1*9a7741deSElliott Hughes /****************************************************************
2*9a7741deSElliott Hughes Copyright (C) Lucent Technologies 1997
3*9a7741deSElliott Hughes All Rights Reserved
4*9a7741deSElliott Hughes
5*9a7741deSElliott Hughes Permission to use, copy, modify, and distribute this software and
6*9a7741deSElliott Hughes its documentation for any purpose and without fee is hereby
7*9a7741deSElliott Hughes granted, provided that the above copyright notice appear in all
8*9a7741deSElliott Hughes copies and that both that the copyright notice and this
9*9a7741deSElliott Hughes permission notice and warranty disclaimer appear in supporting
10*9a7741deSElliott Hughes documentation, and that the name Lucent Technologies or any of
11*9a7741deSElliott Hughes its entities not be used in advertising or publicity pertaining
12*9a7741deSElliott Hughes to distribution of the software without specific, written prior
13*9a7741deSElliott Hughes permission.
14*9a7741deSElliott Hughes
15*9a7741deSElliott Hughes LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16*9a7741deSElliott Hughes INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17*9a7741deSElliott Hughes IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18*9a7741deSElliott Hughes SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19*9a7741deSElliott Hughes WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20*9a7741deSElliott Hughes IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21*9a7741deSElliott Hughes ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22*9a7741deSElliott Hughes THIS SOFTWARE.
23*9a7741deSElliott Hughes ****************************************************************/
24*9a7741deSElliott Hughes
25*9a7741deSElliott Hughes /* lasciate ogne speranza, voi ch'intrate. */
26*9a7741deSElliott Hughes
27*9a7741deSElliott Hughes #define DEBUG
28*9a7741deSElliott Hughes
29*9a7741deSElliott Hughes #include <ctype.h>
30*9a7741deSElliott Hughes #include <limits.h>
31*9a7741deSElliott Hughes #include <stdio.h>
32*9a7741deSElliott Hughes #include <string.h>
33*9a7741deSElliott Hughes #include <stdlib.h>
34*9a7741deSElliott Hughes #include "awk.h"
35*9a7741deSElliott Hughes #include "awkgram.tab.h"
36*9a7741deSElliott Hughes
37*9a7741deSElliott Hughes #define MAXLIN 22
38*9a7741deSElliott Hughes
39*9a7741deSElliott Hughes #define type(v) (v)->nobj /* badly overloaded here */
40*9a7741deSElliott Hughes #define info(v) (v)->ntype /* badly overloaded here */
41*9a7741deSElliott Hughes #define left(v) (v)->narg[0]
42*9a7741deSElliott Hughes #define right(v) (v)->narg[1]
43*9a7741deSElliott Hughes #define parent(v) (v)->nnext
44*9a7741deSElliott Hughes
45*9a7741deSElliott Hughes #define LEAF case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL:
46*9a7741deSElliott Hughes #define ELEAF case EMPTYRE: /* empty string in regexp */
47*9a7741deSElliott Hughes #define UNARY case STAR: case PLUS: case QUEST:
48*9a7741deSElliott Hughes
49*9a7741deSElliott Hughes /* encoding in tree Nodes:
50*9a7741deSElliott Hughes leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL, EMPTYRE):
51*9a7741deSElliott Hughes left is index, right contains value or pointer to value
52*9a7741deSElliott Hughes unary (STAR, PLUS, QUEST): left is child, right is null
53*9a7741deSElliott Hughes binary (CAT, OR): left and right are children
54*9a7741deSElliott Hughes parent contains pointer to parent
55*9a7741deSElliott Hughes */
56*9a7741deSElliott Hughes
57*9a7741deSElliott Hughes
58*9a7741deSElliott Hughes int *setvec;
59*9a7741deSElliott Hughes int *tmpset;
60*9a7741deSElliott Hughes int maxsetvec = 0;
61*9a7741deSElliott Hughes
62*9a7741deSElliott Hughes int rtok; /* next token in current re */
63*9a7741deSElliott Hughes int rlxval;
64*9a7741deSElliott Hughes static const uschar *rlxstr;
65*9a7741deSElliott Hughes static const uschar *prestr; /* current position in current re */
66*9a7741deSElliott Hughes static const uschar *lastre; /* origin of last re */
67*9a7741deSElliott Hughes static const uschar *lastatom; /* origin of last Atom */
68*9a7741deSElliott Hughes static const uschar *starttok;
69*9a7741deSElliott Hughes static const uschar *basestr; /* starts with original, replaced during
70*9a7741deSElliott Hughes repetition processing */
71*9a7741deSElliott Hughes static const uschar *firstbasestr;
72*9a7741deSElliott Hughes
73*9a7741deSElliott Hughes static int setcnt;
74*9a7741deSElliott Hughes static int poscnt;
75*9a7741deSElliott Hughes
76*9a7741deSElliott Hughes const char *patbeg;
77*9a7741deSElliott Hughes int patlen;
78*9a7741deSElliott Hughes
79*9a7741deSElliott Hughes #define NFA 128 /* cache this many dynamic fa's */
80*9a7741deSElliott Hughes fa *fatab[NFA];
81*9a7741deSElliott Hughes int nfatab = 0; /* entries in fatab */
82*9a7741deSElliott Hughes
83*9a7741deSElliott Hughes extern int u8_nextlen(const char *s);
84*9a7741deSElliott Hughes
85*9a7741deSElliott Hughes
86*9a7741deSElliott Hughes /* utf-8 mechanism:
87*9a7741deSElliott Hughes
88*9a7741deSElliott Hughes For most of Awk, utf-8 strings just "work", since they look like
89*9a7741deSElliott Hughes null-terminated sequences of 8-bit bytes.
90*9a7741deSElliott Hughes
91*9a7741deSElliott Hughes Functions like length(), index(), and substr() have to operate
92*9a7741deSElliott Hughes in units of utf-8 characters. The u8_* functions in run.c
93*9a7741deSElliott Hughes handle this.
94*9a7741deSElliott Hughes
95*9a7741deSElliott Hughes Regular expressions are more complicated, since the basic
96*9a7741deSElliott Hughes mechanism of the goto table used 8-bit byte indices into the
97*9a7741deSElliott Hughes gototab entries to compute the next state. Unicode is a lot
98*9a7741deSElliott Hughes bigger, so the gototab entries are now structs with a character
99*9a7741deSElliott Hughes and a next state. These are sorted by code point and binary
100*9a7741deSElliott Hughes searched.
101*9a7741deSElliott Hughes
102*9a7741deSElliott Hughes Throughout the RE mechanism in b.c, utf-8 characters are
103*9a7741deSElliott Hughes converted to their utf-32 value. This mostly shows up in
104*9a7741deSElliott Hughes cclenter, which expands character class ranges like a-z and now
105*9a7741deSElliott Hughes alpha-omega. The size of a gototab array is still about 256.
106*9a7741deSElliott Hughes This should be dynamic, but for now things work ok for a single
107*9a7741deSElliott Hughes code page of Unicode, which is the most likely case.
108*9a7741deSElliott Hughes
109*9a7741deSElliott Hughes The code changes are localized in run.c and b.c. I have added a
110*9a7741deSElliott Hughes handful of functions to somewhat better hide the implementation,
111*9a7741deSElliott Hughes but a lot more could be done.
112*9a7741deSElliott Hughes
113*9a7741deSElliott Hughes */
114*9a7741deSElliott Hughes
115*9a7741deSElliott Hughes static int entry_cmp(const void *l, const void *r);
116*9a7741deSElliott Hughes static int get_gototab(fa*, int, int);
117*9a7741deSElliott Hughes static int set_gototab(fa*, int, int, int);
118*9a7741deSElliott Hughes static void clear_gototab(fa*, int);
119*9a7741deSElliott Hughes extern int u8_rune(int *, const char *);
120*9a7741deSElliott Hughes
121*9a7741deSElliott Hughes static int *
intalloc(size_t n,const char * f)122*9a7741deSElliott Hughes intalloc(size_t n, const char *f)
123*9a7741deSElliott Hughes {
124*9a7741deSElliott Hughes int *p = (int *) calloc(n, sizeof(int));
125*9a7741deSElliott Hughes if (p == NULL)
126*9a7741deSElliott Hughes overflo(f);
127*9a7741deSElliott Hughes return p;
128*9a7741deSElliott Hughes }
129*9a7741deSElliott Hughes
130*9a7741deSElliott Hughes static void
resizesetvec(const char * f)131*9a7741deSElliott Hughes resizesetvec(const char *f)
132*9a7741deSElliott Hughes {
133*9a7741deSElliott Hughes if (maxsetvec == 0)
134*9a7741deSElliott Hughes maxsetvec = MAXLIN;
135*9a7741deSElliott Hughes else
136*9a7741deSElliott Hughes maxsetvec *= 4;
137*9a7741deSElliott Hughes setvec = (int *) realloc(setvec, maxsetvec * sizeof(*setvec));
138*9a7741deSElliott Hughes tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(*tmpset));
139*9a7741deSElliott Hughes if (setvec == NULL || tmpset == NULL)
140*9a7741deSElliott Hughes overflo(f);
141*9a7741deSElliott Hughes }
142*9a7741deSElliott Hughes
143*9a7741deSElliott Hughes static void
resize_state(fa * f,int state)144*9a7741deSElliott Hughes resize_state(fa *f, int state)
145*9a7741deSElliott Hughes {
146*9a7741deSElliott Hughes gtt *p;
147*9a7741deSElliott Hughes uschar *p2;
148*9a7741deSElliott Hughes int **p3;
149*9a7741deSElliott Hughes int i, new_count;
150*9a7741deSElliott Hughes
151*9a7741deSElliott Hughes if (++state < f->state_count)
152*9a7741deSElliott Hughes return;
153*9a7741deSElliott Hughes
154*9a7741deSElliott Hughes new_count = state + 10; /* needs to be tuned */
155*9a7741deSElliott Hughes
156*9a7741deSElliott Hughes p = (gtt *) realloc(f->gototab, new_count * sizeof(gtt));
157*9a7741deSElliott Hughes if (p == NULL)
158*9a7741deSElliott Hughes goto out;
159*9a7741deSElliott Hughes f->gototab = p;
160*9a7741deSElliott Hughes
161*9a7741deSElliott Hughes p2 = (uschar *) realloc(f->out, new_count * sizeof(f->out[0]));
162*9a7741deSElliott Hughes if (p2 == NULL)
163*9a7741deSElliott Hughes goto out;
164*9a7741deSElliott Hughes f->out = p2;
165*9a7741deSElliott Hughes
166*9a7741deSElliott Hughes p3 = (int **) realloc(f->posns, new_count * sizeof(f->posns[0]));
167*9a7741deSElliott Hughes if (p3 == NULL)
168*9a7741deSElliott Hughes goto out;
169*9a7741deSElliott Hughes f->posns = p3;
170*9a7741deSElliott Hughes
171*9a7741deSElliott Hughes for (i = f->state_count; i < new_count; ++i) {
172*9a7741deSElliott Hughes f->gototab[i].entries = (gtte *) calloc(NCHARS, sizeof(gtte));
173*9a7741deSElliott Hughes if (f->gototab[i].entries == NULL)
174*9a7741deSElliott Hughes goto out;
175*9a7741deSElliott Hughes f->gototab[i].allocated = NCHARS;
176*9a7741deSElliott Hughes f->gototab[i].inuse = 0;
177*9a7741deSElliott Hughes f->out[i] = 0;
178*9a7741deSElliott Hughes f->posns[i] = NULL;
179*9a7741deSElliott Hughes }
180*9a7741deSElliott Hughes f->state_count = new_count;
181*9a7741deSElliott Hughes return;
182*9a7741deSElliott Hughes out:
183*9a7741deSElliott Hughes overflo(__func__);
184*9a7741deSElliott Hughes }
185*9a7741deSElliott Hughes
makedfa(const char * s,bool anchor)186*9a7741deSElliott Hughes fa *makedfa(const char *s, bool anchor) /* returns dfa for reg expr s */
187*9a7741deSElliott Hughes {
188*9a7741deSElliott Hughes int i, use, nuse;
189*9a7741deSElliott Hughes fa *pfa;
190*9a7741deSElliott Hughes static int now = 1;
191*9a7741deSElliott Hughes
192*9a7741deSElliott Hughes if (setvec == NULL) { /* first time through any RE */
193*9a7741deSElliott Hughes resizesetvec(__func__);
194*9a7741deSElliott Hughes }
195*9a7741deSElliott Hughes
196*9a7741deSElliott Hughes if (compile_time != RUNNING) /* a constant for sure */
197*9a7741deSElliott Hughes return mkdfa(s, anchor);
198*9a7741deSElliott Hughes for (i = 0; i < nfatab; i++) /* is it there already? */
199*9a7741deSElliott Hughes if (fatab[i]->anchor == anchor
200*9a7741deSElliott Hughes && strcmp((const char *) fatab[i]->restr, s) == 0) {
201*9a7741deSElliott Hughes fatab[i]->use = now++;
202*9a7741deSElliott Hughes return fatab[i];
203*9a7741deSElliott Hughes }
204*9a7741deSElliott Hughes pfa = mkdfa(s, anchor);
205*9a7741deSElliott Hughes if (nfatab < NFA) { /* room for another */
206*9a7741deSElliott Hughes fatab[nfatab] = pfa;
207*9a7741deSElliott Hughes fatab[nfatab]->use = now++;
208*9a7741deSElliott Hughes nfatab++;
209*9a7741deSElliott Hughes return pfa;
210*9a7741deSElliott Hughes }
211*9a7741deSElliott Hughes use = fatab[0]->use; /* replace least-recently used */
212*9a7741deSElliott Hughes nuse = 0;
213*9a7741deSElliott Hughes for (i = 1; i < nfatab; i++)
214*9a7741deSElliott Hughes if (fatab[i]->use < use) {
215*9a7741deSElliott Hughes use = fatab[i]->use;
216*9a7741deSElliott Hughes nuse = i;
217*9a7741deSElliott Hughes }
218*9a7741deSElliott Hughes freefa(fatab[nuse]);
219*9a7741deSElliott Hughes fatab[nuse] = pfa;
220*9a7741deSElliott Hughes pfa->use = now++;
221*9a7741deSElliott Hughes return pfa;
222*9a7741deSElliott Hughes }
223*9a7741deSElliott Hughes
mkdfa(const char * s,bool anchor)224*9a7741deSElliott Hughes fa *mkdfa(const char *s, bool anchor) /* does the real work of making a dfa */
225*9a7741deSElliott Hughes /* anchor = true for anchored matches, else false */
226*9a7741deSElliott Hughes {
227*9a7741deSElliott Hughes Node *p, *p1;
228*9a7741deSElliott Hughes fa *f;
229*9a7741deSElliott Hughes
230*9a7741deSElliott Hughes firstbasestr = (const uschar *) s;
231*9a7741deSElliott Hughes basestr = firstbasestr;
232*9a7741deSElliott Hughes p = reparse(s);
233*9a7741deSElliott Hughes p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p);
234*9a7741deSElliott Hughes /* put ALL STAR in front of reg. exp. */
235*9a7741deSElliott Hughes p1 = op2(CAT, p1, op2(FINAL, NIL, NIL));
236*9a7741deSElliott Hughes /* put FINAL after reg. exp. */
237*9a7741deSElliott Hughes
238*9a7741deSElliott Hughes poscnt = 0;
239*9a7741deSElliott Hughes penter(p1); /* enter parent pointers and leaf indices */
240*9a7741deSElliott Hughes if ((f = (fa *) calloc(1, sizeof(fa) + poscnt * sizeof(rrow))) == NULL)
241*9a7741deSElliott Hughes overflo(__func__);
242*9a7741deSElliott Hughes f->accept = poscnt-1; /* penter has computed number of positions in re */
243*9a7741deSElliott Hughes cfoll(f, p1); /* set up follow sets */
244*9a7741deSElliott Hughes freetr(p1);
245*9a7741deSElliott Hughes resize_state(f, 1);
246*9a7741deSElliott Hughes f->posns[0] = intalloc(*(f->re[0].lfollow), __func__);
247*9a7741deSElliott Hughes f->posns[1] = intalloc(1, __func__);
248*9a7741deSElliott Hughes *f->posns[1] = 0;
249*9a7741deSElliott Hughes f->initstat = makeinit(f, anchor);
250*9a7741deSElliott Hughes f->anchor = anchor;
251*9a7741deSElliott Hughes f->restr = (uschar *) tostring(s);
252*9a7741deSElliott Hughes if (firstbasestr != basestr) {
253*9a7741deSElliott Hughes if (basestr)
254*9a7741deSElliott Hughes xfree(basestr);
255*9a7741deSElliott Hughes }
256*9a7741deSElliott Hughes return f;
257*9a7741deSElliott Hughes }
258*9a7741deSElliott Hughes
makeinit(fa * f,bool anchor)259*9a7741deSElliott Hughes int makeinit(fa *f, bool anchor)
260*9a7741deSElliott Hughes {
261*9a7741deSElliott Hughes int i, k;
262*9a7741deSElliott Hughes
263*9a7741deSElliott Hughes f->curstat = 2;
264*9a7741deSElliott Hughes f->out[2] = 0;
265*9a7741deSElliott Hughes k = *(f->re[0].lfollow);
266*9a7741deSElliott Hughes xfree(f->posns[2]);
267*9a7741deSElliott Hughes f->posns[2] = intalloc(k + 1, __func__);
268*9a7741deSElliott Hughes for (i = 0; i <= k; i++) {
269*9a7741deSElliott Hughes (f->posns[2])[i] = (f->re[0].lfollow)[i];
270*9a7741deSElliott Hughes }
271*9a7741deSElliott Hughes if ((f->posns[2])[1] == f->accept)
272*9a7741deSElliott Hughes f->out[2] = 1;
273*9a7741deSElliott Hughes clear_gototab(f, 2);
274*9a7741deSElliott Hughes f->curstat = cgoto(f, 2, HAT);
275*9a7741deSElliott Hughes if (anchor) {
276*9a7741deSElliott Hughes *f->posns[2] = k-1; /* leave out position 0 */
277*9a7741deSElliott Hughes for (i = 0; i < k; i++) {
278*9a7741deSElliott Hughes (f->posns[0])[i] = (f->posns[2])[i];
279*9a7741deSElliott Hughes }
280*9a7741deSElliott Hughes
281*9a7741deSElliott Hughes f->out[0] = f->out[2];
282*9a7741deSElliott Hughes if (f->curstat != 2)
283*9a7741deSElliott Hughes --(*f->posns[f->curstat]);
284*9a7741deSElliott Hughes }
285*9a7741deSElliott Hughes return f->curstat;
286*9a7741deSElliott Hughes }
287*9a7741deSElliott Hughes
penter(Node * p)288*9a7741deSElliott Hughes void penter(Node *p) /* set up parent pointers and leaf indices */
289*9a7741deSElliott Hughes {
290*9a7741deSElliott Hughes switch (type(p)) {
291*9a7741deSElliott Hughes ELEAF
292*9a7741deSElliott Hughes LEAF
293*9a7741deSElliott Hughes info(p) = poscnt;
294*9a7741deSElliott Hughes poscnt++;
295*9a7741deSElliott Hughes break;
296*9a7741deSElliott Hughes UNARY
297*9a7741deSElliott Hughes penter(left(p));
298*9a7741deSElliott Hughes parent(left(p)) = p;
299*9a7741deSElliott Hughes break;
300*9a7741deSElliott Hughes case CAT:
301*9a7741deSElliott Hughes case OR:
302*9a7741deSElliott Hughes penter(left(p));
303*9a7741deSElliott Hughes penter(right(p));
304*9a7741deSElliott Hughes parent(left(p)) = p;
305*9a7741deSElliott Hughes parent(right(p)) = p;
306*9a7741deSElliott Hughes break;
307*9a7741deSElliott Hughes case ZERO:
308*9a7741deSElliott Hughes break;
309*9a7741deSElliott Hughes default: /* can't happen */
310*9a7741deSElliott Hughes FATAL("can't happen: unknown type %d in penter", type(p));
311*9a7741deSElliott Hughes break;
312*9a7741deSElliott Hughes }
313*9a7741deSElliott Hughes }
314*9a7741deSElliott Hughes
freetr(Node * p)315*9a7741deSElliott Hughes void freetr(Node *p) /* free parse tree */
316*9a7741deSElliott Hughes {
317*9a7741deSElliott Hughes switch (type(p)) {
318*9a7741deSElliott Hughes ELEAF
319*9a7741deSElliott Hughes LEAF
320*9a7741deSElliott Hughes xfree(p);
321*9a7741deSElliott Hughes break;
322*9a7741deSElliott Hughes UNARY
323*9a7741deSElliott Hughes case ZERO:
324*9a7741deSElliott Hughes freetr(left(p));
325*9a7741deSElliott Hughes xfree(p);
326*9a7741deSElliott Hughes break;
327*9a7741deSElliott Hughes case CAT:
328*9a7741deSElliott Hughes case OR:
329*9a7741deSElliott Hughes freetr(left(p));
330*9a7741deSElliott Hughes freetr(right(p));
331*9a7741deSElliott Hughes xfree(p);
332*9a7741deSElliott Hughes break;
333*9a7741deSElliott Hughes default: /* can't happen */
334*9a7741deSElliott Hughes FATAL("can't happen: unknown type %d in freetr", type(p));
335*9a7741deSElliott Hughes break;
336*9a7741deSElliott Hughes }
337*9a7741deSElliott Hughes }
338*9a7741deSElliott Hughes
339*9a7741deSElliott Hughes /* in the parsing of regular expressions, metacharacters like . have */
340*9a7741deSElliott Hughes /* to be seen literally; \056 is not a metacharacter. */
341*9a7741deSElliott Hughes
hexstr(const uschar ** pp,int max)342*9a7741deSElliott Hughes int hexstr(const uschar **pp, int max) /* find and eval hex string at pp, return new p */
343*9a7741deSElliott Hughes { /* only pick up one 8-bit byte (2 chars) */
344*9a7741deSElliott Hughes const uschar *p;
345*9a7741deSElliott Hughes int n = 0;
346*9a7741deSElliott Hughes int i;
347*9a7741deSElliott Hughes
348*9a7741deSElliott Hughes for (i = 0, p = *pp; i < max && isxdigit(*p); i++, p++) {
349*9a7741deSElliott Hughes if (isdigit((int) *p))
350*9a7741deSElliott Hughes n = 16 * n + *p - '0';
351*9a7741deSElliott Hughes else if (*p >= 'a' && *p <= 'f')
352*9a7741deSElliott Hughes n = 16 * n + *p - 'a' + 10;
353*9a7741deSElliott Hughes else if (*p >= 'A' && *p <= 'F')
354*9a7741deSElliott Hughes n = 16 * n + *p - 'A' + 10;
355*9a7741deSElliott Hughes }
356*9a7741deSElliott Hughes *pp = p;
357*9a7741deSElliott Hughes return n;
358*9a7741deSElliott Hughes }
359*9a7741deSElliott Hughes
360*9a7741deSElliott Hughes
361*9a7741deSElliott Hughes
362*9a7741deSElliott Hughes #define isoctdigit(c) ((c) >= '0' && (c) <= '7') /* multiple use of arg */
363*9a7741deSElliott Hughes
quoted(const uschar ** pp)364*9a7741deSElliott Hughes int quoted(const uschar **pp) /* pick up next thing after a \\ */
365*9a7741deSElliott Hughes /* and increment *pp */
366*9a7741deSElliott Hughes {
367*9a7741deSElliott Hughes const uschar *p = *pp;
368*9a7741deSElliott Hughes int c;
369*9a7741deSElliott Hughes
370*9a7741deSElliott Hughes /* BUG: should advance by utf-8 char even if makes no sense */
371*9a7741deSElliott Hughes
372*9a7741deSElliott Hughes switch ((c = *p++)) {
373*9a7741deSElliott Hughes case 't':
374*9a7741deSElliott Hughes c = '\t';
375*9a7741deSElliott Hughes break;
376*9a7741deSElliott Hughes case 'n':
377*9a7741deSElliott Hughes c = '\n';
378*9a7741deSElliott Hughes break;
379*9a7741deSElliott Hughes case 'f':
380*9a7741deSElliott Hughes c = '\f';
381*9a7741deSElliott Hughes break;
382*9a7741deSElliott Hughes case 'r':
383*9a7741deSElliott Hughes c = '\r';
384*9a7741deSElliott Hughes break;
385*9a7741deSElliott Hughes case 'b':
386*9a7741deSElliott Hughes c = '\b';
387*9a7741deSElliott Hughes break;
388*9a7741deSElliott Hughes case 'v':
389*9a7741deSElliott Hughes c = '\v';
390*9a7741deSElliott Hughes break;
391*9a7741deSElliott Hughes case 'a':
392*9a7741deSElliott Hughes c = '\a';
393*9a7741deSElliott Hughes break;
394*9a7741deSElliott Hughes case '\\':
395*9a7741deSElliott Hughes c = '\\';
396*9a7741deSElliott Hughes break;
397*9a7741deSElliott Hughes case 'x': /* 2 hex digits follow */
398*9a7741deSElliott Hughes c = hexstr(&p, 2); /* this adds a null if number is invalid */
399*9a7741deSElliott Hughes break;
400*9a7741deSElliott Hughes case 'u': /* unicode char number up to 8 hex digits */
401*9a7741deSElliott Hughes c = hexstr(&p, 8);
402*9a7741deSElliott Hughes break;
403*9a7741deSElliott Hughes default:
404*9a7741deSElliott Hughes if (isoctdigit(c)) { /* \d \dd \ddd */
405*9a7741deSElliott Hughes int n = c - '0';
406*9a7741deSElliott Hughes if (isoctdigit(*p)) {
407*9a7741deSElliott Hughes n = 8 * n + *p++ - '0';
408*9a7741deSElliott Hughes if (isoctdigit(*p))
409*9a7741deSElliott Hughes n = 8 * n + *p++ - '0';
410*9a7741deSElliott Hughes }
411*9a7741deSElliott Hughes c = n;
412*9a7741deSElliott Hughes }
413*9a7741deSElliott Hughes }
414*9a7741deSElliott Hughes
415*9a7741deSElliott Hughes *pp = p;
416*9a7741deSElliott Hughes return c;
417*9a7741deSElliott Hughes }
418*9a7741deSElliott Hughes
cclenter(const char * argp)419*9a7741deSElliott Hughes int *cclenter(const char *argp) /* add a character class */
420*9a7741deSElliott Hughes {
421*9a7741deSElliott Hughes int i, c, c2;
422*9a7741deSElliott Hughes int n;
423*9a7741deSElliott Hughes const uschar *p = (const uschar *) argp;
424*9a7741deSElliott Hughes int *bp, *retp;
425*9a7741deSElliott Hughes static int *buf = NULL;
426*9a7741deSElliott Hughes static int bufsz = 100;
427*9a7741deSElliott Hughes
428*9a7741deSElliott Hughes if (buf == NULL && (buf = (int *) calloc(bufsz, sizeof(int))) == NULL)
429*9a7741deSElliott Hughes FATAL("out of space for character class [%.10s...] 1", p);
430*9a7741deSElliott Hughes bp = buf;
431*9a7741deSElliott Hughes for (i = 0; *p != 0; ) {
432*9a7741deSElliott Hughes n = u8_rune(&c, (const char *) p);
433*9a7741deSElliott Hughes p += n;
434*9a7741deSElliott Hughes if (c == '\\') {
435*9a7741deSElliott Hughes c = quoted(&p);
436*9a7741deSElliott Hughes } else if (c == '-' && i > 0 && bp[-1] != 0) {
437*9a7741deSElliott Hughes if (*p != 0) {
438*9a7741deSElliott Hughes c = bp[-1];
439*9a7741deSElliott Hughes /* c2 = *p++; */
440*9a7741deSElliott Hughes n = u8_rune(&c2, (const char *) p);
441*9a7741deSElliott Hughes p += n;
442*9a7741deSElliott Hughes if (c2 == '\\')
443*9a7741deSElliott Hughes c2 = quoted(&p); /* BUG: sets p, has to be u8 size */
444*9a7741deSElliott Hughes if (c > c2) { /* empty; ignore */
445*9a7741deSElliott Hughes bp--;
446*9a7741deSElliott Hughes i--;
447*9a7741deSElliott Hughes continue;
448*9a7741deSElliott Hughes }
449*9a7741deSElliott Hughes while (c < c2) {
450*9a7741deSElliott Hughes if (i >= bufsz) {
451*9a7741deSElliott Hughes bufsz *= 2;
452*9a7741deSElliott Hughes buf = (int *) realloc(buf, bufsz * sizeof(int));
453*9a7741deSElliott Hughes if (buf == NULL)
454*9a7741deSElliott Hughes FATAL("out of space for character class [%.10s...] 2", p);
455*9a7741deSElliott Hughes bp = buf + i;
456*9a7741deSElliott Hughes }
457*9a7741deSElliott Hughes *bp++ = ++c;
458*9a7741deSElliott Hughes i++;
459*9a7741deSElliott Hughes }
460*9a7741deSElliott Hughes continue;
461*9a7741deSElliott Hughes }
462*9a7741deSElliott Hughes }
463*9a7741deSElliott Hughes if (i >= bufsz) {
464*9a7741deSElliott Hughes bufsz *= 2;
465*9a7741deSElliott Hughes buf = (int *) realloc(buf, bufsz * sizeof(int));
466*9a7741deSElliott Hughes if (buf == NULL)
467*9a7741deSElliott Hughes FATAL("out of space for character class [%.10s...] 2", p);
468*9a7741deSElliott Hughes bp = buf + i;
469*9a7741deSElliott Hughes }
470*9a7741deSElliott Hughes *bp++ = c;
471*9a7741deSElliott Hughes i++;
472*9a7741deSElliott Hughes }
473*9a7741deSElliott Hughes *bp = 0;
474*9a7741deSElliott Hughes /* DPRINTF("cclenter: in = |%s|, out = |%s|\n", op, buf); BUG: can't print array of int */
475*9a7741deSElliott Hughes /* xfree(op); BUG: what are we freeing here? */
476*9a7741deSElliott Hughes retp = (int *) calloc(bp-buf+1, sizeof(int));
477*9a7741deSElliott Hughes for (i = 0; i < bp-buf+1; i++)
478*9a7741deSElliott Hughes retp[i] = buf[i];
479*9a7741deSElliott Hughes return retp;
480*9a7741deSElliott Hughes }
481*9a7741deSElliott Hughes
overflo(const char * s)482*9a7741deSElliott Hughes void overflo(const char *s)
483*9a7741deSElliott Hughes {
484*9a7741deSElliott Hughes FATAL("regular expression too big: out of space in %.30s...", s);
485*9a7741deSElliott Hughes }
486*9a7741deSElliott Hughes
cfoll(fa * f,Node * v)487*9a7741deSElliott Hughes void cfoll(fa *f, Node *v) /* enter follow set of each leaf of vertex v into lfollow[leaf] */
488*9a7741deSElliott Hughes {
489*9a7741deSElliott Hughes int i;
490*9a7741deSElliott Hughes int *p;
491*9a7741deSElliott Hughes
492*9a7741deSElliott Hughes switch (type(v)) {
493*9a7741deSElliott Hughes ELEAF
494*9a7741deSElliott Hughes LEAF
495*9a7741deSElliott Hughes f->re[info(v)].ltype = type(v);
496*9a7741deSElliott Hughes f->re[info(v)].lval.np = right(v);
497*9a7741deSElliott Hughes while (f->accept >= maxsetvec) { /* guessing here! */
498*9a7741deSElliott Hughes resizesetvec(__func__);
499*9a7741deSElliott Hughes }
500*9a7741deSElliott Hughes for (i = 0; i <= f->accept; i++)
501*9a7741deSElliott Hughes setvec[i] = 0;
502*9a7741deSElliott Hughes setcnt = 0;
503*9a7741deSElliott Hughes follow(v); /* computes setvec and setcnt */
504*9a7741deSElliott Hughes p = intalloc(setcnt + 1, __func__);
505*9a7741deSElliott Hughes f->re[info(v)].lfollow = p;
506*9a7741deSElliott Hughes *p = setcnt;
507*9a7741deSElliott Hughes for (i = f->accept; i >= 0; i--)
508*9a7741deSElliott Hughes if (setvec[i] == 1)
509*9a7741deSElliott Hughes *++p = i;
510*9a7741deSElliott Hughes break;
511*9a7741deSElliott Hughes UNARY
512*9a7741deSElliott Hughes cfoll(f,left(v));
513*9a7741deSElliott Hughes break;
514*9a7741deSElliott Hughes case CAT:
515*9a7741deSElliott Hughes case OR:
516*9a7741deSElliott Hughes cfoll(f,left(v));
517*9a7741deSElliott Hughes cfoll(f,right(v));
518*9a7741deSElliott Hughes break;
519*9a7741deSElliott Hughes case ZERO:
520*9a7741deSElliott Hughes break;
521*9a7741deSElliott Hughes default: /* can't happen */
522*9a7741deSElliott Hughes FATAL("can't happen: unknown type %d in cfoll", type(v));
523*9a7741deSElliott Hughes }
524*9a7741deSElliott Hughes }
525*9a7741deSElliott Hughes
first(Node * p)526*9a7741deSElliott Hughes int first(Node *p) /* collects initially active leaves of p into setvec */
527*9a7741deSElliott Hughes /* returns 0 if p matches empty string */
528*9a7741deSElliott Hughes {
529*9a7741deSElliott Hughes int b, lp;
530*9a7741deSElliott Hughes
531*9a7741deSElliott Hughes switch (type(p)) {
532*9a7741deSElliott Hughes ELEAF
533*9a7741deSElliott Hughes LEAF
534*9a7741deSElliott Hughes lp = info(p); /* look for high-water mark of subscripts */
535*9a7741deSElliott Hughes while (setcnt >= maxsetvec || lp >= maxsetvec) { /* guessing here! */
536*9a7741deSElliott Hughes resizesetvec(__func__);
537*9a7741deSElliott Hughes }
538*9a7741deSElliott Hughes if (type(p) == EMPTYRE) {
539*9a7741deSElliott Hughes setvec[lp] = 0;
540*9a7741deSElliott Hughes return(0);
541*9a7741deSElliott Hughes }
542*9a7741deSElliott Hughes if (setvec[lp] != 1) {
543*9a7741deSElliott Hughes setvec[lp] = 1;
544*9a7741deSElliott Hughes setcnt++;
545*9a7741deSElliott Hughes }
546*9a7741deSElliott Hughes if (type(p) == CCL && (*(int *) right(p)) == 0)
547*9a7741deSElliott Hughes return(0); /* empty CCL */
548*9a7741deSElliott Hughes return(1);
549*9a7741deSElliott Hughes case PLUS:
550*9a7741deSElliott Hughes if (first(left(p)) == 0)
551*9a7741deSElliott Hughes return(0);
552*9a7741deSElliott Hughes return(1);
553*9a7741deSElliott Hughes case STAR:
554*9a7741deSElliott Hughes case QUEST:
555*9a7741deSElliott Hughes first(left(p));
556*9a7741deSElliott Hughes return(0);
557*9a7741deSElliott Hughes case CAT:
558*9a7741deSElliott Hughes if (first(left(p)) == 0 && first(right(p)) == 0) return(0);
559*9a7741deSElliott Hughes return(1);
560*9a7741deSElliott Hughes case OR:
561*9a7741deSElliott Hughes b = first(right(p));
562*9a7741deSElliott Hughes if (first(left(p)) == 0 || b == 0) return(0);
563*9a7741deSElliott Hughes return(1);
564*9a7741deSElliott Hughes case ZERO:
565*9a7741deSElliott Hughes return 0;
566*9a7741deSElliott Hughes }
567*9a7741deSElliott Hughes FATAL("can't happen: unknown type %d in first", type(p)); /* can't happen */
568*9a7741deSElliott Hughes return(-1);
569*9a7741deSElliott Hughes }
570*9a7741deSElliott Hughes
follow(Node * v)571*9a7741deSElliott Hughes void follow(Node *v) /* collects leaves that can follow v into setvec */
572*9a7741deSElliott Hughes {
573*9a7741deSElliott Hughes Node *p;
574*9a7741deSElliott Hughes
575*9a7741deSElliott Hughes if (type(v) == FINAL)
576*9a7741deSElliott Hughes return;
577*9a7741deSElliott Hughes p = parent(v);
578*9a7741deSElliott Hughes switch (type(p)) {
579*9a7741deSElliott Hughes case STAR:
580*9a7741deSElliott Hughes case PLUS:
581*9a7741deSElliott Hughes first(v);
582*9a7741deSElliott Hughes follow(p);
583*9a7741deSElliott Hughes return;
584*9a7741deSElliott Hughes
585*9a7741deSElliott Hughes case OR:
586*9a7741deSElliott Hughes case QUEST:
587*9a7741deSElliott Hughes follow(p);
588*9a7741deSElliott Hughes return;
589*9a7741deSElliott Hughes
590*9a7741deSElliott Hughes case CAT:
591*9a7741deSElliott Hughes if (v == left(p)) { /* v is left child of p */
592*9a7741deSElliott Hughes if (first(right(p)) == 0) {
593*9a7741deSElliott Hughes follow(p);
594*9a7741deSElliott Hughes return;
595*9a7741deSElliott Hughes }
596*9a7741deSElliott Hughes } else /* v is right child */
597*9a7741deSElliott Hughes follow(p);
598*9a7741deSElliott Hughes return;
599*9a7741deSElliott Hughes }
600*9a7741deSElliott Hughes }
601*9a7741deSElliott Hughes
member(int c,int * sarg)602*9a7741deSElliott Hughes int member(int c, int *sarg) /* is c in s? */
603*9a7741deSElliott Hughes {
604*9a7741deSElliott Hughes int *s = (int *) sarg;
605*9a7741deSElliott Hughes
606*9a7741deSElliott Hughes while (*s)
607*9a7741deSElliott Hughes if (c == *s++)
608*9a7741deSElliott Hughes return(1);
609*9a7741deSElliott Hughes return(0);
610*9a7741deSElliott Hughes }
611*9a7741deSElliott Hughes
resize_gototab(fa * f,int state)612*9a7741deSElliott Hughes static void resize_gototab(fa *f, int state)
613*9a7741deSElliott Hughes {
614*9a7741deSElliott Hughes size_t new_size = f->gototab[state].allocated * 2;
615*9a7741deSElliott Hughes gtte *p = (gtte *) realloc(f->gototab[state].entries, new_size * sizeof(gtte));
616*9a7741deSElliott Hughes if (p == NULL)
617*9a7741deSElliott Hughes overflo(__func__);
618*9a7741deSElliott Hughes
619*9a7741deSElliott Hughes // need to initialized the new memory to zero
620*9a7741deSElliott Hughes size_t orig_size = f->gototab[state].allocated; // 2nd half of new mem is this size
621*9a7741deSElliott Hughes memset(p + orig_size, 0, orig_size * sizeof(gtte)); // clean it out
622*9a7741deSElliott Hughes
623*9a7741deSElliott Hughes f->gototab[state].allocated = new_size; // update gototab info
624*9a7741deSElliott Hughes f->gototab[state].entries = p;
625*9a7741deSElliott Hughes }
626*9a7741deSElliott Hughes
get_gototab(fa * f,int state,int ch)627*9a7741deSElliott Hughes static int get_gototab(fa *f, int state, int ch) /* hide gototab implementation */
628*9a7741deSElliott Hughes {
629*9a7741deSElliott Hughes gtte key;
630*9a7741deSElliott Hughes gtte *item;
631*9a7741deSElliott Hughes
632*9a7741deSElliott Hughes key.ch = ch;
633*9a7741deSElliott Hughes key.state = 0; /* irrelevant */
634*9a7741deSElliott Hughes item = (gtte *) bsearch(& key, f->gototab[state].entries,
635*9a7741deSElliott Hughes f->gototab[state].inuse, sizeof(gtte),
636*9a7741deSElliott Hughes entry_cmp);
637*9a7741deSElliott Hughes
638*9a7741deSElliott Hughes if (item == NULL)
639*9a7741deSElliott Hughes return 0;
640*9a7741deSElliott Hughes else
641*9a7741deSElliott Hughes return item->state;
642*9a7741deSElliott Hughes }
643*9a7741deSElliott Hughes
entry_cmp(const void * l,const void * r)644*9a7741deSElliott Hughes static int entry_cmp(const void *l, const void *r)
645*9a7741deSElliott Hughes {
646*9a7741deSElliott Hughes const gtte *left, *right;
647*9a7741deSElliott Hughes
648*9a7741deSElliott Hughes left = (const gtte *) l;
649*9a7741deSElliott Hughes right = (const gtte *) r;
650*9a7741deSElliott Hughes
651*9a7741deSElliott Hughes return left->ch - right->ch;
652*9a7741deSElliott Hughes }
653*9a7741deSElliott Hughes
set_gototab(fa * f,int state,int ch,int val)654*9a7741deSElliott Hughes static int set_gototab(fa *f, int state, int ch, int val) /* hide gototab implementation */
655*9a7741deSElliott Hughes {
656*9a7741deSElliott Hughes if (f->gototab[state].inuse == 0) {
657*9a7741deSElliott Hughes f->gototab[state].entries[0].ch = ch;
658*9a7741deSElliott Hughes f->gototab[state].entries[0].state = val;
659*9a7741deSElliott Hughes f->gototab[state].inuse++;
660*9a7741deSElliott Hughes return val;
661*9a7741deSElliott Hughes } else if ((unsigned)ch > f->gototab[state].entries[f->gototab[state].inuse-1].ch) {
662*9a7741deSElliott Hughes // not seen yet, insert and return
663*9a7741deSElliott Hughes gtt *tab = & f->gototab[state];
664*9a7741deSElliott Hughes if (tab->inuse + 1 >= tab->allocated)
665*9a7741deSElliott Hughes resize_gototab(f, state);
666*9a7741deSElliott Hughes
667*9a7741deSElliott Hughes f->gototab[state].entries[f->gototab[state].inuse].ch = ch;
668*9a7741deSElliott Hughes f->gototab[state].entries[f->gototab[state].inuse].state = val;
669*9a7741deSElliott Hughes f->gototab[state].inuse++;
670*9a7741deSElliott Hughes return val;
671*9a7741deSElliott Hughes } else {
672*9a7741deSElliott Hughes // maybe we have it, maybe we don't
673*9a7741deSElliott Hughes gtte key;
674*9a7741deSElliott Hughes gtte *item;
675*9a7741deSElliott Hughes
676*9a7741deSElliott Hughes key.ch = ch;
677*9a7741deSElliott Hughes key.state = 0; /* irrelevant */
678*9a7741deSElliott Hughes item = (gtte *) bsearch(& key, f->gototab[state].entries,
679*9a7741deSElliott Hughes f->gototab[state].inuse, sizeof(gtte),
680*9a7741deSElliott Hughes entry_cmp);
681*9a7741deSElliott Hughes
682*9a7741deSElliott Hughes if (item != NULL) {
683*9a7741deSElliott Hughes // we have it, update state and return
684*9a7741deSElliott Hughes item->state = val;
685*9a7741deSElliott Hughes return item->state;
686*9a7741deSElliott Hughes }
687*9a7741deSElliott Hughes // otherwise, fall through to insert and reallocate.
688*9a7741deSElliott Hughes }
689*9a7741deSElliott Hughes
690*9a7741deSElliott Hughes gtt *tab = & f->gototab[state];
691*9a7741deSElliott Hughes if (tab->inuse + 1 >= tab->allocated)
692*9a7741deSElliott Hughes resize_gototab(f, state);
693*9a7741deSElliott Hughes f->gototab[state].entries[tab->inuse].ch = ch;
694*9a7741deSElliott Hughes f->gototab[state].entries[tab->inuse].state = val;
695*9a7741deSElliott Hughes ++tab->inuse;
696*9a7741deSElliott Hughes
697*9a7741deSElliott Hughes qsort(f->gototab[state].entries,
698*9a7741deSElliott Hughes f->gototab[state].inuse, sizeof(gtte), entry_cmp);
699*9a7741deSElliott Hughes
700*9a7741deSElliott Hughes return val; /* not used anywhere at the moment */
701*9a7741deSElliott Hughes }
702*9a7741deSElliott Hughes
clear_gototab(fa * f,int state)703*9a7741deSElliott Hughes static void clear_gototab(fa *f, int state)
704*9a7741deSElliott Hughes {
705*9a7741deSElliott Hughes memset(f->gototab[state].entries, 0,
706*9a7741deSElliott Hughes f->gototab[state].allocated * sizeof(gtte));
707*9a7741deSElliott Hughes f->gototab[state].inuse = 0;
708*9a7741deSElliott Hughes }
709*9a7741deSElliott Hughes
match(fa * f,const char * p0)710*9a7741deSElliott Hughes int match(fa *f, const char *p0) /* shortest match ? */
711*9a7741deSElliott Hughes {
712*9a7741deSElliott Hughes int s, ns;
713*9a7741deSElliott Hughes int n;
714*9a7741deSElliott Hughes int rune;
715*9a7741deSElliott Hughes const uschar *p = (const uschar *) p0;
716*9a7741deSElliott Hughes
717*9a7741deSElliott Hughes /* return pmatch(f, p0); does it matter whether longest or shortest? */
718*9a7741deSElliott Hughes
719*9a7741deSElliott Hughes s = f->initstat;
720*9a7741deSElliott Hughes assert (s < f->state_count);
721*9a7741deSElliott Hughes
722*9a7741deSElliott Hughes if (f->out[s])
723*9a7741deSElliott Hughes return(1);
724*9a7741deSElliott Hughes do {
725*9a7741deSElliott Hughes /* assert(*p < NCHARS); */
726*9a7741deSElliott Hughes n = u8_rune(&rune, (const char *) p);
727*9a7741deSElliott Hughes if ((ns = get_gototab(f, s, rune)) != 0)
728*9a7741deSElliott Hughes s = ns;
729*9a7741deSElliott Hughes else
730*9a7741deSElliott Hughes s = cgoto(f, s, rune);
731*9a7741deSElliott Hughes if (f->out[s])
732*9a7741deSElliott Hughes return(1);
733*9a7741deSElliott Hughes if (*p == 0)
734*9a7741deSElliott Hughes break;
735*9a7741deSElliott Hughes p += n;
736*9a7741deSElliott Hughes } while (1); /* was *p++ != 0 */
737*9a7741deSElliott Hughes return(0);
738*9a7741deSElliott Hughes }
739*9a7741deSElliott Hughes
pmatch(fa * f,const char * p0)740*9a7741deSElliott Hughes int pmatch(fa *f, const char *p0) /* longest match, for sub */
741*9a7741deSElliott Hughes {
742*9a7741deSElliott Hughes int s, ns;
743*9a7741deSElliott Hughes int n;
744*9a7741deSElliott Hughes int rune;
745*9a7741deSElliott Hughes const uschar *p = (const uschar *) p0;
746*9a7741deSElliott Hughes const uschar *q;
747*9a7741deSElliott Hughes
748*9a7741deSElliott Hughes s = f->initstat;
749*9a7741deSElliott Hughes assert(s < f->state_count);
750*9a7741deSElliott Hughes
751*9a7741deSElliott Hughes patbeg = (const char *)p;
752*9a7741deSElliott Hughes patlen = -1;
753*9a7741deSElliott Hughes do {
754*9a7741deSElliott Hughes q = p;
755*9a7741deSElliott Hughes do {
756*9a7741deSElliott Hughes if (f->out[s]) /* final state */
757*9a7741deSElliott Hughes patlen = q-p;
758*9a7741deSElliott Hughes /* assert(*q < NCHARS); */
759*9a7741deSElliott Hughes n = u8_rune(&rune, (const char *) q);
760*9a7741deSElliott Hughes if ((ns = get_gototab(f, s, rune)) != 0)
761*9a7741deSElliott Hughes s = ns;
762*9a7741deSElliott Hughes else
763*9a7741deSElliott Hughes s = cgoto(f, s, rune);
764*9a7741deSElliott Hughes
765*9a7741deSElliott Hughes assert(s < f->state_count);
766*9a7741deSElliott Hughes
767*9a7741deSElliott Hughes if (s == 1) { /* no transition */
768*9a7741deSElliott Hughes if (patlen >= 0) {
769*9a7741deSElliott Hughes patbeg = (const char *) p;
770*9a7741deSElliott Hughes return(1);
771*9a7741deSElliott Hughes }
772*9a7741deSElliott Hughes else
773*9a7741deSElliott Hughes goto nextin; /* no match */
774*9a7741deSElliott Hughes }
775*9a7741deSElliott Hughes if (*q == 0)
776*9a7741deSElliott Hughes break;
777*9a7741deSElliott Hughes q += n;
778*9a7741deSElliott Hughes } while (1);
779*9a7741deSElliott Hughes q++; /* was *q++ */
780*9a7741deSElliott Hughes if (f->out[s])
781*9a7741deSElliott Hughes patlen = q-p-1; /* don't count $ */
782*9a7741deSElliott Hughes if (patlen >= 0) {
783*9a7741deSElliott Hughes patbeg = (const char *) p;
784*9a7741deSElliott Hughes return(1);
785*9a7741deSElliott Hughes }
786*9a7741deSElliott Hughes nextin:
787*9a7741deSElliott Hughes s = 2;
788*9a7741deSElliott Hughes if (*p == 0)
789*9a7741deSElliott Hughes break;
790*9a7741deSElliott Hughes n = u8_rune(&rune, (const char *) p);
791*9a7741deSElliott Hughes p += n;
792*9a7741deSElliott Hughes } while (1); /* was *p++ */
793*9a7741deSElliott Hughes return (0);
794*9a7741deSElliott Hughes }
795*9a7741deSElliott Hughes
nematch(fa * f,const char * p0)796*9a7741deSElliott Hughes int nematch(fa *f, const char *p0) /* non-empty match, for sub */
797*9a7741deSElliott Hughes {
798*9a7741deSElliott Hughes int s, ns;
799*9a7741deSElliott Hughes int n;
800*9a7741deSElliott Hughes int rune;
801*9a7741deSElliott Hughes const uschar *p = (const uschar *) p0;
802*9a7741deSElliott Hughes const uschar *q;
803*9a7741deSElliott Hughes
804*9a7741deSElliott Hughes s = f->initstat;
805*9a7741deSElliott Hughes assert(s < f->state_count);
806*9a7741deSElliott Hughes
807*9a7741deSElliott Hughes patbeg = (const char *)p;
808*9a7741deSElliott Hughes patlen = -1;
809*9a7741deSElliott Hughes while (*p) {
810*9a7741deSElliott Hughes q = p;
811*9a7741deSElliott Hughes do {
812*9a7741deSElliott Hughes if (f->out[s]) /* final state */
813*9a7741deSElliott Hughes patlen = q-p;
814*9a7741deSElliott Hughes /* assert(*q < NCHARS); */
815*9a7741deSElliott Hughes n = u8_rune(&rune, (const char *) q);
816*9a7741deSElliott Hughes if ((ns = get_gototab(f, s, rune)) != 0)
817*9a7741deSElliott Hughes s = ns;
818*9a7741deSElliott Hughes else
819*9a7741deSElliott Hughes s = cgoto(f, s, rune);
820*9a7741deSElliott Hughes if (s == 1) { /* no transition */
821*9a7741deSElliott Hughes if (patlen > 0) {
822*9a7741deSElliott Hughes patbeg = (const char *) p;
823*9a7741deSElliott Hughes return(1);
824*9a7741deSElliott Hughes } else
825*9a7741deSElliott Hughes goto nnextin; /* no nonempty match */
826*9a7741deSElliott Hughes }
827*9a7741deSElliott Hughes if (*q == 0)
828*9a7741deSElliott Hughes break;
829*9a7741deSElliott Hughes q += n;
830*9a7741deSElliott Hughes } while (1);
831*9a7741deSElliott Hughes q++;
832*9a7741deSElliott Hughes if (f->out[s])
833*9a7741deSElliott Hughes patlen = q-p-1; /* don't count $ */
834*9a7741deSElliott Hughes if (patlen > 0 ) {
835*9a7741deSElliott Hughes patbeg = (const char *) p;
836*9a7741deSElliott Hughes return(1);
837*9a7741deSElliott Hughes }
838*9a7741deSElliott Hughes nnextin:
839*9a7741deSElliott Hughes s = 2;
840*9a7741deSElliott Hughes p++;
841*9a7741deSElliott Hughes }
842*9a7741deSElliott Hughes return (0);
843*9a7741deSElliott Hughes }
844*9a7741deSElliott Hughes
845*9a7741deSElliott Hughes
846*9a7741deSElliott Hughes /*
847*9a7741deSElliott Hughes * NAME
848*9a7741deSElliott Hughes * fnematch
849*9a7741deSElliott Hughes *
850*9a7741deSElliott Hughes * DESCRIPTION
851*9a7741deSElliott Hughes * A stream-fed version of nematch which transfers characters to a
852*9a7741deSElliott Hughes * null-terminated buffer. All characters up to and including the last
853*9a7741deSElliott Hughes * character of the matching text or EOF are placed in the buffer. If
854*9a7741deSElliott Hughes * a match is found, patbeg and patlen are set appropriately.
855*9a7741deSElliott Hughes *
856*9a7741deSElliott Hughes * RETURN VALUES
857*9a7741deSElliott Hughes * false No match found.
858*9a7741deSElliott Hughes * true Match found.
859*9a7741deSElliott Hughes */
860*9a7741deSElliott Hughes
fnematch(fa * pfa,FILE * f,char ** pbuf,int * pbufsize,int quantum)861*9a7741deSElliott Hughes bool fnematch(fa *pfa, FILE *f, char **pbuf, int *pbufsize, int quantum)
862*9a7741deSElliott Hughes {
863*9a7741deSElliott Hughes char *i, *j, *k, *buf = *pbuf;
864*9a7741deSElliott Hughes int bufsize = *pbufsize;
865*9a7741deSElliott Hughes int c, n, ns, s;
866*9a7741deSElliott Hughes
867*9a7741deSElliott Hughes s = pfa->initstat;
868*9a7741deSElliott Hughes patlen = 0;
869*9a7741deSElliott Hughes
870*9a7741deSElliott Hughes /*
871*9a7741deSElliott Hughes * buf <= i <= j <= k <= buf+bufsize
872*9a7741deSElliott Hughes *
873*9a7741deSElliott Hughes * i: origin of active substring
874*9a7741deSElliott Hughes * j: current character
875*9a7741deSElliott Hughes * k: destination of the next getc
876*9a7741deSElliott Hughes */
877*9a7741deSElliott Hughes
878*9a7741deSElliott Hughes i = j = k = buf;
879*9a7741deSElliott Hughes
880*9a7741deSElliott Hughes do {
881*9a7741deSElliott Hughes /*
882*9a7741deSElliott Hughes * Call u8_rune with at least awk_mb_cur_max ahead in
883*9a7741deSElliott Hughes * the buffer until EOF interferes.
884*9a7741deSElliott Hughes */
885*9a7741deSElliott Hughes if (k - j < (int)awk_mb_cur_max) {
886*9a7741deSElliott Hughes if (k + awk_mb_cur_max > buf + bufsize) {
887*9a7741deSElliott Hughes char *obuf = buf;
888*9a7741deSElliott Hughes adjbuf((char **) &buf, &bufsize,
889*9a7741deSElliott Hughes bufsize + awk_mb_cur_max,
890*9a7741deSElliott Hughes quantum, 0, "fnematch");
891*9a7741deSElliott Hughes
892*9a7741deSElliott Hughes /* buf resized, maybe moved. update pointers */
893*9a7741deSElliott Hughes *pbufsize = bufsize;
894*9a7741deSElliott Hughes if (obuf != buf) {
895*9a7741deSElliott Hughes i = buf + (i - obuf);
896*9a7741deSElliott Hughes j = buf + (j - obuf);
897*9a7741deSElliott Hughes k = buf + (k - obuf);
898*9a7741deSElliott Hughes *pbuf = buf;
899*9a7741deSElliott Hughes if (patlen)
900*9a7741deSElliott Hughes patbeg = buf + (patbeg - obuf);
901*9a7741deSElliott Hughes }
902*9a7741deSElliott Hughes }
903*9a7741deSElliott Hughes for (n = awk_mb_cur_max ; n > 0; n--) {
904*9a7741deSElliott Hughes *k++ = (c = getc(f)) != EOF ? c : 0;
905*9a7741deSElliott Hughes if (c == EOF) {
906*9a7741deSElliott Hughes if (ferror(f))
907*9a7741deSElliott Hughes FATAL("fnematch: getc error");
908*9a7741deSElliott Hughes break;
909*9a7741deSElliott Hughes }
910*9a7741deSElliott Hughes }
911*9a7741deSElliott Hughes }
912*9a7741deSElliott Hughes
913*9a7741deSElliott Hughes j += u8_rune(&c, j);
914*9a7741deSElliott Hughes
915*9a7741deSElliott Hughes if ((ns = get_gototab(pfa, s, c)) != 0)
916*9a7741deSElliott Hughes s = ns;
917*9a7741deSElliott Hughes else
918*9a7741deSElliott Hughes s = cgoto(pfa, s, c);
919*9a7741deSElliott Hughes
920*9a7741deSElliott Hughes if (pfa->out[s]) { /* final state */
921*9a7741deSElliott Hughes patbeg = i;
922*9a7741deSElliott Hughes patlen = j - i;
923*9a7741deSElliott Hughes if (c == 0) /* don't count $ */
924*9a7741deSElliott Hughes patlen--;
925*9a7741deSElliott Hughes }
926*9a7741deSElliott Hughes
927*9a7741deSElliott Hughes if (c && s != 1)
928*9a7741deSElliott Hughes continue; /* origin i still viable, next j */
929*9a7741deSElliott Hughes if (patlen)
930*9a7741deSElliott Hughes break; /* best match found */
931*9a7741deSElliott Hughes
932*9a7741deSElliott Hughes /* no match at origin i, next i and start over */
933*9a7741deSElliott Hughes i += u8_rune(&c, i);
934*9a7741deSElliott Hughes if (c == 0)
935*9a7741deSElliott Hughes break; /* no match */
936*9a7741deSElliott Hughes j = i;
937*9a7741deSElliott Hughes s = 2;
938*9a7741deSElliott Hughes } while (1);
939*9a7741deSElliott Hughes
940*9a7741deSElliott Hughes if (patlen) {
941*9a7741deSElliott Hughes /*
942*9a7741deSElliott Hughes * Under no circumstances is the last character fed to
943*9a7741deSElliott Hughes * the automaton part of the match. It is EOF's nullbyte,
944*9a7741deSElliott Hughes * or it sent the automaton into a state with no further
945*9a7741deSElliott Hughes * transitions available (s==1), or both. Room for a
946*9a7741deSElliott Hughes * terminating nullbyte is guaranteed.
947*9a7741deSElliott Hughes *
948*9a7741deSElliott Hughes * ungetc any chars after the end of matching text
949*9a7741deSElliott Hughes * (except for EOF's nullbyte, if present) and null
950*9a7741deSElliott Hughes * terminate the buffer.
951*9a7741deSElliott Hughes */
952*9a7741deSElliott Hughes do
953*9a7741deSElliott Hughes if (*--k && ungetc(*k, f) == EOF)
954*9a7741deSElliott Hughes FATAL("unable to ungetc '%c'", *k);
955*9a7741deSElliott Hughes while (k > patbeg + patlen);
956*9a7741deSElliott Hughes *k = '\0';
957*9a7741deSElliott Hughes return true;
958*9a7741deSElliott Hughes }
959*9a7741deSElliott Hughes else
960*9a7741deSElliott Hughes return false;
961*9a7741deSElliott Hughes }
962*9a7741deSElliott Hughes
reparse(const char * p)963*9a7741deSElliott Hughes Node *reparse(const char *p) /* parses regular expression pointed to by p */
964*9a7741deSElliott Hughes { /* uses relex() to scan regular expression */
965*9a7741deSElliott Hughes Node *np;
966*9a7741deSElliott Hughes
967*9a7741deSElliott Hughes DPRINTF("reparse <%s>\n", p);
968*9a7741deSElliott Hughes lastre = prestr = (const uschar *) p; /* prestr points to string to be parsed */
969*9a7741deSElliott Hughes rtok = relex();
970*9a7741deSElliott Hughes /* GNU compatibility: an empty regexp matches anything */
971*9a7741deSElliott Hughes if (rtok == '\0') {
972*9a7741deSElliott Hughes /* FATAL("empty regular expression"); previous */
973*9a7741deSElliott Hughes return(op2(EMPTYRE, NIL, NIL));
974*9a7741deSElliott Hughes }
975*9a7741deSElliott Hughes np = regexp();
976*9a7741deSElliott Hughes if (rtok != '\0')
977*9a7741deSElliott Hughes FATAL("syntax error in regular expression %s at %s", lastre, prestr);
978*9a7741deSElliott Hughes return(np);
979*9a7741deSElliott Hughes }
980*9a7741deSElliott Hughes
regexp(void)981*9a7741deSElliott Hughes Node *regexp(void) /* top-level parse of reg expr */
982*9a7741deSElliott Hughes {
983*9a7741deSElliott Hughes return (alt(concat(primary())));
984*9a7741deSElliott Hughes }
985*9a7741deSElliott Hughes
primary(void)986*9a7741deSElliott Hughes Node *primary(void)
987*9a7741deSElliott Hughes {
988*9a7741deSElliott Hughes Node *np;
989*9a7741deSElliott Hughes int savelastatom;
990*9a7741deSElliott Hughes
991*9a7741deSElliott Hughes switch (rtok) {
992*9a7741deSElliott Hughes case CHAR:
993*9a7741deSElliott Hughes lastatom = starttok;
994*9a7741deSElliott Hughes np = op2(CHAR, NIL, itonp(rlxval));
995*9a7741deSElliott Hughes rtok = relex();
996*9a7741deSElliott Hughes return (unary(np));
997*9a7741deSElliott Hughes case ALL:
998*9a7741deSElliott Hughes rtok = relex();
999*9a7741deSElliott Hughes return (unary(op2(ALL, NIL, NIL)));
1000*9a7741deSElliott Hughes case EMPTYRE:
1001*9a7741deSElliott Hughes rtok = relex();
1002*9a7741deSElliott Hughes return (unary(op2(EMPTYRE, NIL, NIL)));
1003*9a7741deSElliott Hughes case DOT:
1004*9a7741deSElliott Hughes lastatom = starttok;
1005*9a7741deSElliott Hughes rtok = relex();
1006*9a7741deSElliott Hughes return (unary(op2(DOT, NIL, NIL)));
1007*9a7741deSElliott Hughes case CCL:
1008*9a7741deSElliott Hughes np = op2(CCL, NIL, (Node*) cclenter((const char *) rlxstr));
1009*9a7741deSElliott Hughes lastatom = starttok;
1010*9a7741deSElliott Hughes rtok = relex();
1011*9a7741deSElliott Hughes return (unary(np));
1012*9a7741deSElliott Hughes case NCCL:
1013*9a7741deSElliott Hughes np = op2(NCCL, NIL, (Node *) cclenter((const char *) rlxstr));
1014*9a7741deSElliott Hughes lastatom = starttok;
1015*9a7741deSElliott Hughes rtok = relex();
1016*9a7741deSElliott Hughes return (unary(np));
1017*9a7741deSElliott Hughes case '^':
1018*9a7741deSElliott Hughes rtok = relex();
1019*9a7741deSElliott Hughes return (unary(op2(CHAR, NIL, itonp(HAT))));
1020*9a7741deSElliott Hughes case '$':
1021*9a7741deSElliott Hughes rtok = relex();
1022*9a7741deSElliott Hughes return (unary(op2(CHAR, NIL, NIL)));
1023*9a7741deSElliott Hughes case '(':
1024*9a7741deSElliott Hughes lastatom = starttok;
1025*9a7741deSElliott Hughes savelastatom = starttok - basestr; /* Retain over recursion */
1026*9a7741deSElliott Hughes rtok = relex();
1027*9a7741deSElliott Hughes if (rtok == ')') { /* special pleading for () */
1028*9a7741deSElliott Hughes rtok = relex();
1029*9a7741deSElliott Hughes return unary(op2(CCL, NIL, (Node *) cclenter("")));
1030*9a7741deSElliott Hughes }
1031*9a7741deSElliott Hughes np = regexp();
1032*9a7741deSElliott Hughes if (rtok == ')') {
1033*9a7741deSElliott Hughes lastatom = basestr + savelastatom; /* Restore */
1034*9a7741deSElliott Hughes rtok = relex();
1035*9a7741deSElliott Hughes return (unary(np));
1036*9a7741deSElliott Hughes }
1037*9a7741deSElliott Hughes else
1038*9a7741deSElliott Hughes FATAL("syntax error in regular expression %s at %s", lastre, prestr);
1039*9a7741deSElliott Hughes default:
1040*9a7741deSElliott Hughes FATAL("illegal primary in regular expression %s at %s", lastre, prestr);
1041*9a7741deSElliott Hughes }
1042*9a7741deSElliott Hughes return 0; /*NOTREACHED*/
1043*9a7741deSElliott Hughes }
1044*9a7741deSElliott Hughes
concat(Node * np)1045*9a7741deSElliott Hughes Node *concat(Node *np)
1046*9a7741deSElliott Hughes {
1047*9a7741deSElliott Hughes switch (rtok) {
1048*9a7741deSElliott Hughes case CHAR: case DOT: case ALL: case CCL: case NCCL: case '$': case '(':
1049*9a7741deSElliott Hughes return (concat(op2(CAT, np, primary())));
1050*9a7741deSElliott Hughes case EMPTYRE:
1051*9a7741deSElliott Hughes rtok = relex();
1052*9a7741deSElliott Hughes return (concat(op2(CAT, op2(CCL, NIL, (Node *) cclenter("")),
1053*9a7741deSElliott Hughes primary())));
1054*9a7741deSElliott Hughes }
1055*9a7741deSElliott Hughes return (np);
1056*9a7741deSElliott Hughes }
1057*9a7741deSElliott Hughes
alt(Node * np)1058*9a7741deSElliott Hughes Node *alt(Node *np)
1059*9a7741deSElliott Hughes {
1060*9a7741deSElliott Hughes if (rtok == OR) {
1061*9a7741deSElliott Hughes rtok = relex();
1062*9a7741deSElliott Hughes return (alt(op2(OR, np, concat(primary()))));
1063*9a7741deSElliott Hughes }
1064*9a7741deSElliott Hughes return (np);
1065*9a7741deSElliott Hughes }
1066*9a7741deSElliott Hughes
unary(Node * np)1067*9a7741deSElliott Hughes Node *unary(Node *np)
1068*9a7741deSElliott Hughes {
1069*9a7741deSElliott Hughes switch (rtok) {
1070*9a7741deSElliott Hughes case STAR:
1071*9a7741deSElliott Hughes rtok = relex();
1072*9a7741deSElliott Hughes return (unary(op2(STAR, np, NIL)));
1073*9a7741deSElliott Hughes case PLUS:
1074*9a7741deSElliott Hughes rtok = relex();
1075*9a7741deSElliott Hughes return (unary(op2(PLUS, np, NIL)));
1076*9a7741deSElliott Hughes case QUEST:
1077*9a7741deSElliott Hughes rtok = relex();
1078*9a7741deSElliott Hughes return (unary(op2(QUEST, np, NIL)));
1079*9a7741deSElliott Hughes case ZERO:
1080*9a7741deSElliott Hughes rtok = relex();
1081*9a7741deSElliott Hughes return (unary(op2(ZERO, np, NIL)));
1082*9a7741deSElliott Hughes default:
1083*9a7741deSElliott Hughes return (np);
1084*9a7741deSElliott Hughes }
1085*9a7741deSElliott Hughes }
1086*9a7741deSElliott Hughes
1087*9a7741deSElliott Hughes /*
1088*9a7741deSElliott Hughes * Character class definitions conformant to the POSIX locale as
1089*9a7741deSElliott Hughes * defined in IEEE P1003.1 draft 7 of June 2001, assuming the source
1090*9a7741deSElliott Hughes * and operating character sets are both ASCII (ISO646) or supersets
1091*9a7741deSElliott Hughes * thereof.
1092*9a7741deSElliott Hughes *
1093*9a7741deSElliott Hughes * Note that to avoid overflowing the temporary buffer used in
1094*9a7741deSElliott Hughes * relex(), the expanded character class (prior to range expansion)
1095*9a7741deSElliott Hughes * must be less than twice the size of their full name.
1096*9a7741deSElliott Hughes */
1097*9a7741deSElliott Hughes
1098*9a7741deSElliott Hughes /* Because isblank doesn't show up in any of the header files on any
1099*9a7741deSElliott Hughes * system i use, it's defined here. if some other locale has a richer
1100*9a7741deSElliott Hughes * definition of "blank", define HAS_ISBLANK and provide your own
1101*9a7741deSElliott Hughes * version.
1102*9a7741deSElliott Hughes * the parentheses here are an attempt to find a path through the maze
1103*9a7741deSElliott Hughes * of macro definition and/or function and/or version provided. thanks
1104*9a7741deSElliott Hughes * to nelson beebe for the suggestion; let's see if it works everywhere.
1105*9a7741deSElliott Hughes */
1106*9a7741deSElliott Hughes
1107*9a7741deSElliott Hughes /* #define HAS_ISBLANK */
1108*9a7741deSElliott Hughes #ifndef HAS_ISBLANK
1109*9a7741deSElliott Hughes
1110*9a7741deSElliott Hughes int (xisblank)(int c)
1111*9a7741deSElliott Hughes {
1112*9a7741deSElliott Hughes return c==' ' || c=='\t';
1113*9a7741deSElliott Hughes }
1114*9a7741deSElliott Hughes
1115*9a7741deSElliott Hughes #endif
1116*9a7741deSElliott Hughes
1117*9a7741deSElliott Hughes static const struct charclass {
1118*9a7741deSElliott Hughes const char *cc_name;
1119*9a7741deSElliott Hughes int cc_namelen;
1120*9a7741deSElliott Hughes int (*cc_func)(int);
1121*9a7741deSElliott Hughes } charclasses[] = {
1122*9a7741deSElliott Hughes { "alnum", 5, isalnum },
1123*9a7741deSElliott Hughes { "alpha", 5, isalpha },
1124*9a7741deSElliott Hughes #ifndef HAS_ISBLANK
1125*9a7741deSElliott Hughes { "blank", 5, xisblank },
1126*9a7741deSElliott Hughes #else
1127*9a7741deSElliott Hughes { "blank", 5, isblank },
1128*9a7741deSElliott Hughes #endif
1129*9a7741deSElliott Hughes { "cntrl", 5, iscntrl },
1130*9a7741deSElliott Hughes { "digit", 5, isdigit },
1131*9a7741deSElliott Hughes { "graph", 5, isgraph },
1132*9a7741deSElliott Hughes { "lower", 5, islower },
1133*9a7741deSElliott Hughes { "print", 5, isprint },
1134*9a7741deSElliott Hughes { "punct", 5, ispunct },
1135*9a7741deSElliott Hughes { "space", 5, isspace },
1136*9a7741deSElliott Hughes { "upper", 5, isupper },
1137*9a7741deSElliott Hughes { "xdigit", 6, isxdigit },
1138*9a7741deSElliott Hughes { NULL, 0, NULL },
1139*9a7741deSElliott Hughes };
1140*9a7741deSElliott Hughes
1141*9a7741deSElliott Hughes #define REPEAT_SIMPLE 0
1142*9a7741deSElliott Hughes #define REPEAT_PLUS_APPENDED 1
1143*9a7741deSElliott Hughes #define REPEAT_WITH_Q 2
1144*9a7741deSElliott Hughes #define REPEAT_ZERO 3
1145*9a7741deSElliott Hughes
1146*9a7741deSElliott Hughes static int
replace_repeat(const uschar * reptok,int reptoklen,const uschar * atom,int atomlen,int firstnum,int secondnum,int special_case)1147*9a7741deSElliott Hughes replace_repeat(const uschar *reptok, int reptoklen, const uschar *atom,
1148*9a7741deSElliott Hughes int atomlen, int firstnum, int secondnum, int special_case)
1149*9a7741deSElliott Hughes {
1150*9a7741deSElliott Hughes int i, j;
1151*9a7741deSElliott Hughes uschar *buf = 0;
1152*9a7741deSElliott Hughes int ret = 1;
1153*9a7741deSElliott Hughes int init_q = (firstnum == 0); /* first added char will be ? */
1154*9a7741deSElliott Hughes int n_q_reps = secondnum-firstnum; /* m>n, so reduce until {1,m-n} left */
1155*9a7741deSElliott Hughes int prefix_length = reptok - basestr; /* prefix includes first rep */
1156*9a7741deSElliott Hughes int suffix_length = strlen((const char *) reptok) - reptoklen; /* string after rep specifier */
1157*9a7741deSElliott Hughes int size = prefix_length + suffix_length;
1158*9a7741deSElliott Hughes
1159*9a7741deSElliott Hughes if (firstnum > 1) { /* add room for reps 2 through firstnum */
1160*9a7741deSElliott Hughes size += atomlen*(firstnum-1);
1161*9a7741deSElliott Hughes }
1162*9a7741deSElliott Hughes
1163*9a7741deSElliott Hughes /* Adjust size of buffer for special cases */
1164*9a7741deSElliott Hughes if (special_case == REPEAT_PLUS_APPENDED) {
1165*9a7741deSElliott Hughes size++; /* for the final + */
1166*9a7741deSElliott Hughes } else if (special_case == REPEAT_WITH_Q) {
1167*9a7741deSElliott Hughes size += init_q + (atomlen+1)* (n_q_reps-init_q);
1168*9a7741deSElliott Hughes } else if (special_case == REPEAT_ZERO) {
1169*9a7741deSElliott Hughes size += 2; /* just a null ERE: () */
1170*9a7741deSElliott Hughes }
1171*9a7741deSElliott Hughes if ((buf = (uschar *) malloc(size + 1)) == NULL)
1172*9a7741deSElliott Hughes FATAL("out of space in reg expr %.10s..", lastre);
1173*9a7741deSElliott Hughes memcpy(buf, basestr, prefix_length); /* copy prefix */
1174*9a7741deSElliott Hughes j = prefix_length;
1175*9a7741deSElliott Hughes if (special_case == REPEAT_ZERO) {
1176*9a7741deSElliott Hughes j -= atomlen;
1177*9a7741deSElliott Hughes buf[j++] = '(';
1178*9a7741deSElliott Hughes buf[j++] = ')';
1179*9a7741deSElliott Hughes }
1180*9a7741deSElliott Hughes for (i = 1; i < firstnum; i++) { /* copy x reps */
1181*9a7741deSElliott Hughes memcpy(&buf[j], atom, atomlen);
1182*9a7741deSElliott Hughes j += atomlen;
1183*9a7741deSElliott Hughes }
1184*9a7741deSElliott Hughes if (special_case == REPEAT_PLUS_APPENDED) {
1185*9a7741deSElliott Hughes buf[j++] = '+';
1186*9a7741deSElliott Hughes } else if (special_case == REPEAT_WITH_Q) {
1187*9a7741deSElliott Hughes if (init_q)
1188*9a7741deSElliott Hughes buf[j++] = '?';
1189*9a7741deSElliott Hughes for (i = init_q; i < n_q_reps; i++) { /* copy x? reps */
1190*9a7741deSElliott Hughes memcpy(&buf[j], atom, atomlen);
1191*9a7741deSElliott Hughes j += atomlen;
1192*9a7741deSElliott Hughes buf[j++] = '?';
1193*9a7741deSElliott Hughes }
1194*9a7741deSElliott Hughes }
1195*9a7741deSElliott Hughes memcpy(&buf[j], reptok+reptoklen, suffix_length);
1196*9a7741deSElliott Hughes j += suffix_length;
1197*9a7741deSElliott Hughes buf[j] = '\0';
1198*9a7741deSElliott Hughes /* free old basestr */
1199*9a7741deSElliott Hughes if (firstbasestr != basestr) {
1200*9a7741deSElliott Hughes if (basestr)
1201*9a7741deSElliott Hughes xfree(basestr);
1202*9a7741deSElliott Hughes }
1203*9a7741deSElliott Hughes basestr = buf;
1204*9a7741deSElliott Hughes prestr = buf + prefix_length;
1205*9a7741deSElliott Hughes if (special_case == REPEAT_ZERO) {
1206*9a7741deSElliott Hughes prestr -= atomlen;
1207*9a7741deSElliott Hughes ret++;
1208*9a7741deSElliott Hughes }
1209*9a7741deSElliott Hughes return ret;
1210*9a7741deSElliott Hughes }
1211*9a7741deSElliott Hughes
repeat(const uschar * reptok,int reptoklen,const uschar * atom,int atomlen,int firstnum,int secondnum)1212*9a7741deSElliott Hughes static int repeat(const uschar *reptok, int reptoklen, const uschar *atom,
1213*9a7741deSElliott Hughes int atomlen, int firstnum, int secondnum)
1214*9a7741deSElliott Hughes {
1215*9a7741deSElliott Hughes /*
1216*9a7741deSElliott Hughes In general, the repetition specifier or "bound" is replaced here
1217*9a7741deSElliott Hughes by an equivalent ERE string, repeating the immediately previous atom
1218*9a7741deSElliott Hughes and appending ? and + as needed. Note that the first copy of the
1219*9a7741deSElliott Hughes atom is left in place, except in the special_case of a zero-repeat
1220*9a7741deSElliott Hughes (i.e., {0}).
1221*9a7741deSElliott Hughes */
1222*9a7741deSElliott Hughes if (secondnum < 0) { /* means {n,} -> repeat n-1 times followed by PLUS */
1223*9a7741deSElliott Hughes if (firstnum < 2) {
1224*9a7741deSElliott Hughes /* 0 or 1: should be handled before you get here */
1225*9a7741deSElliott Hughes FATAL("internal error");
1226*9a7741deSElliott Hughes } else {
1227*9a7741deSElliott Hughes return replace_repeat(reptok, reptoklen, atom, atomlen,
1228*9a7741deSElliott Hughes firstnum, secondnum, REPEAT_PLUS_APPENDED);
1229*9a7741deSElliott Hughes }
1230*9a7741deSElliott Hughes } else if (firstnum == secondnum) { /* {n} or {n,n} -> simply repeat n-1 times */
1231*9a7741deSElliott Hughes if (firstnum == 0) { /* {0} or {0,0} */
1232*9a7741deSElliott Hughes /* This case is unusual because the resulting
1233*9a7741deSElliott Hughes replacement string might actually be SMALLER than
1234*9a7741deSElliott Hughes the original ERE */
1235*9a7741deSElliott Hughes return replace_repeat(reptok, reptoklen, atom, atomlen,
1236*9a7741deSElliott Hughes firstnum, secondnum, REPEAT_ZERO);
1237*9a7741deSElliott Hughes } else { /* (firstnum >= 1) */
1238*9a7741deSElliott Hughes return replace_repeat(reptok, reptoklen, atom, atomlen,
1239*9a7741deSElliott Hughes firstnum, secondnum, REPEAT_SIMPLE);
1240*9a7741deSElliott Hughes }
1241*9a7741deSElliott Hughes } else if (firstnum < secondnum) { /* {n,m} -> repeat n-1 times then alternate */
1242*9a7741deSElliott Hughes /* x{n,m} => xx...x{1, m-n+1} => xx...x?x?x?..x? */
1243*9a7741deSElliott Hughes return replace_repeat(reptok, reptoklen, atom, atomlen,
1244*9a7741deSElliott Hughes firstnum, secondnum, REPEAT_WITH_Q);
1245*9a7741deSElliott Hughes } else { /* Error - shouldn't be here (n>m) */
1246*9a7741deSElliott Hughes FATAL("internal error");
1247*9a7741deSElliott Hughes }
1248*9a7741deSElliott Hughes return 0;
1249*9a7741deSElliott Hughes }
1250*9a7741deSElliott Hughes
relex(void)1251*9a7741deSElliott Hughes int relex(void) /* lexical analyzer for reparse */
1252*9a7741deSElliott Hughes {
1253*9a7741deSElliott Hughes int c, n;
1254*9a7741deSElliott Hughes int cflag;
1255*9a7741deSElliott Hughes static uschar *buf = NULL;
1256*9a7741deSElliott Hughes static int bufsz = 100;
1257*9a7741deSElliott Hughes uschar *bp;
1258*9a7741deSElliott Hughes const struct charclass *cc;
1259*9a7741deSElliott Hughes int i;
1260*9a7741deSElliott Hughes int num, m;
1261*9a7741deSElliott Hughes bool commafound, digitfound;
1262*9a7741deSElliott Hughes const uschar *startreptok;
1263*9a7741deSElliott Hughes static int parens = 0;
1264*9a7741deSElliott Hughes
1265*9a7741deSElliott Hughes rescan:
1266*9a7741deSElliott Hughes starttok = prestr;
1267*9a7741deSElliott Hughes
1268*9a7741deSElliott Hughes if ((n = u8_rune(&rlxval, (const char *) prestr)) > 1) {
1269*9a7741deSElliott Hughes prestr += n;
1270*9a7741deSElliott Hughes starttok = prestr;
1271*9a7741deSElliott Hughes return CHAR;
1272*9a7741deSElliott Hughes }
1273*9a7741deSElliott Hughes
1274*9a7741deSElliott Hughes switch (c = *prestr++) {
1275*9a7741deSElliott Hughes case '|': return OR;
1276*9a7741deSElliott Hughes case '*': return STAR;
1277*9a7741deSElliott Hughes case '+': return PLUS;
1278*9a7741deSElliott Hughes case '?': return QUEST;
1279*9a7741deSElliott Hughes case '.': return DOT;
1280*9a7741deSElliott Hughes case '\0': prestr--; return '\0';
1281*9a7741deSElliott Hughes case '^':
1282*9a7741deSElliott Hughes case '$':
1283*9a7741deSElliott Hughes return c;
1284*9a7741deSElliott Hughes case '(':
1285*9a7741deSElliott Hughes parens++;
1286*9a7741deSElliott Hughes return c;
1287*9a7741deSElliott Hughes case ')':
1288*9a7741deSElliott Hughes if (parens) {
1289*9a7741deSElliott Hughes parens--;
1290*9a7741deSElliott Hughes return c;
1291*9a7741deSElliott Hughes }
1292*9a7741deSElliott Hughes /* unmatched close parenthesis; per POSIX, treat as literal */
1293*9a7741deSElliott Hughes rlxval = c;
1294*9a7741deSElliott Hughes return CHAR;
1295*9a7741deSElliott Hughes case '\\':
1296*9a7741deSElliott Hughes rlxval = quoted(&prestr);
1297*9a7741deSElliott Hughes return CHAR;
1298*9a7741deSElliott Hughes default:
1299*9a7741deSElliott Hughes rlxval = c;
1300*9a7741deSElliott Hughes return CHAR;
1301*9a7741deSElliott Hughes case '[':
1302*9a7741deSElliott Hughes if (buf == NULL && (buf = (uschar *) malloc(bufsz)) == NULL)
1303*9a7741deSElliott Hughes FATAL("out of space in reg expr %.10s..", lastre);
1304*9a7741deSElliott Hughes bp = buf;
1305*9a7741deSElliott Hughes if (*prestr == '^') {
1306*9a7741deSElliott Hughes cflag = 1;
1307*9a7741deSElliott Hughes prestr++;
1308*9a7741deSElliott Hughes }
1309*9a7741deSElliott Hughes else
1310*9a7741deSElliott Hughes cflag = 0;
1311*9a7741deSElliott Hughes n = 5 * strlen((const char *) prestr)+1; /* BUG: was 2. what value? */
1312*9a7741deSElliott Hughes if (!adjbuf((char **) &buf, &bufsz, n, n, (char **) &bp, "relex1"))
1313*9a7741deSElliott Hughes FATAL("out of space for reg expr %.10s...", lastre);
1314*9a7741deSElliott Hughes for (; ; ) {
1315*9a7741deSElliott Hughes if ((n = u8_rune(&rlxval, (const char *) prestr)) > 1) {
1316*9a7741deSElliott Hughes for (i = 0; i < n; i++)
1317*9a7741deSElliott Hughes *bp++ = *prestr++;
1318*9a7741deSElliott Hughes continue;
1319*9a7741deSElliott Hughes }
1320*9a7741deSElliott Hughes if ((c = *prestr++) == '\\') {
1321*9a7741deSElliott Hughes *bp++ = '\\';
1322*9a7741deSElliott Hughes if ((c = *prestr++) == '\0')
1323*9a7741deSElliott Hughes FATAL("nonterminated character class %.20s...", lastre);
1324*9a7741deSElliott Hughes *bp++ = c;
1325*9a7741deSElliott Hughes /* } else if (c == '\n') { */
1326*9a7741deSElliott Hughes /* FATAL("newline in character class %.20s...", lastre); */
1327*9a7741deSElliott Hughes } else if (c == '[' && *prestr == ':') {
1328*9a7741deSElliott Hughes /* POSIX char class names, Dag-Erling Smorgrav, [email protected] */
1329*9a7741deSElliott Hughes for (cc = charclasses; cc->cc_name; cc++)
1330*9a7741deSElliott Hughes if (strncmp((const char *) prestr + 1, (const char *) cc->cc_name, cc->cc_namelen) == 0)
1331*9a7741deSElliott Hughes break;
1332*9a7741deSElliott Hughes if (cc->cc_name != NULL && prestr[1 + cc->cc_namelen] == ':' &&
1333*9a7741deSElliott Hughes prestr[2 + cc->cc_namelen] == ']') {
1334*9a7741deSElliott Hughes prestr += cc->cc_namelen + 3;
1335*9a7741deSElliott Hughes /*
1336*9a7741deSElliott Hughes * BUG: We begin at 1, instead of 0, since we
1337*9a7741deSElliott Hughes * would otherwise prematurely terminate the
1338*9a7741deSElliott Hughes * string for classes like [[:cntrl:]]. This
1339*9a7741deSElliott Hughes * means that we can't match the NUL character,
1340*9a7741deSElliott Hughes * not without first adapting the entire
1341*9a7741deSElliott Hughes * program to track each string's length.
1342*9a7741deSElliott Hughes */
1343*9a7741deSElliott Hughes for (i = 1; i <= UCHAR_MAX; i++) {
1344*9a7741deSElliott Hughes if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, "relex2"))
1345*9a7741deSElliott Hughes FATAL("out of space for reg expr %.10s...", lastre);
1346*9a7741deSElliott Hughes if (cc->cc_func(i)) {
1347*9a7741deSElliott Hughes /* escape backslash */
1348*9a7741deSElliott Hughes if (i == '\\') {
1349*9a7741deSElliott Hughes *bp++ = '\\';
1350*9a7741deSElliott Hughes n++;
1351*9a7741deSElliott Hughes }
1352*9a7741deSElliott Hughes
1353*9a7741deSElliott Hughes *bp++ = i;
1354*9a7741deSElliott Hughes n++;
1355*9a7741deSElliott Hughes }
1356*9a7741deSElliott Hughes }
1357*9a7741deSElliott Hughes } else
1358*9a7741deSElliott Hughes *bp++ = c;
1359*9a7741deSElliott Hughes } else if (c == '[' && *prestr == '.') {
1360*9a7741deSElliott Hughes char collate_char;
1361*9a7741deSElliott Hughes prestr++;
1362*9a7741deSElliott Hughes collate_char = *prestr++;
1363*9a7741deSElliott Hughes if (*prestr == '.' && prestr[1] == ']') {
1364*9a7741deSElliott Hughes prestr += 2;
1365*9a7741deSElliott Hughes /* Found it: map via locale TBD: for
1366*9a7741deSElliott Hughes now, simply return this char. This
1367*9a7741deSElliott Hughes is sufficient to pass conformance
1368*9a7741deSElliott Hughes test awk.ex 156
1369*9a7741deSElliott Hughes */
1370*9a7741deSElliott Hughes if (*prestr == ']') {
1371*9a7741deSElliott Hughes prestr++;
1372*9a7741deSElliott Hughes rlxval = collate_char;
1373*9a7741deSElliott Hughes return CHAR;
1374*9a7741deSElliott Hughes }
1375*9a7741deSElliott Hughes }
1376*9a7741deSElliott Hughes } else if (c == '[' && *prestr == '=') {
1377*9a7741deSElliott Hughes char equiv_char;
1378*9a7741deSElliott Hughes prestr++;
1379*9a7741deSElliott Hughes equiv_char = *prestr++;
1380*9a7741deSElliott Hughes if (*prestr == '=' && prestr[1] == ']') {
1381*9a7741deSElliott Hughes prestr += 2;
1382*9a7741deSElliott Hughes /* Found it: map via locale TBD: for now
1383*9a7741deSElliott Hughes simply return this char. This is
1384*9a7741deSElliott Hughes sufficient to pass conformance test
1385*9a7741deSElliott Hughes awk.ex 156
1386*9a7741deSElliott Hughes */
1387*9a7741deSElliott Hughes if (*prestr == ']') {
1388*9a7741deSElliott Hughes prestr++;
1389*9a7741deSElliott Hughes rlxval = equiv_char;
1390*9a7741deSElliott Hughes return CHAR;
1391*9a7741deSElliott Hughes }
1392*9a7741deSElliott Hughes }
1393*9a7741deSElliott Hughes } else if (c == '\0') {
1394*9a7741deSElliott Hughes FATAL("nonterminated character class %.20s", lastre);
1395*9a7741deSElliott Hughes } else if (bp == buf) { /* 1st char is special */
1396*9a7741deSElliott Hughes *bp++ = c;
1397*9a7741deSElliott Hughes } else if (c == ']') {
1398*9a7741deSElliott Hughes *bp++ = 0;
1399*9a7741deSElliott Hughes rlxstr = (uschar *) tostring((char *) buf);
1400*9a7741deSElliott Hughes if (cflag == 0)
1401*9a7741deSElliott Hughes return CCL;
1402*9a7741deSElliott Hughes else
1403*9a7741deSElliott Hughes return NCCL;
1404*9a7741deSElliott Hughes } else
1405*9a7741deSElliott Hughes *bp++ = c;
1406*9a7741deSElliott Hughes }
1407*9a7741deSElliott Hughes break;
1408*9a7741deSElliott Hughes case '{':
1409*9a7741deSElliott Hughes if (isdigit((int) *(prestr))) {
1410*9a7741deSElliott Hughes num = 0; /* Process as a repetition */
1411*9a7741deSElliott Hughes n = -1; m = -1;
1412*9a7741deSElliott Hughes commafound = false;
1413*9a7741deSElliott Hughes digitfound = false;
1414*9a7741deSElliott Hughes startreptok = prestr-1;
1415*9a7741deSElliott Hughes /* Remember start of previous atom here ? */
1416*9a7741deSElliott Hughes } else { /* just a { char, not a repetition */
1417*9a7741deSElliott Hughes rlxval = c;
1418*9a7741deSElliott Hughes return CHAR;
1419*9a7741deSElliott Hughes }
1420*9a7741deSElliott Hughes for (; ; ) {
1421*9a7741deSElliott Hughes if ((c = *prestr++) == '}') {
1422*9a7741deSElliott Hughes if (commafound) {
1423*9a7741deSElliott Hughes if (digitfound) { /* {n,m} */
1424*9a7741deSElliott Hughes m = num;
1425*9a7741deSElliott Hughes if (m < n)
1426*9a7741deSElliott Hughes FATAL("illegal repetition expression: class %.20s",
1427*9a7741deSElliott Hughes lastre);
1428*9a7741deSElliott Hughes if (n == 0 && m == 1) {
1429*9a7741deSElliott Hughes return QUEST;
1430*9a7741deSElliott Hughes }
1431*9a7741deSElliott Hughes } else { /* {n,} */
1432*9a7741deSElliott Hughes if (n == 0)
1433*9a7741deSElliott Hughes return STAR;
1434*9a7741deSElliott Hughes else if (n == 1)
1435*9a7741deSElliott Hughes return PLUS;
1436*9a7741deSElliott Hughes }
1437*9a7741deSElliott Hughes } else {
1438*9a7741deSElliott Hughes if (digitfound) { /* {n} same as {n,n} */
1439*9a7741deSElliott Hughes n = num;
1440*9a7741deSElliott Hughes m = num;
1441*9a7741deSElliott Hughes } else { /* {} */
1442*9a7741deSElliott Hughes FATAL("illegal repetition expression: class %.20s",
1443*9a7741deSElliott Hughes lastre);
1444*9a7741deSElliott Hughes }
1445*9a7741deSElliott Hughes }
1446*9a7741deSElliott Hughes if (repeat(starttok, prestr-starttok, lastatom,
1447*9a7741deSElliott Hughes startreptok - lastatom, n, m) > 0) {
1448*9a7741deSElliott Hughes if (n == 0 && m == 0) {
1449*9a7741deSElliott Hughes return ZERO;
1450*9a7741deSElliott Hughes }
1451*9a7741deSElliott Hughes /* must rescan input for next token */
1452*9a7741deSElliott Hughes goto rescan;
1453*9a7741deSElliott Hughes }
1454*9a7741deSElliott Hughes /* Failed to replace: eat up {...} characters
1455*9a7741deSElliott Hughes and treat like just PLUS */
1456*9a7741deSElliott Hughes return PLUS;
1457*9a7741deSElliott Hughes } else if (c == '\0') {
1458*9a7741deSElliott Hughes FATAL("nonterminated character class %.20s",
1459*9a7741deSElliott Hughes lastre);
1460*9a7741deSElliott Hughes } else if (isdigit(c)) {
1461*9a7741deSElliott Hughes num = 10 * num + c - '0';
1462*9a7741deSElliott Hughes digitfound = true;
1463*9a7741deSElliott Hughes } else if (c == ',') {
1464*9a7741deSElliott Hughes if (commafound)
1465*9a7741deSElliott Hughes FATAL("illegal repetition expression: class %.20s",
1466*9a7741deSElliott Hughes lastre);
1467*9a7741deSElliott Hughes /* looking for {n,} or {n,m} */
1468*9a7741deSElliott Hughes commafound = true;
1469*9a7741deSElliott Hughes n = num;
1470*9a7741deSElliott Hughes digitfound = false; /* reset */
1471*9a7741deSElliott Hughes num = 0;
1472*9a7741deSElliott Hughes } else {
1473*9a7741deSElliott Hughes FATAL("illegal repetition expression: class %.20s",
1474*9a7741deSElliott Hughes lastre);
1475*9a7741deSElliott Hughes }
1476*9a7741deSElliott Hughes }
1477*9a7741deSElliott Hughes break;
1478*9a7741deSElliott Hughes }
1479*9a7741deSElliott Hughes }
1480*9a7741deSElliott Hughes
cgoto(fa * f,int s,int c)1481*9a7741deSElliott Hughes int cgoto(fa *f, int s, int c)
1482*9a7741deSElliott Hughes {
1483*9a7741deSElliott Hughes int *p, *q;
1484*9a7741deSElliott Hughes int i, j, k;
1485*9a7741deSElliott Hughes
1486*9a7741deSElliott Hughes /* assert(c == HAT || c < NCHARS); BUG: seg fault if disable test */
1487*9a7741deSElliott Hughes while (f->accept >= maxsetvec) { /* guessing here! */
1488*9a7741deSElliott Hughes resizesetvec(__func__);
1489*9a7741deSElliott Hughes }
1490*9a7741deSElliott Hughes for (i = 0; i <= f->accept; i++)
1491*9a7741deSElliott Hughes setvec[i] = 0;
1492*9a7741deSElliott Hughes setcnt = 0;
1493*9a7741deSElliott Hughes resize_state(f, s);
1494*9a7741deSElliott Hughes /* compute positions of gototab[s,c] into setvec */
1495*9a7741deSElliott Hughes p = f->posns[s];
1496*9a7741deSElliott Hughes for (i = 1; i <= *p; i++) {
1497*9a7741deSElliott Hughes if ((k = f->re[p[i]].ltype) != FINAL) {
1498*9a7741deSElliott Hughes if ((k == CHAR && c == ptoi(f->re[p[i]].lval.np))
1499*9a7741deSElliott Hughes || (k == DOT && c != 0 && c != HAT)
1500*9a7741deSElliott Hughes || (k == ALL && c != 0)
1501*9a7741deSElliott Hughes || (k == EMPTYRE && c != 0)
1502*9a7741deSElliott Hughes || (k == CCL && member(c, (int *) f->re[p[i]].lval.rp))
1503*9a7741deSElliott Hughes || (k == NCCL && !member(c, (int *) f->re[p[i]].lval.rp) && c != 0 && c != HAT)) {
1504*9a7741deSElliott Hughes q = f->re[p[i]].lfollow;
1505*9a7741deSElliott Hughes for (j = 1; j <= *q; j++) {
1506*9a7741deSElliott Hughes if (q[j] >= maxsetvec) {
1507*9a7741deSElliott Hughes resizesetvec(__func__);
1508*9a7741deSElliott Hughes }
1509*9a7741deSElliott Hughes if (setvec[q[j]] == 0) {
1510*9a7741deSElliott Hughes setcnt++;
1511*9a7741deSElliott Hughes setvec[q[j]] = 1;
1512*9a7741deSElliott Hughes }
1513*9a7741deSElliott Hughes }
1514*9a7741deSElliott Hughes }
1515*9a7741deSElliott Hughes }
1516*9a7741deSElliott Hughes }
1517*9a7741deSElliott Hughes /* determine if setvec is a previous state */
1518*9a7741deSElliott Hughes tmpset[0] = setcnt;
1519*9a7741deSElliott Hughes j = 1;
1520*9a7741deSElliott Hughes for (i = f->accept; i >= 0; i--)
1521*9a7741deSElliott Hughes if (setvec[i]) {
1522*9a7741deSElliott Hughes tmpset[j++] = i;
1523*9a7741deSElliott Hughes }
1524*9a7741deSElliott Hughes resize_state(f, f->curstat > s ? f->curstat : s);
1525*9a7741deSElliott Hughes /* tmpset == previous state? */
1526*9a7741deSElliott Hughes for (i = 1; i <= f->curstat; i++) {
1527*9a7741deSElliott Hughes p = f->posns[i];
1528*9a7741deSElliott Hughes if ((k = tmpset[0]) != p[0])
1529*9a7741deSElliott Hughes goto different;
1530*9a7741deSElliott Hughes for (j = 1; j <= k; j++)
1531*9a7741deSElliott Hughes if (tmpset[j] != p[j])
1532*9a7741deSElliott Hughes goto different;
1533*9a7741deSElliott Hughes /* setvec is state i */
1534*9a7741deSElliott Hughes if (c != HAT)
1535*9a7741deSElliott Hughes set_gototab(f, s, c, i);
1536*9a7741deSElliott Hughes return i;
1537*9a7741deSElliott Hughes different:;
1538*9a7741deSElliott Hughes }
1539*9a7741deSElliott Hughes
1540*9a7741deSElliott Hughes /* add tmpset to current set of states */
1541*9a7741deSElliott Hughes ++(f->curstat);
1542*9a7741deSElliott Hughes resize_state(f, f->curstat);
1543*9a7741deSElliott Hughes clear_gototab(f, f->curstat);
1544*9a7741deSElliott Hughes xfree(f->posns[f->curstat]);
1545*9a7741deSElliott Hughes p = intalloc(setcnt + 1, __func__);
1546*9a7741deSElliott Hughes
1547*9a7741deSElliott Hughes f->posns[f->curstat] = p;
1548*9a7741deSElliott Hughes if (c != HAT)
1549*9a7741deSElliott Hughes set_gototab(f, s, c, f->curstat);
1550*9a7741deSElliott Hughes for (i = 0; i <= setcnt; i++)
1551*9a7741deSElliott Hughes p[i] = tmpset[i];
1552*9a7741deSElliott Hughes if (setvec[f->accept])
1553*9a7741deSElliott Hughes f->out[f->curstat] = 1;
1554*9a7741deSElliott Hughes else
1555*9a7741deSElliott Hughes f->out[f->curstat] = 0;
1556*9a7741deSElliott Hughes return f->curstat;
1557*9a7741deSElliott Hughes }
1558*9a7741deSElliott Hughes
1559*9a7741deSElliott Hughes
freefa(fa * f)1560*9a7741deSElliott Hughes void freefa(fa *f) /* free a finite automaton */
1561*9a7741deSElliott Hughes {
1562*9a7741deSElliott Hughes int i;
1563*9a7741deSElliott Hughes
1564*9a7741deSElliott Hughes if (f == NULL)
1565*9a7741deSElliott Hughes return;
1566*9a7741deSElliott Hughes for (i = 0; i < f->state_count; i++)
1567*9a7741deSElliott Hughes xfree(f->gototab[i].entries);
1568*9a7741deSElliott Hughes xfree(f->gototab);
1569*9a7741deSElliott Hughes for (i = 0; i <= f->curstat; i++)
1570*9a7741deSElliott Hughes xfree(f->posns[i]);
1571*9a7741deSElliott Hughes for (i = 0; i <= f->accept; i++) {
1572*9a7741deSElliott Hughes xfree(f->re[i].lfollow);
1573*9a7741deSElliott Hughes if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL)
1574*9a7741deSElliott Hughes xfree(f->re[i].lval.np);
1575*9a7741deSElliott Hughes }
1576*9a7741deSElliott Hughes xfree(f->restr);
1577*9a7741deSElliott Hughes xfree(f->out);
1578*9a7741deSElliott Hughes xfree(f->posns);
1579*9a7741deSElliott Hughes xfree(f->gototab);
1580*9a7741deSElliott Hughes xfree(f);
1581*9a7741deSElliott Hughes }
1582