1*9a7741deSElliott Hughes /****************************************************************
2*9a7741deSElliott Hughes Copyright (C) Lucent Technologies 1997
3*9a7741deSElliott Hughes All Rights Reserved
4*9a7741deSElliott Hughes
5*9a7741deSElliott Hughes Permission to use, copy, modify, and distribute this software and
6*9a7741deSElliott Hughes its documentation for any purpose and without fee is hereby
7*9a7741deSElliott Hughes granted, provided that the above copyright notice appear in all
8*9a7741deSElliott Hughes copies and that both that the copyright notice and this
9*9a7741deSElliott Hughes permission notice and warranty disclaimer appear in supporting
10*9a7741deSElliott Hughes documentation, and that the name Lucent Technologies or any of
11*9a7741deSElliott Hughes its entities not be used in advertising or publicity pertaining
12*9a7741deSElliott Hughes to distribution of the software without specific, written prior
13*9a7741deSElliott Hughes permission.
14*9a7741deSElliott Hughes
15*9a7741deSElliott Hughes LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16*9a7741deSElliott Hughes INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17*9a7741deSElliott Hughes IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18*9a7741deSElliott Hughes SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19*9a7741deSElliott Hughes WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20*9a7741deSElliott Hughes IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21*9a7741deSElliott Hughes ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22*9a7741deSElliott Hughes THIS SOFTWARE.
23*9a7741deSElliott Hughes ****************************************************************/
24*9a7741deSElliott Hughes
25*9a7741deSElliott Hughes #include <stdio.h>
26*9a7741deSElliott Hughes #include <stdlib.h>
27*9a7741deSElliott Hughes #include <string.h>
28*9a7741deSElliott Hughes #include <ctype.h>
29*9a7741deSElliott Hughes #include "awk.h"
30*9a7741deSElliott Hughes #include "awkgram.tab.h"
31*9a7741deSElliott Hughes
32*9a7741deSElliott Hughes extern YYSTYPE yylval;
33*9a7741deSElliott Hughes extern bool infunc;
34*9a7741deSElliott Hughes
35*9a7741deSElliott Hughes int lineno = 1;
36*9a7741deSElliott Hughes int bracecnt = 0;
37*9a7741deSElliott Hughes int brackcnt = 0;
38*9a7741deSElliott Hughes int parencnt = 0;
39*9a7741deSElliott Hughes
40*9a7741deSElliott Hughes typedef struct Keyword {
41*9a7741deSElliott Hughes const char *word;
42*9a7741deSElliott Hughes int sub;
43*9a7741deSElliott Hughes int type;
44*9a7741deSElliott Hughes } Keyword;
45*9a7741deSElliott Hughes
46*9a7741deSElliott Hughes const Keyword keywords[] = { /* keep sorted: binary searched */
47*9a7741deSElliott Hughes { "BEGIN", XBEGIN, XBEGIN },
48*9a7741deSElliott Hughes { "END", XEND, XEND },
49*9a7741deSElliott Hughes { "NF", VARNF, VARNF },
50*9a7741deSElliott Hughes { "atan2", FATAN, BLTIN },
51*9a7741deSElliott Hughes { "break", BREAK, BREAK },
52*9a7741deSElliott Hughes { "close", CLOSE, CLOSE },
53*9a7741deSElliott Hughes { "continue", CONTINUE, CONTINUE },
54*9a7741deSElliott Hughes { "cos", FCOS, BLTIN },
55*9a7741deSElliott Hughes { "delete", DELETE, DELETE },
56*9a7741deSElliott Hughes { "do", DO, DO },
57*9a7741deSElliott Hughes { "else", ELSE, ELSE },
58*9a7741deSElliott Hughes { "exit", EXIT, EXIT },
59*9a7741deSElliott Hughes { "exp", FEXP, BLTIN },
60*9a7741deSElliott Hughes { "fflush", FFLUSH, BLTIN },
61*9a7741deSElliott Hughes { "for", FOR, FOR },
62*9a7741deSElliott Hughes { "func", FUNC, FUNC },
63*9a7741deSElliott Hughes { "function", FUNC, FUNC },
64*9a7741deSElliott Hughes { "getline", GETLINE, GETLINE },
65*9a7741deSElliott Hughes { "gsub", GSUB, GSUB },
66*9a7741deSElliott Hughes { "if", IF, IF },
67*9a7741deSElliott Hughes { "in", IN, IN },
68*9a7741deSElliott Hughes { "index", INDEX, INDEX },
69*9a7741deSElliott Hughes { "int", FINT, BLTIN },
70*9a7741deSElliott Hughes { "length", FLENGTH, BLTIN },
71*9a7741deSElliott Hughes { "log", FLOG, BLTIN },
72*9a7741deSElliott Hughes { "match", MATCHFCN, MATCHFCN },
73*9a7741deSElliott Hughes { "next", NEXT, NEXT },
74*9a7741deSElliott Hughes { "nextfile", NEXTFILE, NEXTFILE },
75*9a7741deSElliott Hughes { "print", PRINT, PRINT },
76*9a7741deSElliott Hughes { "printf", PRINTF, PRINTF },
77*9a7741deSElliott Hughes { "rand", FRAND, BLTIN },
78*9a7741deSElliott Hughes { "return", RETURN, RETURN },
79*9a7741deSElliott Hughes { "sin", FSIN, BLTIN },
80*9a7741deSElliott Hughes { "split", SPLIT, SPLIT },
81*9a7741deSElliott Hughes { "sprintf", SPRINTF, SPRINTF },
82*9a7741deSElliott Hughes { "sqrt", FSQRT, BLTIN },
83*9a7741deSElliott Hughes { "srand", FSRAND, BLTIN },
84*9a7741deSElliott Hughes { "sub", SUB, SUB },
85*9a7741deSElliott Hughes { "substr", SUBSTR, SUBSTR },
86*9a7741deSElliott Hughes { "system", FSYSTEM, BLTIN },
87*9a7741deSElliott Hughes { "tolower", FTOLOWER, BLTIN },
88*9a7741deSElliott Hughes { "toupper", FTOUPPER, BLTIN },
89*9a7741deSElliott Hughes { "while", WHILE, WHILE },
90*9a7741deSElliott Hughes };
91*9a7741deSElliott Hughes
92*9a7741deSElliott Hughes #define RET(x) { if(dbg)printf("lex %s\n", tokname(x)); return(x); }
93*9a7741deSElliott Hughes
peek(void)94*9a7741deSElliott Hughes static int peek(void)
95*9a7741deSElliott Hughes {
96*9a7741deSElliott Hughes int c = input();
97*9a7741deSElliott Hughes unput(c);
98*9a7741deSElliott Hughes return c;
99*9a7741deSElliott Hughes }
100*9a7741deSElliott Hughes
gettok(char ** pbuf,int * psz)101*9a7741deSElliott Hughes static int gettok(char **pbuf, int *psz) /* get next input token */
102*9a7741deSElliott Hughes {
103*9a7741deSElliott Hughes int c, retc;
104*9a7741deSElliott Hughes char *buf = *pbuf;
105*9a7741deSElliott Hughes int sz = *psz;
106*9a7741deSElliott Hughes char *bp = buf;
107*9a7741deSElliott Hughes
108*9a7741deSElliott Hughes c = input();
109*9a7741deSElliott Hughes if (c == 0)
110*9a7741deSElliott Hughes return 0;
111*9a7741deSElliott Hughes buf[0] = c;
112*9a7741deSElliott Hughes buf[1] = 0;
113*9a7741deSElliott Hughes if (!isalnum(c) && c != '.' && c != '_')
114*9a7741deSElliott Hughes return c;
115*9a7741deSElliott Hughes
116*9a7741deSElliott Hughes *bp++ = c;
117*9a7741deSElliott Hughes if (isalpha(c) || c == '_') { /* it's a varname */
118*9a7741deSElliott Hughes for ( ; (c = input()) != 0; ) {
119*9a7741deSElliott Hughes if (bp-buf >= sz)
120*9a7741deSElliott Hughes if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, "gettok"))
121*9a7741deSElliott Hughes FATAL( "out of space for name %.10s...", buf );
122*9a7741deSElliott Hughes if (isalnum(c) || c == '_')
123*9a7741deSElliott Hughes *bp++ = c;
124*9a7741deSElliott Hughes else {
125*9a7741deSElliott Hughes *bp = 0;
126*9a7741deSElliott Hughes unput(c);
127*9a7741deSElliott Hughes break;
128*9a7741deSElliott Hughes }
129*9a7741deSElliott Hughes }
130*9a7741deSElliott Hughes *bp = 0;
131*9a7741deSElliott Hughes retc = 'a'; /* alphanumeric */
132*9a7741deSElliott Hughes } else { /* maybe it's a number, but could be . */
133*9a7741deSElliott Hughes char *rem;
134*9a7741deSElliott Hughes /* read input until can't be a number */
135*9a7741deSElliott Hughes for ( ; (c = input()) != 0; ) {
136*9a7741deSElliott Hughes if (bp-buf >= sz)
137*9a7741deSElliott Hughes if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, "gettok"))
138*9a7741deSElliott Hughes FATAL( "out of space for number %.10s...", buf );
139*9a7741deSElliott Hughes if (isdigit(c) || c == 'e' || c == 'E'
140*9a7741deSElliott Hughes || c == '.' || c == '+' || c == '-')
141*9a7741deSElliott Hughes *bp++ = c;
142*9a7741deSElliott Hughes else {
143*9a7741deSElliott Hughes unput(c);
144*9a7741deSElliott Hughes break;
145*9a7741deSElliott Hughes }
146*9a7741deSElliott Hughes }
147*9a7741deSElliott Hughes *bp = 0;
148*9a7741deSElliott Hughes strtod(buf, &rem); /* parse the number */
149*9a7741deSElliott Hughes if (rem == buf) { /* it wasn't a valid number at all */
150*9a7741deSElliott Hughes buf[1] = 0; /* return one character as token */
151*9a7741deSElliott Hughes retc = (uschar)buf[0]; /* character is its own type */
152*9a7741deSElliott Hughes unputstr(rem+1); /* put rest back for later */
153*9a7741deSElliott Hughes } else { /* some prefix was a number */
154*9a7741deSElliott Hughes unputstr(rem); /* put rest back for later */
155*9a7741deSElliott Hughes rem[0] = 0; /* truncate buf after number part */
156*9a7741deSElliott Hughes retc = '0'; /* type is number */
157*9a7741deSElliott Hughes }
158*9a7741deSElliott Hughes }
159*9a7741deSElliott Hughes *pbuf = buf;
160*9a7741deSElliott Hughes *psz = sz;
161*9a7741deSElliott Hughes return retc;
162*9a7741deSElliott Hughes }
163*9a7741deSElliott Hughes
164*9a7741deSElliott Hughes int word(char *);
165*9a7741deSElliott Hughes int string(void);
166*9a7741deSElliott Hughes int regexpr(void);
167*9a7741deSElliott Hughes bool sc = false; /* true => return a } right now */
168*9a7741deSElliott Hughes bool reg = false; /* true => return a REGEXPR now */
169*9a7741deSElliott Hughes
yylex(void)170*9a7741deSElliott Hughes int yylex(void)
171*9a7741deSElliott Hughes {
172*9a7741deSElliott Hughes int c;
173*9a7741deSElliott Hughes static char *buf = NULL;
174*9a7741deSElliott Hughes static int bufsize = 5; /* BUG: setting this small causes core dump! */
175*9a7741deSElliott Hughes
176*9a7741deSElliott Hughes if (buf == NULL && (buf = (char *) malloc(bufsize)) == NULL)
177*9a7741deSElliott Hughes FATAL( "out of space in yylex" );
178*9a7741deSElliott Hughes if (sc) {
179*9a7741deSElliott Hughes sc = false;
180*9a7741deSElliott Hughes RET('}');
181*9a7741deSElliott Hughes }
182*9a7741deSElliott Hughes if (reg) {
183*9a7741deSElliott Hughes reg = false;
184*9a7741deSElliott Hughes return regexpr();
185*9a7741deSElliott Hughes }
186*9a7741deSElliott Hughes for (;;) {
187*9a7741deSElliott Hughes c = gettok(&buf, &bufsize);
188*9a7741deSElliott Hughes if (c == 0)
189*9a7741deSElliott Hughes return 0;
190*9a7741deSElliott Hughes if (isalpha(c) || c == '_')
191*9a7741deSElliott Hughes return word(buf);
192*9a7741deSElliott Hughes if (isdigit(c)) {
193*9a7741deSElliott Hughes char *cp = tostring(buf);
194*9a7741deSElliott Hughes double result;
195*9a7741deSElliott Hughes
196*9a7741deSElliott Hughes if (is_number(cp, & result))
197*9a7741deSElliott Hughes yylval.cp = setsymtab(buf, cp, result, CON|NUM, symtab);
198*9a7741deSElliott Hughes else
199*9a7741deSElliott Hughes yylval.cp = setsymtab(buf, cp, 0.0, STR, symtab);
200*9a7741deSElliott Hughes free(cp);
201*9a7741deSElliott Hughes /* should this also have STR set? */
202*9a7741deSElliott Hughes RET(NUMBER);
203*9a7741deSElliott Hughes }
204*9a7741deSElliott Hughes
205*9a7741deSElliott Hughes yylval.i = c;
206*9a7741deSElliott Hughes switch (c) {
207*9a7741deSElliott Hughes case '\n': /* {EOL} */
208*9a7741deSElliott Hughes lineno++;
209*9a7741deSElliott Hughes RET(NL);
210*9a7741deSElliott Hughes case '\r': /* assume \n is coming */
211*9a7741deSElliott Hughes case ' ': /* {WS}+ */
212*9a7741deSElliott Hughes case '\t':
213*9a7741deSElliott Hughes break;
214*9a7741deSElliott Hughes case '#': /* #.* strip comments */
215*9a7741deSElliott Hughes while ((c = input()) != '\n' && c != 0)
216*9a7741deSElliott Hughes ;
217*9a7741deSElliott Hughes unput(c);
218*9a7741deSElliott Hughes /*
219*9a7741deSElliott Hughes * Next line is a hack, it compensates for
220*9a7741deSElliott Hughes * unput's treatment of \n.
221*9a7741deSElliott Hughes */
222*9a7741deSElliott Hughes lineno++;
223*9a7741deSElliott Hughes break;
224*9a7741deSElliott Hughes case ';':
225*9a7741deSElliott Hughes RET(';');
226*9a7741deSElliott Hughes case '\\':
227*9a7741deSElliott Hughes if (peek() == '\n') {
228*9a7741deSElliott Hughes input();
229*9a7741deSElliott Hughes lineno++;
230*9a7741deSElliott Hughes } else if (peek() == '\r') {
231*9a7741deSElliott Hughes input(); input(); /* \n */
232*9a7741deSElliott Hughes lineno++;
233*9a7741deSElliott Hughes } else {
234*9a7741deSElliott Hughes RET(c);
235*9a7741deSElliott Hughes }
236*9a7741deSElliott Hughes break;
237*9a7741deSElliott Hughes case '&':
238*9a7741deSElliott Hughes if (peek() == '&') {
239*9a7741deSElliott Hughes input(); RET(AND);
240*9a7741deSElliott Hughes } else
241*9a7741deSElliott Hughes RET('&');
242*9a7741deSElliott Hughes case '|':
243*9a7741deSElliott Hughes if (peek() == '|') {
244*9a7741deSElliott Hughes input(); RET(BOR);
245*9a7741deSElliott Hughes } else
246*9a7741deSElliott Hughes RET('|');
247*9a7741deSElliott Hughes case '!':
248*9a7741deSElliott Hughes if (peek() == '=') {
249*9a7741deSElliott Hughes input(); yylval.i = NE; RET(NE);
250*9a7741deSElliott Hughes } else if (peek() == '~') {
251*9a7741deSElliott Hughes input(); yylval.i = NOTMATCH; RET(MATCHOP);
252*9a7741deSElliott Hughes } else
253*9a7741deSElliott Hughes RET(NOT);
254*9a7741deSElliott Hughes case '~':
255*9a7741deSElliott Hughes yylval.i = MATCH;
256*9a7741deSElliott Hughes RET(MATCHOP);
257*9a7741deSElliott Hughes case '<':
258*9a7741deSElliott Hughes if (peek() == '=') {
259*9a7741deSElliott Hughes input(); yylval.i = LE; RET(LE);
260*9a7741deSElliott Hughes } else {
261*9a7741deSElliott Hughes yylval.i = LT; RET(LT);
262*9a7741deSElliott Hughes }
263*9a7741deSElliott Hughes case '=':
264*9a7741deSElliott Hughes if (peek() == '=') {
265*9a7741deSElliott Hughes input(); yylval.i = EQ; RET(EQ);
266*9a7741deSElliott Hughes } else {
267*9a7741deSElliott Hughes yylval.i = ASSIGN; RET(ASGNOP);
268*9a7741deSElliott Hughes }
269*9a7741deSElliott Hughes case '>':
270*9a7741deSElliott Hughes if (peek() == '=') {
271*9a7741deSElliott Hughes input(); yylval.i = GE; RET(GE);
272*9a7741deSElliott Hughes } else if (peek() == '>') {
273*9a7741deSElliott Hughes input(); yylval.i = APPEND; RET(APPEND);
274*9a7741deSElliott Hughes } else {
275*9a7741deSElliott Hughes yylval.i = GT; RET(GT);
276*9a7741deSElliott Hughes }
277*9a7741deSElliott Hughes case '+':
278*9a7741deSElliott Hughes if (peek() == '+') {
279*9a7741deSElliott Hughes input(); yylval.i = INCR; RET(INCR);
280*9a7741deSElliott Hughes } else if (peek() == '=') {
281*9a7741deSElliott Hughes input(); yylval.i = ADDEQ; RET(ASGNOP);
282*9a7741deSElliott Hughes } else
283*9a7741deSElliott Hughes RET('+');
284*9a7741deSElliott Hughes case '-':
285*9a7741deSElliott Hughes if (peek() == '-') {
286*9a7741deSElliott Hughes input(); yylval.i = DECR; RET(DECR);
287*9a7741deSElliott Hughes } else if (peek() == '=') {
288*9a7741deSElliott Hughes input(); yylval.i = SUBEQ; RET(ASGNOP);
289*9a7741deSElliott Hughes } else
290*9a7741deSElliott Hughes RET('-');
291*9a7741deSElliott Hughes case '*':
292*9a7741deSElliott Hughes if (peek() == '=') { /* *= */
293*9a7741deSElliott Hughes input(); yylval.i = MULTEQ; RET(ASGNOP);
294*9a7741deSElliott Hughes } else if (peek() == '*') { /* ** or **= */
295*9a7741deSElliott Hughes input(); /* eat 2nd * */
296*9a7741deSElliott Hughes if (peek() == '=') {
297*9a7741deSElliott Hughes input(); yylval.i = POWEQ; RET(ASGNOP);
298*9a7741deSElliott Hughes } else {
299*9a7741deSElliott Hughes RET(POWER);
300*9a7741deSElliott Hughes }
301*9a7741deSElliott Hughes } else
302*9a7741deSElliott Hughes RET('*');
303*9a7741deSElliott Hughes case '/':
304*9a7741deSElliott Hughes RET('/');
305*9a7741deSElliott Hughes case '%':
306*9a7741deSElliott Hughes if (peek() == '=') {
307*9a7741deSElliott Hughes input(); yylval.i = MODEQ; RET(ASGNOP);
308*9a7741deSElliott Hughes } else
309*9a7741deSElliott Hughes RET('%');
310*9a7741deSElliott Hughes case '^':
311*9a7741deSElliott Hughes if (peek() == '=') {
312*9a7741deSElliott Hughes input(); yylval.i = POWEQ; RET(ASGNOP);
313*9a7741deSElliott Hughes } else
314*9a7741deSElliott Hughes RET(POWER);
315*9a7741deSElliott Hughes
316*9a7741deSElliott Hughes case '$':
317*9a7741deSElliott Hughes /* BUG: awkward, if not wrong */
318*9a7741deSElliott Hughes c = gettok(&buf, &bufsize);
319*9a7741deSElliott Hughes if (isalpha(c)) {
320*9a7741deSElliott Hughes if (strcmp(buf, "NF") == 0) { /* very special */
321*9a7741deSElliott Hughes unputstr("(NF)");
322*9a7741deSElliott Hughes RET(INDIRECT);
323*9a7741deSElliott Hughes }
324*9a7741deSElliott Hughes c = peek();
325*9a7741deSElliott Hughes if (c == '(' || c == '[' || (infunc && isarg(buf) >= 0)) {
326*9a7741deSElliott Hughes unputstr(buf);
327*9a7741deSElliott Hughes RET(INDIRECT);
328*9a7741deSElliott Hughes }
329*9a7741deSElliott Hughes yylval.cp = setsymtab(buf, "", 0.0, STR|NUM, symtab);
330*9a7741deSElliott Hughes RET(IVAR);
331*9a7741deSElliott Hughes } else if (c == 0) { /* */
332*9a7741deSElliott Hughes SYNTAX( "unexpected end of input after $" );
333*9a7741deSElliott Hughes RET(';');
334*9a7741deSElliott Hughes } else {
335*9a7741deSElliott Hughes unputstr(buf);
336*9a7741deSElliott Hughes RET(INDIRECT);
337*9a7741deSElliott Hughes }
338*9a7741deSElliott Hughes
339*9a7741deSElliott Hughes case '}':
340*9a7741deSElliott Hughes if (--bracecnt < 0)
341*9a7741deSElliott Hughes SYNTAX( "extra }" );
342*9a7741deSElliott Hughes sc = true;
343*9a7741deSElliott Hughes RET(';');
344*9a7741deSElliott Hughes case ']':
345*9a7741deSElliott Hughes if (--brackcnt < 0)
346*9a7741deSElliott Hughes SYNTAX( "extra ]" );
347*9a7741deSElliott Hughes RET(']');
348*9a7741deSElliott Hughes case ')':
349*9a7741deSElliott Hughes if (--parencnt < 0)
350*9a7741deSElliott Hughes SYNTAX( "extra )" );
351*9a7741deSElliott Hughes RET(')');
352*9a7741deSElliott Hughes case '{':
353*9a7741deSElliott Hughes bracecnt++;
354*9a7741deSElliott Hughes RET('{');
355*9a7741deSElliott Hughes case '[':
356*9a7741deSElliott Hughes brackcnt++;
357*9a7741deSElliott Hughes RET('[');
358*9a7741deSElliott Hughes case '(':
359*9a7741deSElliott Hughes parencnt++;
360*9a7741deSElliott Hughes RET('(');
361*9a7741deSElliott Hughes
362*9a7741deSElliott Hughes case '"':
363*9a7741deSElliott Hughes return string(); /* BUG: should be like tran.c ? */
364*9a7741deSElliott Hughes
365*9a7741deSElliott Hughes default:
366*9a7741deSElliott Hughes RET(c);
367*9a7741deSElliott Hughes }
368*9a7741deSElliott Hughes }
369*9a7741deSElliott Hughes }
370*9a7741deSElliott Hughes
371*9a7741deSElliott Hughes extern int runetochar(char *str, int c);
372*9a7741deSElliott Hughes
string(void)373*9a7741deSElliott Hughes int string(void)
374*9a7741deSElliott Hughes {
375*9a7741deSElliott Hughes int c, n;
376*9a7741deSElliott Hughes char *s, *bp;
377*9a7741deSElliott Hughes static char *buf = NULL;
378*9a7741deSElliott Hughes static int bufsz = 500;
379*9a7741deSElliott Hughes
380*9a7741deSElliott Hughes if (buf == NULL && (buf = (char *) malloc(bufsz)) == NULL)
381*9a7741deSElliott Hughes FATAL("out of space for strings");
382*9a7741deSElliott Hughes for (bp = buf; (c = input()) != '"'; ) {
383*9a7741deSElliott Hughes if (!adjbuf(&buf, &bufsz, bp-buf+2, 500, &bp, "string"))
384*9a7741deSElliott Hughes FATAL("out of space for string %.10s...", buf);
385*9a7741deSElliott Hughes switch (c) {
386*9a7741deSElliott Hughes case '\n':
387*9a7741deSElliott Hughes case '\r':
388*9a7741deSElliott Hughes case 0:
389*9a7741deSElliott Hughes *bp = '\0';
390*9a7741deSElliott Hughes SYNTAX( "non-terminated string %.10s...", buf );
391*9a7741deSElliott Hughes if (c == 0) /* hopeless */
392*9a7741deSElliott Hughes FATAL( "giving up" );
393*9a7741deSElliott Hughes lineno++;
394*9a7741deSElliott Hughes break;
395*9a7741deSElliott Hughes case '\\':
396*9a7741deSElliott Hughes c = input();
397*9a7741deSElliott Hughes switch (c) {
398*9a7741deSElliott Hughes case '\n': break;
399*9a7741deSElliott Hughes case '"': *bp++ = '"'; break;
400*9a7741deSElliott Hughes case 'n': *bp++ = '\n'; break;
401*9a7741deSElliott Hughes case 't': *bp++ = '\t'; break;
402*9a7741deSElliott Hughes case 'f': *bp++ = '\f'; break;
403*9a7741deSElliott Hughes case 'r': *bp++ = '\r'; break;
404*9a7741deSElliott Hughes case 'b': *bp++ = '\b'; break;
405*9a7741deSElliott Hughes case 'v': *bp++ = '\v'; break;
406*9a7741deSElliott Hughes case 'a': *bp++ = '\a'; break;
407*9a7741deSElliott Hughes case '\\': *bp++ = '\\'; break;
408*9a7741deSElliott Hughes
409*9a7741deSElliott Hughes case '0': case '1': case '2': /* octal: \d \dd \ddd */
410*9a7741deSElliott Hughes case '3': case '4': case '5': case '6': case '7':
411*9a7741deSElliott Hughes n = c - '0';
412*9a7741deSElliott Hughes if ((c = peek()) >= '0' && c < '8') {
413*9a7741deSElliott Hughes n = 8 * n + input() - '0';
414*9a7741deSElliott Hughes if ((c = peek()) >= '0' && c < '8')
415*9a7741deSElliott Hughes n = 8 * n + input() - '0';
416*9a7741deSElliott Hughes }
417*9a7741deSElliott Hughes *bp++ = n;
418*9a7741deSElliott Hughes break;
419*9a7741deSElliott Hughes
420*9a7741deSElliott Hughes case 'x': /* hex \x0-9a-fA-F (exactly two) */
421*9a7741deSElliott Hughes {
422*9a7741deSElliott Hughes int i;
423*9a7741deSElliott Hughes
424*9a7741deSElliott Hughes if (!isxdigit(peek())) {
425*9a7741deSElliott Hughes unput(c);
426*9a7741deSElliott Hughes break;
427*9a7741deSElliott Hughes }
428*9a7741deSElliott Hughes n = 0;
429*9a7741deSElliott Hughes for (i = 0; i < 2; i++) {
430*9a7741deSElliott Hughes c = input();
431*9a7741deSElliott Hughes if (c == 0)
432*9a7741deSElliott Hughes break;
433*9a7741deSElliott Hughes if (isxdigit(c)) {
434*9a7741deSElliott Hughes c = tolower(c);
435*9a7741deSElliott Hughes n *= 16;
436*9a7741deSElliott Hughes if (isdigit(c))
437*9a7741deSElliott Hughes n += (c - '0');
438*9a7741deSElliott Hughes else
439*9a7741deSElliott Hughes n += 10 + (c - 'a');
440*9a7741deSElliott Hughes } else {
441*9a7741deSElliott Hughes unput(c);
442*9a7741deSElliott Hughes break;
443*9a7741deSElliott Hughes }
444*9a7741deSElliott Hughes }
445*9a7741deSElliott Hughes if (i)
446*9a7741deSElliott Hughes *bp++ = n;
447*9a7741deSElliott Hughes break;
448*9a7741deSElliott Hughes }
449*9a7741deSElliott Hughes
450*9a7741deSElliott Hughes case 'u': /* utf \u0-9a-fA-F (1..8) */
451*9a7741deSElliott Hughes {
452*9a7741deSElliott Hughes int i;
453*9a7741deSElliott Hughes
454*9a7741deSElliott Hughes n = 0;
455*9a7741deSElliott Hughes for (i = 0; i < 8; i++) {
456*9a7741deSElliott Hughes c = input();
457*9a7741deSElliott Hughes if (!isxdigit(c) || c == 0)
458*9a7741deSElliott Hughes break;
459*9a7741deSElliott Hughes c = tolower(c);
460*9a7741deSElliott Hughes n *= 16;
461*9a7741deSElliott Hughes if (isdigit(c))
462*9a7741deSElliott Hughes n += (c - '0');
463*9a7741deSElliott Hughes else
464*9a7741deSElliott Hughes n += 10 + (c - 'a');
465*9a7741deSElliott Hughes }
466*9a7741deSElliott Hughes unput(c);
467*9a7741deSElliott Hughes bp += runetochar(bp, n);
468*9a7741deSElliott Hughes break;
469*9a7741deSElliott Hughes }
470*9a7741deSElliott Hughes
471*9a7741deSElliott Hughes default:
472*9a7741deSElliott Hughes *bp++ = c;
473*9a7741deSElliott Hughes break;
474*9a7741deSElliott Hughes }
475*9a7741deSElliott Hughes break;
476*9a7741deSElliott Hughes default:
477*9a7741deSElliott Hughes *bp++ = c;
478*9a7741deSElliott Hughes break;
479*9a7741deSElliott Hughes }
480*9a7741deSElliott Hughes }
481*9a7741deSElliott Hughes *bp = 0;
482*9a7741deSElliott Hughes s = tostring(buf);
483*9a7741deSElliott Hughes *bp++ = ' '; *bp++ = '\0';
484*9a7741deSElliott Hughes yylval.cp = setsymtab(buf, s, 0.0, CON|STR|DONTFREE, symtab);
485*9a7741deSElliott Hughes free(s);
486*9a7741deSElliott Hughes RET(STRING);
487*9a7741deSElliott Hughes }
488*9a7741deSElliott Hughes
489*9a7741deSElliott Hughes
binsearch(char * w,const Keyword * kp,int n)490*9a7741deSElliott Hughes static int binsearch(char *w, const Keyword *kp, int n)
491*9a7741deSElliott Hughes {
492*9a7741deSElliott Hughes int cond, low, mid, high;
493*9a7741deSElliott Hughes
494*9a7741deSElliott Hughes low = 0;
495*9a7741deSElliott Hughes high = n - 1;
496*9a7741deSElliott Hughes while (low <= high) {
497*9a7741deSElliott Hughes mid = (low + high) / 2;
498*9a7741deSElliott Hughes if ((cond = strcmp(w, kp[mid].word)) < 0)
499*9a7741deSElliott Hughes high = mid - 1;
500*9a7741deSElliott Hughes else if (cond > 0)
501*9a7741deSElliott Hughes low = mid + 1;
502*9a7741deSElliott Hughes else
503*9a7741deSElliott Hughes return mid;
504*9a7741deSElliott Hughes }
505*9a7741deSElliott Hughes return -1;
506*9a7741deSElliott Hughes }
507*9a7741deSElliott Hughes
word(char * w)508*9a7741deSElliott Hughes int word(char *w)
509*9a7741deSElliott Hughes {
510*9a7741deSElliott Hughes const Keyword *kp;
511*9a7741deSElliott Hughes int c, n;
512*9a7741deSElliott Hughes
513*9a7741deSElliott Hughes n = binsearch(w, keywords, sizeof(keywords)/sizeof(keywords[0]));
514*9a7741deSElliott Hughes if (n != -1) { /* found in table */
515*9a7741deSElliott Hughes kp = keywords + n;
516*9a7741deSElliott Hughes yylval.i = kp->sub;
517*9a7741deSElliott Hughes switch (kp->type) { /* special handling */
518*9a7741deSElliott Hughes case BLTIN:
519*9a7741deSElliott Hughes if (kp->sub == FSYSTEM && safe)
520*9a7741deSElliott Hughes SYNTAX( "system is unsafe" );
521*9a7741deSElliott Hughes RET(kp->type);
522*9a7741deSElliott Hughes case FUNC:
523*9a7741deSElliott Hughes if (infunc)
524*9a7741deSElliott Hughes SYNTAX( "illegal nested function" );
525*9a7741deSElliott Hughes RET(kp->type);
526*9a7741deSElliott Hughes case RETURN:
527*9a7741deSElliott Hughes if (!infunc)
528*9a7741deSElliott Hughes SYNTAX( "return not in function" );
529*9a7741deSElliott Hughes RET(kp->type);
530*9a7741deSElliott Hughes case VARNF:
531*9a7741deSElliott Hughes yylval.cp = setsymtab("NF", "", 0.0, NUM, symtab);
532*9a7741deSElliott Hughes RET(VARNF);
533*9a7741deSElliott Hughes default:
534*9a7741deSElliott Hughes RET(kp->type);
535*9a7741deSElliott Hughes }
536*9a7741deSElliott Hughes }
537*9a7741deSElliott Hughes c = peek(); /* look for '(' */
538*9a7741deSElliott Hughes if (c != '(' && infunc && (n=isarg(w)) >= 0) {
539*9a7741deSElliott Hughes yylval.i = n;
540*9a7741deSElliott Hughes RET(ARG);
541*9a7741deSElliott Hughes } else {
542*9a7741deSElliott Hughes yylval.cp = setsymtab(w, "", 0.0, STR|NUM|DONTFREE, symtab);
543*9a7741deSElliott Hughes if (c == '(') {
544*9a7741deSElliott Hughes RET(CALL);
545*9a7741deSElliott Hughes } else {
546*9a7741deSElliott Hughes RET(VAR);
547*9a7741deSElliott Hughes }
548*9a7741deSElliott Hughes }
549*9a7741deSElliott Hughes }
550*9a7741deSElliott Hughes
startreg(void)551*9a7741deSElliott Hughes void startreg(void) /* next call to yylex will return a regular expression */
552*9a7741deSElliott Hughes {
553*9a7741deSElliott Hughes reg = true;
554*9a7741deSElliott Hughes }
555*9a7741deSElliott Hughes
regexpr(void)556*9a7741deSElliott Hughes int regexpr(void)
557*9a7741deSElliott Hughes {
558*9a7741deSElliott Hughes int c;
559*9a7741deSElliott Hughes static char *buf = NULL;
560*9a7741deSElliott Hughes static int bufsz = 500;
561*9a7741deSElliott Hughes char *bp;
562*9a7741deSElliott Hughes
563*9a7741deSElliott Hughes if (buf == NULL && (buf = (char *) malloc(bufsz)) == NULL)
564*9a7741deSElliott Hughes FATAL("out of space for reg expr");
565*9a7741deSElliott Hughes bp = buf;
566*9a7741deSElliott Hughes for ( ; (c = input()) != '/' && c != 0; ) {
567*9a7741deSElliott Hughes if (!adjbuf(&buf, &bufsz, bp-buf+3, 500, &bp, "regexpr"))
568*9a7741deSElliott Hughes FATAL("out of space for reg expr %.10s...", buf);
569*9a7741deSElliott Hughes if (c == '\n') {
570*9a7741deSElliott Hughes *bp = '\0';
571*9a7741deSElliott Hughes SYNTAX( "newline in regular expression %.10s...", buf );
572*9a7741deSElliott Hughes unput('\n');
573*9a7741deSElliott Hughes break;
574*9a7741deSElliott Hughes } else if (c == '\\') {
575*9a7741deSElliott Hughes *bp++ = '\\';
576*9a7741deSElliott Hughes *bp++ = input();
577*9a7741deSElliott Hughes } else {
578*9a7741deSElliott Hughes *bp++ = c;
579*9a7741deSElliott Hughes }
580*9a7741deSElliott Hughes }
581*9a7741deSElliott Hughes *bp = 0;
582*9a7741deSElliott Hughes if (c == 0)
583*9a7741deSElliott Hughes SYNTAX("non-terminated regular expression %.10s...", buf);
584*9a7741deSElliott Hughes yylval.s = tostring(buf);
585*9a7741deSElliott Hughes unput('/');
586*9a7741deSElliott Hughes RET(REGEXPR);
587*9a7741deSElliott Hughes }
588*9a7741deSElliott Hughes
589*9a7741deSElliott Hughes /* low-level lexical stuff, sort of inherited from lex */
590*9a7741deSElliott Hughes
591*9a7741deSElliott Hughes char ebuf[300];
592*9a7741deSElliott Hughes char *ep = ebuf;
593*9a7741deSElliott Hughes char yysbuf[100]; /* pushback buffer */
594*9a7741deSElliott Hughes char *yysptr = yysbuf;
595*9a7741deSElliott Hughes FILE *yyin = NULL;
596*9a7741deSElliott Hughes
input(void)597*9a7741deSElliott Hughes int input(void) /* get next lexical input character */
598*9a7741deSElliott Hughes {
599*9a7741deSElliott Hughes int c;
600*9a7741deSElliott Hughes extern char *lexprog;
601*9a7741deSElliott Hughes
602*9a7741deSElliott Hughes if (yysptr > yysbuf)
603*9a7741deSElliott Hughes c = (uschar)*--yysptr;
604*9a7741deSElliott Hughes else if (lexprog != NULL) { /* awk '...' */
605*9a7741deSElliott Hughes if ((c = (uschar)*lexprog) != 0)
606*9a7741deSElliott Hughes lexprog++;
607*9a7741deSElliott Hughes } else /* awk -f ... */
608*9a7741deSElliott Hughes c = pgetc();
609*9a7741deSElliott Hughes if (c == EOF)
610*9a7741deSElliott Hughes c = 0;
611*9a7741deSElliott Hughes if (ep >= ebuf + sizeof ebuf)
612*9a7741deSElliott Hughes ep = ebuf;
613*9a7741deSElliott Hughes *ep = c;
614*9a7741deSElliott Hughes if (c != 0) {
615*9a7741deSElliott Hughes ep++;
616*9a7741deSElliott Hughes }
617*9a7741deSElliott Hughes return (c);
618*9a7741deSElliott Hughes }
619*9a7741deSElliott Hughes
unput(int c)620*9a7741deSElliott Hughes void unput(int c) /* put lexical character back on input */
621*9a7741deSElliott Hughes {
622*9a7741deSElliott Hughes if (c == '\n')
623*9a7741deSElliott Hughes lineno--;
624*9a7741deSElliott Hughes if (yysptr >= yysbuf + sizeof(yysbuf))
625*9a7741deSElliott Hughes FATAL("pushed back too much: %.20s...", yysbuf);
626*9a7741deSElliott Hughes *yysptr++ = c;
627*9a7741deSElliott Hughes if (--ep < ebuf)
628*9a7741deSElliott Hughes ep = ebuf + sizeof(ebuf) - 1;
629*9a7741deSElliott Hughes }
630*9a7741deSElliott Hughes
unputstr(const char * s)631*9a7741deSElliott Hughes void unputstr(const char *s) /* put a string back on input */
632*9a7741deSElliott Hughes {
633*9a7741deSElliott Hughes int i;
634*9a7741deSElliott Hughes
635*9a7741deSElliott Hughes for (i = strlen(s)-1; i >= 0; i--)
636*9a7741deSElliott Hughes unput(s[i]);
637*9a7741deSElliott Hughes }
638