xref: /aosp_15_r20/external/one-true-awk/lex.c (revision 9a7741de182b2776d7b30d6355f2585c0780a51b)
1*9a7741deSElliott Hughes /****************************************************************
2*9a7741deSElliott Hughes Copyright (C) Lucent Technologies 1997
3*9a7741deSElliott Hughes All Rights Reserved
4*9a7741deSElliott Hughes 
5*9a7741deSElliott Hughes Permission to use, copy, modify, and distribute this software and
6*9a7741deSElliott Hughes its documentation for any purpose and without fee is hereby
7*9a7741deSElliott Hughes granted, provided that the above copyright notice appear in all
8*9a7741deSElliott Hughes copies and that both that the copyright notice and this
9*9a7741deSElliott Hughes permission notice and warranty disclaimer appear in supporting
10*9a7741deSElliott Hughes documentation, and that the name Lucent Technologies or any of
11*9a7741deSElliott Hughes its entities not be used in advertising or publicity pertaining
12*9a7741deSElliott Hughes to distribution of the software without specific, written prior
13*9a7741deSElliott Hughes permission.
14*9a7741deSElliott Hughes 
15*9a7741deSElliott Hughes LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16*9a7741deSElliott Hughes INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17*9a7741deSElliott Hughes IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18*9a7741deSElliott Hughes SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19*9a7741deSElliott Hughes WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20*9a7741deSElliott Hughes IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21*9a7741deSElliott Hughes ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22*9a7741deSElliott Hughes THIS SOFTWARE.
23*9a7741deSElliott Hughes ****************************************************************/
24*9a7741deSElliott Hughes 
25*9a7741deSElliott Hughes #include <stdio.h>
26*9a7741deSElliott Hughes #include <stdlib.h>
27*9a7741deSElliott Hughes #include <string.h>
28*9a7741deSElliott Hughes #include <ctype.h>
29*9a7741deSElliott Hughes #include "awk.h"
30*9a7741deSElliott Hughes #include "awkgram.tab.h"
31*9a7741deSElliott Hughes 
32*9a7741deSElliott Hughes extern YYSTYPE	yylval;
33*9a7741deSElliott Hughes extern bool	infunc;
34*9a7741deSElliott Hughes 
35*9a7741deSElliott Hughes int	lineno	= 1;
36*9a7741deSElliott Hughes int	bracecnt = 0;
37*9a7741deSElliott Hughes int	brackcnt  = 0;
38*9a7741deSElliott Hughes int	parencnt = 0;
39*9a7741deSElliott Hughes 
40*9a7741deSElliott Hughes typedef struct Keyword {
41*9a7741deSElliott Hughes 	const char *word;
42*9a7741deSElliott Hughes 	int	sub;
43*9a7741deSElliott Hughes 	int	type;
44*9a7741deSElliott Hughes } Keyword;
45*9a7741deSElliott Hughes 
46*9a7741deSElliott Hughes const Keyword keywords[] = {	/* keep sorted: binary searched */
47*9a7741deSElliott Hughes 	{ "BEGIN",	XBEGIN,		XBEGIN },
48*9a7741deSElliott Hughes 	{ "END",	XEND,		XEND },
49*9a7741deSElliott Hughes 	{ "NF",		VARNF,		VARNF },
50*9a7741deSElliott Hughes 	{ "atan2",	FATAN,		BLTIN },
51*9a7741deSElliott Hughes 	{ "break",	BREAK,		BREAK },
52*9a7741deSElliott Hughes 	{ "close",	CLOSE,		CLOSE },
53*9a7741deSElliott Hughes 	{ "continue",	CONTINUE,	CONTINUE },
54*9a7741deSElliott Hughes 	{ "cos",	FCOS,		BLTIN },
55*9a7741deSElliott Hughes 	{ "delete",	DELETE,		DELETE },
56*9a7741deSElliott Hughes 	{ "do",		DO,		DO },
57*9a7741deSElliott Hughes 	{ "else",	ELSE,		ELSE },
58*9a7741deSElliott Hughes 	{ "exit",	EXIT,		EXIT },
59*9a7741deSElliott Hughes 	{ "exp",	FEXP,		BLTIN },
60*9a7741deSElliott Hughes 	{ "fflush",	FFLUSH,		BLTIN },
61*9a7741deSElliott Hughes 	{ "for",	FOR,		FOR },
62*9a7741deSElliott Hughes 	{ "func",	FUNC,		FUNC },
63*9a7741deSElliott Hughes 	{ "function",	FUNC,		FUNC },
64*9a7741deSElliott Hughes 	{ "getline",	GETLINE,	GETLINE },
65*9a7741deSElliott Hughes 	{ "gsub",	GSUB,		GSUB },
66*9a7741deSElliott Hughes 	{ "if",		IF,		IF },
67*9a7741deSElliott Hughes 	{ "in",		IN,		IN },
68*9a7741deSElliott Hughes 	{ "index",	INDEX,		INDEX },
69*9a7741deSElliott Hughes 	{ "int",	FINT,		BLTIN },
70*9a7741deSElliott Hughes 	{ "length",	FLENGTH,	BLTIN },
71*9a7741deSElliott Hughes 	{ "log",	FLOG,		BLTIN },
72*9a7741deSElliott Hughes 	{ "match",	MATCHFCN,	MATCHFCN },
73*9a7741deSElliott Hughes 	{ "next",	NEXT,		NEXT },
74*9a7741deSElliott Hughes 	{ "nextfile",	NEXTFILE,	NEXTFILE },
75*9a7741deSElliott Hughes 	{ "print",	PRINT,		PRINT },
76*9a7741deSElliott Hughes 	{ "printf",	PRINTF,		PRINTF },
77*9a7741deSElliott Hughes 	{ "rand",	FRAND,		BLTIN },
78*9a7741deSElliott Hughes 	{ "return",	RETURN,		RETURN },
79*9a7741deSElliott Hughes 	{ "sin",	FSIN,		BLTIN },
80*9a7741deSElliott Hughes 	{ "split",	SPLIT,		SPLIT },
81*9a7741deSElliott Hughes 	{ "sprintf",	SPRINTF,	SPRINTF },
82*9a7741deSElliott Hughes 	{ "sqrt",	FSQRT,		BLTIN },
83*9a7741deSElliott Hughes 	{ "srand",	FSRAND,		BLTIN },
84*9a7741deSElliott Hughes 	{ "sub",	SUB,		SUB },
85*9a7741deSElliott Hughes 	{ "substr",	SUBSTR,		SUBSTR },
86*9a7741deSElliott Hughes 	{ "system",	FSYSTEM,	BLTIN },
87*9a7741deSElliott Hughes 	{ "tolower",	FTOLOWER,	BLTIN },
88*9a7741deSElliott Hughes 	{ "toupper",	FTOUPPER,	BLTIN },
89*9a7741deSElliott Hughes 	{ "while",	WHILE,		WHILE },
90*9a7741deSElliott Hughes };
91*9a7741deSElliott Hughes 
92*9a7741deSElliott Hughes #define	RET(x)	{ if(dbg)printf("lex %s\n", tokname(x)); return(x); }
93*9a7741deSElliott Hughes 
peek(void)94*9a7741deSElliott Hughes static int peek(void)
95*9a7741deSElliott Hughes {
96*9a7741deSElliott Hughes 	int c = input();
97*9a7741deSElliott Hughes 	unput(c);
98*9a7741deSElliott Hughes 	return c;
99*9a7741deSElliott Hughes }
100*9a7741deSElliott Hughes 
gettok(char ** pbuf,int * psz)101*9a7741deSElliott Hughes static int gettok(char **pbuf, int *psz)	/* get next input token */
102*9a7741deSElliott Hughes {
103*9a7741deSElliott Hughes 	int c, retc;
104*9a7741deSElliott Hughes 	char *buf = *pbuf;
105*9a7741deSElliott Hughes 	int sz = *psz;
106*9a7741deSElliott Hughes 	char *bp = buf;
107*9a7741deSElliott Hughes 
108*9a7741deSElliott Hughes 	c = input();
109*9a7741deSElliott Hughes 	if (c == 0)
110*9a7741deSElliott Hughes 		return 0;
111*9a7741deSElliott Hughes 	buf[0] = c;
112*9a7741deSElliott Hughes 	buf[1] = 0;
113*9a7741deSElliott Hughes 	if (!isalnum(c) && c != '.' && c != '_')
114*9a7741deSElliott Hughes 		return c;
115*9a7741deSElliott Hughes 
116*9a7741deSElliott Hughes 	*bp++ = c;
117*9a7741deSElliott Hughes 	if (isalpha(c) || c == '_') {	/* it's a varname */
118*9a7741deSElliott Hughes 		for ( ; (c = input()) != 0; ) {
119*9a7741deSElliott Hughes 			if (bp-buf >= sz)
120*9a7741deSElliott Hughes 				if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, "gettok"))
121*9a7741deSElliott Hughes 					FATAL( "out of space for name %.10s...", buf );
122*9a7741deSElliott Hughes 			if (isalnum(c) || c == '_')
123*9a7741deSElliott Hughes 				*bp++ = c;
124*9a7741deSElliott Hughes 			else {
125*9a7741deSElliott Hughes 				*bp = 0;
126*9a7741deSElliott Hughes 				unput(c);
127*9a7741deSElliott Hughes 				break;
128*9a7741deSElliott Hughes 			}
129*9a7741deSElliott Hughes 		}
130*9a7741deSElliott Hughes 		*bp = 0;
131*9a7741deSElliott Hughes 		retc = 'a';	/* alphanumeric */
132*9a7741deSElliott Hughes 	} else {	/* maybe it's a number, but could be . */
133*9a7741deSElliott Hughes 		char *rem;
134*9a7741deSElliott Hughes 		/* read input until can't be a number */
135*9a7741deSElliott Hughes 		for ( ; (c = input()) != 0; ) {
136*9a7741deSElliott Hughes 			if (bp-buf >= sz)
137*9a7741deSElliott Hughes 				if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, "gettok"))
138*9a7741deSElliott Hughes 					FATAL( "out of space for number %.10s...", buf );
139*9a7741deSElliott Hughes 			if (isdigit(c) || c == 'e' || c == 'E'
140*9a7741deSElliott Hughes 			  || c == '.' || c == '+' || c == '-')
141*9a7741deSElliott Hughes 				*bp++ = c;
142*9a7741deSElliott Hughes 			else {
143*9a7741deSElliott Hughes 				unput(c);
144*9a7741deSElliott Hughes 				break;
145*9a7741deSElliott Hughes 			}
146*9a7741deSElliott Hughes 		}
147*9a7741deSElliott Hughes 		*bp = 0;
148*9a7741deSElliott Hughes 		strtod(buf, &rem);	/* parse the number */
149*9a7741deSElliott Hughes 		if (rem == buf) {	/* it wasn't a valid number at all */
150*9a7741deSElliott Hughes 			buf[1] = 0;	/* return one character as token */
151*9a7741deSElliott Hughes 			retc = (uschar)buf[0];	/* character is its own type */
152*9a7741deSElliott Hughes 			unputstr(rem+1); /* put rest back for later */
153*9a7741deSElliott Hughes 		} else {	/* some prefix was a number */
154*9a7741deSElliott Hughes 			unputstr(rem);	/* put rest back for later */
155*9a7741deSElliott Hughes 			rem[0] = 0;	/* truncate buf after number part */
156*9a7741deSElliott Hughes 			retc = '0';	/* type is number */
157*9a7741deSElliott Hughes 		}
158*9a7741deSElliott Hughes 	}
159*9a7741deSElliott Hughes 	*pbuf = buf;
160*9a7741deSElliott Hughes 	*psz = sz;
161*9a7741deSElliott Hughes 	return retc;
162*9a7741deSElliott Hughes }
163*9a7741deSElliott Hughes 
164*9a7741deSElliott Hughes int	word(char *);
165*9a7741deSElliott Hughes int	string(void);
166*9a7741deSElliott Hughes int	regexpr(void);
167*9a7741deSElliott Hughes bool	sc	= false;	/* true => return a } right now */
168*9a7741deSElliott Hughes bool	reg	= false;	/* true => return a REGEXPR now */
169*9a7741deSElliott Hughes 
yylex(void)170*9a7741deSElliott Hughes int yylex(void)
171*9a7741deSElliott Hughes {
172*9a7741deSElliott Hughes 	int c;
173*9a7741deSElliott Hughes 	static char *buf = NULL;
174*9a7741deSElliott Hughes 	static int bufsize = 5; /* BUG: setting this small causes core dump! */
175*9a7741deSElliott Hughes 
176*9a7741deSElliott Hughes 	if (buf == NULL && (buf = (char *) malloc(bufsize)) == NULL)
177*9a7741deSElliott Hughes 		FATAL( "out of space in yylex" );
178*9a7741deSElliott Hughes 	if (sc) {
179*9a7741deSElliott Hughes 		sc = false;
180*9a7741deSElliott Hughes 		RET('}');
181*9a7741deSElliott Hughes 	}
182*9a7741deSElliott Hughes 	if (reg) {
183*9a7741deSElliott Hughes 		reg = false;
184*9a7741deSElliott Hughes 		return regexpr();
185*9a7741deSElliott Hughes 	}
186*9a7741deSElliott Hughes 	for (;;) {
187*9a7741deSElliott Hughes 		c = gettok(&buf, &bufsize);
188*9a7741deSElliott Hughes 		if (c == 0)
189*9a7741deSElliott Hughes 			return 0;
190*9a7741deSElliott Hughes 		if (isalpha(c) || c == '_')
191*9a7741deSElliott Hughes 			return word(buf);
192*9a7741deSElliott Hughes 		if (isdigit(c)) {
193*9a7741deSElliott Hughes 			char *cp = tostring(buf);
194*9a7741deSElliott Hughes 			double result;
195*9a7741deSElliott Hughes 
196*9a7741deSElliott Hughes 			if (is_number(cp, & result))
197*9a7741deSElliott Hughes 				yylval.cp = setsymtab(buf, cp, result, CON|NUM, symtab);
198*9a7741deSElliott Hughes 			else
199*9a7741deSElliott Hughes 				yylval.cp = setsymtab(buf, cp, 0.0, STR, symtab);
200*9a7741deSElliott Hughes 			free(cp);
201*9a7741deSElliott Hughes 			/* should this also have STR set? */
202*9a7741deSElliott Hughes 			RET(NUMBER);
203*9a7741deSElliott Hughes 		}
204*9a7741deSElliott Hughes 
205*9a7741deSElliott Hughes 		yylval.i = c;
206*9a7741deSElliott Hughes 		switch (c) {
207*9a7741deSElliott Hughes 		case '\n':	/* {EOL} */
208*9a7741deSElliott Hughes 			lineno++;
209*9a7741deSElliott Hughes 			RET(NL);
210*9a7741deSElliott Hughes 		case '\r':	/* assume \n is coming */
211*9a7741deSElliott Hughes 		case ' ':	/* {WS}+ */
212*9a7741deSElliott Hughes 		case '\t':
213*9a7741deSElliott Hughes 			break;
214*9a7741deSElliott Hughes 		case '#':	/* #.* strip comments */
215*9a7741deSElliott Hughes 			while ((c = input()) != '\n' && c != 0)
216*9a7741deSElliott Hughes 				;
217*9a7741deSElliott Hughes 			unput(c);
218*9a7741deSElliott Hughes 			/*
219*9a7741deSElliott Hughes 			 * Next line is a hack, it compensates for
220*9a7741deSElliott Hughes 			 * unput's treatment of \n.
221*9a7741deSElliott Hughes 			 */
222*9a7741deSElliott Hughes 			lineno++;
223*9a7741deSElliott Hughes 			break;
224*9a7741deSElliott Hughes 		case ';':
225*9a7741deSElliott Hughes 			RET(';');
226*9a7741deSElliott Hughes 		case '\\':
227*9a7741deSElliott Hughes 			if (peek() == '\n') {
228*9a7741deSElliott Hughes 				input();
229*9a7741deSElliott Hughes 				lineno++;
230*9a7741deSElliott Hughes 			} else if (peek() == '\r') {
231*9a7741deSElliott Hughes 				input(); input();	/* \n */
232*9a7741deSElliott Hughes 				lineno++;
233*9a7741deSElliott Hughes 			} else {
234*9a7741deSElliott Hughes 				RET(c);
235*9a7741deSElliott Hughes 			}
236*9a7741deSElliott Hughes 			break;
237*9a7741deSElliott Hughes 		case '&':
238*9a7741deSElliott Hughes 			if (peek() == '&') {
239*9a7741deSElliott Hughes 				input(); RET(AND);
240*9a7741deSElliott Hughes 			} else
241*9a7741deSElliott Hughes 				RET('&');
242*9a7741deSElliott Hughes 		case '|':
243*9a7741deSElliott Hughes 			if (peek() == '|') {
244*9a7741deSElliott Hughes 				input(); RET(BOR);
245*9a7741deSElliott Hughes 			} else
246*9a7741deSElliott Hughes 				RET('|');
247*9a7741deSElliott Hughes 		case '!':
248*9a7741deSElliott Hughes 			if (peek() == '=') {
249*9a7741deSElliott Hughes 				input(); yylval.i = NE; RET(NE);
250*9a7741deSElliott Hughes 			} else if (peek() == '~') {
251*9a7741deSElliott Hughes 				input(); yylval.i = NOTMATCH; RET(MATCHOP);
252*9a7741deSElliott Hughes 			} else
253*9a7741deSElliott Hughes 				RET(NOT);
254*9a7741deSElliott Hughes 		case '~':
255*9a7741deSElliott Hughes 			yylval.i = MATCH;
256*9a7741deSElliott Hughes 			RET(MATCHOP);
257*9a7741deSElliott Hughes 		case '<':
258*9a7741deSElliott Hughes 			if (peek() == '=') {
259*9a7741deSElliott Hughes 				input(); yylval.i = LE; RET(LE);
260*9a7741deSElliott Hughes 			} else {
261*9a7741deSElliott Hughes 				yylval.i = LT; RET(LT);
262*9a7741deSElliott Hughes 			}
263*9a7741deSElliott Hughes 		case '=':
264*9a7741deSElliott Hughes 			if (peek() == '=') {
265*9a7741deSElliott Hughes 				input(); yylval.i = EQ; RET(EQ);
266*9a7741deSElliott Hughes 			} else {
267*9a7741deSElliott Hughes 				yylval.i = ASSIGN; RET(ASGNOP);
268*9a7741deSElliott Hughes 			}
269*9a7741deSElliott Hughes 		case '>':
270*9a7741deSElliott Hughes 			if (peek() == '=') {
271*9a7741deSElliott Hughes 				input(); yylval.i = GE; RET(GE);
272*9a7741deSElliott Hughes 			} else if (peek() == '>') {
273*9a7741deSElliott Hughes 				input(); yylval.i = APPEND; RET(APPEND);
274*9a7741deSElliott Hughes 			} else {
275*9a7741deSElliott Hughes 				yylval.i = GT; RET(GT);
276*9a7741deSElliott Hughes 			}
277*9a7741deSElliott Hughes 		case '+':
278*9a7741deSElliott Hughes 			if (peek() == '+') {
279*9a7741deSElliott Hughes 				input(); yylval.i = INCR; RET(INCR);
280*9a7741deSElliott Hughes 			} else if (peek() == '=') {
281*9a7741deSElliott Hughes 				input(); yylval.i = ADDEQ; RET(ASGNOP);
282*9a7741deSElliott Hughes 			} else
283*9a7741deSElliott Hughes 				RET('+');
284*9a7741deSElliott Hughes 		case '-':
285*9a7741deSElliott Hughes 			if (peek() == '-') {
286*9a7741deSElliott Hughes 				input(); yylval.i = DECR; RET(DECR);
287*9a7741deSElliott Hughes 			} else if (peek() == '=') {
288*9a7741deSElliott Hughes 				input(); yylval.i = SUBEQ; RET(ASGNOP);
289*9a7741deSElliott Hughes 			} else
290*9a7741deSElliott Hughes 				RET('-');
291*9a7741deSElliott Hughes 		case '*':
292*9a7741deSElliott Hughes 			if (peek() == '=') {	/* *= */
293*9a7741deSElliott Hughes 				input(); yylval.i = MULTEQ; RET(ASGNOP);
294*9a7741deSElliott Hughes 			} else if (peek() == '*') {	/* ** or **= */
295*9a7741deSElliott Hughes 				input();	/* eat 2nd * */
296*9a7741deSElliott Hughes 				if (peek() == '=') {
297*9a7741deSElliott Hughes 					input(); yylval.i = POWEQ; RET(ASGNOP);
298*9a7741deSElliott Hughes 				} else {
299*9a7741deSElliott Hughes 					RET(POWER);
300*9a7741deSElliott Hughes 				}
301*9a7741deSElliott Hughes 			} else
302*9a7741deSElliott Hughes 				RET('*');
303*9a7741deSElliott Hughes 		case '/':
304*9a7741deSElliott Hughes 			RET('/');
305*9a7741deSElliott Hughes 		case '%':
306*9a7741deSElliott Hughes 			if (peek() == '=') {
307*9a7741deSElliott Hughes 				input(); yylval.i = MODEQ; RET(ASGNOP);
308*9a7741deSElliott Hughes 			} else
309*9a7741deSElliott Hughes 				RET('%');
310*9a7741deSElliott Hughes 		case '^':
311*9a7741deSElliott Hughes 			if (peek() == '=') {
312*9a7741deSElliott Hughes 				input(); yylval.i = POWEQ; RET(ASGNOP);
313*9a7741deSElliott Hughes 			} else
314*9a7741deSElliott Hughes 				RET(POWER);
315*9a7741deSElliott Hughes 
316*9a7741deSElliott Hughes 		case '$':
317*9a7741deSElliott Hughes 			/* BUG: awkward, if not wrong */
318*9a7741deSElliott Hughes 			c = gettok(&buf, &bufsize);
319*9a7741deSElliott Hughes 			if (isalpha(c)) {
320*9a7741deSElliott Hughes 				if (strcmp(buf, "NF") == 0) {	/* very special */
321*9a7741deSElliott Hughes 					unputstr("(NF)");
322*9a7741deSElliott Hughes 					RET(INDIRECT);
323*9a7741deSElliott Hughes 				}
324*9a7741deSElliott Hughes 				c = peek();
325*9a7741deSElliott Hughes 				if (c == '(' || c == '[' || (infunc && isarg(buf) >= 0)) {
326*9a7741deSElliott Hughes 					unputstr(buf);
327*9a7741deSElliott Hughes 					RET(INDIRECT);
328*9a7741deSElliott Hughes 				}
329*9a7741deSElliott Hughes 				yylval.cp = setsymtab(buf, "", 0.0, STR|NUM, symtab);
330*9a7741deSElliott Hughes 				RET(IVAR);
331*9a7741deSElliott Hughes 			} else if (c == 0) {	/*  */
332*9a7741deSElliott Hughes 				SYNTAX( "unexpected end of input after $" );
333*9a7741deSElliott Hughes 				RET(';');
334*9a7741deSElliott Hughes 			} else {
335*9a7741deSElliott Hughes 				unputstr(buf);
336*9a7741deSElliott Hughes 				RET(INDIRECT);
337*9a7741deSElliott Hughes 			}
338*9a7741deSElliott Hughes 
339*9a7741deSElliott Hughes 		case '}':
340*9a7741deSElliott Hughes 			if (--bracecnt < 0)
341*9a7741deSElliott Hughes 				SYNTAX( "extra }" );
342*9a7741deSElliott Hughes 			sc = true;
343*9a7741deSElliott Hughes 			RET(';');
344*9a7741deSElliott Hughes 		case ']':
345*9a7741deSElliott Hughes 			if (--brackcnt < 0)
346*9a7741deSElliott Hughes 				SYNTAX( "extra ]" );
347*9a7741deSElliott Hughes 			RET(']');
348*9a7741deSElliott Hughes 		case ')':
349*9a7741deSElliott Hughes 			if (--parencnt < 0)
350*9a7741deSElliott Hughes 				SYNTAX( "extra )" );
351*9a7741deSElliott Hughes 			RET(')');
352*9a7741deSElliott Hughes 		case '{':
353*9a7741deSElliott Hughes 			bracecnt++;
354*9a7741deSElliott Hughes 			RET('{');
355*9a7741deSElliott Hughes 		case '[':
356*9a7741deSElliott Hughes 			brackcnt++;
357*9a7741deSElliott Hughes 			RET('[');
358*9a7741deSElliott Hughes 		case '(':
359*9a7741deSElliott Hughes 			parencnt++;
360*9a7741deSElliott Hughes 			RET('(');
361*9a7741deSElliott Hughes 
362*9a7741deSElliott Hughes 		case '"':
363*9a7741deSElliott Hughes 			return string();	/* BUG: should be like tran.c ? */
364*9a7741deSElliott Hughes 
365*9a7741deSElliott Hughes 		default:
366*9a7741deSElliott Hughes 			RET(c);
367*9a7741deSElliott Hughes 		}
368*9a7741deSElliott Hughes 	}
369*9a7741deSElliott Hughes }
370*9a7741deSElliott Hughes 
371*9a7741deSElliott Hughes extern int runetochar(char *str, int c);
372*9a7741deSElliott Hughes 
string(void)373*9a7741deSElliott Hughes int string(void)
374*9a7741deSElliott Hughes {
375*9a7741deSElliott Hughes 	int c, n;
376*9a7741deSElliott Hughes 	char *s, *bp;
377*9a7741deSElliott Hughes 	static char *buf = NULL;
378*9a7741deSElliott Hughes 	static int bufsz = 500;
379*9a7741deSElliott Hughes 
380*9a7741deSElliott Hughes 	if (buf == NULL && (buf = (char *) malloc(bufsz)) == NULL)
381*9a7741deSElliott Hughes 		FATAL("out of space for strings");
382*9a7741deSElliott Hughes 	for (bp = buf; (c = input()) != '"'; ) {
383*9a7741deSElliott Hughes 		if (!adjbuf(&buf, &bufsz, bp-buf+2, 500, &bp, "string"))
384*9a7741deSElliott Hughes 			FATAL("out of space for string %.10s...", buf);
385*9a7741deSElliott Hughes 		switch (c) {
386*9a7741deSElliott Hughes 		case '\n':
387*9a7741deSElliott Hughes 		case '\r':
388*9a7741deSElliott Hughes 		case 0:
389*9a7741deSElliott Hughes 			*bp = '\0';
390*9a7741deSElliott Hughes 			SYNTAX( "non-terminated string %.10s...", buf );
391*9a7741deSElliott Hughes 			if (c == 0)	/* hopeless */
392*9a7741deSElliott Hughes 				FATAL( "giving up" );
393*9a7741deSElliott Hughes 			lineno++;
394*9a7741deSElliott Hughes 			break;
395*9a7741deSElliott Hughes 		case '\\':
396*9a7741deSElliott Hughes 			c = input();
397*9a7741deSElliott Hughes 			switch (c) {
398*9a7741deSElliott Hughes 			case '\n': break;
399*9a7741deSElliott Hughes 			case '"': *bp++ = '"'; break;
400*9a7741deSElliott Hughes 			case 'n': *bp++ = '\n'; break;
401*9a7741deSElliott Hughes 			case 't': *bp++ = '\t'; break;
402*9a7741deSElliott Hughes 			case 'f': *bp++ = '\f'; break;
403*9a7741deSElliott Hughes 			case 'r': *bp++ = '\r'; break;
404*9a7741deSElliott Hughes 			case 'b': *bp++ = '\b'; break;
405*9a7741deSElliott Hughes 			case 'v': *bp++ = '\v'; break;
406*9a7741deSElliott Hughes 			case 'a': *bp++ = '\a'; break;
407*9a7741deSElliott Hughes 			case '\\': *bp++ = '\\'; break;
408*9a7741deSElliott Hughes 
409*9a7741deSElliott Hughes 			case '0': case '1': case '2': /* octal: \d \dd \ddd */
410*9a7741deSElliott Hughes 			case '3': case '4': case '5': case '6': case '7':
411*9a7741deSElliott Hughes 				n = c - '0';
412*9a7741deSElliott Hughes 				if ((c = peek()) >= '0' && c < '8') {
413*9a7741deSElliott Hughes 					n = 8 * n + input() - '0';
414*9a7741deSElliott Hughes 					if ((c = peek()) >= '0' && c < '8')
415*9a7741deSElliott Hughes 						n = 8 * n + input() - '0';
416*9a7741deSElliott Hughes 				}
417*9a7741deSElliott Hughes 				*bp++ = n;
418*9a7741deSElliott Hughes 				break;
419*9a7741deSElliott Hughes 
420*9a7741deSElliott Hughes 			case 'x':	/* hex  \x0-9a-fA-F (exactly two) */
421*9a7741deSElliott Hughes 			    {
422*9a7741deSElliott Hughes 				int i;
423*9a7741deSElliott Hughes 
424*9a7741deSElliott Hughes 				if (!isxdigit(peek())) {
425*9a7741deSElliott Hughes 					unput(c);
426*9a7741deSElliott Hughes 					break;
427*9a7741deSElliott Hughes 				}
428*9a7741deSElliott Hughes 				n = 0;
429*9a7741deSElliott Hughes 				for (i = 0; i < 2; i++) {
430*9a7741deSElliott Hughes 					c = input();
431*9a7741deSElliott Hughes 					if (c == 0)
432*9a7741deSElliott Hughes 						break;
433*9a7741deSElliott Hughes 					if (isxdigit(c)) {
434*9a7741deSElliott Hughes 						c = tolower(c);
435*9a7741deSElliott Hughes 						n *= 16;
436*9a7741deSElliott Hughes 						if (isdigit(c))
437*9a7741deSElliott Hughes 							n += (c - '0');
438*9a7741deSElliott Hughes 						else
439*9a7741deSElliott Hughes 							n += 10 + (c - 'a');
440*9a7741deSElliott Hughes 					} else {
441*9a7741deSElliott Hughes 						unput(c);
442*9a7741deSElliott Hughes 						break;
443*9a7741deSElliott Hughes 					}
444*9a7741deSElliott Hughes 				}
445*9a7741deSElliott Hughes 				if (i)
446*9a7741deSElliott Hughes 					*bp++ = n;
447*9a7741deSElliott Hughes 				break;
448*9a7741deSElliott Hughes 			    }
449*9a7741deSElliott Hughes 
450*9a7741deSElliott Hughes 			case 'u':	/* utf  \u0-9a-fA-F (1..8) */
451*9a7741deSElliott Hughes 			    {
452*9a7741deSElliott Hughes 				int i;
453*9a7741deSElliott Hughes 
454*9a7741deSElliott Hughes 				n = 0;
455*9a7741deSElliott Hughes 				for (i = 0; i < 8; i++) {
456*9a7741deSElliott Hughes 					c = input();
457*9a7741deSElliott Hughes 					if (!isxdigit(c) || c == 0)
458*9a7741deSElliott Hughes 						break;
459*9a7741deSElliott Hughes 					c = tolower(c);
460*9a7741deSElliott Hughes 					n *= 16;
461*9a7741deSElliott Hughes 					if (isdigit(c))
462*9a7741deSElliott Hughes 						n += (c - '0');
463*9a7741deSElliott Hughes 					else
464*9a7741deSElliott Hughes 						n += 10 + (c - 'a');
465*9a7741deSElliott Hughes 				}
466*9a7741deSElliott Hughes 				unput(c);
467*9a7741deSElliott Hughes 				bp += runetochar(bp, n);
468*9a7741deSElliott Hughes 				break;
469*9a7741deSElliott Hughes 			    }
470*9a7741deSElliott Hughes 
471*9a7741deSElliott Hughes 			default:
472*9a7741deSElliott Hughes 				*bp++ = c;
473*9a7741deSElliott Hughes 				break;
474*9a7741deSElliott Hughes 			}
475*9a7741deSElliott Hughes 			break;
476*9a7741deSElliott Hughes 		default:
477*9a7741deSElliott Hughes 			*bp++ = c;
478*9a7741deSElliott Hughes 			break;
479*9a7741deSElliott Hughes 		}
480*9a7741deSElliott Hughes 	}
481*9a7741deSElliott Hughes 	*bp = 0;
482*9a7741deSElliott Hughes 	s = tostring(buf);
483*9a7741deSElliott Hughes 	*bp++ = ' '; *bp++ = '\0';
484*9a7741deSElliott Hughes 	yylval.cp = setsymtab(buf, s, 0.0, CON|STR|DONTFREE, symtab);
485*9a7741deSElliott Hughes 	free(s);
486*9a7741deSElliott Hughes 	RET(STRING);
487*9a7741deSElliott Hughes }
488*9a7741deSElliott Hughes 
489*9a7741deSElliott Hughes 
binsearch(char * w,const Keyword * kp,int n)490*9a7741deSElliott Hughes static int binsearch(char *w, const Keyword *kp, int n)
491*9a7741deSElliott Hughes {
492*9a7741deSElliott Hughes 	int cond, low, mid, high;
493*9a7741deSElliott Hughes 
494*9a7741deSElliott Hughes 	low = 0;
495*9a7741deSElliott Hughes 	high = n - 1;
496*9a7741deSElliott Hughes 	while (low <= high) {
497*9a7741deSElliott Hughes 		mid = (low + high) / 2;
498*9a7741deSElliott Hughes 		if ((cond = strcmp(w, kp[mid].word)) < 0)
499*9a7741deSElliott Hughes 			high = mid - 1;
500*9a7741deSElliott Hughes 		else if (cond > 0)
501*9a7741deSElliott Hughes 			low = mid + 1;
502*9a7741deSElliott Hughes 		else
503*9a7741deSElliott Hughes 			return mid;
504*9a7741deSElliott Hughes 	}
505*9a7741deSElliott Hughes 	return -1;
506*9a7741deSElliott Hughes }
507*9a7741deSElliott Hughes 
word(char * w)508*9a7741deSElliott Hughes int word(char *w)
509*9a7741deSElliott Hughes {
510*9a7741deSElliott Hughes 	const Keyword *kp;
511*9a7741deSElliott Hughes 	int c, n;
512*9a7741deSElliott Hughes 
513*9a7741deSElliott Hughes 	n = binsearch(w, keywords, sizeof(keywords)/sizeof(keywords[0]));
514*9a7741deSElliott Hughes 	if (n != -1) {	/* found in table */
515*9a7741deSElliott Hughes 		kp = keywords + n;
516*9a7741deSElliott Hughes 		yylval.i = kp->sub;
517*9a7741deSElliott Hughes 		switch (kp->type) {	/* special handling */
518*9a7741deSElliott Hughes 		case BLTIN:
519*9a7741deSElliott Hughes 			if (kp->sub == FSYSTEM && safe)
520*9a7741deSElliott Hughes 				SYNTAX( "system is unsafe" );
521*9a7741deSElliott Hughes 			RET(kp->type);
522*9a7741deSElliott Hughes 		case FUNC:
523*9a7741deSElliott Hughes 			if (infunc)
524*9a7741deSElliott Hughes 				SYNTAX( "illegal nested function" );
525*9a7741deSElliott Hughes 			RET(kp->type);
526*9a7741deSElliott Hughes 		case RETURN:
527*9a7741deSElliott Hughes 			if (!infunc)
528*9a7741deSElliott Hughes 				SYNTAX( "return not in function" );
529*9a7741deSElliott Hughes 			RET(kp->type);
530*9a7741deSElliott Hughes 		case VARNF:
531*9a7741deSElliott Hughes 			yylval.cp = setsymtab("NF", "", 0.0, NUM, symtab);
532*9a7741deSElliott Hughes 			RET(VARNF);
533*9a7741deSElliott Hughes 		default:
534*9a7741deSElliott Hughes 			RET(kp->type);
535*9a7741deSElliott Hughes 		}
536*9a7741deSElliott Hughes 	}
537*9a7741deSElliott Hughes 	c = peek();	/* look for '(' */
538*9a7741deSElliott Hughes 	if (c != '(' && infunc && (n=isarg(w)) >= 0) {
539*9a7741deSElliott Hughes 		yylval.i = n;
540*9a7741deSElliott Hughes 		RET(ARG);
541*9a7741deSElliott Hughes 	} else {
542*9a7741deSElliott Hughes 		yylval.cp = setsymtab(w, "", 0.0, STR|NUM|DONTFREE, symtab);
543*9a7741deSElliott Hughes 		if (c == '(') {
544*9a7741deSElliott Hughes 			RET(CALL);
545*9a7741deSElliott Hughes 		} else {
546*9a7741deSElliott Hughes 			RET(VAR);
547*9a7741deSElliott Hughes 		}
548*9a7741deSElliott Hughes 	}
549*9a7741deSElliott Hughes }
550*9a7741deSElliott Hughes 
startreg(void)551*9a7741deSElliott Hughes void startreg(void)	/* next call to yylex will return a regular expression */
552*9a7741deSElliott Hughes {
553*9a7741deSElliott Hughes 	reg = true;
554*9a7741deSElliott Hughes }
555*9a7741deSElliott Hughes 
regexpr(void)556*9a7741deSElliott Hughes int regexpr(void)
557*9a7741deSElliott Hughes {
558*9a7741deSElliott Hughes 	int c;
559*9a7741deSElliott Hughes 	static char *buf = NULL;
560*9a7741deSElliott Hughes 	static int bufsz = 500;
561*9a7741deSElliott Hughes 	char *bp;
562*9a7741deSElliott Hughes 
563*9a7741deSElliott Hughes 	if (buf == NULL && (buf = (char *) malloc(bufsz)) == NULL)
564*9a7741deSElliott Hughes 		FATAL("out of space for reg expr");
565*9a7741deSElliott Hughes 	bp = buf;
566*9a7741deSElliott Hughes 	for ( ; (c = input()) != '/' && c != 0; ) {
567*9a7741deSElliott Hughes 		if (!adjbuf(&buf, &bufsz, bp-buf+3, 500, &bp, "regexpr"))
568*9a7741deSElliott Hughes 			FATAL("out of space for reg expr %.10s...", buf);
569*9a7741deSElliott Hughes 		if (c == '\n') {
570*9a7741deSElliott Hughes 			*bp = '\0';
571*9a7741deSElliott Hughes 			SYNTAX( "newline in regular expression %.10s...", buf );
572*9a7741deSElliott Hughes 			unput('\n');
573*9a7741deSElliott Hughes 			break;
574*9a7741deSElliott Hughes 		} else if (c == '\\') {
575*9a7741deSElliott Hughes 			*bp++ = '\\';
576*9a7741deSElliott Hughes 			*bp++ = input();
577*9a7741deSElliott Hughes 		} else {
578*9a7741deSElliott Hughes 			*bp++ = c;
579*9a7741deSElliott Hughes 		}
580*9a7741deSElliott Hughes 	}
581*9a7741deSElliott Hughes 	*bp = 0;
582*9a7741deSElliott Hughes 	if (c == 0)
583*9a7741deSElliott Hughes 		SYNTAX("non-terminated regular expression %.10s...", buf);
584*9a7741deSElliott Hughes 	yylval.s = tostring(buf);
585*9a7741deSElliott Hughes 	unput('/');
586*9a7741deSElliott Hughes 	RET(REGEXPR);
587*9a7741deSElliott Hughes }
588*9a7741deSElliott Hughes 
589*9a7741deSElliott Hughes /* low-level lexical stuff, sort of inherited from lex */
590*9a7741deSElliott Hughes 
591*9a7741deSElliott Hughes char	ebuf[300];
592*9a7741deSElliott Hughes char	*ep = ebuf;
593*9a7741deSElliott Hughes char	yysbuf[100];	/* pushback buffer */
594*9a7741deSElliott Hughes char	*yysptr = yysbuf;
595*9a7741deSElliott Hughes FILE	*yyin = NULL;
596*9a7741deSElliott Hughes 
input(void)597*9a7741deSElliott Hughes int input(void)	/* get next lexical input character */
598*9a7741deSElliott Hughes {
599*9a7741deSElliott Hughes 	int c;
600*9a7741deSElliott Hughes 	extern char *lexprog;
601*9a7741deSElliott Hughes 
602*9a7741deSElliott Hughes 	if (yysptr > yysbuf)
603*9a7741deSElliott Hughes 		c = (uschar)*--yysptr;
604*9a7741deSElliott Hughes 	else if (lexprog != NULL) {	/* awk '...' */
605*9a7741deSElliott Hughes 		if ((c = (uschar)*lexprog) != 0)
606*9a7741deSElliott Hughes 			lexprog++;
607*9a7741deSElliott Hughes 	} else				/* awk -f ... */
608*9a7741deSElliott Hughes 		c = pgetc();
609*9a7741deSElliott Hughes 	if (c == EOF)
610*9a7741deSElliott Hughes 		c = 0;
611*9a7741deSElliott Hughes 	if (ep >= ebuf + sizeof ebuf)
612*9a7741deSElliott Hughes 		ep = ebuf;
613*9a7741deSElliott Hughes 	*ep = c;
614*9a7741deSElliott Hughes 	if (c != 0) {
615*9a7741deSElliott Hughes 		ep++;
616*9a7741deSElliott Hughes 	}
617*9a7741deSElliott Hughes 	return (c);
618*9a7741deSElliott Hughes }
619*9a7741deSElliott Hughes 
unput(int c)620*9a7741deSElliott Hughes void unput(int c)	/* put lexical character back on input */
621*9a7741deSElliott Hughes {
622*9a7741deSElliott Hughes 	if (c == '\n')
623*9a7741deSElliott Hughes 		lineno--;
624*9a7741deSElliott Hughes 	if (yysptr >= yysbuf + sizeof(yysbuf))
625*9a7741deSElliott Hughes 		FATAL("pushed back too much: %.20s...", yysbuf);
626*9a7741deSElliott Hughes 	*yysptr++ = c;
627*9a7741deSElliott Hughes 	if (--ep < ebuf)
628*9a7741deSElliott Hughes 		ep = ebuf + sizeof(ebuf) - 1;
629*9a7741deSElliott Hughes }
630*9a7741deSElliott Hughes 
unputstr(const char * s)631*9a7741deSElliott Hughes void unputstr(const char *s)	/* put a string back on input */
632*9a7741deSElliott Hughes {
633*9a7741deSElliott Hughes 	int i;
634*9a7741deSElliott Hughes 
635*9a7741deSElliott Hughes 	for (i = strlen(s)-1; i >= 0; i--)
636*9a7741deSElliott Hughes 		unput(s[i]);
637*9a7741deSElliott Hughes }
638