xref: /aosp_15_r20/external/one-true-awk/main.c (revision 9a7741de182b2776d7b30d6355f2585c0780a51b)
1*9a7741deSElliott Hughes /****************************************************************
2*9a7741deSElliott Hughes Copyright (C) Lucent Technologies 1997
3*9a7741deSElliott Hughes All Rights Reserved
4*9a7741deSElliott Hughes 
5*9a7741deSElliott Hughes Permission to use, copy, modify, and distribute this software and
6*9a7741deSElliott Hughes its documentation for any purpose and without fee is hereby
7*9a7741deSElliott Hughes granted, provided that the above copyright notice appear in all
8*9a7741deSElliott Hughes copies and that both that the copyright notice and this
9*9a7741deSElliott Hughes permission notice and warranty disclaimer appear in supporting
10*9a7741deSElliott Hughes documentation, and that the name Lucent Technologies or any of
11*9a7741deSElliott Hughes its entities not be used in advertising or publicity pertaining
12*9a7741deSElliott Hughes to distribution of the software without specific, written prior
13*9a7741deSElliott Hughes permission.
14*9a7741deSElliott Hughes 
15*9a7741deSElliott Hughes LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16*9a7741deSElliott Hughes INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17*9a7741deSElliott Hughes IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18*9a7741deSElliott Hughes SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19*9a7741deSElliott Hughes WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20*9a7741deSElliott Hughes IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21*9a7741deSElliott Hughes ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22*9a7741deSElliott Hughes THIS SOFTWARE.
23*9a7741deSElliott Hughes ****************************************************************/
24*9a7741deSElliott Hughes 
25*9a7741deSElliott Hughes const char	*version = "version 20240728";
26*9a7741deSElliott Hughes 
27*9a7741deSElliott Hughes #define DEBUG
28*9a7741deSElliott Hughes #include <stdio.h>
29*9a7741deSElliott Hughes #include <ctype.h>
30*9a7741deSElliott Hughes #include <locale.h>
31*9a7741deSElliott Hughes #include <stdlib.h>
32*9a7741deSElliott Hughes #include <string.h>
33*9a7741deSElliott Hughes #include <signal.h>
34*9a7741deSElliott Hughes #include "awk.h"
35*9a7741deSElliott Hughes 
36*9a7741deSElliott Hughes extern	char	**environ;
37*9a7741deSElliott Hughes extern	int	nfields;
38*9a7741deSElliott Hughes 
39*9a7741deSElliott Hughes int	dbg	= 0;
40*9a7741deSElliott Hughes Awkfloat	srand_seed = 1;
41*9a7741deSElliott Hughes char	*cmdname;	/* gets argv[0] for error messages */
42*9a7741deSElliott Hughes extern	FILE	*yyin;	/* lex input file */
43*9a7741deSElliott Hughes char	*lexprog;	/* points to program argument if it exists */
44*9a7741deSElliott Hughes extern	int errorflag;	/* non-zero if any syntax errors; set by yyerror */
45*9a7741deSElliott Hughes enum compile_states	compile_time = ERROR_PRINTING;
46*9a7741deSElliott Hughes 
47*9a7741deSElliott Hughes static char	**pfile;	/* program filenames from -f's */
48*9a7741deSElliott Hughes static size_t	maxpfile;	/* max program filename */
49*9a7741deSElliott Hughes static size_t	npfile;		/* number of filenames */
50*9a7741deSElliott Hughes static size_t	curpfile;	/* current filename */
51*9a7741deSElliott Hughes 
52*9a7741deSElliott Hughes bool	CSV = false;	/* true for csv input */
53*9a7741deSElliott Hughes 
54*9a7741deSElliott Hughes bool	safe = false;	/* true => "safe" mode */
55*9a7741deSElliott Hughes 
56*9a7741deSElliott Hughes size_t	awk_mb_cur_max = 1;
57*9a7741deSElliott Hughes 
fpecatch(int n,siginfo_t * si,void * uc)58*9a7741deSElliott Hughes static noreturn void fpecatch(int n
59*9a7741deSElliott Hughes #ifdef SA_SIGINFO
60*9a7741deSElliott Hughes 	, siginfo_t *si, void *uc
61*9a7741deSElliott Hughes #endif
62*9a7741deSElliott Hughes )
63*9a7741deSElliott Hughes {
64*9a7741deSElliott Hughes #ifdef SA_SIGINFO
65*9a7741deSElliott Hughes 	const char *mesg = NULL;
66*9a7741deSElliott Hughes 
67*9a7741deSElliott Hughes 	switch (si->si_code) {
68*9a7741deSElliott Hughes 	case FPE_INTDIV:
69*9a7741deSElliott Hughes 		mesg = "Integer divide by zero";
70*9a7741deSElliott Hughes 		break;
71*9a7741deSElliott Hughes 	case FPE_INTOVF:
72*9a7741deSElliott Hughes 		mesg = "Integer overflow";
73*9a7741deSElliott Hughes 		break;
74*9a7741deSElliott Hughes 	case FPE_FLTDIV:
75*9a7741deSElliott Hughes 		mesg = "Floating point divide by zero";
76*9a7741deSElliott Hughes 		break;
77*9a7741deSElliott Hughes 	case FPE_FLTOVF:
78*9a7741deSElliott Hughes 		mesg = "Floating point overflow";
79*9a7741deSElliott Hughes 		break;
80*9a7741deSElliott Hughes 	case FPE_FLTUND:
81*9a7741deSElliott Hughes 		mesg = "Floating point underflow";
82*9a7741deSElliott Hughes 		break;
83*9a7741deSElliott Hughes 	case FPE_FLTRES:
84*9a7741deSElliott Hughes 		mesg = "Floating point inexact result";
85*9a7741deSElliott Hughes 		break;
86*9a7741deSElliott Hughes 	case FPE_FLTINV:
87*9a7741deSElliott Hughes 		mesg = "Invalid Floating point operation";
88*9a7741deSElliott Hughes 		break;
89*9a7741deSElliott Hughes 	case FPE_FLTSUB:
90*9a7741deSElliott Hughes 		mesg = "Subscript out of range";
91*9a7741deSElliott Hughes 		break;
92*9a7741deSElliott Hughes 	case 0:
93*9a7741deSElliott Hughes 	default:
94*9a7741deSElliott Hughes 		mesg = "Unknown error";
95*9a7741deSElliott Hughes 		break;
96*9a7741deSElliott Hughes 	}
97*9a7741deSElliott Hughes #endif
98*9a7741deSElliott Hughes 	FATAL("floating point exception"
99*9a7741deSElliott Hughes #ifdef SA_SIGINFO
100*9a7741deSElliott Hughes 		": %s", mesg
101*9a7741deSElliott Hughes #endif
102*9a7741deSElliott Hughes 	    );
103*9a7741deSElliott Hughes }
104*9a7741deSElliott Hughes 
105*9a7741deSElliott Hughes /* Can this work with recursive calls?  I don't think so.
106*9a7741deSElliott Hughes void segvcatch(int n)
107*9a7741deSElliott Hughes {
108*9a7741deSElliott Hughes 	FATAL("segfault.  Do you have an unbounded recursive call?", n);
109*9a7741deSElliott Hughes }
110*9a7741deSElliott Hughes */
111*9a7741deSElliott Hughes 
112*9a7741deSElliott Hughes static const char *
setfs(char * p)113*9a7741deSElliott Hughes setfs(char *p)
114*9a7741deSElliott Hughes {
115*9a7741deSElliott Hughes 	/* wart: t=>\t */
116*9a7741deSElliott Hughes 	if (p[0] == 't' && p[1] == '\0')
117*9a7741deSElliott Hughes 		return "\t";
118*9a7741deSElliott Hughes 	return p;
119*9a7741deSElliott Hughes }
120*9a7741deSElliott Hughes 
121*9a7741deSElliott Hughes static char *
getarg(int * argc,char *** argv,const char * msg)122*9a7741deSElliott Hughes getarg(int *argc, char ***argv, const char *msg)
123*9a7741deSElliott Hughes {
124*9a7741deSElliott Hughes 	if ((*argv)[1][2] != '\0') {	/* arg is -fsomething */
125*9a7741deSElliott Hughes 		return &(*argv)[1][2];
126*9a7741deSElliott Hughes 	} else {			/* arg is -f something */
127*9a7741deSElliott Hughes 		(*argc)--; (*argv)++;
128*9a7741deSElliott Hughes 		if (*argc <= 1)
129*9a7741deSElliott Hughes 			FATAL("%s", msg);
130*9a7741deSElliott Hughes 		return (*argv)[1];
131*9a7741deSElliott Hughes 	}
132*9a7741deSElliott Hughes }
133*9a7741deSElliott Hughes 
main(int argc,char * argv[])134*9a7741deSElliott Hughes int main(int argc, char *argv[])
135*9a7741deSElliott Hughes {
136*9a7741deSElliott Hughes 	const char *fs = NULL;
137*9a7741deSElliott Hughes 	char *fn, *vn;
138*9a7741deSElliott Hughes 
139*9a7741deSElliott Hughes 	setlocale(LC_CTYPE, "");
140*9a7741deSElliott Hughes 	setlocale(LC_NUMERIC, "C"); /* for parsing cmdline & prog */
141*9a7741deSElliott Hughes 	awk_mb_cur_max = MB_CUR_MAX;
142*9a7741deSElliott Hughes 	cmdname = argv[0];
143*9a7741deSElliott Hughes 	if (argc == 1) {
144*9a7741deSElliott Hughes 		fprintf(stderr,
145*9a7741deSElliott Hughes 		  "usage: %s [-F fs | --csv] [-v var=value] [-f progfile | 'prog'] [file ...]\n",
146*9a7741deSElliott Hughes 		  cmdname);
147*9a7741deSElliott Hughes 		exit(1);
148*9a7741deSElliott Hughes 	}
149*9a7741deSElliott Hughes #ifdef SA_SIGINFO
150*9a7741deSElliott Hughes 	{
151*9a7741deSElliott Hughes 		struct sigaction sa;
152*9a7741deSElliott Hughes 		sa.sa_sigaction = fpecatch;
153*9a7741deSElliott Hughes 		sa.sa_flags = SA_SIGINFO;
154*9a7741deSElliott Hughes 		sigemptyset(&sa.sa_mask);
155*9a7741deSElliott Hughes 		(void)sigaction(SIGFPE, &sa, NULL);
156*9a7741deSElliott Hughes 	}
157*9a7741deSElliott Hughes #else
158*9a7741deSElliott Hughes 	(void)signal(SIGFPE, fpecatch);
159*9a7741deSElliott Hughes #endif
160*9a7741deSElliott Hughes 	/*signal(SIGSEGV, segvcatch); experiment */
161*9a7741deSElliott Hughes 
162*9a7741deSElliott Hughes 	/* Set and keep track of the random seed */
163*9a7741deSElliott Hughes 	srand_seed = 1;
164*9a7741deSElliott Hughes 	srandom((unsigned long) srand_seed);
165*9a7741deSElliott Hughes 
166*9a7741deSElliott Hughes 	yyin = NULL;
167*9a7741deSElliott Hughes 	symtab = makesymtab(NSYMTAB/NSYMTAB);
168*9a7741deSElliott Hughes 	while (argc > 1 && argv[1][0] == '-' && argv[1][1] != '\0') {
169*9a7741deSElliott Hughes 		if (strcmp(argv[1], "-version") == 0 || strcmp(argv[1], "--version") == 0) {
170*9a7741deSElliott Hughes 			printf("awk %s\n", version);
171*9a7741deSElliott Hughes 			return 0;
172*9a7741deSElliott Hughes 		}
173*9a7741deSElliott Hughes 		if (strcmp(argv[1], "--") == 0) {	/* explicit end of args */
174*9a7741deSElliott Hughes 			argc--;
175*9a7741deSElliott Hughes 			argv++;
176*9a7741deSElliott Hughes 			break;
177*9a7741deSElliott Hughes 		}
178*9a7741deSElliott Hughes 		if (strcmp(argv[1], "--csv") == 0) {	/* turn on csv input processing */
179*9a7741deSElliott Hughes 			CSV = true;
180*9a7741deSElliott Hughes 			argc--;
181*9a7741deSElliott Hughes 			argv++;
182*9a7741deSElliott Hughes 			continue;
183*9a7741deSElliott Hughes 		}
184*9a7741deSElliott Hughes 		switch (argv[1][1]) {
185*9a7741deSElliott Hughes 		case 's':
186*9a7741deSElliott Hughes 			if (strcmp(argv[1], "-safe") == 0)
187*9a7741deSElliott Hughes 				safe = true;
188*9a7741deSElliott Hughes 			break;
189*9a7741deSElliott Hughes 		case 'f':	/* next argument is program filename */
190*9a7741deSElliott Hughes 			fn = getarg(&argc, &argv, "no program filename");
191*9a7741deSElliott Hughes 			if (npfile >= maxpfile) {
192*9a7741deSElliott Hughes 				maxpfile += 20;
193*9a7741deSElliott Hughes 				pfile = (char **) realloc(pfile, maxpfile * sizeof(*pfile));
194*9a7741deSElliott Hughes 				if (pfile == NULL)
195*9a7741deSElliott Hughes 					FATAL("error allocating space for -f options");
196*9a7741deSElliott Hughes  			}
197*9a7741deSElliott Hughes 			pfile[npfile++] = fn;
198*9a7741deSElliott Hughes  			break;
199*9a7741deSElliott Hughes 		case 'F':	/* set field separator */
200*9a7741deSElliott Hughes 			fs = setfs(getarg(&argc, &argv, "no field separator"));
201*9a7741deSElliott Hughes 			break;
202*9a7741deSElliott Hughes 		case 'v':	/* -v a=1 to be done NOW.  one -v for each */
203*9a7741deSElliott Hughes 			vn = getarg(&argc, &argv, "no variable name");
204*9a7741deSElliott Hughes 			if (isclvar(vn))
205*9a7741deSElliott Hughes 				setclvar(vn);
206*9a7741deSElliott Hughes 			else
207*9a7741deSElliott Hughes 				FATAL("invalid -v option argument: %s", vn);
208*9a7741deSElliott Hughes 			break;
209*9a7741deSElliott Hughes 		case 'd':
210*9a7741deSElliott Hughes 			dbg = atoi(&argv[1][2]);
211*9a7741deSElliott Hughes 			if (dbg == 0)
212*9a7741deSElliott Hughes 				dbg = 1;
213*9a7741deSElliott Hughes 			printf("awk %s\n", version);
214*9a7741deSElliott Hughes 			break;
215*9a7741deSElliott Hughes 		default:
216*9a7741deSElliott Hughes 			WARNING("unknown option %s ignored", argv[1]);
217*9a7741deSElliott Hughes 			break;
218*9a7741deSElliott Hughes 		}
219*9a7741deSElliott Hughes 		argc--;
220*9a7741deSElliott Hughes 		argv++;
221*9a7741deSElliott Hughes 	}
222*9a7741deSElliott Hughes 
223*9a7741deSElliott Hughes 	if (CSV && (fs != NULL || lookup("FS", symtab) != NULL))
224*9a7741deSElliott Hughes 		WARNING("danger: don't set FS when --csv is in effect");
225*9a7741deSElliott Hughes 
226*9a7741deSElliott Hughes 	/* argv[1] is now the first argument */
227*9a7741deSElliott Hughes 	if (npfile == 0) {	/* no -f; first argument is program */
228*9a7741deSElliott Hughes 		if (argc <= 1) {
229*9a7741deSElliott Hughes 			if (dbg)
230*9a7741deSElliott Hughes 				exit(0);
231*9a7741deSElliott Hughes 			FATAL("no program given");
232*9a7741deSElliott Hughes 		}
233*9a7741deSElliott Hughes 		DPRINTF("program = |%s|\n", argv[1]);
234*9a7741deSElliott Hughes 		lexprog = argv[1];
235*9a7741deSElliott Hughes 		argc--;
236*9a7741deSElliott Hughes 		argv++;
237*9a7741deSElliott Hughes 	}
238*9a7741deSElliott Hughes 	recinit(recsize);
239*9a7741deSElliott Hughes 	syminit();
240*9a7741deSElliott Hughes 	compile_time = COMPILING;
241*9a7741deSElliott Hughes 	argv[0] = cmdname;	/* put prog name at front of arglist */
242*9a7741deSElliott Hughes 	DPRINTF("argc=%d, argv[0]=%s\n", argc, argv[0]);
243*9a7741deSElliott Hughes 	arginit(argc, argv);
244*9a7741deSElliott Hughes 	if (!safe)
245*9a7741deSElliott Hughes 		envinit(environ);
246*9a7741deSElliott Hughes 	yyparse();
247*9a7741deSElliott Hughes #if 0
248*9a7741deSElliott Hughes 	// Doing this would comply with POSIX, but is not compatible with
249*9a7741deSElliott Hughes 	// other awks and with what most users expect. So comment it out.
250*9a7741deSElliott Hughes 	setlocale(LC_NUMERIC, ""); /* back to whatever it is locally */
251*9a7741deSElliott Hughes #endif
252*9a7741deSElliott Hughes 	if (fs)
253*9a7741deSElliott Hughes 		*FS = qstring(fs, '\0');
254*9a7741deSElliott Hughes 	DPRINTF("errorflag=%d\n", errorflag);
255*9a7741deSElliott Hughes 	if (errorflag == 0) {
256*9a7741deSElliott Hughes 		compile_time = RUNNING;
257*9a7741deSElliott Hughes 		run(winner);
258*9a7741deSElliott Hughes 	} else
259*9a7741deSElliott Hughes 		bracecheck();
260*9a7741deSElliott Hughes 	return(errorflag);
261*9a7741deSElliott Hughes }
262*9a7741deSElliott Hughes 
pgetc(void)263*9a7741deSElliott Hughes int pgetc(void)		/* get 1 character from awk program */
264*9a7741deSElliott Hughes {
265*9a7741deSElliott Hughes 	int c;
266*9a7741deSElliott Hughes 
267*9a7741deSElliott Hughes 	for (;;) {
268*9a7741deSElliott Hughes 		if (yyin == NULL) {
269*9a7741deSElliott Hughes 			if (curpfile >= npfile)
270*9a7741deSElliott Hughes 				return EOF;
271*9a7741deSElliott Hughes 			if (strcmp(pfile[curpfile], "-") == 0)
272*9a7741deSElliott Hughes 				yyin = stdin;
273*9a7741deSElliott Hughes 			else if ((yyin = fopen(pfile[curpfile], "r")) == NULL)
274*9a7741deSElliott Hughes 				FATAL("can't open file %s", pfile[curpfile]);
275*9a7741deSElliott Hughes 			lineno = 1;
276*9a7741deSElliott Hughes 		}
277*9a7741deSElliott Hughes 		if ((c = getc(yyin)) != EOF)
278*9a7741deSElliott Hughes 			return c;
279*9a7741deSElliott Hughes 		if (yyin != stdin)
280*9a7741deSElliott Hughes 			fclose(yyin);
281*9a7741deSElliott Hughes 		yyin = NULL;
282*9a7741deSElliott Hughes 		curpfile++;
283*9a7741deSElliott Hughes 	}
284*9a7741deSElliott Hughes }
285*9a7741deSElliott Hughes 
cursource(void)286*9a7741deSElliott Hughes char *cursource(void)	/* current source file name */
287*9a7741deSElliott Hughes {
288*9a7741deSElliott Hughes 	if (npfile > 0)
289*9a7741deSElliott Hughes 		return pfile[curpfile < npfile ? curpfile : curpfile - 1];
290*9a7741deSElliott Hughes 	else
291*9a7741deSElliott Hughes 		return NULL;
292*9a7741deSElliott Hughes }
293