xref: /aosp_15_r20/external/lua/src/llex.c (revision 088332b5b69e7ab13924864b272aabfc2509d2d5)
1*088332b5SXin Li /*
2*088332b5SXin Li ** $Id: llex.c $
3*088332b5SXin Li ** Lexical Analyzer
4*088332b5SXin Li ** See Copyright Notice in lua.h
5*088332b5SXin Li */
6*088332b5SXin Li 
7*088332b5SXin Li #define llex_c
8*088332b5SXin Li #define LUA_CORE
9*088332b5SXin Li 
10*088332b5SXin Li #include "lprefix.h"
11*088332b5SXin Li 
12*088332b5SXin Li 
13*088332b5SXin Li #include <locale.h>
14*088332b5SXin Li #include <string.h>
15*088332b5SXin Li 
16*088332b5SXin Li #include "lua.h"
17*088332b5SXin Li 
18*088332b5SXin Li #include "lctype.h"
19*088332b5SXin Li #include "ldebug.h"
20*088332b5SXin Li #include "ldo.h"
21*088332b5SXin Li #include "lgc.h"
22*088332b5SXin Li #include "llex.h"
23*088332b5SXin Li #include "lobject.h"
24*088332b5SXin Li #include "lparser.h"
25*088332b5SXin Li #include "lstate.h"
26*088332b5SXin Li #include "lstring.h"
27*088332b5SXin Li #include "ltable.h"
28*088332b5SXin Li #include "lzio.h"
29*088332b5SXin Li 
30*088332b5SXin Li 
31*088332b5SXin Li 
32*088332b5SXin Li #define next(ls)	(ls->current = zgetc(ls->z))
33*088332b5SXin Li 
34*088332b5SXin Li 
35*088332b5SXin Li 
36*088332b5SXin Li #define currIsNewline(ls)	(ls->current == '\n' || ls->current == '\r')
37*088332b5SXin Li 
38*088332b5SXin Li 
39*088332b5SXin Li /* ORDER RESERVED */
40*088332b5SXin Li static const char *const luaX_tokens [] = {
41*088332b5SXin Li     "and", "break", "do", "else", "elseif",
42*088332b5SXin Li     "end", "false", "for", "function", "goto", "if",
43*088332b5SXin Li     "in", "local", "nil", "not", "or", "repeat",
44*088332b5SXin Li     "return", "then", "true", "until", "while",
45*088332b5SXin Li     "//", "..", "...", "==", ">=", "<=", "~=",
46*088332b5SXin Li     "<<", ">>", "::", "<eof>",
47*088332b5SXin Li     "<number>", "<integer>", "<name>", "<string>"
48*088332b5SXin Li };
49*088332b5SXin Li 
50*088332b5SXin Li 
51*088332b5SXin Li #define save_and_next(ls) (save(ls, ls->current), next(ls))
52*088332b5SXin Li 
53*088332b5SXin Li 
54*088332b5SXin Li static l_noret lexerror (LexState *ls, const char *msg, int token);
55*088332b5SXin Li 
56*088332b5SXin Li 
save(LexState * ls,int c)57*088332b5SXin Li static void save (LexState *ls, int c) {
58*088332b5SXin Li   Mbuffer *b = ls->buff;
59*088332b5SXin Li   if (luaZ_bufflen(b) + 1 > luaZ_sizebuffer(b)) {
60*088332b5SXin Li     size_t newsize;
61*088332b5SXin Li     if (luaZ_sizebuffer(b) >= MAX_SIZE/2)
62*088332b5SXin Li       lexerror(ls, "lexical element too long", 0);
63*088332b5SXin Li     newsize = luaZ_sizebuffer(b) * 2;
64*088332b5SXin Li     luaZ_resizebuffer(ls->L, b, newsize);
65*088332b5SXin Li   }
66*088332b5SXin Li   b->buffer[luaZ_bufflen(b)++] = cast_char(c);
67*088332b5SXin Li }
68*088332b5SXin Li 
69*088332b5SXin Li 
luaX_init(lua_State * L)70*088332b5SXin Li void luaX_init (lua_State *L) {
71*088332b5SXin Li   int i;
72*088332b5SXin Li   TString *e = luaS_newliteral(L, LUA_ENV);  /* create env name */
73*088332b5SXin Li   luaC_fix(L, obj2gco(e));  /* never collect this name */
74*088332b5SXin Li   for (i=0; i<NUM_RESERVED; i++) {
75*088332b5SXin Li     TString *ts = luaS_new(L, luaX_tokens[i]);
76*088332b5SXin Li     luaC_fix(L, obj2gco(ts));  /* reserved words are never collected */
77*088332b5SXin Li     ts->extra = cast_byte(i+1);  /* reserved word */
78*088332b5SXin Li   }
79*088332b5SXin Li }
80*088332b5SXin Li 
81*088332b5SXin Li 
luaX_token2str(LexState * ls,int token)82*088332b5SXin Li const char *luaX_token2str (LexState *ls, int token) {
83*088332b5SXin Li   if (token < FIRST_RESERVED) {  /* single-byte symbols? */
84*088332b5SXin Li     if (lisprint(token))
85*088332b5SXin Li       return luaO_pushfstring(ls->L, "'%c'", token);
86*088332b5SXin Li     else  /* control character */
87*088332b5SXin Li       return luaO_pushfstring(ls->L, "'<\\%d>'", token);
88*088332b5SXin Li   }
89*088332b5SXin Li   else {
90*088332b5SXin Li     const char *s = luaX_tokens[token - FIRST_RESERVED];
91*088332b5SXin Li     if (token < TK_EOS)  /* fixed format (symbols and reserved words)? */
92*088332b5SXin Li       return luaO_pushfstring(ls->L, "'%s'", s);
93*088332b5SXin Li     else  /* names, strings, and numerals */
94*088332b5SXin Li       return s;
95*088332b5SXin Li   }
96*088332b5SXin Li }
97*088332b5SXin Li 
98*088332b5SXin Li 
txtToken(LexState * ls,int token)99*088332b5SXin Li static const char *txtToken (LexState *ls, int token) {
100*088332b5SXin Li   switch (token) {
101*088332b5SXin Li     case TK_NAME: case TK_STRING:
102*088332b5SXin Li     case TK_FLT: case TK_INT:
103*088332b5SXin Li       save(ls, '\0');
104*088332b5SXin Li       return luaO_pushfstring(ls->L, "'%s'", luaZ_buffer(ls->buff));
105*088332b5SXin Li     default:
106*088332b5SXin Li       return luaX_token2str(ls, token);
107*088332b5SXin Li   }
108*088332b5SXin Li }
109*088332b5SXin Li 
110*088332b5SXin Li 
lexerror(LexState * ls,const char * msg,int token)111*088332b5SXin Li static l_noret lexerror (LexState *ls, const char *msg, int token) {
112*088332b5SXin Li   msg = luaG_addinfo(ls->L, msg, ls->source, ls->linenumber);
113*088332b5SXin Li   if (token)
114*088332b5SXin Li     luaO_pushfstring(ls->L, "%s near %s", msg, txtToken(ls, token));
115*088332b5SXin Li   luaD_throw(ls->L, LUA_ERRSYNTAX);
116*088332b5SXin Li }
117*088332b5SXin Li 
118*088332b5SXin Li 
luaX_syntaxerror(LexState * ls,const char * msg)119*088332b5SXin Li l_noret luaX_syntaxerror (LexState *ls, const char *msg) {
120*088332b5SXin Li   lexerror(ls, msg, ls->t.token);
121*088332b5SXin Li }
122*088332b5SXin Li 
123*088332b5SXin Li 
124*088332b5SXin Li /*
125*088332b5SXin Li ** creates a new string and anchors it in scanner's table so that
126*088332b5SXin Li ** it will not be collected until the end of the compilation
127*088332b5SXin Li ** (by that time it should be anchored somewhere)
128*088332b5SXin Li */
luaX_newstring(LexState * ls,const char * str,size_t l)129*088332b5SXin Li TString *luaX_newstring (LexState *ls, const char *str, size_t l) {
130*088332b5SXin Li   lua_State *L = ls->L;
131*088332b5SXin Li   TValue *o;  /* entry for 'str' */
132*088332b5SXin Li   TString *ts = luaS_newlstr(L, str, l);  /* create new string */
133*088332b5SXin Li   setsvalue2s(L, L->top++, ts);  /* temporarily anchor it in stack */
134*088332b5SXin Li   o = luaH_set(L, ls->h, s2v(L->top - 1));
135*088332b5SXin Li   if (isempty(o)) {  /* not in use yet? */
136*088332b5SXin Li     /* boolean value does not need GC barrier;
137*088332b5SXin Li        table is not a metatable, so it does not need to invalidate cache */
138*088332b5SXin Li     setbtvalue(o);  /* t[string] = true */
139*088332b5SXin Li     luaC_checkGC(L);
140*088332b5SXin Li   }
141*088332b5SXin Li   else {  /* string already present */
142*088332b5SXin Li     ts = keystrval(nodefromval(o));  /* re-use value previously stored */
143*088332b5SXin Li   }
144*088332b5SXin Li   L->top--;  /* remove string from stack */
145*088332b5SXin Li   return ts;
146*088332b5SXin Li }
147*088332b5SXin Li 
148*088332b5SXin Li 
149*088332b5SXin Li /*
150*088332b5SXin Li ** increment line number and skips newline sequence (any of
151*088332b5SXin Li ** \n, \r, \n\r, or \r\n)
152*088332b5SXin Li */
inclinenumber(LexState * ls)153*088332b5SXin Li static void inclinenumber (LexState *ls) {
154*088332b5SXin Li   int old = ls->current;
155*088332b5SXin Li   lua_assert(currIsNewline(ls));
156*088332b5SXin Li   next(ls);  /* skip '\n' or '\r' */
157*088332b5SXin Li   if (currIsNewline(ls) && ls->current != old)
158*088332b5SXin Li     next(ls);  /* skip '\n\r' or '\r\n' */
159*088332b5SXin Li   if (++ls->linenumber >= MAX_INT)
160*088332b5SXin Li     lexerror(ls, "chunk has too many lines", 0);
161*088332b5SXin Li }
162*088332b5SXin Li 
163*088332b5SXin Li 
luaX_setinput(lua_State * L,LexState * ls,ZIO * z,TString * source,int firstchar)164*088332b5SXin Li void luaX_setinput (lua_State *L, LexState *ls, ZIO *z, TString *source,
165*088332b5SXin Li                     int firstchar) {
166*088332b5SXin Li   ls->t.token = 0;
167*088332b5SXin Li   ls->L = L;
168*088332b5SXin Li   ls->current = firstchar;
169*088332b5SXin Li   ls->lookahead.token = TK_EOS;  /* no look-ahead token */
170*088332b5SXin Li   ls->z = z;
171*088332b5SXin Li   ls->fs = NULL;
172*088332b5SXin Li   ls->linenumber = 1;
173*088332b5SXin Li   ls->lastline = 1;
174*088332b5SXin Li   ls->source = source;
175*088332b5SXin Li   ls->envn = luaS_newliteral(L, LUA_ENV);  /* get env name */
176*088332b5SXin Li   luaZ_resizebuffer(ls->L, ls->buff, LUA_MINBUFFER);  /* initialize buffer */
177*088332b5SXin Li }
178*088332b5SXin Li 
179*088332b5SXin Li 
180*088332b5SXin Li 
181*088332b5SXin Li /*
182*088332b5SXin Li ** =======================================================
183*088332b5SXin Li ** LEXICAL ANALYZER
184*088332b5SXin Li ** =======================================================
185*088332b5SXin Li */
186*088332b5SXin Li 
187*088332b5SXin Li 
check_next1(LexState * ls,int c)188*088332b5SXin Li static int check_next1 (LexState *ls, int c) {
189*088332b5SXin Li   if (ls->current == c) {
190*088332b5SXin Li     next(ls);
191*088332b5SXin Li     return 1;
192*088332b5SXin Li   }
193*088332b5SXin Li   else return 0;
194*088332b5SXin Li }
195*088332b5SXin Li 
196*088332b5SXin Li 
197*088332b5SXin Li /*
198*088332b5SXin Li ** Check whether current char is in set 'set' (with two chars) and
199*088332b5SXin Li ** saves it
200*088332b5SXin Li */
check_next2(LexState * ls,const char * set)201*088332b5SXin Li static int check_next2 (LexState *ls, const char *set) {
202*088332b5SXin Li   lua_assert(set[2] == '\0');
203*088332b5SXin Li   if (ls->current == set[0] || ls->current == set[1]) {
204*088332b5SXin Li     save_and_next(ls);
205*088332b5SXin Li     return 1;
206*088332b5SXin Li   }
207*088332b5SXin Li   else return 0;
208*088332b5SXin Li }
209*088332b5SXin Li 
210*088332b5SXin Li 
211*088332b5SXin Li /* LUA_NUMBER */
212*088332b5SXin Li /*
213*088332b5SXin Li ** This function is quite liberal in what it accepts, as 'luaO_str2num'
214*088332b5SXin Li ** will reject ill-formed numerals. Roughly, it accepts the following
215*088332b5SXin Li ** pattern:
216*088332b5SXin Li **
217*088332b5SXin Li **   %d(%x|%.|([Ee][+-]?))* | 0[Xx](%x|%.|([Pp][+-]?))*
218*088332b5SXin Li **
219*088332b5SXin Li ** The only tricky part is to accept [+-] only after a valid exponent
220*088332b5SXin Li ** mark, to avoid reading '3-4' or '0xe+1' as a single number.
221*088332b5SXin Li **
222*088332b5SXin Li ** The caller might have already read an initial dot.
223*088332b5SXin Li */
read_numeral(LexState * ls,SemInfo * seminfo)224*088332b5SXin Li static int read_numeral (LexState *ls, SemInfo *seminfo) {
225*088332b5SXin Li   TValue obj;
226*088332b5SXin Li   const char *expo = "Ee";
227*088332b5SXin Li   int first = ls->current;
228*088332b5SXin Li   lua_assert(lisdigit(ls->current));
229*088332b5SXin Li   save_and_next(ls);
230*088332b5SXin Li   if (first == '0' && check_next2(ls, "xX"))  /* hexadecimal? */
231*088332b5SXin Li     expo = "Pp";
232*088332b5SXin Li   for (;;) {
233*088332b5SXin Li     if (check_next2(ls, expo))  /* exponent mark? */
234*088332b5SXin Li       check_next2(ls, "-+");  /* optional exponent sign */
235*088332b5SXin Li     else if (lisxdigit(ls->current) || ls->current == '.')  /* '%x|%.' */
236*088332b5SXin Li       save_and_next(ls);
237*088332b5SXin Li     else break;
238*088332b5SXin Li   }
239*088332b5SXin Li   if (lislalpha(ls->current))  /* is numeral touching a letter? */
240*088332b5SXin Li     save_and_next(ls);  /* force an error */
241*088332b5SXin Li   save(ls, '\0');
242*088332b5SXin Li   if (luaO_str2num(luaZ_buffer(ls->buff), &obj) == 0)  /* format error? */
243*088332b5SXin Li     lexerror(ls, "malformed number", TK_FLT);
244*088332b5SXin Li   if (ttisinteger(&obj)) {
245*088332b5SXin Li     seminfo->i = ivalue(&obj);
246*088332b5SXin Li     return TK_INT;
247*088332b5SXin Li   }
248*088332b5SXin Li   else {
249*088332b5SXin Li     lua_assert(ttisfloat(&obj));
250*088332b5SXin Li     seminfo->r = fltvalue(&obj);
251*088332b5SXin Li     return TK_FLT;
252*088332b5SXin Li   }
253*088332b5SXin Li }
254*088332b5SXin Li 
255*088332b5SXin Li 
256*088332b5SXin Li /*
257*088332b5SXin Li ** reads a sequence '[=*[' or ']=*]', leaving the last bracket.
258*088332b5SXin Li ** If sequence is well formed, return its number of '='s + 2; otherwise,
259*088332b5SXin Li ** return 1 if there is no '='s or 0 otherwise (an unfinished '[==...').
260*088332b5SXin Li */
skip_sep(LexState * ls)261*088332b5SXin Li static size_t skip_sep (LexState *ls) {
262*088332b5SXin Li   size_t count = 0;
263*088332b5SXin Li   int s = ls->current;
264*088332b5SXin Li   lua_assert(s == '[' || s == ']');
265*088332b5SXin Li   save_and_next(ls);
266*088332b5SXin Li   while (ls->current == '=') {
267*088332b5SXin Li     save_and_next(ls);
268*088332b5SXin Li     count++;
269*088332b5SXin Li   }
270*088332b5SXin Li   return (ls->current == s) ? count + 2
271*088332b5SXin Li          : (count == 0) ? 1
272*088332b5SXin Li          : 0;
273*088332b5SXin Li }
274*088332b5SXin Li 
275*088332b5SXin Li 
read_long_string(LexState * ls,SemInfo * seminfo,size_t sep)276*088332b5SXin Li static void read_long_string (LexState *ls, SemInfo *seminfo, size_t sep) {
277*088332b5SXin Li   int line = ls->linenumber;  /* initial line (for error message) */
278*088332b5SXin Li   save_and_next(ls);  /* skip 2nd '[' */
279*088332b5SXin Li   if (currIsNewline(ls))  /* string starts with a newline? */
280*088332b5SXin Li     inclinenumber(ls);  /* skip it */
281*088332b5SXin Li   for (;;) {
282*088332b5SXin Li     switch (ls->current) {
283*088332b5SXin Li       case EOZ: {  /* error */
284*088332b5SXin Li         const char *what = (seminfo ? "string" : "comment");
285*088332b5SXin Li         const char *msg = luaO_pushfstring(ls->L,
286*088332b5SXin Li                      "unfinished long %s (starting at line %d)", what, line);
287*088332b5SXin Li         lexerror(ls, msg, TK_EOS);
288*088332b5SXin Li         break;  /* to avoid warnings */
289*088332b5SXin Li       }
290*088332b5SXin Li       case ']': {
291*088332b5SXin Li         if (skip_sep(ls) == sep) {
292*088332b5SXin Li           save_and_next(ls);  /* skip 2nd ']' */
293*088332b5SXin Li           goto endloop;
294*088332b5SXin Li         }
295*088332b5SXin Li         break;
296*088332b5SXin Li       }
297*088332b5SXin Li       case '\n': case '\r': {
298*088332b5SXin Li         save(ls, '\n');
299*088332b5SXin Li         inclinenumber(ls);
300*088332b5SXin Li         if (!seminfo) luaZ_resetbuffer(ls->buff);  /* avoid wasting space */
301*088332b5SXin Li         break;
302*088332b5SXin Li       }
303*088332b5SXin Li       default: {
304*088332b5SXin Li         if (seminfo) save_and_next(ls);
305*088332b5SXin Li         else next(ls);
306*088332b5SXin Li       }
307*088332b5SXin Li     }
308*088332b5SXin Li   } endloop:
309*088332b5SXin Li   if (seminfo)
310*088332b5SXin Li     seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + sep,
311*088332b5SXin Li                                      luaZ_bufflen(ls->buff) - 2 * sep);
312*088332b5SXin Li }
313*088332b5SXin Li 
314*088332b5SXin Li 
esccheck(LexState * ls,int c,const char * msg)315*088332b5SXin Li static void esccheck (LexState *ls, int c, const char *msg) {
316*088332b5SXin Li   if (!c) {
317*088332b5SXin Li     if (ls->current != EOZ)
318*088332b5SXin Li       save_and_next(ls);  /* add current to buffer for error message */
319*088332b5SXin Li     lexerror(ls, msg, TK_STRING);
320*088332b5SXin Li   }
321*088332b5SXin Li }
322*088332b5SXin Li 
323*088332b5SXin Li 
gethexa(LexState * ls)324*088332b5SXin Li static int gethexa (LexState *ls) {
325*088332b5SXin Li   save_and_next(ls);
326*088332b5SXin Li   esccheck (ls, lisxdigit(ls->current), "hexadecimal digit expected");
327*088332b5SXin Li   return luaO_hexavalue(ls->current);
328*088332b5SXin Li }
329*088332b5SXin Li 
330*088332b5SXin Li 
readhexaesc(LexState * ls)331*088332b5SXin Li static int readhexaesc (LexState *ls) {
332*088332b5SXin Li   int r = gethexa(ls);
333*088332b5SXin Li   r = (r << 4) + gethexa(ls);
334*088332b5SXin Li   luaZ_buffremove(ls->buff, 2);  /* remove saved chars from buffer */
335*088332b5SXin Li   return r;
336*088332b5SXin Li }
337*088332b5SXin Li 
338*088332b5SXin Li 
readutf8esc(LexState * ls)339*088332b5SXin Li static unsigned long readutf8esc (LexState *ls) {
340*088332b5SXin Li   unsigned long r;
341*088332b5SXin Li   int i = 4;  /* chars to be removed: '\', 'u', '{', and first digit */
342*088332b5SXin Li   save_and_next(ls);  /* skip 'u' */
343*088332b5SXin Li   esccheck(ls, ls->current == '{', "missing '{'");
344*088332b5SXin Li   r = gethexa(ls);  /* must have at least one digit */
345*088332b5SXin Li   while (cast_void(save_and_next(ls)), lisxdigit(ls->current)) {
346*088332b5SXin Li     i++;
347*088332b5SXin Li     esccheck(ls, r <= (0x7FFFFFFFu >> 4), "UTF-8 value too large");
348*088332b5SXin Li     r = (r << 4) + luaO_hexavalue(ls->current);
349*088332b5SXin Li   }
350*088332b5SXin Li   esccheck(ls, ls->current == '}', "missing '}'");
351*088332b5SXin Li   next(ls);  /* skip '}' */
352*088332b5SXin Li   luaZ_buffremove(ls->buff, i);  /* remove saved chars from buffer */
353*088332b5SXin Li   return r;
354*088332b5SXin Li }
355*088332b5SXin Li 
356*088332b5SXin Li 
utf8esc(LexState * ls)357*088332b5SXin Li static void utf8esc (LexState *ls) {
358*088332b5SXin Li   char buff[UTF8BUFFSZ];
359*088332b5SXin Li   int n = luaO_utf8esc(buff, readutf8esc(ls));
360*088332b5SXin Li   for (; n > 0; n--)  /* add 'buff' to string */
361*088332b5SXin Li     save(ls, buff[UTF8BUFFSZ - n]);
362*088332b5SXin Li }
363*088332b5SXin Li 
364*088332b5SXin Li 
readdecesc(LexState * ls)365*088332b5SXin Li static int readdecesc (LexState *ls) {
366*088332b5SXin Li   int i;
367*088332b5SXin Li   int r = 0;  /* result accumulator */
368*088332b5SXin Li   for (i = 0; i < 3 && lisdigit(ls->current); i++) {  /* read up to 3 digits */
369*088332b5SXin Li     r = 10*r + ls->current - '0';
370*088332b5SXin Li     save_and_next(ls);
371*088332b5SXin Li   }
372*088332b5SXin Li   esccheck(ls, r <= UCHAR_MAX, "decimal escape too large");
373*088332b5SXin Li   luaZ_buffremove(ls->buff, i);  /* remove read digits from buffer */
374*088332b5SXin Li   return r;
375*088332b5SXin Li }
376*088332b5SXin Li 
377*088332b5SXin Li 
read_string(LexState * ls,int del,SemInfo * seminfo)378*088332b5SXin Li static void read_string (LexState *ls, int del, SemInfo *seminfo) {
379*088332b5SXin Li   save_and_next(ls);  /* keep delimiter (for error messages) */
380*088332b5SXin Li   while (ls->current != del) {
381*088332b5SXin Li     switch (ls->current) {
382*088332b5SXin Li       case EOZ:
383*088332b5SXin Li         lexerror(ls, "unfinished string", TK_EOS);
384*088332b5SXin Li         break;  /* to avoid warnings */
385*088332b5SXin Li       case '\n':
386*088332b5SXin Li       case '\r':
387*088332b5SXin Li         lexerror(ls, "unfinished string", TK_STRING);
388*088332b5SXin Li         break;  /* to avoid warnings */
389*088332b5SXin Li       case '\\': {  /* escape sequences */
390*088332b5SXin Li         int c;  /* final character to be saved */
391*088332b5SXin Li         save_and_next(ls);  /* keep '\\' for error messages */
392*088332b5SXin Li         switch (ls->current) {
393*088332b5SXin Li           case 'a': c = '\a'; goto read_save;
394*088332b5SXin Li           case 'b': c = '\b'; goto read_save;
395*088332b5SXin Li           case 'f': c = '\f'; goto read_save;
396*088332b5SXin Li           case 'n': c = '\n'; goto read_save;
397*088332b5SXin Li           case 'r': c = '\r'; goto read_save;
398*088332b5SXin Li           case 't': c = '\t'; goto read_save;
399*088332b5SXin Li           case 'v': c = '\v'; goto read_save;
400*088332b5SXin Li           case 'x': c = readhexaesc(ls); goto read_save;
401*088332b5SXin Li           case 'u': utf8esc(ls);  goto no_save;
402*088332b5SXin Li           case '\n': case '\r':
403*088332b5SXin Li             inclinenumber(ls); c = '\n'; goto only_save;
404*088332b5SXin Li           case '\\': case '\"': case '\'':
405*088332b5SXin Li             c = ls->current; goto read_save;
406*088332b5SXin Li           case EOZ: goto no_save;  /* will raise an error next loop */
407*088332b5SXin Li           case 'z': {  /* zap following span of spaces */
408*088332b5SXin Li             luaZ_buffremove(ls->buff, 1);  /* remove '\\' */
409*088332b5SXin Li             next(ls);  /* skip the 'z' */
410*088332b5SXin Li             while (lisspace(ls->current)) {
411*088332b5SXin Li               if (currIsNewline(ls)) inclinenumber(ls);
412*088332b5SXin Li               else next(ls);
413*088332b5SXin Li             }
414*088332b5SXin Li             goto no_save;
415*088332b5SXin Li           }
416*088332b5SXin Li           default: {
417*088332b5SXin Li             esccheck(ls, lisdigit(ls->current), "invalid escape sequence");
418*088332b5SXin Li             c = readdecesc(ls);  /* digital escape '\ddd' */
419*088332b5SXin Li             goto only_save;
420*088332b5SXin Li           }
421*088332b5SXin Li         }
422*088332b5SXin Li        read_save:
423*088332b5SXin Li          next(ls);
424*088332b5SXin Li          /* go through */
425*088332b5SXin Li        only_save:
426*088332b5SXin Li          luaZ_buffremove(ls->buff, 1);  /* remove '\\' */
427*088332b5SXin Li          save(ls, c);
428*088332b5SXin Li          /* go through */
429*088332b5SXin Li        no_save: break;
430*088332b5SXin Li       }
431*088332b5SXin Li       default:
432*088332b5SXin Li         save_and_next(ls);
433*088332b5SXin Li     }
434*088332b5SXin Li   }
435*088332b5SXin Li   save_and_next(ls);  /* skip delimiter */
436*088332b5SXin Li   seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + 1,
437*088332b5SXin Li                                    luaZ_bufflen(ls->buff) - 2);
438*088332b5SXin Li }
439*088332b5SXin Li 
440*088332b5SXin Li 
llex(LexState * ls,SemInfo * seminfo)441*088332b5SXin Li static int llex (LexState *ls, SemInfo *seminfo) {
442*088332b5SXin Li   luaZ_resetbuffer(ls->buff);
443*088332b5SXin Li   for (;;) {
444*088332b5SXin Li     switch (ls->current) {
445*088332b5SXin Li       case '\n': case '\r': {  /* line breaks */
446*088332b5SXin Li         inclinenumber(ls);
447*088332b5SXin Li         break;
448*088332b5SXin Li       }
449*088332b5SXin Li       case ' ': case '\f': case '\t': case '\v': {  /* spaces */
450*088332b5SXin Li         next(ls);
451*088332b5SXin Li         break;
452*088332b5SXin Li       }
453*088332b5SXin Li       case '-': {  /* '-' or '--' (comment) */
454*088332b5SXin Li         next(ls);
455*088332b5SXin Li         if (ls->current != '-') return '-';
456*088332b5SXin Li         /* else is a comment */
457*088332b5SXin Li         next(ls);
458*088332b5SXin Li         if (ls->current == '[') {  /* long comment? */
459*088332b5SXin Li           size_t sep = skip_sep(ls);
460*088332b5SXin Li           luaZ_resetbuffer(ls->buff);  /* 'skip_sep' may dirty the buffer */
461*088332b5SXin Li           if (sep >= 2) {
462*088332b5SXin Li             read_long_string(ls, NULL, sep);  /* skip long comment */
463*088332b5SXin Li             luaZ_resetbuffer(ls->buff);  /* previous call may dirty the buff. */
464*088332b5SXin Li             break;
465*088332b5SXin Li           }
466*088332b5SXin Li         }
467*088332b5SXin Li         /* else short comment */
468*088332b5SXin Li         while (!currIsNewline(ls) && ls->current != EOZ)
469*088332b5SXin Li           next(ls);  /* skip until end of line (or end of file) */
470*088332b5SXin Li         break;
471*088332b5SXin Li       }
472*088332b5SXin Li       case '[': {  /* long string or simply '[' */
473*088332b5SXin Li         size_t sep = skip_sep(ls);
474*088332b5SXin Li         if (sep >= 2) {
475*088332b5SXin Li           read_long_string(ls, seminfo, sep);
476*088332b5SXin Li           return TK_STRING;
477*088332b5SXin Li         }
478*088332b5SXin Li         else if (sep == 0)  /* '[=...' missing second bracket? */
479*088332b5SXin Li           lexerror(ls, "invalid long string delimiter", TK_STRING);
480*088332b5SXin Li         return '[';
481*088332b5SXin Li       }
482*088332b5SXin Li       case '=': {
483*088332b5SXin Li         next(ls);
484*088332b5SXin Li         if (check_next1(ls, '=')) return TK_EQ;
485*088332b5SXin Li         else return '=';
486*088332b5SXin Li       }
487*088332b5SXin Li       case '<': {
488*088332b5SXin Li         next(ls);
489*088332b5SXin Li         if (check_next1(ls, '=')) return TK_LE;
490*088332b5SXin Li         else if (check_next1(ls, '<')) return TK_SHL;
491*088332b5SXin Li         else return '<';
492*088332b5SXin Li       }
493*088332b5SXin Li       case '>': {
494*088332b5SXin Li         next(ls);
495*088332b5SXin Li         if (check_next1(ls, '=')) return TK_GE;
496*088332b5SXin Li         else if (check_next1(ls, '>')) return TK_SHR;
497*088332b5SXin Li         else return '>';
498*088332b5SXin Li       }
499*088332b5SXin Li       case '/': {
500*088332b5SXin Li         next(ls);
501*088332b5SXin Li         if (check_next1(ls, '/')) return TK_IDIV;
502*088332b5SXin Li         else return '/';
503*088332b5SXin Li       }
504*088332b5SXin Li       case '~': {
505*088332b5SXin Li         next(ls);
506*088332b5SXin Li         if (check_next1(ls, '=')) return TK_NE;
507*088332b5SXin Li         else return '~';
508*088332b5SXin Li       }
509*088332b5SXin Li       case ':': {
510*088332b5SXin Li         next(ls);
511*088332b5SXin Li         if (check_next1(ls, ':')) return TK_DBCOLON;
512*088332b5SXin Li         else return ':';
513*088332b5SXin Li       }
514*088332b5SXin Li       case '"': case '\'': {  /* short literal strings */
515*088332b5SXin Li         read_string(ls, ls->current, seminfo);
516*088332b5SXin Li         return TK_STRING;
517*088332b5SXin Li       }
518*088332b5SXin Li       case '.': {  /* '.', '..', '...', or number */
519*088332b5SXin Li         save_and_next(ls);
520*088332b5SXin Li         if (check_next1(ls, '.')) {
521*088332b5SXin Li           if (check_next1(ls, '.'))
522*088332b5SXin Li             return TK_DOTS;   /* '...' */
523*088332b5SXin Li           else return TK_CONCAT;   /* '..' */
524*088332b5SXin Li         }
525*088332b5SXin Li         else if (!lisdigit(ls->current)) return '.';
526*088332b5SXin Li         else return read_numeral(ls, seminfo);
527*088332b5SXin Li       }
528*088332b5SXin Li       case '0': case '1': case '2': case '3': case '4':
529*088332b5SXin Li       case '5': case '6': case '7': case '8': case '9': {
530*088332b5SXin Li         return read_numeral(ls, seminfo);
531*088332b5SXin Li       }
532*088332b5SXin Li       case EOZ: {
533*088332b5SXin Li         return TK_EOS;
534*088332b5SXin Li       }
535*088332b5SXin Li       default: {
536*088332b5SXin Li         if (lislalpha(ls->current)) {  /* identifier or reserved word? */
537*088332b5SXin Li           TString *ts;
538*088332b5SXin Li           do {
539*088332b5SXin Li             save_and_next(ls);
540*088332b5SXin Li           } while (lislalnum(ls->current));
541*088332b5SXin Li           ts = luaX_newstring(ls, luaZ_buffer(ls->buff),
542*088332b5SXin Li                                   luaZ_bufflen(ls->buff));
543*088332b5SXin Li           seminfo->ts = ts;
544*088332b5SXin Li           if (isreserved(ts))  /* reserved word? */
545*088332b5SXin Li             return ts->extra - 1 + FIRST_RESERVED;
546*088332b5SXin Li           else {
547*088332b5SXin Li             return TK_NAME;
548*088332b5SXin Li           }
549*088332b5SXin Li         }
550*088332b5SXin Li         else {  /* single-char tokens (+ - / ...) */
551*088332b5SXin Li           int c = ls->current;
552*088332b5SXin Li           next(ls);
553*088332b5SXin Li           return c;
554*088332b5SXin Li         }
555*088332b5SXin Li       }
556*088332b5SXin Li     }
557*088332b5SXin Li   }
558*088332b5SXin Li }
559*088332b5SXin Li 
560*088332b5SXin Li 
luaX_next(LexState * ls)561*088332b5SXin Li void luaX_next (LexState *ls) {
562*088332b5SXin Li   ls->lastline = ls->linenumber;
563*088332b5SXin Li   if (ls->lookahead.token != TK_EOS) {  /* is there a look-ahead token? */
564*088332b5SXin Li     ls->t = ls->lookahead;  /* use this one */
565*088332b5SXin Li     ls->lookahead.token = TK_EOS;  /* and discharge it */
566*088332b5SXin Li   }
567*088332b5SXin Li   else
568*088332b5SXin Li     ls->t.token = llex(ls, &ls->t.seminfo);  /* read next token */
569*088332b5SXin Li }
570*088332b5SXin Li 
571*088332b5SXin Li 
luaX_lookahead(LexState * ls)572*088332b5SXin Li int luaX_lookahead (LexState *ls) {
573*088332b5SXin Li   lua_assert(ls->lookahead.token == TK_EOS);
574*088332b5SXin Li   ls->lookahead.token = llex(ls, &ls->lookahead.seminfo);
575*088332b5SXin Li   return ls->lookahead.token;
576*088332b5SXin Li }
577*088332b5SXin Li 
578