xref: /nrf52832-nimble/rt-thread/components/finsh/finsh_token.c (revision 104654410c56c573564690304ae786df310c91fc)
1 /*
2  * Copyright (c) 2006-2018, RT-Thread Development Team
3  *
4  * SPDX-License-Identifier: Apache-2.0
5  *
6  * Change Logs:
7  * Date           Author       Notes
8  * 2010-03-22     Bernard      first version
9  * 2013-04-03     Bernard      strip more characters.
10  */
11 #include <finsh.h>
12 #include <stdlib.h>
13 
14 #include "finsh_token.h"
15 #include "finsh_error.h"
16 
17 #define is_alpha(ch)    ((ch | 0x20) - 'a') < 26u
18 #define is_digit(ch)    ((ch) >= '0' && (ch) <= '9')
19 #define is_xdigit(ch)   (((ch) >= '0' && (ch) <= '9') || (((ch | 0x20) - 'a') < 6u))
20 #define is_separator(ch) !(((ch) >= 'a' && (ch) <= 'z') \
21      || ((ch) >= 'A' && (ch) <= 'Z') || ((ch) >= '0' && (ch) <= '9') || ((ch) == '_'))
22 #define is_eof(self) (self)->eof
23 
24 struct name_table
25 {
26     char* name;
27     enum finsh_token_type type;
28 };
29 
30 /* keyword */
31 static const struct name_table finsh_name_table[] =
32 {
33     {"void",        finsh_token_type_void},
34     {"char",        finsh_token_type_char},
35     {"short",       finsh_token_type_short},
36     {"int",         finsh_token_type_int},
37     {"long",        finsh_token_type_long},
38     {"unsigned",    finsh_token_type_unsigned},
39 
40     {"NULL",        finsh_token_type_value_null},
41     {"null",        finsh_token_type_value_null}
42 };
43 
44 static char token_next_char(struct finsh_token* self);
45 static void token_prev_char(struct finsh_token* self);
46 static long token_spec_number(char* string, int length, int b);
47 static void token_run(struct finsh_token* self);
48 static int  token_match_name(struct finsh_token* self, const char* str);
49 static void token_proc_number(struct finsh_token* self);
50 static uint8_t* token_proc_string(struct finsh_token* self);
51 static void token_trim_space(struct finsh_token* self);
52 static char token_proc_char(struct finsh_token* self);
53 static int token_proc_escape(struct finsh_token* self);
54 
finsh_token_init(struct finsh_token * self,uint8_t * line)55 void finsh_token_init(struct finsh_token* self, uint8_t* line)
56 {
57     memset(self, 0, sizeof(struct finsh_token));
58 
59     self->line = line;
60 }
61 
finsh_token_token(struct finsh_token * self)62 enum finsh_token_type finsh_token_token(struct finsh_token* self)
63 {
64     if ( self->replay ) self->replay = 0;
65     else token_run(self);
66 
67     return (enum finsh_token_type)self->current_token;
68 }
69 
finsh_token_get_token(struct finsh_token * self,uint8_t * token)70 void finsh_token_get_token(struct finsh_token* self, uint8_t* token)
71 {
72     strncpy((char*)token, (char*)self->string, FINSH_NAME_MAX);
73 }
74 
token_get_string(struct finsh_token * self,uint8_t * str)75 int token_get_string(struct finsh_token* self, uint8_t* str)
76 {
77     unsigned char *p=str;
78     char ch;
79 
80     ch = token_next_char(self);
81     if (is_eof(self)) return -1;
82 
83     str[0] = '\0';
84 
85     if ( is_digit(ch) )/*the first character of identifier is not a digit.*/
86     {
87         token_prev_char(self);
88         return -1;
89     }
90 
91     while (!is_separator(ch) && !is_eof(self))
92     {
93         *p++ = ch;
94 
95         ch = token_next_char(self);
96     }
97     self->eof = 0;
98 
99     token_prev_char(self);
100     *p = '\0';
101 
102     return 0;
103 }
104 
105 /*
106 get next character.
107 */
token_next_char(struct finsh_token * self)108 static char token_next_char(struct finsh_token* self)
109 {
110     if (self->eof) return '\0';
111 
112     if (self->position == (int)strlen((char*)self->line) || self->line[self->position] =='\n')
113     {
114             self->eof = 1;
115             self->position = 0;
116             return '\0';
117     }
118 
119     return self->line[self->position++];
120 }
121 
token_prev_char(struct finsh_token * self)122 static void token_prev_char(struct finsh_token* self)
123 {
124     if ( self->eof ) return;
125 
126     if ( self->position == 0 ) return;
127     else self->position--;
128 }
129 
token_run(struct finsh_token * self)130 static void token_run(struct finsh_token* self)
131 {
132     char ch;
133 
134     token_trim_space(self); /* first trim space and tab. */
135     token_get_string(self, &(self->string[0]));
136 
137     if ( is_eof(self) ) /*if it is eof, break;*/
138     {
139         self->current_token = finsh_token_type_eof;
140         return ;
141     }
142 
143     if (self->string[0] != '\0') /*It is a key word or a identifier.*/
144     {
145         if ( !token_match_name(self, (char*)self->string) )
146         {
147             self->current_token = finsh_token_type_identifier;
148         }
149     }
150     else/*It is a operator character.*/
151     {
152         ch = token_next_char(self);
153 
154         switch ( ch )
155         {
156         case '(':
157             self->current_token = finsh_token_type_left_paren;
158             break;
159 
160         case ')':
161             self->current_token = finsh_token_type_right_paren;
162             break;
163 
164         case ',':
165             self->current_token = finsh_token_type_comma;
166             break;
167 
168         case ';':
169             self->current_token = finsh_token_type_semicolon;
170             break;
171 
172         case '&':
173             self->current_token = finsh_token_type_and;
174             break;
175 
176         case '*':
177             self->current_token = finsh_token_type_mul;
178             break;
179 
180         case '+':
181             ch = token_next_char(self);
182 
183             if ( ch == '+' )
184             {
185                 self->current_token = finsh_token_type_inc;
186             }
187             else
188             {
189                 token_prev_char(self);
190                 self->current_token = finsh_token_type_add;
191             }
192             break;
193 
194         case '-':
195             ch = token_next_char(self);
196 
197             if ( ch == '-' )
198             {
199                 self->current_token = finsh_token_type_dec;
200             }
201             else
202             {
203                 token_prev_char(self);
204                 self->current_token = finsh_token_type_sub;
205             }
206             break;
207 
208         case '/':
209             ch = token_next_char(self);
210             if (ch == '/')
211             {
212                 /* line comments, set to end of file */
213                 self->current_token = finsh_token_type_eof;
214             }
215             else
216             {
217                 token_prev_char(self);
218                 self->current_token = finsh_token_type_div;
219             }
220             break;
221 
222         case '<':
223             ch = token_next_char(self);
224 
225             if ( ch == '<' )
226             {
227                 self->current_token = finsh_token_type_shl;
228             }
229             else
230             {
231                 token_prev_char(self);
232                 self->current_token = finsh_token_type_bad;
233             }
234             break;
235 
236         case '>':
237             ch = token_next_char(self);
238 
239             if ( ch == '>' )
240             {
241                 self->current_token = finsh_token_type_shr;
242             }
243             else
244             {
245                 token_prev_char(self);
246                 self->current_token = finsh_token_type_bad;
247             }
248             break;
249 
250         case '|':
251             self->current_token = finsh_token_type_or;
252             break;
253 
254         case '%':
255             self->current_token = finsh_token_type_mod;
256             break;
257 
258         case '~':
259             self->current_token = finsh_token_type_bitwise;
260             break;
261 
262         case '^':
263             self->current_token = finsh_token_type_xor;
264             break;
265 
266         case '=':
267             self->current_token = finsh_token_type_assign;
268             break;
269 
270         case '\'':
271             self->value.char_value = token_proc_char(self);
272             self->current_token = finsh_token_type_value_char;
273             break;
274 
275         case '"':
276             token_proc_string(self);
277             self->current_token = finsh_token_type_value_string;
278             break;
279 
280         default:
281             if ( is_digit(ch) )
282             {
283                 token_prev_char(self);
284                 token_proc_number(self);
285                 break;
286             }
287 
288             finsh_error_set(FINSH_ERROR_UNKNOWN_TOKEN);
289             self->current_token = finsh_token_type_bad;
290 
291             break;
292         }
293     }
294 }
295 
token_match_name(struct finsh_token * self,const char * str)296 static int token_match_name(struct finsh_token* self, const char* str)
297 {
298     int i;
299 
300     for (i = 0; i < sizeof(finsh_name_table)/sizeof(struct name_table); i++)
301     {
302         if ( strcmp(finsh_name_table[i].name, str)==0 )
303         {
304             self->current_token = finsh_name_table[i].type;
305             return 1;
306         }
307     }
308 
309     return 0;
310 }
311 
token_trim_space(struct finsh_token * self)312 static void token_trim_space(struct finsh_token* self)
313 {
314     char ch;
315     while ( (ch = token_next_char(self)) ==' ' ||
316         ch == '\t' ||
317         ch == '\r');
318 
319     token_prev_char(self);
320 }
321 
token_proc_char(struct finsh_token * self)322 static char token_proc_char(struct finsh_token* self)
323 {
324     char ch;
325     char buf[4], *p;
326 
327     p = buf;
328     ch = token_next_char(self);
329 
330     if ( ch == '\\' )
331     {
332         ch = token_next_char(self);
333         switch ( ch )
334         {
335         case 'n': ch = '\n'; break;
336         case 't': ch = '\t'; break;
337         case 'v': ch = '\v'; break;
338         case 'b': ch = '\b'; break;
339         case 'r': ch = '\r'; break;
340         case '\\': ch = '\\';  break;
341         case '\'': ch = '\'';  break;
342         default :
343             while ( is_digit(ch) )/*for '\113' char*/
344             {
345                 ch = token_next_char(self);
346                 *p++ = ch;
347             }
348 
349             token_prev_char(self);
350             *p = '\0';
351             ch = atoi(p);
352             break;
353         }
354     }
355 
356     if ( token_next_char(self) != '\'' )
357     {
358         token_prev_char(self);
359         finsh_error_set(FINSH_ERROR_EXPECT_CHAR);
360         return ch;
361     }
362 
363     return ch;
364 }
365 
token_proc_string(struct finsh_token * self)366 static uint8_t* token_proc_string(struct finsh_token* self)
367 {
368     uint8_t* p;
369 
370     for ( p = &self->string[0]; p - &(self->string[0]) < FINSH_STRING_MAX; )
371     {
372         char ch = token_next_char(self);
373 
374         if ( is_eof(self) )
375         {
376             finsh_error_set(FINSH_ERROR_UNEXPECT_END);
377             return NULL;;
378         }
379         if ( ch == '\\' )
380         {
381             ch = token_proc_escape(self);
382         }
383         else if ( ch == '"' )/*end of string.*/
384         {
385             *p = '\0';
386             return self->string;
387         }
388 
389         *p++ = ch;
390     }
391 
392     return NULL;
393 }
394 
token_proc_escape(struct finsh_token * self)395 static int token_proc_escape(struct finsh_token* self)
396 {
397     char ch;
398     int result=0;
399 
400     ch = token_next_char(self);
401     switch (ch)
402     {
403     case 'n':
404         result = '\n';
405         break;
406     case 't':
407         result = '\t';
408         break;
409     case 'v':
410         result = '\v';
411         break;
412     case 'b':
413         result = '\b';
414         break;
415     case 'r':
416         result = '\r';
417         break;
418     case 'f':
419         result = '\f';
420         break;
421     case 'a':
422         result = '\007';
423         break;
424     case '"':
425         result = '"';
426         break;
427     case 'x':
428     case 'X':
429         result = 0;
430         ch  = token_next_char(self);
431         while (is_xdigit(ch))
432         {
433             result = result * 16 + ((ch < 'A') ? (ch - '0') : (ch | 0x20) - 'a' + 10);
434             ch = token_next_char(self);
435         }
436         token_prev_char(self);
437         break;
438     default:
439         if ( (ch - '0') < 8u)
440         {
441             result = 0;
442             while ( (ch - '0') < 8u )
443             {
444                 result = result*8 + ch - '0';
445                 ch = token_next_char(self);
446             }
447 
448             token_prev_char(self);
449         }
450         break;
451     }
452 
453     return result;
454 }
455 
456 /*
457 (0|0x|0X|0b|0B)number+(l|L)
458 */
token_proc_number(struct finsh_token * self)459 static void token_proc_number(struct finsh_token* self)
460 {
461     char ch;
462     char *p, buf[128];
463     long value;
464 
465     value = 0;
466     p = buf;
467 
468     ch  = token_next_char(self);
469     if ( ch == '0' )
470     {
471         int b;
472         ch = token_next_char(self);
473         if ( ch == 'x' || ch == 'X' )/*it's a hex number*/
474         {
475             b = 16;
476             ch = token_next_char(self);
477             while ( is_digit(ch) || is_alpha(ch) )
478             {
479                 *p++ = ch;
480                 ch = token_next_char(self);
481             }
482 
483             *p = '\0';
484         }
485         else if ( ch == 'b' || ch == 'B' )
486         {
487             b = 2;
488             ch = token_next_char(self);
489             while ( (ch=='0')||(ch=='1') )
490             {
491                 *p++ = ch;
492                 ch = token_next_char(self);
493             }
494 
495             *p = '\0';
496         }
497         else if ( '0' <= ch && ch <= '7' )
498         {
499             b = 8;
500             while ( '0' <= ch && ch <= '7' )
501             {
502                 *p++ = ch;
503                 ch = token_next_char(self);
504             }
505 
506             *p = '\0';
507         }
508         else
509         {
510             token_prev_char(self);
511 
512             /* made as 0 value */
513             self->value.int_value = 0;
514             self->current_token = finsh_token_type_value_int;
515             return;
516         }
517 
518         self->value.int_value = token_spec_number(buf, strlen(buf), b);
519         self->current_token = finsh_token_type_value_int;
520     }
521     else
522     {
523         while ( is_digit(ch) )
524         {
525             value = value*10 + ( ch - '0' );
526             ch = token_next_char(self);
527         }
528 
529         self->value.int_value = value;
530         self->current_token = finsh_token_type_value_int;
531     }
532 
533     switch ( ch )
534     {
535     case 'l':
536     case 'L':
537         self->current_token = finsh_token_type_value_long;
538         break;
539 
540     default:
541         token_prev_char(self);
542         break;
543     }
544 }
545 
546 /*use 64 bit number*/
547 #define BN_SIZE 2
548 
token_spec_number(char * string,int length,int b)549 static long token_spec_number(char* string, int length, int b)
550 {
551     char* p;
552     int t;
553     int i, j, shift=1;
554     unsigned int bn[BN_SIZE], v;
555     long d;
556 
557     p = string;
558     i = 0;
559 
560     switch ( b )
561     {
562     case 16: shift = 4;
563         break;
564     case 8:  shift = 3;
565         break;
566     case 2:  shift = 1;
567         break;
568     default: break;
569     }
570 
571     for ( j=0; j<BN_SIZE ; j++) bn[j] = 0;
572 
573     while ( i<length )
574     {
575         t = *p++;
576         if ( t>='a' && t <='f' )
577         {
578             t = t - 'a' +10;
579         }
580         else if ( t >='A' && t <='F' )
581         {
582             t = t - 'A' +10;
583         }
584         else t = t - '0';
585 
586         for ( j=0; j<BN_SIZE ; j++)
587         {
588             v = bn[j];
589             bn[j] = (v<<shift) | t;
590             t = v >> (32 - shift);
591         }
592         i++;
593     }
594 
595     d = (long)bn[0];
596 
597     return d;
598 }
599