1 /*
2 * Copyright (c) 2006-2018, RT-Thread Development Team
3 *
4 * SPDX-License-Identifier: Apache-2.0
5 *
6 * Change Logs:
7 * Date Author Notes
8 * 2010-03-22 Bernard first version
9 * 2013-04-03 Bernard strip more characters.
10 */
11 #include <finsh.h>
12 #include <stdlib.h>
13
14 #include "finsh_token.h"
15 #include "finsh_error.h"
16
17 #define is_alpha(ch) ((ch | 0x20) - 'a') < 26u
18 #define is_digit(ch) ((ch) >= '0' && (ch) <= '9')
19 #define is_xdigit(ch) (((ch) >= '0' && (ch) <= '9') || (((ch | 0x20) - 'a') < 6u))
20 #define is_separator(ch) !(((ch) >= 'a' && (ch) <= 'z') \
21 || ((ch) >= 'A' && (ch) <= 'Z') || ((ch) >= '0' && (ch) <= '9') || ((ch) == '_'))
22 #define is_eof(self) (self)->eof
23
24 struct name_table
25 {
26 char* name;
27 enum finsh_token_type type;
28 };
29
30 /* keyword */
31 static const struct name_table finsh_name_table[] =
32 {
33 {"void", finsh_token_type_void},
34 {"char", finsh_token_type_char},
35 {"short", finsh_token_type_short},
36 {"int", finsh_token_type_int},
37 {"long", finsh_token_type_long},
38 {"unsigned", finsh_token_type_unsigned},
39
40 {"NULL", finsh_token_type_value_null},
41 {"null", finsh_token_type_value_null}
42 };
43
44 static char token_next_char(struct finsh_token* self);
45 static void token_prev_char(struct finsh_token* self);
46 static long token_spec_number(char* string, int length, int b);
47 static void token_run(struct finsh_token* self);
48 static int token_match_name(struct finsh_token* self, const char* str);
49 static void token_proc_number(struct finsh_token* self);
50 static uint8_t* token_proc_string(struct finsh_token* self);
51 static void token_trim_space(struct finsh_token* self);
52 static char token_proc_char(struct finsh_token* self);
53 static int token_proc_escape(struct finsh_token* self);
54
finsh_token_init(struct finsh_token * self,uint8_t * line)55 void finsh_token_init(struct finsh_token* self, uint8_t* line)
56 {
57 memset(self, 0, sizeof(struct finsh_token));
58
59 self->line = line;
60 }
61
finsh_token_token(struct finsh_token * self)62 enum finsh_token_type finsh_token_token(struct finsh_token* self)
63 {
64 if ( self->replay ) self->replay = 0;
65 else token_run(self);
66
67 return (enum finsh_token_type)self->current_token;
68 }
69
finsh_token_get_token(struct finsh_token * self,uint8_t * token)70 void finsh_token_get_token(struct finsh_token* self, uint8_t* token)
71 {
72 strncpy((char*)token, (char*)self->string, FINSH_NAME_MAX);
73 }
74
token_get_string(struct finsh_token * self,uint8_t * str)75 int token_get_string(struct finsh_token* self, uint8_t* str)
76 {
77 unsigned char *p=str;
78 char ch;
79
80 ch = token_next_char(self);
81 if (is_eof(self)) return -1;
82
83 str[0] = '\0';
84
85 if ( is_digit(ch) )/*the first character of identifier is not a digit.*/
86 {
87 token_prev_char(self);
88 return -1;
89 }
90
91 while (!is_separator(ch) && !is_eof(self))
92 {
93 *p++ = ch;
94
95 ch = token_next_char(self);
96 }
97 self->eof = 0;
98
99 token_prev_char(self);
100 *p = '\0';
101
102 return 0;
103 }
104
105 /*
106 get next character.
107 */
token_next_char(struct finsh_token * self)108 static char token_next_char(struct finsh_token* self)
109 {
110 if (self->eof) return '\0';
111
112 if (self->position == (int)strlen((char*)self->line) || self->line[self->position] =='\n')
113 {
114 self->eof = 1;
115 self->position = 0;
116 return '\0';
117 }
118
119 return self->line[self->position++];
120 }
121
token_prev_char(struct finsh_token * self)122 static void token_prev_char(struct finsh_token* self)
123 {
124 if ( self->eof ) return;
125
126 if ( self->position == 0 ) return;
127 else self->position--;
128 }
129
token_run(struct finsh_token * self)130 static void token_run(struct finsh_token* self)
131 {
132 char ch;
133
134 token_trim_space(self); /* first trim space and tab. */
135 token_get_string(self, &(self->string[0]));
136
137 if ( is_eof(self) ) /*if it is eof, break;*/
138 {
139 self->current_token = finsh_token_type_eof;
140 return ;
141 }
142
143 if (self->string[0] != '\0') /*It is a key word or a identifier.*/
144 {
145 if ( !token_match_name(self, (char*)self->string) )
146 {
147 self->current_token = finsh_token_type_identifier;
148 }
149 }
150 else/*It is a operator character.*/
151 {
152 ch = token_next_char(self);
153
154 switch ( ch )
155 {
156 case '(':
157 self->current_token = finsh_token_type_left_paren;
158 break;
159
160 case ')':
161 self->current_token = finsh_token_type_right_paren;
162 break;
163
164 case ',':
165 self->current_token = finsh_token_type_comma;
166 break;
167
168 case ';':
169 self->current_token = finsh_token_type_semicolon;
170 break;
171
172 case '&':
173 self->current_token = finsh_token_type_and;
174 break;
175
176 case '*':
177 self->current_token = finsh_token_type_mul;
178 break;
179
180 case '+':
181 ch = token_next_char(self);
182
183 if ( ch == '+' )
184 {
185 self->current_token = finsh_token_type_inc;
186 }
187 else
188 {
189 token_prev_char(self);
190 self->current_token = finsh_token_type_add;
191 }
192 break;
193
194 case '-':
195 ch = token_next_char(self);
196
197 if ( ch == '-' )
198 {
199 self->current_token = finsh_token_type_dec;
200 }
201 else
202 {
203 token_prev_char(self);
204 self->current_token = finsh_token_type_sub;
205 }
206 break;
207
208 case '/':
209 ch = token_next_char(self);
210 if (ch == '/')
211 {
212 /* line comments, set to end of file */
213 self->current_token = finsh_token_type_eof;
214 }
215 else
216 {
217 token_prev_char(self);
218 self->current_token = finsh_token_type_div;
219 }
220 break;
221
222 case '<':
223 ch = token_next_char(self);
224
225 if ( ch == '<' )
226 {
227 self->current_token = finsh_token_type_shl;
228 }
229 else
230 {
231 token_prev_char(self);
232 self->current_token = finsh_token_type_bad;
233 }
234 break;
235
236 case '>':
237 ch = token_next_char(self);
238
239 if ( ch == '>' )
240 {
241 self->current_token = finsh_token_type_shr;
242 }
243 else
244 {
245 token_prev_char(self);
246 self->current_token = finsh_token_type_bad;
247 }
248 break;
249
250 case '|':
251 self->current_token = finsh_token_type_or;
252 break;
253
254 case '%':
255 self->current_token = finsh_token_type_mod;
256 break;
257
258 case '~':
259 self->current_token = finsh_token_type_bitwise;
260 break;
261
262 case '^':
263 self->current_token = finsh_token_type_xor;
264 break;
265
266 case '=':
267 self->current_token = finsh_token_type_assign;
268 break;
269
270 case '\'':
271 self->value.char_value = token_proc_char(self);
272 self->current_token = finsh_token_type_value_char;
273 break;
274
275 case '"':
276 token_proc_string(self);
277 self->current_token = finsh_token_type_value_string;
278 break;
279
280 default:
281 if ( is_digit(ch) )
282 {
283 token_prev_char(self);
284 token_proc_number(self);
285 break;
286 }
287
288 finsh_error_set(FINSH_ERROR_UNKNOWN_TOKEN);
289 self->current_token = finsh_token_type_bad;
290
291 break;
292 }
293 }
294 }
295
token_match_name(struct finsh_token * self,const char * str)296 static int token_match_name(struct finsh_token* self, const char* str)
297 {
298 int i;
299
300 for (i = 0; i < sizeof(finsh_name_table)/sizeof(struct name_table); i++)
301 {
302 if ( strcmp(finsh_name_table[i].name, str)==0 )
303 {
304 self->current_token = finsh_name_table[i].type;
305 return 1;
306 }
307 }
308
309 return 0;
310 }
311
token_trim_space(struct finsh_token * self)312 static void token_trim_space(struct finsh_token* self)
313 {
314 char ch;
315 while ( (ch = token_next_char(self)) ==' ' ||
316 ch == '\t' ||
317 ch == '\r');
318
319 token_prev_char(self);
320 }
321
token_proc_char(struct finsh_token * self)322 static char token_proc_char(struct finsh_token* self)
323 {
324 char ch;
325 char buf[4], *p;
326
327 p = buf;
328 ch = token_next_char(self);
329
330 if ( ch == '\\' )
331 {
332 ch = token_next_char(self);
333 switch ( ch )
334 {
335 case 'n': ch = '\n'; break;
336 case 't': ch = '\t'; break;
337 case 'v': ch = '\v'; break;
338 case 'b': ch = '\b'; break;
339 case 'r': ch = '\r'; break;
340 case '\\': ch = '\\'; break;
341 case '\'': ch = '\''; break;
342 default :
343 while ( is_digit(ch) )/*for '\113' char*/
344 {
345 ch = token_next_char(self);
346 *p++ = ch;
347 }
348
349 token_prev_char(self);
350 *p = '\0';
351 ch = atoi(p);
352 break;
353 }
354 }
355
356 if ( token_next_char(self) != '\'' )
357 {
358 token_prev_char(self);
359 finsh_error_set(FINSH_ERROR_EXPECT_CHAR);
360 return ch;
361 }
362
363 return ch;
364 }
365
token_proc_string(struct finsh_token * self)366 static uint8_t* token_proc_string(struct finsh_token* self)
367 {
368 uint8_t* p;
369
370 for ( p = &self->string[0]; p - &(self->string[0]) < FINSH_STRING_MAX; )
371 {
372 char ch = token_next_char(self);
373
374 if ( is_eof(self) )
375 {
376 finsh_error_set(FINSH_ERROR_UNEXPECT_END);
377 return NULL;;
378 }
379 if ( ch == '\\' )
380 {
381 ch = token_proc_escape(self);
382 }
383 else if ( ch == '"' )/*end of string.*/
384 {
385 *p = '\0';
386 return self->string;
387 }
388
389 *p++ = ch;
390 }
391
392 return NULL;
393 }
394
token_proc_escape(struct finsh_token * self)395 static int token_proc_escape(struct finsh_token* self)
396 {
397 char ch;
398 int result=0;
399
400 ch = token_next_char(self);
401 switch (ch)
402 {
403 case 'n':
404 result = '\n';
405 break;
406 case 't':
407 result = '\t';
408 break;
409 case 'v':
410 result = '\v';
411 break;
412 case 'b':
413 result = '\b';
414 break;
415 case 'r':
416 result = '\r';
417 break;
418 case 'f':
419 result = '\f';
420 break;
421 case 'a':
422 result = '\007';
423 break;
424 case '"':
425 result = '"';
426 break;
427 case 'x':
428 case 'X':
429 result = 0;
430 ch = token_next_char(self);
431 while (is_xdigit(ch))
432 {
433 result = result * 16 + ((ch < 'A') ? (ch - '0') : (ch | 0x20) - 'a' + 10);
434 ch = token_next_char(self);
435 }
436 token_prev_char(self);
437 break;
438 default:
439 if ( (ch - '0') < 8u)
440 {
441 result = 0;
442 while ( (ch - '0') < 8u )
443 {
444 result = result*8 + ch - '0';
445 ch = token_next_char(self);
446 }
447
448 token_prev_char(self);
449 }
450 break;
451 }
452
453 return result;
454 }
455
456 /*
457 (0|0x|0X|0b|0B)number+(l|L)
458 */
token_proc_number(struct finsh_token * self)459 static void token_proc_number(struct finsh_token* self)
460 {
461 char ch;
462 char *p, buf[128];
463 long value;
464
465 value = 0;
466 p = buf;
467
468 ch = token_next_char(self);
469 if ( ch == '0' )
470 {
471 int b;
472 ch = token_next_char(self);
473 if ( ch == 'x' || ch == 'X' )/*it's a hex number*/
474 {
475 b = 16;
476 ch = token_next_char(self);
477 while ( is_digit(ch) || is_alpha(ch) )
478 {
479 *p++ = ch;
480 ch = token_next_char(self);
481 }
482
483 *p = '\0';
484 }
485 else if ( ch == 'b' || ch == 'B' )
486 {
487 b = 2;
488 ch = token_next_char(self);
489 while ( (ch=='0')||(ch=='1') )
490 {
491 *p++ = ch;
492 ch = token_next_char(self);
493 }
494
495 *p = '\0';
496 }
497 else if ( '0' <= ch && ch <= '7' )
498 {
499 b = 8;
500 while ( '0' <= ch && ch <= '7' )
501 {
502 *p++ = ch;
503 ch = token_next_char(self);
504 }
505
506 *p = '\0';
507 }
508 else
509 {
510 token_prev_char(self);
511
512 /* made as 0 value */
513 self->value.int_value = 0;
514 self->current_token = finsh_token_type_value_int;
515 return;
516 }
517
518 self->value.int_value = token_spec_number(buf, strlen(buf), b);
519 self->current_token = finsh_token_type_value_int;
520 }
521 else
522 {
523 while ( is_digit(ch) )
524 {
525 value = value*10 + ( ch - '0' );
526 ch = token_next_char(self);
527 }
528
529 self->value.int_value = value;
530 self->current_token = finsh_token_type_value_int;
531 }
532
533 switch ( ch )
534 {
535 case 'l':
536 case 'L':
537 self->current_token = finsh_token_type_value_long;
538 break;
539
540 default:
541 token_prev_char(self);
542 break;
543 }
544 }
545
546 /*use 64 bit number*/
547 #define BN_SIZE 2
548
token_spec_number(char * string,int length,int b)549 static long token_spec_number(char* string, int length, int b)
550 {
551 char* p;
552 int t;
553 int i, j, shift=1;
554 unsigned int bn[BN_SIZE], v;
555 long d;
556
557 p = string;
558 i = 0;
559
560 switch ( b )
561 {
562 case 16: shift = 4;
563 break;
564 case 8: shift = 3;
565 break;
566 case 2: shift = 1;
567 break;
568 default: break;
569 }
570
571 for ( j=0; j<BN_SIZE ; j++) bn[j] = 0;
572
573 while ( i<length )
574 {
575 t = *p++;
576 if ( t>='a' && t <='f' )
577 {
578 t = t - 'a' +10;
579 }
580 else if ( t >='A' && t <='F' )
581 {
582 t = t - 'A' +10;
583 }
584 else t = t - '0';
585
586 for ( j=0; j<BN_SIZE ; j++)
587 {
588 v = bn[j];
589 bn[j] = (v<<shift) | t;
590 t = v >> (32 - shift);
591 }
592 i++;
593 }
594
595 d = (long)bn[0];
596
597 return d;
598 }
599