components/finsh/finsh_token.c

/*
 * Copyright (c) 2006-2018, RT-Thread Development Team
 *
 * SPDX-License-Identifier: Apache-2.0
 *
 * Change Logs:
 * Date           Author       Notes
 * 2010-03-22     Bernard      first version
 * 2013-04-03     Bernard      strip more characters.
 */
#include <finsh.h>
#include <stdlib.h>

#include "finsh_token.h"
#include "finsh_error.h"

#define is_alpha(ch)    ((ch | 0x20) - 'a') < 26u
#define is_digit(ch)    ((ch) >= '0' && (ch) <= '9')
#define is_xdigit(ch)   (((ch) >= '0' && (ch) <= '9') || (((ch | 0x20) - 'a') < 6u))
#define is_separator(ch) !(((ch) >= 'a' && (ch) <= 'z') \
     || ((ch) >= 'A' && (ch) <= 'Z') || ((ch) >= '0' && (ch) <= '9') || ((ch) == '_'))
#define is_eof(self) (self)->eof

struct name_table
{
    char* name;
    enum finsh_token_type type;
};

/* keyword */
static const struct name_table finsh_name_table[] =
{
    {"void",        finsh_token_type_void},
    {"char",        finsh_token_type_char},
    {"short",       finsh_token_type_short},
    {"int",         finsh_token_type_int},
    {"long",        finsh_token_type_long},
    {"unsigned",    finsh_token_type_unsigned},

    {"NULL",        finsh_token_type_value_null},
    {"null",        finsh_token_type_value_null}
};

static char token_next_char(struct finsh_token* self);
static void token_prev_char(struct finsh_token* self);
static long token_spec_number(char* string, int length, int b);
static void token_run(struct finsh_token* self);
static int  token_match_name(struct finsh_token* self, const char* str);
static void token_proc_number(struct finsh_token* self);
static uint8_t* token_proc_string(struct finsh_token* self);
static void token_trim_space(struct finsh_token* self);
static char token_proc_char(struct finsh_token* self);
static int token_proc_escape(struct finsh_token* self);

void finsh_token_init(struct finsh_token* self, uint8_t* line)
{
    memset(self, 0, sizeof(struct finsh_token));

    self->line = line;
}

enum finsh_token_type finsh_token_token(struct finsh_token* self)
{
    if ( self->replay ) self->replay = 0;
    else token_run(self);

    return (enum finsh_token_type)self->current_token;
}

void finsh_token_get_token(struct finsh_token* self, uint8_t* token)
{
    strncpy((char*)token, (char*)self->string, FINSH_NAME_MAX);
}

int token_get_string(struct finsh_token* self, uint8_t* str)
{
    unsigned char *p=str;
    char ch;

    ch = token_next_char(self);
    if (is_eof(self)) return -1;

    str[0] = '\0';

    if ( is_digit(ch) )/*the first character of identifier is not a digit.*/
    {
        token_prev_char(self);
        return -1;
    }

    while (!is_separator(ch) && !is_eof(self))
    {
        *p++ = ch;

        ch = token_next_char(self);
    }
    self->eof = 0;

    token_prev_char(self);
    *p = '\0';

    return 0;
}

/*
get next character.
*/
static char token_next_char(struct finsh_token* self)
{
    if (self->eof) return '\0';

    if (self->position == (int)strlen((char*)self->line) || self->line[self->position] =='\n')
    {
            self->eof = 1;
            self->position = 0;
            return '\0';
    }

    return self->line[self->position++];
}

static void token_prev_char(struct finsh_token* self)
{
    if ( self->eof ) return;

    if ( self->position == 0 ) return;
    else self->position--;
}

static void token_run(struct finsh_token* self)
{
    char ch;

    token_trim_space(self); /* first trim space and tab. */
    token_get_string(self, &(self->string[0]));

    if ( is_eof(self) ) /*if it is eof, break;*/
    {
        self->current_token = finsh_token_type_eof;
        return ;
    }

    if (self->string[0] != '\0') /*It is a key word or a identifier.*/
    {
        if ( !token_match_name(self, (char*)self->string) )
        {
            self->current_token = finsh_token_type_identifier;
        }
    }
    else/*It is a operator character.*/
    {
        ch = token_next_char(self);

        switch ( ch )
        {
        case '(':
            self->current_token = finsh_token_type_left_paren;
            break;

        case ')':
            self->current_token = finsh_token_type_right_paren;
            break;

        case ',':
            self->current_token = finsh_token_type_comma;
            break;

        case ';':
            self->current_token = finsh_token_type_semicolon;
            break;

        case '&':
            self->current_token = finsh_token_type_and;
            break;

        case '*':
            self->current_token = finsh_token_type_mul;
            break;

        case '+':
            ch = token_next_char(self);

            if ( ch == '+' )
            {
                self->current_token = finsh_token_type_inc;
            }
            else
            {
                token_prev_char(self);
                self->current_token = finsh_token_type_add;
            }
            break;

        case '-':
            ch = token_next_char(self);

            if ( ch == '-' )
            {
                self->current_token = finsh_token_type_dec;
            }
            else
            {
                token_prev_char(self);
                self->current_token = finsh_token_type_sub;
            }
            break;

        case '/':
            ch = token_next_char(self);
            if (ch == '/')
            {
                /* line comments, set to end of file */
                self->current_token = finsh_token_type_eof;
            }
            else
            {
                token_prev_char(self);
                self->current_token = finsh_token_type_div;
            }
            break;

        case '<':
            ch = token_next_char(self);

            if ( ch == '<' )
            {
                self->current_token = finsh_token_type_shl;
            }
            else
            {
                token_prev_char(self);
                self->current_token = finsh_token_type_bad;
            }
            break;

        case '>':
            ch = token_next_char(self);

            if ( ch == '>' )
            {
                self->current_token = finsh_token_type_shr;
            }
            else
            {
                token_prev_char(self);
                self->current_token = finsh_token_type_bad;
            }
            break;

        case '|':
            self->current_token = finsh_token_type_or;
            break;

        case '%':
            self->current_token = finsh_token_type_mod;
            break;

        case '~':
            self->current_token = finsh_token_type_bitwise;
            break;

        case '^':
            self->current_token = finsh_token_type_xor;
            break;

        case '=':
            self->current_token = finsh_token_type_assign;
            break;

        case '\'':
            self->value.char_value = token_proc_char(self);
            self->current_token = finsh_token_type_value_char;
            break;

        case '"':
            token_proc_string(self);
            self->current_token = finsh_token_type_value_string;
            break;

        default:
            if ( is_digit(ch) )
            {
                token_prev_char(self);
                token_proc_number(self);
                break;
            }

            finsh_error_set(FINSH_ERROR_UNKNOWN_TOKEN);
            self->current_token = finsh_token_type_bad;

            break;
        }
    }
}

static int token_match_name(struct finsh_token* self, const char* str)
{
    int i;

    for (i = 0; i < sizeof(finsh_name_table)/sizeof(struct name_table); i++)
    {
        if ( strcmp(finsh_name_table[i].name, str)==0 )
        {
            self->current_token = finsh_name_table[i].type;
            return 1;
        }
    }

    return 0;
}

static void token_trim_space(struct finsh_token* self)
{
    char ch;
    while ( (ch = token_next_char(self)) ==' ' ||
        ch == '\t' ||
        ch == '\r');

    token_prev_char(self);
}

static char token_proc_char(struct finsh_token* self)
{
    char ch;
    char buf[4], *p;

    p = buf;
    ch = token_next_char(self);

    if ( ch == '\\' )
    {
        ch = token_next_char(self);
        switch ( ch )
        {
        case 'n': ch = '\n'; break;
        case 't': ch = '\t'; break;
        case 'v': ch = '\v'; break;
        case 'b': ch = '\b'; break;
        case 'r': ch = '\r'; break;
        case '\\': ch = '\\';  break;
        case '\'': ch = '\'';  break;
        default :
            while ( is_digit(ch) )/*for '\113' char*/
            {
                ch = token_next_char(self);
                *p++ = ch;
            }

            token_prev_char(self);
            *p = '\0';
            ch = atoi(p);
            break;
        }
    }

    if ( token_next_char(self) != '\'' )
    {
        token_prev_char(self);
        finsh_error_set(FINSH_ERROR_EXPECT_CHAR);
        return ch;
    }

    return ch;
}

static uint8_t* token_proc_string(struct finsh_token* self)
{
    uint8_t* p;

    for ( p = &self->string[0]; p - &(self->string[0]) < FINSH_STRING_MAX; )
    {
        char ch = token_next_char(self);

        if ( is_eof(self) )
        {
            finsh_error_set(FINSH_ERROR_UNEXPECT_END);
            return NULL;;
        }
        if ( ch == '\\' )
        {
            ch = token_proc_escape(self);
        }
        else if ( ch == '"' )/*end of string.*/
        {
            *p = '\0';
            return self->string;
        }

        *p++ = ch;
    }

    return NULL;
}

static int token_proc_escape(struct finsh_token* self)
{
    char ch;
    int result=0;

    ch = token_next_char(self);
    switch (ch)
    {
    case 'n':
        result = '\n';
        break;
    case 't':
        result = '\t';
        break;
    case 'v':
        result = '\v';
        break;
    case 'b':
        result = '\b';
        break;
    case 'r':
        result = '\r';
        break;
    case 'f':
        result = '\f';
        break;
    case 'a':
        result = '\007';
        break;
    case '"':
        result = '"';
        break;
    case 'x':
    case 'X':
        result = 0;
        ch  = token_next_char(self);
        while (is_xdigit(ch))
        {
            result = result * 16 + ((ch < 'A') ? (ch - '0') : (ch | 0x20) - 'a' + 10);
            ch = token_next_char(self);
        }
        token_prev_char(self);
        break;
    default:
        if ( (ch - '0') < 8u)
        {
            result = 0;
            while ( (ch - '0') < 8u )
            {
                result = result*8 + ch - '0';
                ch = token_next_char(self);
            }

            token_prev_char(self);
        }
        break;
    }

    return result;
}

/*
(0|0x|0X|0b|0B)number+(l|L)
*/
static void token_proc_number(struct finsh_token* self)
{
    char ch;
    char *p, buf[128];
    long value;

    value = 0;
    p = buf;

    ch  = token_next_char(self);
    if ( ch == '0' )
    {
        int b;
        ch = token_next_char(self);
        if ( ch == 'x' || ch == 'X' )/*it's a hex number*/
        {
            b = 16;
            ch = token_next_char(self);
            while ( is_digit(ch) || is_alpha(ch) )
            {
                *p++ = ch;
                ch = token_next_char(self);
            }

            *p = '\0';
        }
        else if ( ch == 'b' || ch == 'B' )
        {
            b = 2;
            ch = token_next_char(self);
            while ( (ch=='0')||(ch=='1') )
            {
                *p++ = ch;
                ch = token_next_char(self);
            }

            *p = '\0';
        }
        else if ( '0' <= ch && ch <= '7' )
        {
            b = 8;
            while ( '0' <= ch && ch <= '7' )
            {
                *p++ = ch;
                ch = token_next_char(self);
            }

            *p = '\0';
        }
        else
        {
            token_prev_char(self);

            /* made as 0 value */
            self->value.int_value = 0;
            self->current_token = finsh_token_type_value_int;
            return;
        }

        self->value.int_value = token_spec_number(buf, strlen(buf), b);
        self->current_token = finsh_token_type_value_int;
    }
    else
    {
        while ( is_digit(ch) )
        {
            value = value*10 + ( ch - '0' );
            ch = token_next_char(self);
        }

        self->value.int_value = value;
        self->current_token = finsh_token_type_value_int;
    }

    switch ( ch )
    {
    case 'l':
    case 'L':
        self->current_token = finsh_token_type_value_long;
        break;

    default:
        token_prev_char(self);
        break;
    }
}

/*use 64 bit number*/
#define BN_SIZE 2

static long token_spec_number(char* string, int length, int b)
{
    char* p;
    int t;
    int i, j, shift=1;
    unsigned int bn[BN_SIZE], v;
    long d;

    p = string;
    i = 0;

    switch ( b )
    {
    case 16: shift = 4;
        break;
    case 8:  shift = 3;
        break;
    case 2:  shift = 1;
        break;
    default: break;
    }

    for ( j=0; j<BN_SIZE ; j++) bn[j] = 0;

    while ( i<length )
    {
        t = *p++;
        if ( t>='a' && t <='f' )
        {
            t = t - 'a' +10;
        }
        else if ( t >='A' && t <='F' )
        {
            t = t - 'A' +10;
        }
        else t = t - '0';

        for ( j=0; j<BN_SIZE ; j++)
        {
            v = bn[j];
            bn[j] = (v<<shift) | t;
            t = v >> (32 - shift);
        }
        i++;
    }

    d = (long)bn[0];

    return d;
}