xref: /aosp_15_r20/external/grpc-grpc/third_party/utf8_range/lookup.c (revision cc02d7e222339f7a4f6ba5f422e6413f4bd931f2)
1*cc02d7e2SAndroid Build Coastguard Worker #include <stdio.h>
2*cc02d7e2SAndroid Build Coastguard Worker 
3*cc02d7e2SAndroid Build Coastguard Worker /* http://bjoern.hoehrmann.de/utf-8/decoder/dfa */
4*cc02d7e2SAndroid Build Coastguard Worker /* Optimized version based on Rich Felker's variant. */
5*cc02d7e2SAndroid Build Coastguard Worker #define UTF8_ACCEPT	0
6*cc02d7e2SAndroid Build Coastguard Worker #define UTF8_REJECT	12
7*cc02d7e2SAndroid Build Coastguard Worker 
8*cc02d7e2SAndroid Build Coastguard Worker static const unsigned char utf8d[] = {
9*cc02d7e2SAndroid Build Coastguard Worker     /* The first part of the table maps bytes to character classes that
10*cc02d7e2SAndroid Build Coastguard Worker      * to reduce the size of the transition table and create bitmasks. */
11*cc02d7e2SAndroid Build Coastguard Worker      0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
12*cc02d7e2SAndroid Build Coastguard Worker      0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
13*cc02d7e2SAndroid Build Coastguard Worker      0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
14*cc02d7e2SAndroid Build Coastguard Worker      0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
15*cc02d7e2SAndroid Build Coastguard Worker      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,  9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
16*cc02d7e2SAndroid Build Coastguard Worker      7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
17*cc02d7e2SAndroid Build Coastguard Worker      8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
18*cc02d7e2SAndroid Build Coastguard Worker     10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8
19*cc02d7e2SAndroid Build Coastguard Worker };
20*cc02d7e2SAndroid Build Coastguard Worker /* Note: Splitting the table improves performance on ARM due to its simpler
21*cc02d7e2SAndroid Build Coastguard Worker  * addressing modes not being able to encode x[y + 256]. */
22*cc02d7e2SAndroid Build Coastguard Worker static const unsigned char utf8s[] = {
23*cc02d7e2SAndroid Build Coastguard Worker     /* The second part is a transition table that maps a combination
24*cc02d7e2SAndroid Build Coastguard Worker      * of a state of the automaton and a character class to a state. */
25*cc02d7e2SAndroid Build Coastguard Worker      0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12,
26*cc02d7e2SAndroid Build Coastguard Worker     12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12,
27*cc02d7e2SAndroid Build Coastguard Worker     12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12,
28*cc02d7e2SAndroid Build Coastguard Worker     12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12,
29*cc02d7e2SAndroid Build Coastguard Worker     12,36,12,12,12,12,12,12,12,12,12,12
30*cc02d7e2SAndroid Build Coastguard Worker };
31*cc02d7e2SAndroid Build Coastguard Worker 
32*cc02d7e2SAndroid Build Coastguard Worker /* Return 0 on success, -1 on error */
utf8_lookup(const unsigned char * data,int len)33*cc02d7e2SAndroid Build Coastguard Worker int utf8_lookup(const unsigned char *data, int len)
34*cc02d7e2SAndroid Build Coastguard Worker {
35*cc02d7e2SAndroid Build Coastguard Worker     int state = 0;
36*cc02d7e2SAndroid Build Coastguard Worker 
37*cc02d7e2SAndroid Build Coastguard Worker     while (len-- && state != UTF8_REJECT)
38*cc02d7e2SAndroid Build Coastguard Worker         state = utf8s[state + utf8d[*data++]];
39*cc02d7e2SAndroid Build Coastguard Worker 
40*cc02d7e2SAndroid Build Coastguard Worker     return state == UTF8_ACCEPT ? 0 : -1;
41*cc02d7e2SAndroid Build Coastguard Worker }
42