1*cc02d7e2SAndroid Build Coastguard Worker #include <stdio.h>
2*cc02d7e2SAndroid Build Coastguard Worker
3*cc02d7e2SAndroid Build Coastguard Worker /* http://bjoern.hoehrmann.de/utf-8/decoder/dfa */
4*cc02d7e2SAndroid Build Coastguard Worker /* Optimized version based on Rich Felker's variant. */
5*cc02d7e2SAndroid Build Coastguard Worker #define UTF8_ACCEPT 0
6*cc02d7e2SAndroid Build Coastguard Worker #define UTF8_REJECT 12
7*cc02d7e2SAndroid Build Coastguard Worker
8*cc02d7e2SAndroid Build Coastguard Worker static const unsigned char utf8d[] = {
9*cc02d7e2SAndroid Build Coastguard Worker /* The first part of the table maps bytes to character classes that
10*cc02d7e2SAndroid Build Coastguard Worker * to reduce the size of the transition table and create bitmasks. */
11*cc02d7e2SAndroid Build Coastguard Worker 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
12*cc02d7e2SAndroid Build Coastguard Worker 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
13*cc02d7e2SAndroid Build Coastguard Worker 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
14*cc02d7e2SAndroid Build Coastguard Worker 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
15*cc02d7e2SAndroid Build Coastguard Worker 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
16*cc02d7e2SAndroid Build Coastguard Worker 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
17*cc02d7e2SAndroid Build Coastguard Worker 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
18*cc02d7e2SAndroid Build Coastguard Worker 10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8
19*cc02d7e2SAndroid Build Coastguard Worker };
20*cc02d7e2SAndroid Build Coastguard Worker /* Note: Splitting the table improves performance on ARM due to its simpler
21*cc02d7e2SAndroid Build Coastguard Worker * addressing modes not being able to encode x[y + 256]. */
22*cc02d7e2SAndroid Build Coastguard Worker static const unsigned char utf8s[] = {
23*cc02d7e2SAndroid Build Coastguard Worker /* The second part is a transition table that maps a combination
24*cc02d7e2SAndroid Build Coastguard Worker * of a state of the automaton and a character class to a state. */
25*cc02d7e2SAndroid Build Coastguard Worker 0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12,
26*cc02d7e2SAndroid Build Coastguard Worker 12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12,
27*cc02d7e2SAndroid Build Coastguard Worker 12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12,
28*cc02d7e2SAndroid Build Coastguard Worker 12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12,
29*cc02d7e2SAndroid Build Coastguard Worker 12,36,12,12,12,12,12,12,12,12,12,12
30*cc02d7e2SAndroid Build Coastguard Worker };
31*cc02d7e2SAndroid Build Coastguard Worker
32*cc02d7e2SAndroid Build Coastguard Worker /* Return 0 on success, -1 on error */
utf8_lookup(const unsigned char * data,int len)33*cc02d7e2SAndroid Build Coastguard Worker int utf8_lookup(const unsigned char *data, int len)
34*cc02d7e2SAndroid Build Coastguard Worker {
35*cc02d7e2SAndroid Build Coastguard Worker int state = 0;
36*cc02d7e2SAndroid Build Coastguard Worker
37*cc02d7e2SAndroid Build Coastguard Worker while (len-- && state != UTF8_REJECT)
38*cc02d7e2SAndroid Build Coastguard Worker state = utf8s[state + utf8d[*data++]];
39*cc02d7e2SAndroid Build Coastguard Worker
40*cc02d7e2SAndroid Build Coastguard Worker return state == UTF8_ACCEPT ? 0 : -1;
41*cc02d7e2SAndroid Build Coastguard Worker }
42