Lines Matching +full:0 +full:xd800

50 int verbose = 0;
63 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
98 return 0; in age_valid()
100 return 0; in age_valid()
102 return 0; in age_valid()
119 * if offlen == 0 (non-branching node)
124 * if offlen != 0 (branching node)
133 #define BITNUM 0x07
134 #define NEXTBYTE 0x08
135 #define OFFLEN 0x30
137 #define RIGHTPATH 0x40
138 #define TRIENODE 0x80
139 #define RIGHTNODE 0x40
140 #define LEFTNODE 0x80
148 * leaf[0]: The unicode version, stored as a generation number that is
151 * defined. The CCC of a non-defined code point is 0.
155 * a CCC of 0, or at the begin or end of a string.
157 * between 0 and 254 inclusive, which leaves 255 available as
159 * Code points with CCC 0 are known as stoppers.
167 * These do affect normalization, as they all have CCC 0.
175 #define LEAF_GEN(LEAF) ((LEAF)[0])
181 #define MINCCC (0)
183 #define STOPPER (0)
209 * 0x00000000 0x0000007F: 0xxxxxxx
210 * 0x00000000 0x000007FF: 110xxxxx 10xxxxxx
211 * 0x00000000 0x0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx
212 * 0x00000000 0x001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
213 * 0x00000000 0x03FFFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
214 * 0x00000000 0x7FFFFFFF: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
221 * 0x00000000 0x0000007F: 0xxxxxxx
222 * 0x00000080 0x000007FF: 110xxxxx 10xxxxxx
223 * 0x00000800 0x0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx
224 * 0x00010000 0x001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
225 * 0x00200000 0x03FFFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
226 * 0x04000000 0x7FFFFFFF: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
228 * Actual unicode characters are limited to the range 0x0 - 0x10FFFF,
232 * 0 - 0x7f: 0 0x7f
233 * 0x80 - 0x7ff: 0xc2 0x80 0xdf 0xbf
234 * 0x800 - 0xffff: 0xe0 0xa0 0x80 0xef 0xbf 0xbf
235 * 0x10000 - 0x10ffff: 0xf0 0x90 0x80 0x80 0xf4 0x8f 0xbf 0xbf
238 * 0xd800 - 0xdfff should never be seen.
251 #define UTF8_2_BITS 0xC0
252 #define UTF8_3_BITS 0xE0
253 #define UTF8_4_BITS 0xF0
254 #define UTF8_N_BITS 0x80
255 #define UTF8_2_MASK 0xE0
256 #define UTF8_3_MASK 0xF0
257 #define UTF8_4_MASK 0xF8
258 #define UTF8_N_MASK 0xC0
259 #define UTF8_V_MASK 0x3F
266 if (val < 0x80) { in utf8encode()
267 str[0] = val; in utf8encode()
269 } else if (val < 0x800) { in utf8encode()
273 str[0] = val; in utf8encode()
274 str[0] |= UTF8_2_BITS; in utf8encode()
276 } else if (val < 0x10000) { in utf8encode()
283 str[0] = val; in utf8encode()
284 str[0] |= UTF8_3_BITS; in utf8encode()
286 } else if (val < 0x110000) { in utf8encode()
296 str[0] = val; in utf8encode()
297 str[0] |= UTF8_4_BITS; in utf8encode()
301 len = 0; in utf8encode()
309 unsigned int unichar = 0; in utf8decode()
311 if (*s < 0x80) { in utf8decode()
314 unichar = *s++ & 0x1F; in utf8decode()
316 unichar |= *s & 0x3F; in utf8decode()
318 unichar = *s++ & 0x0F; in utf8decode()
320 unichar |= *s++ & 0x3F; in utf8decode()
322 unichar |= *s & 0x3F; in utf8decode()
324 unichar = *s++ & 0x0F; in utf8decode()
326 unichar |= *s++ & 0x3F; in utf8decode()
328 unichar |= *s++ & 0x3F; in utf8decode()
330 unichar |= *s & 0x3F; in utf8decode()
337 return unichar < 0x110000; in utf32valid()
340 #define HANGUL_SYLLABLE(U) ((U) >= 0xAC00 && (U) <= 0xD7A3)
343 #define LEAF 0
357 int leafindex[0x110000];
426 nodes = singletons = leaves = 0; in tree_walk()
436 leftmask = rightmask = 0; in tree_walk()
450 if ((leftmask & bitmask) == 0) { in tree_walk()
464 if ((rightmask & bitmask) == 0) { in tree_walk()
502 node->keybits = 0; in alloc_node()
503 node->keymask = 0; in alloc_node()
504 node->mark = 0; in alloc_node()
505 node->index = 0; in alloc_node()
511 if ((bitnum & 7) == 0) { in alloc_node()
516 node->nextbyte = 0; in alloc_node()
520 node->nextbyte = 0; in alloc_node()
588 parent->keymask = 0; in insert()
589 parent->keybits = 0; in insert()
597 parent->keymask = 0; in insert()
598 parent->keybits = 0; in insert()
605 assert(0); in insert()
617 if (node->keymask == 0) { in insert()
618 parent->keymask = 0; in insert()
619 parent->keybits = 0; in insert()
621 parent->keymask = 0; in insert()
622 parent->keybits = 0; in insert()
624 assert((parent->keymask & node->keymask) == 0); in insert()
634 return 0; in insert()
667 if (verbose > 0) in prune()
670 count = 0; in prune()
676 leftmask = rightmask = 0; in prune()
691 if (left->keymask == 0) in prune()
693 if (right->keymask == 0) in prune()
711 assert(0); in prune()
725 assert(0); in prune()
741 assert(0); in prune()
791 if ((leftmask & bitmask) == 0 && in prune()
796 } else if ((rightmask & bitmask) == 0 && in prune()
807 if (verbose > 0) in prune()
824 marked = 0; in mark_nodes()
825 if (verbose > 0) in mark_nodes()
832 leftmask = rightmask = 0; in mark_nodes()
835 if ((leftmask & bitmask) == 0) { in mark_nodes()
853 if ((rightmask & bitmask) == 0) { in mark_nodes()
880 leftmask = rightmask = 0; in mark_nodes()
883 if ((leftmask & bitmask) == 0) { in mark_nodes()
905 if ((rightmask & bitmask) == 0) { in mark_nodes()
933 if (verbose > 0) in mark_nodes()
956 count = 0; in index_nodes()
958 if (verbose > 0) in index_nodes()
967 leftmask = rightmask = 0; in index_nodes()
978 if (node->mark && (leftmask & bitmask) == 0) { in index_nodes()
993 if (node->mark && (rightmask & bitmask) == 0) { in index_nodes()
1017 if (verbose > 0) in index_nodes()
1030 return 0; in mark_subtree()
1066 changed = 0; in size_nodes()
1067 size = 0; in size_nodes()
1069 if (verbose > 0) in size_nodes()
1075 pathbits = 0; in size_nodes()
1076 pathmask = 0; in size_nodes()
1078 leftmask = rightmask = 0; in size_nodes()
1082 offset = 0; in size_nodes()
1126 assert(offset >= 0); in size_nodes()
1127 assert(offset <= 0xffffff); in size_nodes()
1128 if (offset <= 0xff) { in size_nodes()
1130 } else if (offset <= 0xffff) { in size_nodes()
1132 } else { /* offset <= 0xffffff */ in size_nodes()
1145 if (node->mark && (leftmask & bitmask) == 0) { in size_nodes()
1156 if (node->mark && (rightmask & bitmask) == 0) { in size_nodes()
1177 if (verbose > 0) in size_nodes()
1201 nodes[0] = nodes[1] = nodes[2] = nodes[3] = 0; in emit()
1202 leaves = 0; in emit()
1203 bytes = 0; in emit()
1207 if (verbose > 0) in emit()
1220 leftmask = rightmask = 0; in emit()
1227 byte = 0; in emit()
1236 if (node->offset <= 0xff) in emit()
1238 else if (node->offset <= 0xffff) in emit()
1248 *data++ = offset & 0xff; in emit()
1255 nodes[0]++; in emit()
1262 nodes[0]++; in emit()
1266 assert(0); in emit()
1271 if (node->mark && (leftmask & bitmask) == 0) { in emit()
1288 if (node->mark && (rightmask & bitmask) == 0) { in emit()
1312 if (verbose > 0) { in emit()
1316 nodes[0] + nodes[1] + nodes[2] + nodes[3], in emit()
1317 nodes[0], nodes[1], nodes[2], nodes[3]); in emit()
1350 struct unicode_data unicode_data[0x110000];
1368 for (i = 0; i != corrections_count; i++) in corrections_lookup()
1380 return 0; in nfdi_equal()
1382 return 0; in nfdi_equal()
1384 strcmp(left->utf8nfdi, right->utf8nfdi) == 0) in nfdi_equal()
1387 return 0; in nfdi_equal()
1397 return 0; in nfdicf_equal()
1399 return 0; in nfdicf_equal()
1401 strcmp(left->utf8nfdicf, right->utf8nfdicf) == 0) in nfdicf_equal()
1404 return 0; in nfdicf_equal()
1406 return 0; in nfdicf_equal()
1408 strcmp(left->utf8nfdi, right->utf8nfdi) == 0) in nfdicf_equal()
1411 return 0; in nfdicf_equal()
1422 if (leaf->utf8nfdi && leaf->utf8nfdi[0] == HANGUL) in nfdi_print()
1439 else if (leaf->utf8nfdi && leaf->utf8nfdi[0] == HANGUL) in nfdicf_print()
1457 return 0; in nfdicf_mark()
1520 while ((*data++ = *s++) != 0) in nfdi_emit()
1541 while ((*data++ = *s++) != 0) in nfdicf_emit()
1546 while ((*data++ = *s++) != 0) in nfdicf_emit()
1562 assert(data->utf8nfdi[0] == HANGUL); in utf8_create()
1569 for (i = 0; um[i]; i++) in utf8_create()
1571 *u = '\0'; in utf8_create()
1577 for (i = 0; um[i]; i++) in utf8_create()
1579 *u = '\0'; in utf8_create()
1590 for (unichar = 0; unichar != 0x110000; unichar++) in utf8_init()
1593 for (i = 0; i != corrections_count; i++) in utf8_init()
1607 count = 0; in trees_init()
1611 nextage = 0; in trees_init()
1612 for (i = 0; i <= corrections_count; i++) { in trees_init()
1632 nextage = 0; in trees_init()
1633 for (i = 0; i <= corrections_count; i++) { in trees_init()
1642 for (i = 0; i != trees_count; i++) { in trees_init()
1643 j = 0; in trees_init()
1653 for (i = 0; i != trees_count-2; i += 2) { in trees_init()
1661 for (i = 0; i != trees_count; i += 2) { in trees_init()
1678 for (i = 0; i != trees_count; i++) in trees_init()
1690 for (i = 0; i != trees_count; i++) { in trees_populate()
1691 if (verbose > 0) { in trees_populate()
1695 for (unichar = 0; unichar != 0x110000; unichar++) { in trees_populate()
1696 if (unicode_data[unichar].gen < 0) in trees_populate()
1713 for (i = 0; i != trees_count; i++) in trees_reduce()
1715 for (i = 0; i != trees_count; i++) in trees_reduce()
1718 size = 0; in trees_reduce()
1719 for (i = 0; i != trees_count; i++) in trees_reduce()
1721 changed = 0; in trees_reduce()
1722 for (i = 0; i != trees_count; i++) in trees_reduce()
1728 for (i = 0; i != trees_count; i++) in trees_reduce()
1731 if (verbose > 0) { in trees_reduce()
1732 for (i = 0; i != trees_count; i++) { in trees_reduce()
1755 if (verbose > 0) in verify()
1759 for (unichar = 0; unichar != 0x110000; unichar++) { in verify()
1760 report = 0; in verify()
1770 if (unichar < 0xd800 || unichar > 0xdfff) in verify()
1773 if (unichar >= 0xd800 && unichar <= 0xdfff) in verify()
1781 if (data->utf8nfdi[0] != HANGUL) in verify()
1830 for (i = 0; i != trees_count; i++) in trees_verify()
1863 printf("by version 11.0.0 of the Unicode Character Database.\n"); in help()
1911 for (i = 0; utf32str[i]; i++) in print_utf32()
1944 if (verbose > 0) in age_init()
1950 count = 0; in age_init()
1952 gen = 0; in age_init()
1970 if (!age_valid(major, minor, 0)) in age_init()
1979 if (ages_count == 0 || ages_count > MAXGEN) in age_init()
1982 /* There is a 0 entry. */ in age_init()
1989 count = 0; in age_init()
1990 gen = 0; in age_init()
2006 ages[++gen] = UNICODE_AGE(major, minor, 0); in age_init()
2010 if (!age_valid(major, minor, 0)) in age_init()
2043 for (unichar = 0xd800; unichar <= 0xdfff; unichar++) in age_init()
2046 if (verbose > 0) in age_init()
2048 if (count == 0) in age_init()
2062 if (verbose > 0) in ccc_init()
2069 count = 0; in ccc_init()
2096 if (verbose > 0) in ccc_init()
2098 if (count == 0) in ccc_init()
2110 for (i = 0 ; i < ARRAY_SIZE(ignored_types); i++) in ignore_compatibility_form()
2111 if (strcmp(type, ignored_types[i]) == 0) in ignore_compatibility_form()
2113 return 0; in ignore_compatibility_form()
2128 if (verbose > 0) in nfdi_init()
2134 count = 0; in nfdi_init()
2148 *s++ = '\0'; in nfdi_init()
2153 i = 0; in nfdi_init()
2160 mapping[i++] = 0; in nfdi_init()
2171 if (verbose > 0) in nfdi_init()
2173 if (count == 0) in nfdi_init()
2189 if (verbose > 0) in nfdicf_init()
2195 count = 0; in nfdicf_init()
2209 i = 0; in nfdicf_init()
2216 mapping[i++] = 0; in nfdicf_init()
2227 if (verbose > 0) in nfdicf_init()
2229 if (count == 0) in nfdicf_init()
2243 if (verbose > 0) in ignore_init()
2249 count = 0; in ignore_init()
2260 *um = 0; in ignore_init()
2264 *um = 0; in ignore_init()
2281 *um = 0; in ignore_init()
2285 *um = 0; in ignore_init()
2296 if (verbose > 0) in ignore_init()
2298 if (count == 0) in ignore_init()
2317 if (verbose > 0) in corrections_init()
2323 count = 0; in corrections_init()
2338 count = 0; in corrections_init()
2352 i = 0; in corrections_init()
2360 mapping[i++] = 0; in corrections_init()
2373 if (verbose > 0) in corrections_init()
2375 if (count == 0) in corrections_init()
2382 * Hangul decomposition (algorithm from Section 3.12 of Unicode 6.3.0)
2384 * AC00;<Hangul Syllable, First>;Lo;0;L;;;;;N;;;;;
2385 * D7A3;<Hangul Syllable, Last>;Lo;0;L;;;;;N;;;;;
2387 * SBase = 0xAC00
2388 * LBase = 0x1100
2389 * VBase = 0x1161
2390 * TBase = 0x11A7
2418 * if (TIndex == 0) {
2429 unsigned int sb = 0xAC00; in hangul_decompose()
2430 unsigned int lb = 0x1100; in hangul_decompose()
2431 unsigned int vb = 0x1161; in hangul_decompose()
2432 unsigned int tb = 0x11a7; in hangul_decompose()
2444 if (verbose > 0) in hangul_decompose()
2447 count = 0; in hangul_decompose()
2448 for (unichar = 0xAC00; unichar <= 0xD7A3; unichar++) { in hangul_decompose()
2454 i = 0; in hangul_decompose()
2459 mapping[i++] = 0; in hangul_decompose()
2477 unicode_data[unichar].utf8nfdi[0] = HANGUL; in hangul_decompose()
2478 unicode_data[unichar].utf8nfdi[1] = '\0'; in hangul_decompose()
2485 if (verbose > 0) in hangul_decompose()
2500 if (verbose > 0) in nfdi_decompose()
2503 count = 0; in nfdi_decompose()
2504 for (unichar = 0; unichar != 0x110000; unichar++) { in nfdi_decompose()
2509 i = 0; in nfdi_decompose()
2514 for (j = 0; dc[j]; j++) in nfdi_decompose()
2516 ret = 0; in nfdi_decompose()
2522 mapping[i++] = 0; in nfdi_decompose()
2540 if (verbose > 0) in nfdi_decompose()
2555 if (verbose > 0) in nfdicf_decompose()
2557 count = 0; in nfdicf_decompose()
2558 for (unichar = 0; unichar != 0x110000; unichar++) { in nfdicf_decompose()
2563 i = 0; in nfdicf_decompose()
2568 for (j = 0; dc[j]; j++) in nfdicf_decompose()
2570 ret = 0; in nfdicf_decompose()
2576 mapping[i++] = 0; in nfdicf_decompose()
2588 if (verbose > 0) in nfdicf_decompose()
2606 * Hangul decomposition (algorithm from Section 3.12 of Unicode 6.3.0)
2608 * AC00;<Hangul Syllable, First>;Lo;0;L;;;;;N;;;;;
2609 * D7A3;<Hangul Syllable, Last>;Lo;0;L;;;;;N;;;;;
2611 * SBase = 0xAC00
2612 * LBase = 0x1100
2613 * VBase = 0x1161
2614 * TBase = 0x11A7
2642 * if (TIndex == 0) {
2651 #define SB (0xAC00)
2652 #define LB (0x1100)
2653 #define VB (0x1161)
2654 #define TB (0x11A7)
2693 h[0] = '\0'; in utf8hangul()
2717 if (len == 0) in utf8nlookup()
2724 if (--len == 0) in utf8nlookup()
2766 * codepoints >= 0xAC00 and <= 0xD7A3. Their UTF-8 encoding is in utf8nlookup()
2770 if (LEAF_CCC(trie) == DECOMPOSE && LEAF_STR(trie)[0] == HANGUL) in utf8nlookup()
2795 return 1 + (c >= 0xC0) + (c >= 0xE0) + (c >= 0xF0); in utf8clen()
2801 * Return 0 if only non-assigned code points are used.
2806 int age = 0; in utf8agemax()
2828 * Return 0 if non-assigned code points are used.
2859 int age = 0; in utf8nagemax()
2910 * A string of Default_Ignorable_Code_Point has length 0.
2915 size_t ret = 0; in utf8len()
2942 size_t ret = 0; in utf8nlen()
2988 * Returns -1 on error, 0 on success.
3003 u8c->slen = 0; in utf8ncursor()
3006 u8c->unichar = 0; in utf8ncursor()
3011 if (len > 0 && (*s & 0xC0) == 0x80) in utf8ncursor()
3013 return 0; in utf8ncursor()
3023 * Returns -1 on error, 0 on success.
3042 * and the function returns 0 in that case.
3064 if (u8c->p && *u8c->s == '\0') { in utf8byte()
3070 if (!u8c->p && (u8c->len == 0 || *u8c->s == '\0')) { in utf8byte()
3073 return 0; in utf8byte()
3077 } else if ((*u8c->s & 0xC0) == 0x80) { in utf8byte()
3096 /* Characters that are too new have CCC 0. */ in utf8byte()
3103 /* Empty decomposition implies CCC 0. */ in utf8byte()
3104 if (*u8c->s == '\0') { in utf8byte()
3167 u8c->slen = 0; in utf8byte()
3186 while ((c = utf8byte(&u8c)) > 0) in normalize_line()
3189 if (c < 0) in normalize_line()
3191 if (*t != 0) in normalize_line()
3201 while ((c = utf8byte(&u8c)) > 0) in normalize_line()
3204 if (c < 0) in normalize_line()
3206 if (*t != 0) in normalize_line()
3209 return 0; in normalize_line()
3221 int tests = 0; in normalization_test()
3222 int failures = 0; in normalization_test()
3224 if (verbose > 0) in normalization_test()
3242 *t = '\0'; in normalization_test()
3244 ignorables = 0; in normalization_test()
3255 *t = '\0'; in normalization_test()
3258 if (normalize_line(nfdi_tree) < 0) { in normalization_test()
3267 if (verbose > 0) in normalization_test()
3283 if (verbose > 0) in write_file()
3296 for (i = 0; i != ages_count; i++) in write_file()
3302 t = 0; in write_file()
3303 for (gen = 0; gen < ages_count; gen++) { in write_file()
3314 for (gen = 0; gen < ages_count; gen++) { in write_file()
3325 t = 0; in write_file()
3326 for (i = 0; i != utf8data_size; i += 16) { in write_file()
3335 fprintf(file, "0x%.2x%s", utf8data[j], in write_file()
3367 argv0 = argv[0]; in main()
3400 exit(0); in main()
3408 for (unichar = 0; unichar != 0x110000; unichar++) in main()
3433 return 0; in main()