xref: /aosp_15_r20/external/AFLplusplus/dictionaries/utf8.dict (revision 08b48e0b10e97b33e7b60c5b6e2243bd915777f2)
1# https://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt
2
3
4# Defines byteorder and endianess
5byte_order="\xFE\xFF"
6
7# Reorder the display of text for RTL reading
8right_to_left="\x20\x2E"
9
10# Mongolian Vowel Separator: invisible and has the whitespace property
11invisible_separator="\x18\x03"
12
13# Invisible zero-width character.
14word_join="\x20\x60"
15
16# Reserved code point
17reserved="\xfe\xfe"
18
19# Invalid code point
20invalid1="\xff\xff"
21invalid2="\x01\xff\xff"
22invalid3="\xfdd0"
23
24# unassigned code point
25unassigned="\x0f\xed"
26
27# illegal low half-surrogate
28illegal_low="\xde\xad"
29
30# illegal high half-surrogate
31illegal_high="\xda\xad"
32
33# private use area code usbed by apple for its logo
34apple="\xf8\xff"
35
36# hostname normalization
37fullwidth_solidus="\xff\x0f"
38
39# numerical mapping and a value
40bold_eight="\x01\xd7\xd6"
41
42# # U+00DF normalizes to "ss" during IDNA2003's mapping phase,
43# different from its IDNA2008 mapping. See http://www.unicode.org/reports/tr46/
44weird="\x00\xdf"
45
46# U+FDFD expands by 11x (UTF-8) and 18x (UTF-16) under NFKC/NFKC
47expansion="\xfd\xfd"
48
49# U+0390 expands by 3x (UTF-8) under NFD
50expansion2="\x03\x90"
51
52# U+1F82 expands by 4x (UTF-16) under NFD
53expansion3= "\x1F\x82"
54
55# U+FB2C expands by 3x (UTF-16) under NFC
56expansion4="\xFB\x2C"
57
58# Lowecaser expansion: https://twitter.com/jifa/status/625776454479970304
59low_exp1="\x02\x3a"
60low_exp2="\x02\x3e"
61low_exp3="\x00\xdf"
62low_exp4="\x1e\x9e"
63
64# Null byte
65null="\x00\x00"
66"\xfc\x80\x80\x80\x80\x80"
67"fc\x80\x80\x80\x80\xaf"
68
69# Confusing new lines
70"\x00\x1b"
71"\x00\x85"
72"\x20\x28"
73"\x20\x29"
74