AFLplusplus/dictionaries/utf8.dict

# https://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt


# Defines byteorder and endianess
byte_order="\xFE\xFF"

# Reorder the display of text for RTL reading
right_to_left="\x20\x2E"

# Mongolian Vowel Separator: invisible and has the whitespace property
invisible_separator="\x18\x03"

# Invisible zero-width character.
word_join="\x20\x60"

# Reserved code point
reserved="\xfe\xfe"

# Invalid code point
invalid1="\xff\xff"
invalid2="\x01\xff\xff"
invalid3="\xfdd0"

# unassigned code point
unassigned="\x0f\xed"

# illegal low half-surrogate
illegal_low="\xde\xad"

# illegal high half-surrogate
illegal_high="\xda\xad"

# private use area code usbed by apple for its logo
apple="\xf8\xff"

# hostname normalization
fullwidth_solidus="\xff\x0f"

# numerical mapping and a value
bold_eight="\x01\xd7\xd6"

# # U+00DF normalizes to "ss" during IDNA2003's mapping phase,
# different from its IDNA2008 mapping. See http://www.unicode.org/reports/tr46/
weird="\x00\xdf"

# U+FDFD expands by 11x (UTF-8) and 18x (UTF-16) under NFKC/NFKC
expansion="\xfd\xfd"

# U+0390 expands by 3x (UTF-8) under NFD
expansion2="\x03\x90"

# U+1F82 expands by 4x (UTF-16) under NFD
expansion3= "\x1F\x82"

# U+FB2C expands by 3x (UTF-16) under NFC
expansion4="\xFB\x2C"

# Lowecaser expansion: https://twitter.com/jifa/status/625776454479970304
low_exp1="\x02\x3a"
low_exp2="\x02\x3e"
low_exp3="\x00\xdf"
low_exp4="\x1e\x9e"

# Null byte
null="\x00\x00"
"\xfc\x80\x80\x80\x80\x80"
"fc\x80\x80\x80\x80\xaf"

# Confusing new lines
"\x00\x1b"
"\x00\x85"
"\x20\x28"
"\x20\x29"