1 // Copyright 2014 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include "core/fxcrt/fx_unicode.h"
8
9 #include <stddef.h>
10
11 #include <iterator>
12
13 #include "third_party/base/check.h"
14
15 namespace {
16
17 // Format of uint16_t values in kTextLayoutCodeProperties[].
18 constexpr uint16_t kBidiClassBitPos = 0;
19 constexpr uint16_t kBidiClassBitCount = 5;
20 constexpr uint16_t kBidiClassBitMask =
21 (((1u << kBidiClassBitCount) - 1) << kBidiClassBitPos);
22
23 constexpr uint16_t kMirrorBitPos = 5;
24 constexpr uint16_t kMirrorBitCount = 9;
25 constexpr uint16_t kMirrorMax = (1 << kMirrorBitCount) - 1;
26
27 #undef CHARPROP____
28 #define CHARPROP____(mirror, ct, bd, bt) \
29 ((mirror << kMirrorBitPos) | \
30 (static_cast<uint16_t>(FX_BIDICLASS::bd) << kBidiClassBitPos)),
31 constexpr uint16_t kTextLayoutCodeProperties[] = {
32 #include "core/fxcrt/fx_ucddata.inc" // NOLINT(build/include)
33 };
34 #undef CHARPROP____
35
36 constexpr size_t kTextLayoutCodePropertiesSize =
37 std::size(kTextLayoutCodeProperties);
38
39 static_assert(kTextLayoutCodePropertiesSize == 65536, "missing characters");
40
GetUnicodeProperties(wchar_t wch)41 uint16_t GetUnicodeProperties(wchar_t wch) {
42 size_t idx = static_cast<size_t>(wch);
43 if (idx < kTextLayoutCodePropertiesSize)
44 return kTextLayoutCodeProperties[idx];
45 return 0;
46 }
47
48 #ifdef PDF_ENABLE_XFA
49 // Format of uint16_t values in kExtendedTextLayoutCodeProperties[].
50 constexpr uint16_t kBreakTypeBitPos = 0;
51 constexpr uint16_t kBreakTypeBitCount = 6;
52 constexpr uint16_t kBreakTypeBitMask =
53 (((1u << kBreakTypeBitCount) - 1) << kBreakTypeBitPos);
54
55 constexpr uint16_t kCharTypeBitPos = 6;
56 constexpr uint16_t kCharTypeBitCount = 4;
57 constexpr uint16_t kCharTypeBitMask =
58 (((1u << kCharTypeBitCount) - 1) << kCharTypeBitPos);
59
60 #undef CHARPROP____
61 #define CHARPROP____(mirror, ct, bd, bt) \
62 ((static_cast<uint16_t>(FX_CHARTYPE::ct) << kCharTypeBitPos) | \
63 (static_cast<uint16_t>(FX_BREAKPROPERTY::bt) << kBreakTypeBitPos)),
64 constexpr uint16_t kExtendedTextLayoutCodeProperties[] = {
65 #include "core/fxcrt/fx_ucddata.inc" // NOLINT(build/include)
66 };
67 #undef CHARPROP____
68
69 constexpr size_t kExtendedTextLayoutCodePropertiesSize =
70 std::size(kExtendedTextLayoutCodeProperties);
71
72 static_assert(kExtendedTextLayoutCodePropertiesSize == 65536,
73 "missing characters");
74
GetExtendedUnicodeProperties(wchar_t wch)75 uint16_t GetExtendedUnicodeProperties(wchar_t wch) {
76 size_t idx = static_cast<size_t>(wch);
77 if (idx < kExtendedTextLayoutCodePropertiesSize)
78 return kExtendedTextLayoutCodeProperties[idx];
79 return 0;
80 }
81
82 #endif // PDF_ENABLE_XFA
83
84 constexpr uint16_t kFXTextLayoutBidiMirror[] = {
85 0x0029, 0x0028, 0x003E, 0x003C, 0x005D, 0x005B, 0x007D, 0x007B, 0x00BB,
86 0x00AB, 0x0F3B, 0x0F3A, 0x0F3D, 0x0F3C, 0x169C, 0x169B, 0x2019, 0x2018,
87 0x201D, 0x201C, 0x203A, 0x2039, 0x2046, 0x2045, 0x207E, 0x207D, 0x208E,
88 0x208D, 0x220B, 0x220C, 0x220D, 0x2208, 0x2209, 0x220A, 0x29F5, 0x223D,
89 0x223C, 0x22CD, 0x2253, 0x2252, 0x2255, 0x2254, 0x2265, 0x2264, 0x2267,
90 0x2266, 0x2269, 0x2268, 0x226B, 0x226A, 0x226F, 0x226E, 0x2271, 0x2270,
91 0x2273, 0x2272, 0x2275, 0x2274, 0x2277, 0x2276, 0x2279, 0x2278, 0x227B,
92 0x227A, 0x227D, 0x227C, 0x227F, 0x227E, 0x2281, 0x2280, 0x2283, 0x2282,
93 0x2285, 0x2284, 0x2287, 0x2286, 0x2289, 0x2288, 0x228B, 0x228A, 0x2290,
94 0x228F, 0x2292, 0x2291, 0x29B8, 0x22A3, 0x22A2, 0x2ADE, 0x2AE4, 0x2AE3,
95 0x2AE5, 0x22B1, 0x22B0, 0x22B3, 0x22B2, 0x22B5, 0x22B4, 0x22B7, 0x22B6,
96 0x22CA, 0x22C9, 0x22CC, 0x22CB, 0x2243, 0x22D1, 0x22D0, 0x22D7, 0x22D6,
97 0x22D9, 0x22D8, 0x22DB, 0x22DA, 0x22DD, 0x22DC, 0x22DF, 0x22DE, 0x22E1,
98 0x22E0, 0x22E3, 0x22E2, 0x22E5, 0x22E4, 0x22E7, 0x22E6, 0x22E9, 0x22E8,
99 0x22EB, 0x22EA, 0x22ED, 0x22EC, 0x22F1, 0x22F0, 0x22FA, 0x22FB, 0x22FC,
100 0x22FD, 0x22FE, 0x22F2, 0x22F3, 0x22F4, 0x22F6, 0x22F7, 0x2309, 0x2308,
101 0x230B, 0x230A, 0x232A, 0x2329, 0x2769, 0x2768, 0x276B, 0x276A, 0x276D,
102 0x276C, 0x276F, 0x276E, 0x2771, 0x2770, 0x2773, 0x2772, 0x2775, 0x2774,
103 0x27C4, 0x27C3, 0x27C6, 0x27C5, 0x27C9, 0x27C8, 0x27D6, 0x27D5, 0x27DE,
104 0x27DD, 0x27E3, 0x27E2, 0x27E5, 0x27E4, 0x27E7, 0x27E6, 0x27E9, 0x27E8,
105 0x27EB, 0x27EA, 0x27ED, 0x27EC, 0x27EF, 0x27EE, 0x2984, 0x2983, 0x2986,
106 0x2985, 0x2988, 0x2987, 0x298A, 0x2989, 0x298C, 0x298B, 0x2990, 0x298F,
107 0x298E, 0x298D, 0x2992, 0x2991, 0x2994, 0x2993, 0x2996, 0x2995, 0x2998,
108 0x2997, 0x2298, 0x29C1, 0x29C0, 0x29C5, 0x29C4, 0x29D0, 0x29CF, 0x29D2,
109 0x29D1, 0x29D5, 0x29D4, 0x29D9, 0x29D8, 0x29DB, 0x29DA, 0x2215, 0x29F9,
110 0x29F8, 0x29FD, 0x29FC, 0x2A2C, 0x2A2B, 0x2A2E, 0x2A2D, 0x2A35, 0x2A34,
111 0x2A3D, 0x2A3C, 0x2A65, 0x2A64, 0x2A7A, 0x2A79, 0x2A7E, 0x2A7D, 0x2A80,
112 0x2A7F, 0x2A82, 0x2A81, 0x2A84, 0x2A83, 0x2A8C, 0x2A8B, 0x2A92, 0x2A91,
113 0x2A94, 0x2A93, 0x2A96, 0x2A95, 0x2A98, 0x2A97, 0x2A9A, 0x2A99, 0x2A9C,
114 0x2A9B, 0x2AA2, 0x2AA1, 0x2AA7, 0x2AA6, 0x2AA9, 0x2AA8, 0x2AAB, 0x2AAA,
115 0x2AAD, 0x2AAC, 0x2AB0, 0x2AAF, 0x2AB4, 0x2AB3, 0x2ABC, 0x2ABB, 0x2ABE,
116 0x2ABD, 0x2AC0, 0x2ABF, 0x2AC2, 0x2AC1, 0x2AC4, 0x2AC3, 0x2AC6, 0x2AC5,
117 0x2ACE, 0x2ACD, 0x2AD0, 0x2ACF, 0x2AD2, 0x2AD1, 0x2AD4, 0x2AD3, 0x2AD6,
118 0x2AD5, 0x22A6, 0x22A9, 0x22A8, 0x22AB, 0x2AED, 0x2AEC, 0x2AF8, 0x2AF7,
119 0x2AFA, 0x2AF9, 0x2E03, 0x2E02, 0x2E05, 0x2E04, 0x2E0A, 0x2E09, 0x2E0D,
120 0x2E0C, 0x2E1D, 0x2E1C, 0x2E21, 0x2E20, 0x2E23, 0x2E22, 0x2E25, 0x2E24,
121 0x2E27, 0x2E26, 0x2E29, 0x2E28, 0x3009, 0x3008, 0x300B, 0x300A, 0x300D,
122 0x300C, 0x300F, 0x300E, 0x3011, 0x3010, 0x3015, 0x3014, 0x3017, 0x3016,
123 0x3019, 0x3018, 0x301B, 0x301A, 0xFE5A, 0xFE59, 0xFE5C, 0xFE5B, 0xFE5E,
124 0xFE5D, 0xFE65, 0xFE64, 0xFF09, 0xFF08, 0xFF1E, 0xFF1C, 0xFF3D, 0xFF3B,
125 0xFF5D, 0xFF5B, 0xFF60, 0xFF5F, 0xFF63, 0xFF62,
126 };
127
128 constexpr size_t kFXTextLayoutBidiMirrorSize =
129 std::size(kFXTextLayoutBidiMirror);
130
131 // Check that the mirror indicies in the fx_ucddata.inc table are in bounds.
132 #undef CHARPROP____
133 #define CHARPROP____(mirror, ct, bd, bt) \
134 static_assert(mirror == kMirrorMax || mirror < kFXTextLayoutBidiMirrorSize, \
135 "Bad mirror index");
136 #include "core/fxcrt/fx_ucddata.inc" // NOLINT(build/include)
137 #undef CHARPROP____
138
139 } // namespace
140
141 namespace pdfium {
142 namespace unicode {
143
GetMirrorChar(wchar_t wch)144 wchar_t GetMirrorChar(wchar_t wch) {
145 uint16_t prop = GetUnicodeProperties(wch);
146 size_t idx = prop >> kMirrorBitPos;
147 if (idx == kMirrorMax)
148 return wch;
149 DCHECK(idx < kFXTextLayoutBidiMirrorSize);
150 return kFXTextLayoutBidiMirror[idx];
151 }
152
GetBidiClass(wchar_t wch)153 FX_BIDICLASS GetBidiClass(wchar_t wch) {
154 uint16_t prop = GetUnicodeProperties(wch);
155 uint16_t result = (prop & kBidiClassBitMask) >> kBidiClassBitPos;
156 DCHECK(result <= static_cast<uint16_t>(FX_BIDICLASS::kPDF));
157 return static_cast<FX_BIDICLASS>(result);
158 }
159
160 #ifdef PDF_ENABLE_XFA
GetCharType(wchar_t wch)161 FX_CHARTYPE GetCharType(wchar_t wch) {
162 uint16_t prop = GetExtendedUnicodeProperties(wch);
163 uint16_t result = (prop & kCharTypeBitMask) >> kCharTypeBitPos;
164 DCHECK(result <= static_cast<uint16_t>(FX_CHARTYPE::kArabic));
165 return static_cast<FX_CHARTYPE>(result);
166 }
167
GetBreakProperty(wchar_t wch)168 FX_BREAKPROPERTY GetBreakProperty(wchar_t wch) {
169 uint16_t prop = GetExtendedUnicodeProperties(wch);
170 uint16_t result = (prop & kBreakTypeBitMask) >> kBreakTypeBitPos;
171 DCHECK(result <= static_cast<uint16_t>(FX_BREAKPROPERTY::kTB));
172 return static_cast<FX_BREAKPROPERTY>(result);
173 }
174 #endif // PDF_ENABLE_XFA
175
176 } // namespace unicode
177 } // namespace pdfium
178