xref: /aosp_15_r20/external/icu/libicu/cts_headers/ucptrie_impl.h (revision 0e209d3975ff4a8c132096b14b0e9364a753506e)
1*0e209d39SAndroid Build Coastguard Worker // © 2017 and later: Unicode, Inc. and others.
2*0e209d39SAndroid Build Coastguard Worker // License & terms of use: http://www.unicode.org/copyright.html
3*0e209d39SAndroid Build Coastguard Worker 
4*0e209d39SAndroid Build Coastguard Worker // ucptrie_impl.h (modified from utrie2_impl.h)
5*0e209d39SAndroid Build Coastguard Worker // created: 2017dec29 Markus W. Scherer
6*0e209d39SAndroid Build Coastguard Worker 
7*0e209d39SAndroid Build Coastguard Worker #ifndef __UCPTRIE_IMPL_H__
8*0e209d39SAndroid Build Coastguard Worker #define __UCPTRIE_IMPL_H__
9*0e209d39SAndroid Build Coastguard Worker 
10*0e209d39SAndroid Build Coastguard Worker #include "unicode/ucptrie.h"
11*0e209d39SAndroid Build Coastguard Worker #ifdef UCPTRIE_DEBUG
12*0e209d39SAndroid Build Coastguard Worker #include "unicode/umutablecptrie.h"
13*0e209d39SAndroid Build Coastguard Worker #endif
14*0e209d39SAndroid Build Coastguard Worker 
15*0e209d39SAndroid Build Coastguard Worker // UCPTrie signature values, in platform endianness and opposite endianness.
16*0e209d39SAndroid Build Coastguard Worker // The UCPTrie signature ASCII byte values spell "Tri3".
17*0e209d39SAndroid Build Coastguard Worker #define UCPTRIE_SIG     0x54726933
18*0e209d39SAndroid Build Coastguard Worker #define UCPTRIE_OE_SIG  0x33697254
19*0e209d39SAndroid Build Coastguard Worker 
20*0e209d39SAndroid Build Coastguard Worker /**
21*0e209d39SAndroid Build Coastguard Worker  * Header data for the binary, memory-mappable representation of a UCPTrie/CodePointTrie.
22*0e209d39SAndroid Build Coastguard Worker  * @internal
23*0e209d39SAndroid Build Coastguard Worker  */
24*0e209d39SAndroid Build Coastguard Worker struct UCPTrieHeader {
25*0e209d39SAndroid Build Coastguard Worker     /** "Tri3" in big-endian US-ASCII (0x54726933) */
26*0e209d39SAndroid Build Coastguard Worker     uint32_t signature;
27*0e209d39SAndroid Build Coastguard Worker 
28*0e209d39SAndroid Build Coastguard Worker     /**
29*0e209d39SAndroid Build Coastguard Worker      * Options bit field:
30*0e209d39SAndroid Build Coastguard Worker      * Bits 15..12: Data length bits 19..16.
31*0e209d39SAndroid Build Coastguard Worker      * Bits 11..8: Data null block offset bits 19..16.
32*0e209d39SAndroid Build Coastguard Worker      * Bits 7..6: UCPTrieType
33*0e209d39SAndroid Build Coastguard Worker      * Bits 5..3: Reserved (0).
34*0e209d39SAndroid Build Coastguard Worker      * Bits 2..0: UCPTrieValueWidth
35*0e209d39SAndroid Build Coastguard Worker      */
36*0e209d39SAndroid Build Coastguard Worker     uint16_t options;
37*0e209d39SAndroid Build Coastguard Worker 
38*0e209d39SAndroid Build Coastguard Worker     /** Total length of the index tables. */
39*0e209d39SAndroid Build Coastguard Worker     uint16_t indexLength;
40*0e209d39SAndroid Build Coastguard Worker 
41*0e209d39SAndroid Build Coastguard Worker     /** Data length bits 15..0. */
42*0e209d39SAndroid Build Coastguard Worker     uint16_t dataLength;
43*0e209d39SAndroid Build Coastguard Worker 
44*0e209d39SAndroid Build Coastguard Worker     /** Index-3 null block offset, 0x7fff or 0xffff if none. */
45*0e209d39SAndroid Build Coastguard Worker     uint16_t index3NullOffset;
46*0e209d39SAndroid Build Coastguard Worker 
47*0e209d39SAndroid Build Coastguard Worker     /** Data null block offset bits 15..0, 0xfffff if none. */
48*0e209d39SAndroid Build Coastguard Worker     uint16_t dataNullOffset;
49*0e209d39SAndroid Build Coastguard Worker 
50*0e209d39SAndroid Build Coastguard Worker     /**
51*0e209d39SAndroid Build Coastguard Worker      * First code point of the single-value range ending with U+10ffff,
52*0e209d39SAndroid Build Coastguard Worker      * rounded up and then shifted right by UCPTRIE_SHIFT_2.
53*0e209d39SAndroid Build Coastguard Worker      */
54*0e209d39SAndroid Build Coastguard Worker     uint16_t shiftedHighStart;
55*0e209d39SAndroid Build Coastguard Worker };
56*0e209d39SAndroid Build Coastguard Worker 
57*0e209d39SAndroid Build Coastguard Worker // Constants for use with UCPTrieHeader.options.
58*0e209d39SAndroid Build Coastguard Worker constexpr uint16_t UCPTRIE_OPTIONS_DATA_LENGTH_MASK = 0xf000;
59*0e209d39SAndroid Build Coastguard Worker constexpr uint16_t UCPTRIE_OPTIONS_DATA_NULL_OFFSET_MASK = 0xf00;
60*0e209d39SAndroid Build Coastguard Worker constexpr uint16_t UCPTRIE_OPTIONS_RESERVED_MASK = 0x38;
61*0e209d39SAndroid Build Coastguard Worker constexpr uint16_t UCPTRIE_OPTIONS_VALUE_BITS_MASK = 7;
62*0e209d39SAndroid Build Coastguard Worker 
63*0e209d39SAndroid Build Coastguard Worker /**
64*0e209d39SAndroid Build Coastguard Worker  * Value for index3NullOffset which indicates that there is no index-3 null block.
65*0e209d39SAndroid Build Coastguard Worker  * Bit 15 is unused for this value because this bit is used if the index-3 contains
66*0e209d39SAndroid Build Coastguard Worker  * 18-bit indexes.
67*0e209d39SAndroid Build Coastguard Worker  */
68*0e209d39SAndroid Build Coastguard Worker constexpr int32_t UCPTRIE_NO_INDEX3_NULL_OFFSET = 0x7fff;
69*0e209d39SAndroid Build Coastguard Worker constexpr int32_t UCPTRIE_NO_DATA_NULL_OFFSET = 0xfffff;
70*0e209d39SAndroid Build Coastguard Worker 
71*0e209d39SAndroid Build Coastguard Worker // Internal constants.
72*0e209d39SAndroid Build Coastguard Worker 
73*0e209d39SAndroid Build Coastguard Worker /** The length of the BMP index table. 1024=0x400 */
74*0e209d39SAndroid Build Coastguard Worker constexpr int32_t UCPTRIE_BMP_INDEX_LENGTH = 0x10000 >> UCPTRIE_FAST_SHIFT;
75*0e209d39SAndroid Build Coastguard Worker 
76*0e209d39SAndroid Build Coastguard Worker constexpr int32_t UCPTRIE_SMALL_LIMIT = 0x1000;
77*0e209d39SAndroid Build Coastguard Worker constexpr int32_t UCPTRIE_SMALL_INDEX_LENGTH = UCPTRIE_SMALL_LIMIT >> UCPTRIE_FAST_SHIFT;
78*0e209d39SAndroid Build Coastguard Worker 
79*0e209d39SAndroid Build Coastguard Worker /** Shift size for getting the index-3 table offset. */
80*0e209d39SAndroid Build Coastguard Worker constexpr int32_t UCPTRIE_SHIFT_3 = 4;
81*0e209d39SAndroid Build Coastguard Worker 
82*0e209d39SAndroid Build Coastguard Worker /** Shift size for getting the index-2 table offset. */
83*0e209d39SAndroid Build Coastguard Worker constexpr int32_t UCPTRIE_SHIFT_2 = 5 + UCPTRIE_SHIFT_3;
84*0e209d39SAndroid Build Coastguard Worker 
85*0e209d39SAndroid Build Coastguard Worker /** Shift size for getting the index-1 table offset. */
86*0e209d39SAndroid Build Coastguard Worker constexpr int32_t UCPTRIE_SHIFT_1 = 5 + UCPTRIE_SHIFT_2;
87*0e209d39SAndroid Build Coastguard Worker 
88*0e209d39SAndroid Build Coastguard Worker /**
89*0e209d39SAndroid Build Coastguard Worker  * Difference between two shift sizes,
90*0e209d39SAndroid Build Coastguard Worker  * for getting an index-2 offset from an index-3 offset. 5=9-4
91*0e209d39SAndroid Build Coastguard Worker  */
92*0e209d39SAndroid Build Coastguard Worker constexpr int32_t UCPTRIE_SHIFT_2_3 = UCPTRIE_SHIFT_2 - UCPTRIE_SHIFT_3;
93*0e209d39SAndroid Build Coastguard Worker 
94*0e209d39SAndroid Build Coastguard Worker /**
95*0e209d39SAndroid Build Coastguard Worker  * Difference between two shift sizes,
96*0e209d39SAndroid Build Coastguard Worker  * for getting an index-1 offset from an index-2 offset. 5=14-9
97*0e209d39SAndroid Build Coastguard Worker  */
98*0e209d39SAndroid Build Coastguard Worker constexpr int32_t UCPTRIE_SHIFT_1_2 = UCPTRIE_SHIFT_1 - UCPTRIE_SHIFT_2;
99*0e209d39SAndroid Build Coastguard Worker 
100*0e209d39SAndroid Build Coastguard Worker /**
101*0e209d39SAndroid Build Coastguard Worker  * Number of index-1 entries for the BMP. (4)
102*0e209d39SAndroid Build Coastguard Worker  * This part of the index-1 table is omitted from the serialized form.
103*0e209d39SAndroid Build Coastguard Worker  */
104*0e209d39SAndroid Build Coastguard Worker constexpr int32_t UCPTRIE_OMITTED_BMP_INDEX_1_LENGTH = 0x10000 >> UCPTRIE_SHIFT_1;
105*0e209d39SAndroid Build Coastguard Worker 
106*0e209d39SAndroid Build Coastguard Worker /** Number of entries in an index-2 block. 32=0x20 */
107*0e209d39SAndroid Build Coastguard Worker constexpr int32_t UCPTRIE_INDEX_2_BLOCK_LENGTH = 1 << UCPTRIE_SHIFT_1_2;
108*0e209d39SAndroid Build Coastguard Worker 
109*0e209d39SAndroid Build Coastguard Worker /** Mask for getting the lower bits for the in-index-2-block offset. */
110*0e209d39SAndroid Build Coastguard Worker constexpr int32_t UCPTRIE_INDEX_2_MASK = UCPTRIE_INDEX_2_BLOCK_LENGTH - 1;
111*0e209d39SAndroid Build Coastguard Worker 
112*0e209d39SAndroid Build Coastguard Worker /** Number of code points per index-2 table entry. 512=0x200 */
113*0e209d39SAndroid Build Coastguard Worker constexpr int32_t UCPTRIE_CP_PER_INDEX_2_ENTRY = 1 << UCPTRIE_SHIFT_2;
114*0e209d39SAndroid Build Coastguard Worker 
115*0e209d39SAndroid Build Coastguard Worker /** Number of entries in an index-3 block. 32=0x20 */
116*0e209d39SAndroid Build Coastguard Worker constexpr int32_t UCPTRIE_INDEX_3_BLOCK_LENGTH = 1 << UCPTRIE_SHIFT_2_3;
117*0e209d39SAndroid Build Coastguard Worker 
118*0e209d39SAndroid Build Coastguard Worker /** Mask for getting the lower bits for the in-index-3-block offset. */
119*0e209d39SAndroid Build Coastguard Worker constexpr int32_t UCPTRIE_INDEX_3_MASK = UCPTRIE_INDEX_3_BLOCK_LENGTH - 1;
120*0e209d39SAndroid Build Coastguard Worker 
121*0e209d39SAndroid Build Coastguard Worker /** Number of entries in a small data block. 16=0x10 */
122*0e209d39SAndroid Build Coastguard Worker constexpr int32_t UCPTRIE_SMALL_DATA_BLOCK_LENGTH = 1 << UCPTRIE_SHIFT_3;
123*0e209d39SAndroid Build Coastguard Worker 
124*0e209d39SAndroid Build Coastguard Worker /** Mask for getting the lower bits for the in-small-data-block offset. */
125*0e209d39SAndroid Build Coastguard Worker constexpr int32_t UCPTRIE_SMALL_DATA_MASK = UCPTRIE_SMALL_DATA_BLOCK_LENGTH - 1;
126*0e209d39SAndroid Build Coastguard Worker 
127*0e209d39SAndroid Build Coastguard Worker 
128*0e209d39SAndroid Build Coastguard Worker typedef UChar32
129*0e209d39SAndroid Build Coastguard Worker UCPTrieGetRange(const void *trie, UChar32 start,
130*0e209d39SAndroid Build Coastguard Worker                 UCPMapValueFilter *filter, const void *context, uint32_t *pValue);
131*0e209d39SAndroid Build Coastguard Worker 
132*0e209d39SAndroid Build Coastguard Worker U_CFUNC UChar32
133*0e209d39SAndroid Build Coastguard Worker ucptrie_internalGetRange(UCPTrieGetRange *getRange,
134*0e209d39SAndroid Build Coastguard Worker                          const void *trie, UChar32 start,
135*0e209d39SAndroid Build Coastguard Worker                          UCPMapRangeOption option, uint32_t surrogateValue,
136*0e209d39SAndroid Build Coastguard Worker                          UCPMapValueFilter *filter, const void *context, uint32_t *pValue);
137*0e209d39SAndroid Build Coastguard Worker 
138*0e209d39SAndroid Build Coastguard Worker #ifdef UCPTRIE_DEBUG
139*0e209d39SAndroid Build Coastguard Worker U_CFUNC void
140*0e209d39SAndroid Build Coastguard Worker ucptrie_printLengths(const UCPTrie *trie, const char *which);
141*0e209d39SAndroid Build Coastguard Worker 
142*0e209d39SAndroid Build Coastguard Worker U_CFUNC void umutablecptrie_setName(UMutableCPTrie *builder, const char *name);
143*0e209d39SAndroid Build Coastguard Worker #endif
144*0e209d39SAndroid Build Coastguard Worker 
145*0e209d39SAndroid Build Coastguard Worker /*
146*0e209d39SAndroid Build Coastguard Worker  * Format of the binary, memory-mappable representation of a UCPTrie/CodePointTrie.
147*0e209d39SAndroid Build Coastguard Worker  * For overview information see https://icu.unicode.org/design/struct/utrie
148*0e209d39SAndroid Build Coastguard Worker  *
149*0e209d39SAndroid Build Coastguard Worker  * The binary trie data should be 32-bit-aligned.
150*0e209d39SAndroid Build Coastguard Worker  * The overall layout is:
151*0e209d39SAndroid Build Coastguard Worker  *
152*0e209d39SAndroid Build Coastguard Worker  * UCPTrieHeader header; -- 16 bytes, see struct definition above
153*0e209d39SAndroid Build Coastguard Worker  * uint16_t index[header.indexLength];
154*0e209d39SAndroid Build Coastguard Worker  * uintXY_t data[header.dataLength];
155*0e209d39SAndroid Build Coastguard Worker  *
156*0e209d39SAndroid Build Coastguard Worker  * The trie data array is an array of uint16_t, uint32_t, or uint8_t,
157*0e209d39SAndroid Build Coastguard Worker  * specified via the UCPTrieValueWidth when building the trie.
158*0e209d39SAndroid Build Coastguard Worker  * The data array is 32-bit-aligned for uint32_t, otherwise 16-bit-aligned.
159*0e209d39SAndroid Build Coastguard Worker  * The overall length of the trie data is a multiple of 4 bytes.
160*0e209d39SAndroid Build Coastguard Worker  * (Padding is added at the end of the index array and/or near the end of the data array as needed.)
161*0e209d39SAndroid Build Coastguard Worker  *
162*0e209d39SAndroid Build Coastguard Worker  * The length of the data array (dataLength) is stored as an integer split across two fields
163*0e209d39SAndroid Build Coastguard Worker  * of the header struct (high bits in header.options).
164*0e209d39SAndroid Build Coastguard Worker  *
165*0e209d39SAndroid Build Coastguard Worker  * The trie type can be "fast" or "small" which determines the index structure,
166*0e209d39SAndroid Build Coastguard Worker  * specified via the UCPTrieType when building the trie.
167*0e209d39SAndroid Build Coastguard Worker  *
168*0e209d39SAndroid Build Coastguard Worker  * The type and valueWidth are stored in the header.options.
169*0e209d39SAndroid Build Coastguard Worker  * There are reserved type and valueWidth values, and reserved header.options bits.
170*0e209d39SAndroid Build Coastguard Worker  * They could be used in future format extensions.
171*0e209d39SAndroid Build Coastguard Worker  * Code reading the trie structure must fail with an error when unknown values or options are set.
172*0e209d39SAndroid Build Coastguard Worker  *
173*0e209d39SAndroid Build Coastguard Worker  * Values for ASCII character (U+0000..U+007F) can always be found at the start of the data array.
174*0e209d39SAndroid Build Coastguard Worker  *
175*0e209d39SAndroid Build Coastguard Worker  * Values for code points below a type-specific fast-indexing limit are found via two-stage lookup.
176*0e209d39SAndroid Build Coastguard Worker  * For a "fast" trie, the limit is the BMP/supplementary boundary at U+10000.
177*0e209d39SAndroid Build Coastguard Worker  * For a "small" trie, the limit is UCPTRIE_SMALL_MAX+1=U+1000.
178*0e209d39SAndroid Build Coastguard Worker  *
179*0e209d39SAndroid Build Coastguard Worker  * All code points in the range highStart..U+10FFFF map to a single highValue
180*0e209d39SAndroid Build Coastguard Worker  * which is stored at the second-to-last position of the data array.
181*0e209d39SAndroid Build Coastguard Worker  * (See UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET.)
182*0e209d39SAndroid Build Coastguard Worker  * The highStart value is header.shiftedHighStart<<UCPTRIE_SHIFT_2.
183*0e209d39SAndroid Build Coastguard Worker  * (UCPTRIE_SHIFT_2=9)
184*0e209d39SAndroid Build Coastguard Worker  *
185*0e209d39SAndroid Build Coastguard Worker  * Values for code points fast_limit..highStart-1 are found via four-stage lookup.
186*0e209d39SAndroid Build Coastguard Worker  * The data block size is smaller for this range than for the fast range.
187*0e209d39SAndroid Build Coastguard Worker  * This together with more index stages with small blocks makes this range
188*0e209d39SAndroid Build Coastguard Worker  * more easily compactable.
189*0e209d39SAndroid Build Coastguard Worker  *
190*0e209d39SAndroid Build Coastguard Worker  * There is also a trie error value stored at the last position of the data array.
191*0e209d39SAndroid Build Coastguard Worker  * (See UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET.)
192*0e209d39SAndroid Build Coastguard Worker  * It is intended to be returned for inputs that are not Unicode code points
193*0e209d39SAndroid Build Coastguard Worker  * (outside U+0000..U+10FFFF), or in string processing for ill-formed input
194*0e209d39SAndroid Build Coastguard Worker  * (unpaired surrogate in UTF-16, ill-formed UTF-8 subsequence).
195*0e209d39SAndroid Build Coastguard Worker  *
196*0e209d39SAndroid Build Coastguard Worker  * For a "fast" trie:
197*0e209d39SAndroid Build Coastguard Worker  *
198*0e209d39SAndroid Build Coastguard Worker  * The index array starts with the BMP index table for BMP code point lookup.
199*0e209d39SAndroid Build Coastguard Worker  * Its length is 1024=0x400.
200*0e209d39SAndroid Build Coastguard Worker  *
201*0e209d39SAndroid Build Coastguard Worker  * The supplementary index-1 table follows the BMP index table.
202*0e209d39SAndroid Build Coastguard Worker  * Variable length, for code points up to highStart-1.
203*0e209d39SAndroid Build Coastguard Worker  * Maximum length 64=0x40=0x100000>>UCPTRIE_SHIFT_1.
204*0e209d39SAndroid Build Coastguard Worker  * (For 0x100000 supplementary code points U+10000..U+10ffff.)
205*0e209d39SAndroid Build Coastguard Worker  *
206*0e209d39SAndroid Build Coastguard Worker  * After this index-1 table follow the variable-length index-3 and index-2 tables.
207*0e209d39SAndroid Build Coastguard Worker  *
208*0e209d39SAndroid Build Coastguard Worker  * The supplementary index tables are omitted completely
209*0e209d39SAndroid Build Coastguard Worker  * if there is only BMP data (highStart<=U+10000).
210*0e209d39SAndroid Build Coastguard Worker  *
211*0e209d39SAndroid Build Coastguard Worker  * For a "small" trie:
212*0e209d39SAndroid Build Coastguard Worker  *
213*0e209d39SAndroid Build Coastguard Worker  * The index array starts with a fast-index table for lookup of code points U+0000..U+0FFF.
214*0e209d39SAndroid Build Coastguard Worker  *
215*0e209d39SAndroid Build Coastguard Worker  * The "supplementary" index tables are always stored.
216*0e209d39SAndroid Build Coastguard Worker  * The index-1 table starts from U+0000, its maximum length is 68=0x44=0x110000>>UCPTRIE_SHIFT_1.
217*0e209d39SAndroid Build Coastguard Worker  *
218*0e209d39SAndroid Build Coastguard Worker  * For both trie types:
219*0e209d39SAndroid Build Coastguard Worker  *
220*0e209d39SAndroid Build Coastguard Worker  * The last index-2 block may be a partial block, storing indexes only for code points
221*0e209d39SAndroid Build Coastguard Worker  * below highStart.
222*0e209d39SAndroid Build Coastguard Worker  *
223*0e209d39SAndroid Build Coastguard Worker  * Lookup for ASCII code point c:
224*0e209d39SAndroid Build Coastguard Worker  *
225*0e209d39SAndroid Build Coastguard Worker  * Linear access from the start of the data array.
226*0e209d39SAndroid Build Coastguard Worker  *
227*0e209d39SAndroid Build Coastguard Worker  * value = data[c];
228*0e209d39SAndroid Build Coastguard Worker  *
229*0e209d39SAndroid Build Coastguard Worker  * Lookup for fast-range code point c:
230*0e209d39SAndroid Build Coastguard Worker  *
231*0e209d39SAndroid Build Coastguard Worker  * Shift the code point right by UCPTRIE_FAST_SHIFT=6 bits,
232*0e209d39SAndroid Build Coastguard Worker  * fetch the index array value at that offset,
233*0e209d39SAndroid Build Coastguard Worker  * add the lower code point bits, index into the data array.
234*0e209d39SAndroid Build Coastguard Worker  *
235*0e209d39SAndroid Build Coastguard Worker  * value = data[index[c>>6] + (c&0x3f)];
236*0e209d39SAndroid Build Coastguard Worker  *
237*0e209d39SAndroid Build Coastguard Worker  * (This works for ASCII as well.)
238*0e209d39SAndroid Build Coastguard Worker  *
239*0e209d39SAndroid Build Coastguard Worker  * Lookup for small-range code point c below highStart:
240*0e209d39SAndroid Build Coastguard Worker  *
241*0e209d39SAndroid Build Coastguard Worker  * Split the code point into four bit fields using several sets of shifts & masks
242*0e209d39SAndroid Build Coastguard Worker  * to read consecutive values from the index-1, index-2, index-3 and data tables.
243*0e209d39SAndroid Build Coastguard Worker  *
244*0e209d39SAndroid Build Coastguard Worker  * If all of the data block offsets in an index-3 block fit within 16 bits (up to 0xffff),
245*0e209d39SAndroid Build Coastguard Worker  * then the data block offsets are stored directly as uint16_t.
246*0e209d39SAndroid Build Coastguard Worker  *
247*0e209d39SAndroid Build Coastguard Worker  * Otherwise (this is very unusual but possible), the index-2 entry for the index-3 block
248*0e209d39SAndroid Build Coastguard Worker  * has bit 15 (0x8000) set, and each set of 8 index-3 entries is preceded by
249*0e209d39SAndroid Build Coastguard Worker  * an additional uint16_t word. Data block offsets are 18 bits wide, with the top 2 bits stored
250*0e209d39SAndroid Build Coastguard Worker  * in the additional word.
251*0e209d39SAndroid Build Coastguard Worker  *
252*0e209d39SAndroid Build Coastguard Worker  * See ucptrie_internalSmallIndex() for details.
253*0e209d39SAndroid Build Coastguard Worker  *
254*0e209d39SAndroid Build Coastguard Worker  * (In a "small" trie, this works for ASCII and below-fast_limit code points as well.)
255*0e209d39SAndroid Build Coastguard Worker  *
256*0e209d39SAndroid Build Coastguard Worker  * Compaction:
257*0e209d39SAndroid Build Coastguard Worker  *
258*0e209d39SAndroid Build Coastguard Worker  * Multiple code point ranges ("blocks") that are aligned on certain boundaries
259*0e209d39SAndroid Build Coastguard Worker  * (determined by the shifting/bit fields of code points) and
260*0e209d39SAndroid Build Coastguard Worker  * map to the same data values normally share a single subsequence of the data array.
261*0e209d39SAndroid Build Coastguard Worker  * Data blocks can also overlap partially.
262*0e209d39SAndroid Build Coastguard Worker  * (Depending on the builder code finding duplicate and overlapping blocks.)
263*0e209d39SAndroid Build Coastguard Worker  *
264*0e209d39SAndroid Build Coastguard Worker  * Iteration over same-value ranges:
265*0e209d39SAndroid Build Coastguard Worker  *
266*0e209d39SAndroid Build Coastguard Worker  * Range iteration (ucptrie_getRange()) walks the structure from a start code point
267*0e209d39SAndroid Build Coastguard Worker  * until some code point is found that maps to a different value;
268*0e209d39SAndroid Build Coastguard Worker  * the end of the returned range is just before that.
269*0e209d39SAndroid Build Coastguard Worker  *
270*0e209d39SAndroid Build Coastguard Worker  * The header.dataNullOffset (split across two header fields, high bits in header.options)
271*0e209d39SAndroid Build Coastguard Worker  * is the offset of a widely shared data block filled with one single value.
272*0e209d39SAndroid Build Coastguard Worker  * It helps quickly skip over large ranges of data with that value.
273*0e209d39SAndroid Build Coastguard Worker  * The builder must ensure that if the start of any data block (fast or small)
274*0e209d39SAndroid Build Coastguard Worker  * matches the dataNullOffset, then the whole block must be filled with the null value.
275*0e209d39SAndroid Build Coastguard Worker  * Special care must be taken if there is no fast null data block
276*0e209d39SAndroid Build Coastguard Worker  * but a small one, which is shorter, and it matches the *start* of some fast data block.
277*0e209d39SAndroid Build Coastguard Worker  *
278*0e209d39SAndroid Build Coastguard Worker  * Similarly, the header.index3NullOffset is the index-array offset of an index-3 block
279*0e209d39SAndroid Build Coastguard Worker  * where all index entries point to the dataNullOffset.
280*0e209d39SAndroid Build Coastguard Worker  * If there is no such data or index-3 block, then these offsets are set to
281*0e209d39SAndroid Build Coastguard Worker  * values that cannot be reached (data offset out of range/reserved index offset),
282*0e209d39SAndroid Build Coastguard Worker  * normally UCPTRIE_NO_DATA_NULL_OFFSET or UCPTRIE_NO_INDEX3_NULL_OFFSET respectively.
283*0e209d39SAndroid Build Coastguard Worker  */
284*0e209d39SAndroid Build Coastguard Worker 
285*0e209d39SAndroid Build Coastguard Worker #endif
286