1*0e209d39SAndroid Build Coastguard Worker // © 2017 and later: Unicode, Inc. and others. 2*0e209d39SAndroid Build Coastguard Worker // License & terms of use: http://www.unicode.org/copyright.html 3*0e209d39SAndroid Build Coastguard Worker 4*0e209d39SAndroid Build Coastguard Worker // ucptrie_impl.h (modified from utrie2_impl.h) 5*0e209d39SAndroid Build Coastguard Worker // created: 2017dec29 Markus W. Scherer 6*0e209d39SAndroid Build Coastguard Worker 7*0e209d39SAndroid Build Coastguard Worker #ifndef __UCPTRIE_IMPL_H__ 8*0e209d39SAndroid Build Coastguard Worker #define __UCPTRIE_IMPL_H__ 9*0e209d39SAndroid Build Coastguard Worker 10*0e209d39SAndroid Build Coastguard Worker #include "unicode/ucptrie.h" 11*0e209d39SAndroid Build Coastguard Worker #ifdef UCPTRIE_DEBUG 12*0e209d39SAndroid Build Coastguard Worker #include "unicode/umutablecptrie.h" 13*0e209d39SAndroid Build Coastguard Worker #endif 14*0e209d39SAndroid Build Coastguard Worker 15*0e209d39SAndroid Build Coastguard Worker // UCPTrie signature values, in platform endianness and opposite endianness. 16*0e209d39SAndroid Build Coastguard Worker // The UCPTrie signature ASCII byte values spell "Tri3". 17*0e209d39SAndroid Build Coastguard Worker #define UCPTRIE_SIG 0x54726933 18*0e209d39SAndroid Build Coastguard Worker #define UCPTRIE_OE_SIG 0x33697254 19*0e209d39SAndroid Build Coastguard Worker 20*0e209d39SAndroid Build Coastguard Worker /** 21*0e209d39SAndroid Build Coastguard Worker * Header data for the binary, memory-mappable representation of a UCPTrie/CodePointTrie. 22*0e209d39SAndroid Build Coastguard Worker * @internal 23*0e209d39SAndroid Build Coastguard Worker */ 24*0e209d39SAndroid Build Coastguard Worker struct UCPTrieHeader { 25*0e209d39SAndroid Build Coastguard Worker /** "Tri3" in big-endian US-ASCII (0x54726933) */ 26*0e209d39SAndroid Build Coastguard Worker uint32_t signature; 27*0e209d39SAndroid Build Coastguard Worker 28*0e209d39SAndroid Build Coastguard Worker /** 29*0e209d39SAndroid Build Coastguard Worker * Options bit field: 30*0e209d39SAndroid Build Coastguard Worker * Bits 15..12: Data length bits 19..16. 31*0e209d39SAndroid Build Coastguard Worker * Bits 11..8: Data null block offset bits 19..16. 32*0e209d39SAndroid Build Coastguard Worker * Bits 7..6: UCPTrieType 33*0e209d39SAndroid Build Coastguard Worker * Bits 5..3: Reserved (0). 34*0e209d39SAndroid Build Coastguard Worker * Bits 2..0: UCPTrieValueWidth 35*0e209d39SAndroid Build Coastguard Worker */ 36*0e209d39SAndroid Build Coastguard Worker uint16_t options; 37*0e209d39SAndroid Build Coastguard Worker 38*0e209d39SAndroid Build Coastguard Worker /** Total length of the index tables. */ 39*0e209d39SAndroid Build Coastguard Worker uint16_t indexLength; 40*0e209d39SAndroid Build Coastguard Worker 41*0e209d39SAndroid Build Coastguard Worker /** Data length bits 15..0. */ 42*0e209d39SAndroid Build Coastguard Worker uint16_t dataLength; 43*0e209d39SAndroid Build Coastguard Worker 44*0e209d39SAndroid Build Coastguard Worker /** Index-3 null block offset, 0x7fff or 0xffff if none. */ 45*0e209d39SAndroid Build Coastguard Worker uint16_t index3NullOffset; 46*0e209d39SAndroid Build Coastguard Worker 47*0e209d39SAndroid Build Coastguard Worker /** Data null block offset bits 15..0, 0xfffff if none. */ 48*0e209d39SAndroid Build Coastguard Worker uint16_t dataNullOffset; 49*0e209d39SAndroid Build Coastguard Worker 50*0e209d39SAndroid Build Coastguard Worker /** 51*0e209d39SAndroid Build Coastguard Worker * First code point of the single-value range ending with U+10ffff, 52*0e209d39SAndroid Build Coastguard Worker * rounded up and then shifted right by UCPTRIE_SHIFT_2. 53*0e209d39SAndroid Build Coastguard Worker */ 54*0e209d39SAndroid Build Coastguard Worker uint16_t shiftedHighStart; 55*0e209d39SAndroid Build Coastguard Worker }; 56*0e209d39SAndroid Build Coastguard Worker 57*0e209d39SAndroid Build Coastguard Worker // Constants for use with UCPTrieHeader.options. 58*0e209d39SAndroid Build Coastguard Worker constexpr uint16_t UCPTRIE_OPTIONS_DATA_LENGTH_MASK = 0xf000; 59*0e209d39SAndroid Build Coastguard Worker constexpr uint16_t UCPTRIE_OPTIONS_DATA_NULL_OFFSET_MASK = 0xf00; 60*0e209d39SAndroid Build Coastguard Worker constexpr uint16_t UCPTRIE_OPTIONS_RESERVED_MASK = 0x38; 61*0e209d39SAndroid Build Coastguard Worker constexpr uint16_t UCPTRIE_OPTIONS_VALUE_BITS_MASK = 7; 62*0e209d39SAndroid Build Coastguard Worker 63*0e209d39SAndroid Build Coastguard Worker /** 64*0e209d39SAndroid Build Coastguard Worker * Value for index3NullOffset which indicates that there is no index-3 null block. 65*0e209d39SAndroid Build Coastguard Worker * Bit 15 is unused for this value because this bit is used if the index-3 contains 66*0e209d39SAndroid Build Coastguard Worker * 18-bit indexes. 67*0e209d39SAndroid Build Coastguard Worker */ 68*0e209d39SAndroid Build Coastguard Worker constexpr int32_t UCPTRIE_NO_INDEX3_NULL_OFFSET = 0x7fff; 69*0e209d39SAndroid Build Coastguard Worker constexpr int32_t UCPTRIE_NO_DATA_NULL_OFFSET = 0xfffff; 70*0e209d39SAndroid Build Coastguard Worker 71*0e209d39SAndroid Build Coastguard Worker // Internal constants. 72*0e209d39SAndroid Build Coastguard Worker 73*0e209d39SAndroid Build Coastguard Worker /** The length of the BMP index table. 1024=0x400 */ 74*0e209d39SAndroid Build Coastguard Worker constexpr int32_t UCPTRIE_BMP_INDEX_LENGTH = 0x10000 >> UCPTRIE_FAST_SHIFT; 75*0e209d39SAndroid Build Coastguard Worker 76*0e209d39SAndroid Build Coastguard Worker constexpr int32_t UCPTRIE_SMALL_LIMIT = 0x1000; 77*0e209d39SAndroid Build Coastguard Worker constexpr int32_t UCPTRIE_SMALL_INDEX_LENGTH = UCPTRIE_SMALL_LIMIT >> UCPTRIE_FAST_SHIFT; 78*0e209d39SAndroid Build Coastguard Worker 79*0e209d39SAndroid Build Coastguard Worker /** Shift size for getting the index-3 table offset. */ 80*0e209d39SAndroid Build Coastguard Worker constexpr int32_t UCPTRIE_SHIFT_3 = 4; 81*0e209d39SAndroid Build Coastguard Worker 82*0e209d39SAndroid Build Coastguard Worker /** Shift size for getting the index-2 table offset. */ 83*0e209d39SAndroid Build Coastguard Worker constexpr int32_t UCPTRIE_SHIFT_2 = 5 + UCPTRIE_SHIFT_3; 84*0e209d39SAndroid Build Coastguard Worker 85*0e209d39SAndroid Build Coastguard Worker /** Shift size for getting the index-1 table offset. */ 86*0e209d39SAndroid Build Coastguard Worker constexpr int32_t UCPTRIE_SHIFT_1 = 5 + UCPTRIE_SHIFT_2; 87*0e209d39SAndroid Build Coastguard Worker 88*0e209d39SAndroid Build Coastguard Worker /** 89*0e209d39SAndroid Build Coastguard Worker * Difference between two shift sizes, 90*0e209d39SAndroid Build Coastguard Worker * for getting an index-2 offset from an index-3 offset. 5=9-4 91*0e209d39SAndroid Build Coastguard Worker */ 92*0e209d39SAndroid Build Coastguard Worker constexpr int32_t UCPTRIE_SHIFT_2_3 = UCPTRIE_SHIFT_2 - UCPTRIE_SHIFT_3; 93*0e209d39SAndroid Build Coastguard Worker 94*0e209d39SAndroid Build Coastguard Worker /** 95*0e209d39SAndroid Build Coastguard Worker * Difference between two shift sizes, 96*0e209d39SAndroid Build Coastguard Worker * for getting an index-1 offset from an index-2 offset. 5=14-9 97*0e209d39SAndroid Build Coastguard Worker */ 98*0e209d39SAndroid Build Coastguard Worker constexpr int32_t UCPTRIE_SHIFT_1_2 = UCPTRIE_SHIFT_1 - UCPTRIE_SHIFT_2; 99*0e209d39SAndroid Build Coastguard Worker 100*0e209d39SAndroid Build Coastguard Worker /** 101*0e209d39SAndroid Build Coastguard Worker * Number of index-1 entries for the BMP. (4) 102*0e209d39SAndroid Build Coastguard Worker * This part of the index-1 table is omitted from the serialized form. 103*0e209d39SAndroid Build Coastguard Worker */ 104*0e209d39SAndroid Build Coastguard Worker constexpr int32_t UCPTRIE_OMITTED_BMP_INDEX_1_LENGTH = 0x10000 >> UCPTRIE_SHIFT_1; 105*0e209d39SAndroid Build Coastguard Worker 106*0e209d39SAndroid Build Coastguard Worker /** Number of entries in an index-2 block. 32=0x20 */ 107*0e209d39SAndroid Build Coastguard Worker constexpr int32_t UCPTRIE_INDEX_2_BLOCK_LENGTH = 1 << UCPTRIE_SHIFT_1_2; 108*0e209d39SAndroid Build Coastguard Worker 109*0e209d39SAndroid Build Coastguard Worker /** Mask for getting the lower bits for the in-index-2-block offset. */ 110*0e209d39SAndroid Build Coastguard Worker constexpr int32_t UCPTRIE_INDEX_2_MASK = UCPTRIE_INDEX_2_BLOCK_LENGTH - 1; 111*0e209d39SAndroid Build Coastguard Worker 112*0e209d39SAndroid Build Coastguard Worker /** Number of code points per index-2 table entry. 512=0x200 */ 113*0e209d39SAndroid Build Coastguard Worker constexpr int32_t UCPTRIE_CP_PER_INDEX_2_ENTRY = 1 << UCPTRIE_SHIFT_2; 114*0e209d39SAndroid Build Coastguard Worker 115*0e209d39SAndroid Build Coastguard Worker /** Number of entries in an index-3 block. 32=0x20 */ 116*0e209d39SAndroid Build Coastguard Worker constexpr int32_t UCPTRIE_INDEX_3_BLOCK_LENGTH = 1 << UCPTRIE_SHIFT_2_3; 117*0e209d39SAndroid Build Coastguard Worker 118*0e209d39SAndroid Build Coastguard Worker /** Mask for getting the lower bits for the in-index-3-block offset. */ 119*0e209d39SAndroid Build Coastguard Worker constexpr int32_t UCPTRIE_INDEX_3_MASK = UCPTRIE_INDEX_3_BLOCK_LENGTH - 1; 120*0e209d39SAndroid Build Coastguard Worker 121*0e209d39SAndroid Build Coastguard Worker /** Number of entries in a small data block. 16=0x10 */ 122*0e209d39SAndroid Build Coastguard Worker constexpr int32_t UCPTRIE_SMALL_DATA_BLOCK_LENGTH = 1 << UCPTRIE_SHIFT_3; 123*0e209d39SAndroid Build Coastguard Worker 124*0e209d39SAndroid Build Coastguard Worker /** Mask for getting the lower bits for the in-small-data-block offset. */ 125*0e209d39SAndroid Build Coastguard Worker constexpr int32_t UCPTRIE_SMALL_DATA_MASK = UCPTRIE_SMALL_DATA_BLOCK_LENGTH - 1; 126*0e209d39SAndroid Build Coastguard Worker 127*0e209d39SAndroid Build Coastguard Worker 128*0e209d39SAndroid Build Coastguard Worker typedef UChar32 129*0e209d39SAndroid Build Coastguard Worker UCPTrieGetRange(const void *trie, UChar32 start, 130*0e209d39SAndroid Build Coastguard Worker UCPMapValueFilter *filter, const void *context, uint32_t *pValue); 131*0e209d39SAndroid Build Coastguard Worker 132*0e209d39SAndroid Build Coastguard Worker U_CFUNC UChar32 133*0e209d39SAndroid Build Coastguard Worker ucptrie_internalGetRange(UCPTrieGetRange *getRange, 134*0e209d39SAndroid Build Coastguard Worker const void *trie, UChar32 start, 135*0e209d39SAndroid Build Coastguard Worker UCPMapRangeOption option, uint32_t surrogateValue, 136*0e209d39SAndroid Build Coastguard Worker UCPMapValueFilter *filter, const void *context, uint32_t *pValue); 137*0e209d39SAndroid Build Coastguard Worker 138*0e209d39SAndroid Build Coastguard Worker #ifdef UCPTRIE_DEBUG 139*0e209d39SAndroid Build Coastguard Worker U_CFUNC void 140*0e209d39SAndroid Build Coastguard Worker ucptrie_printLengths(const UCPTrie *trie, const char *which); 141*0e209d39SAndroid Build Coastguard Worker 142*0e209d39SAndroid Build Coastguard Worker U_CFUNC void umutablecptrie_setName(UMutableCPTrie *builder, const char *name); 143*0e209d39SAndroid Build Coastguard Worker #endif 144*0e209d39SAndroid Build Coastguard Worker 145*0e209d39SAndroid Build Coastguard Worker /* 146*0e209d39SAndroid Build Coastguard Worker * Format of the binary, memory-mappable representation of a UCPTrie/CodePointTrie. 147*0e209d39SAndroid Build Coastguard Worker * For overview information see https://icu.unicode.org/design/struct/utrie 148*0e209d39SAndroid Build Coastguard Worker * 149*0e209d39SAndroid Build Coastguard Worker * The binary trie data should be 32-bit-aligned. 150*0e209d39SAndroid Build Coastguard Worker * The overall layout is: 151*0e209d39SAndroid Build Coastguard Worker * 152*0e209d39SAndroid Build Coastguard Worker * UCPTrieHeader header; -- 16 bytes, see struct definition above 153*0e209d39SAndroid Build Coastguard Worker * uint16_t index[header.indexLength]; 154*0e209d39SAndroid Build Coastguard Worker * uintXY_t data[header.dataLength]; 155*0e209d39SAndroid Build Coastguard Worker * 156*0e209d39SAndroid Build Coastguard Worker * The trie data array is an array of uint16_t, uint32_t, or uint8_t, 157*0e209d39SAndroid Build Coastguard Worker * specified via the UCPTrieValueWidth when building the trie. 158*0e209d39SAndroid Build Coastguard Worker * The data array is 32-bit-aligned for uint32_t, otherwise 16-bit-aligned. 159*0e209d39SAndroid Build Coastguard Worker * The overall length of the trie data is a multiple of 4 bytes. 160*0e209d39SAndroid Build Coastguard Worker * (Padding is added at the end of the index array and/or near the end of the data array as needed.) 161*0e209d39SAndroid Build Coastguard Worker * 162*0e209d39SAndroid Build Coastguard Worker * The length of the data array (dataLength) is stored as an integer split across two fields 163*0e209d39SAndroid Build Coastguard Worker * of the header struct (high bits in header.options). 164*0e209d39SAndroid Build Coastguard Worker * 165*0e209d39SAndroid Build Coastguard Worker * The trie type can be "fast" or "small" which determines the index structure, 166*0e209d39SAndroid Build Coastguard Worker * specified via the UCPTrieType when building the trie. 167*0e209d39SAndroid Build Coastguard Worker * 168*0e209d39SAndroid Build Coastguard Worker * The type and valueWidth are stored in the header.options. 169*0e209d39SAndroid Build Coastguard Worker * There are reserved type and valueWidth values, and reserved header.options bits. 170*0e209d39SAndroid Build Coastguard Worker * They could be used in future format extensions. 171*0e209d39SAndroid Build Coastguard Worker * Code reading the trie structure must fail with an error when unknown values or options are set. 172*0e209d39SAndroid Build Coastguard Worker * 173*0e209d39SAndroid Build Coastguard Worker * Values for ASCII character (U+0000..U+007F) can always be found at the start of the data array. 174*0e209d39SAndroid Build Coastguard Worker * 175*0e209d39SAndroid Build Coastguard Worker * Values for code points below a type-specific fast-indexing limit are found via two-stage lookup. 176*0e209d39SAndroid Build Coastguard Worker * For a "fast" trie, the limit is the BMP/supplementary boundary at U+10000. 177*0e209d39SAndroid Build Coastguard Worker * For a "small" trie, the limit is UCPTRIE_SMALL_MAX+1=U+1000. 178*0e209d39SAndroid Build Coastguard Worker * 179*0e209d39SAndroid Build Coastguard Worker * All code points in the range highStart..U+10FFFF map to a single highValue 180*0e209d39SAndroid Build Coastguard Worker * which is stored at the second-to-last position of the data array. 181*0e209d39SAndroid Build Coastguard Worker * (See UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET.) 182*0e209d39SAndroid Build Coastguard Worker * The highStart value is header.shiftedHighStart<<UCPTRIE_SHIFT_2. 183*0e209d39SAndroid Build Coastguard Worker * (UCPTRIE_SHIFT_2=9) 184*0e209d39SAndroid Build Coastguard Worker * 185*0e209d39SAndroid Build Coastguard Worker * Values for code points fast_limit..highStart-1 are found via four-stage lookup. 186*0e209d39SAndroid Build Coastguard Worker * The data block size is smaller for this range than for the fast range. 187*0e209d39SAndroid Build Coastguard Worker * This together with more index stages with small blocks makes this range 188*0e209d39SAndroid Build Coastguard Worker * more easily compactable. 189*0e209d39SAndroid Build Coastguard Worker * 190*0e209d39SAndroid Build Coastguard Worker * There is also a trie error value stored at the last position of the data array. 191*0e209d39SAndroid Build Coastguard Worker * (See UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET.) 192*0e209d39SAndroid Build Coastguard Worker * It is intended to be returned for inputs that are not Unicode code points 193*0e209d39SAndroid Build Coastguard Worker * (outside U+0000..U+10FFFF), or in string processing for ill-formed input 194*0e209d39SAndroid Build Coastguard Worker * (unpaired surrogate in UTF-16, ill-formed UTF-8 subsequence). 195*0e209d39SAndroid Build Coastguard Worker * 196*0e209d39SAndroid Build Coastguard Worker * For a "fast" trie: 197*0e209d39SAndroid Build Coastguard Worker * 198*0e209d39SAndroid Build Coastguard Worker * The index array starts with the BMP index table for BMP code point lookup. 199*0e209d39SAndroid Build Coastguard Worker * Its length is 1024=0x400. 200*0e209d39SAndroid Build Coastguard Worker * 201*0e209d39SAndroid Build Coastguard Worker * The supplementary index-1 table follows the BMP index table. 202*0e209d39SAndroid Build Coastguard Worker * Variable length, for code points up to highStart-1. 203*0e209d39SAndroid Build Coastguard Worker * Maximum length 64=0x40=0x100000>>UCPTRIE_SHIFT_1. 204*0e209d39SAndroid Build Coastguard Worker * (For 0x100000 supplementary code points U+10000..U+10ffff.) 205*0e209d39SAndroid Build Coastguard Worker * 206*0e209d39SAndroid Build Coastguard Worker * After this index-1 table follow the variable-length index-3 and index-2 tables. 207*0e209d39SAndroid Build Coastguard Worker * 208*0e209d39SAndroid Build Coastguard Worker * The supplementary index tables are omitted completely 209*0e209d39SAndroid Build Coastguard Worker * if there is only BMP data (highStart<=U+10000). 210*0e209d39SAndroid Build Coastguard Worker * 211*0e209d39SAndroid Build Coastguard Worker * For a "small" trie: 212*0e209d39SAndroid Build Coastguard Worker * 213*0e209d39SAndroid Build Coastguard Worker * The index array starts with a fast-index table for lookup of code points U+0000..U+0FFF. 214*0e209d39SAndroid Build Coastguard Worker * 215*0e209d39SAndroid Build Coastguard Worker * The "supplementary" index tables are always stored. 216*0e209d39SAndroid Build Coastguard Worker * The index-1 table starts from U+0000, its maximum length is 68=0x44=0x110000>>UCPTRIE_SHIFT_1. 217*0e209d39SAndroid Build Coastguard Worker * 218*0e209d39SAndroid Build Coastguard Worker * For both trie types: 219*0e209d39SAndroid Build Coastguard Worker * 220*0e209d39SAndroid Build Coastguard Worker * The last index-2 block may be a partial block, storing indexes only for code points 221*0e209d39SAndroid Build Coastguard Worker * below highStart. 222*0e209d39SAndroid Build Coastguard Worker * 223*0e209d39SAndroid Build Coastguard Worker * Lookup for ASCII code point c: 224*0e209d39SAndroid Build Coastguard Worker * 225*0e209d39SAndroid Build Coastguard Worker * Linear access from the start of the data array. 226*0e209d39SAndroid Build Coastguard Worker * 227*0e209d39SAndroid Build Coastguard Worker * value = data[c]; 228*0e209d39SAndroid Build Coastguard Worker * 229*0e209d39SAndroid Build Coastguard Worker * Lookup for fast-range code point c: 230*0e209d39SAndroid Build Coastguard Worker * 231*0e209d39SAndroid Build Coastguard Worker * Shift the code point right by UCPTRIE_FAST_SHIFT=6 bits, 232*0e209d39SAndroid Build Coastguard Worker * fetch the index array value at that offset, 233*0e209d39SAndroid Build Coastguard Worker * add the lower code point bits, index into the data array. 234*0e209d39SAndroid Build Coastguard Worker * 235*0e209d39SAndroid Build Coastguard Worker * value = data[index[c>>6] + (c&0x3f)]; 236*0e209d39SAndroid Build Coastguard Worker * 237*0e209d39SAndroid Build Coastguard Worker * (This works for ASCII as well.) 238*0e209d39SAndroid Build Coastguard Worker * 239*0e209d39SAndroid Build Coastguard Worker * Lookup for small-range code point c below highStart: 240*0e209d39SAndroid Build Coastguard Worker * 241*0e209d39SAndroid Build Coastguard Worker * Split the code point into four bit fields using several sets of shifts & masks 242*0e209d39SAndroid Build Coastguard Worker * to read consecutive values from the index-1, index-2, index-3 and data tables. 243*0e209d39SAndroid Build Coastguard Worker * 244*0e209d39SAndroid Build Coastguard Worker * If all of the data block offsets in an index-3 block fit within 16 bits (up to 0xffff), 245*0e209d39SAndroid Build Coastguard Worker * then the data block offsets are stored directly as uint16_t. 246*0e209d39SAndroid Build Coastguard Worker * 247*0e209d39SAndroid Build Coastguard Worker * Otherwise (this is very unusual but possible), the index-2 entry for the index-3 block 248*0e209d39SAndroid Build Coastguard Worker * has bit 15 (0x8000) set, and each set of 8 index-3 entries is preceded by 249*0e209d39SAndroid Build Coastguard Worker * an additional uint16_t word. Data block offsets are 18 bits wide, with the top 2 bits stored 250*0e209d39SAndroid Build Coastguard Worker * in the additional word. 251*0e209d39SAndroid Build Coastguard Worker * 252*0e209d39SAndroid Build Coastguard Worker * See ucptrie_internalSmallIndex() for details. 253*0e209d39SAndroid Build Coastguard Worker * 254*0e209d39SAndroid Build Coastguard Worker * (In a "small" trie, this works for ASCII and below-fast_limit code points as well.) 255*0e209d39SAndroid Build Coastguard Worker * 256*0e209d39SAndroid Build Coastguard Worker * Compaction: 257*0e209d39SAndroid Build Coastguard Worker * 258*0e209d39SAndroid Build Coastguard Worker * Multiple code point ranges ("blocks") that are aligned on certain boundaries 259*0e209d39SAndroid Build Coastguard Worker * (determined by the shifting/bit fields of code points) and 260*0e209d39SAndroid Build Coastguard Worker * map to the same data values normally share a single subsequence of the data array. 261*0e209d39SAndroid Build Coastguard Worker * Data blocks can also overlap partially. 262*0e209d39SAndroid Build Coastguard Worker * (Depending on the builder code finding duplicate and overlapping blocks.) 263*0e209d39SAndroid Build Coastguard Worker * 264*0e209d39SAndroid Build Coastguard Worker * Iteration over same-value ranges: 265*0e209d39SAndroid Build Coastguard Worker * 266*0e209d39SAndroid Build Coastguard Worker * Range iteration (ucptrie_getRange()) walks the structure from a start code point 267*0e209d39SAndroid Build Coastguard Worker * until some code point is found that maps to a different value; 268*0e209d39SAndroid Build Coastguard Worker * the end of the returned range is just before that. 269*0e209d39SAndroid Build Coastguard Worker * 270*0e209d39SAndroid Build Coastguard Worker * The header.dataNullOffset (split across two header fields, high bits in header.options) 271*0e209d39SAndroid Build Coastguard Worker * is the offset of a widely shared data block filled with one single value. 272*0e209d39SAndroid Build Coastguard Worker * It helps quickly skip over large ranges of data with that value. 273*0e209d39SAndroid Build Coastguard Worker * The builder must ensure that if the start of any data block (fast or small) 274*0e209d39SAndroid Build Coastguard Worker * matches the dataNullOffset, then the whole block must be filled with the null value. 275*0e209d39SAndroid Build Coastguard Worker * Special care must be taken if there is no fast null data block 276*0e209d39SAndroid Build Coastguard Worker * but a small one, which is shorter, and it matches the *start* of some fast data block. 277*0e209d39SAndroid Build Coastguard Worker * 278*0e209d39SAndroid Build Coastguard Worker * Similarly, the header.index3NullOffset is the index-array offset of an index-3 block 279*0e209d39SAndroid Build Coastguard Worker * where all index entries point to the dataNullOffset. 280*0e209d39SAndroid Build Coastguard Worker * If there is no such data or index-3 block, then these offsets are set to 281*0e209d39SAndroid Build Coastguard Worker * values that cannot be reached (data offset out of range/reserved index offset), 282*0e209d39SAndroid Build Coastguard Worker * normally UCPTRIE_NO_DATA_NULL_OFFSET or UCPTRIE_NO_INDEX3_NULL_OFFSET respectively. 283*0e209d39SAndroid Build Coastguard Worker */ 284*0e209d39SAndroid Build Coastguard Worker 285*0e209d39SAndroid Build Coastguard Worker #endif 286