xref: /aosp_15_r20/external/icu/libicu/cts_headers/utrie2_impl.h (revision 0e209d3975ff4a8c132096b14b0e9364a753506e)
1*0e209d39SAndroid Build Coastguard Worker // © 2016 and later: Unicode, Inc. and others.
2*0e209d39SAndroid Build Coastguard Worker // License & terms of use: http://www.unicode.org/copyright.html
3*0e209d39SAndroid Build Coastguard Worker /*
4*0e209d39SAndroid Build Coastguard Worker ******************************************************************************
5*0e209d39SAndroid Build Coastguard Worker *
6*0e209d39SAndroid Build Coastguard Worker *   Copyright (C) 2001-2008, International Business Machines
7*0e209d39SAndroid Build Coastguard Worker *   Corporation and others.  All Rights Reserved.
8*0e209d39SAndroid Build Coastguard Worker *
9*0e209d39SAndroid Build Coastguard Worker ******************************************************************************
10*0e209d39SAndroid Build Coastguard Worker *   file name:  utrie2_impl.h
11*0e209d39SAndroid Build Coastguard Worker *   encoding:   UTF-8
12*0e209d39SAndroid Build Coastguard Worker *   tab size:   8 (not used)
13*0e209d39SAndroid Build Coastguard Worker *   indentation:4
14*0e209d39SAndroid Build Coastguard Worker *
15*0e209d39SAndroid Build Coastguard Worker *   created on: 2008sep26 (split off from utrie2.c)
16*0e209d39SAndroid Build Coastguard Worker *   created by: Markus W. Scherer
17*0e209d39SAndroid Build Coastguard Worker *
18*0e209d39SAndroid Build Coastguard Worker *   Definitions needed for both runtime and builder code for UTrie2,
19*0e209d39SAndroid Build Coastguard Worker *   used by utrie2.c and utrie2_builder.c.
20*0e209d39SAndroid Build Coastguard Worker */
21*0e209d39SAndroid Build Coastguard Worker 
22*0e209d39SAndroid Build Coastguard Worker #ifndef __UTRIE2_IMPL_H__
23*0e209d39SAndroid Build Coastguard Worker #define __UTRIE2_IMPL_H__
24*0e209d39SAndroid Build Coastguard Worker 
25*0e209d39SAndroid Build Coastguard Worker #ifdef UCPTRIE_DEBUG
26*0e209d39SAndroid Build Coastguard Worker #include "unicode/umutablecptrie.h"
27*0e209d39SAndroid Build Coastguard Worker #endif
28*0e209d39SAndroid Build Coastguard Worker #include "utrie2.h"
29*0e209d39SAndroid Build Coastguard Worker 
30*0e209d39SAndroid Build Coastguard Worker /* Public UTrie2 API implementation ----------------------------------------- */
31*0e209d39SAndroid Build Coastguard Worker 
32*0e209d39SAndroid Build Coastguard Worker /*
33*0e209d39SAndroid Build Coastguard Worker  * These definitions are mostly needed by utrie2.cpp,
34*0e209d39SAndroid Build Coastguard Worker  * but also by utrie2_serialize() and utrie2_swap().
35*0e209d39SAndroid Build Coastguard Worker  */
36*0e209d39SAndroid Build Coastguard Worker 
37*0e209d39SAndroid Build Coastguard Worker // UTrie2 signature values, in platform endianness and opposite endianness.
38*0e209d39SAndroid Build Coastguard Worker // The UTrie2 signature ASCII byte values spell "Tri2".
39*0e209d39SAndroid Build Coastguard Worker #define UTRIE2_SIG      0x54726932
40*0e209d39SAndroid Build Coastguard Worker #define UTRIE2_OE_SIG   0x32697254
41*0e209d39SAndroid Build Coastguard Worker 
42*0e209d39SAndroid Build Coastguard Worker /**
43*0e209d39SAndroid Build Coastguard Worker  * Trie data structure in serialized form:
44*0e209d39SAndroid Build Coastguard Worker  *
45*0e209d39SAndroid Build Coastguard Worker  * UTrie2Header header;
46*0e209d39SAndroid Build Coastguard Worker  * uint16_t index[header.index2Length];
47*0e209d39SAndroid Build Coastguard Worker  * uint16_t data[header.shiftedDataLength<<2];  -- or uint32_t data[...]
48*0e209d39SAndroid Build Coastguard Worker  * @internal
49*0e209d39SAndroid Build Coastguard Worker  */
50*0e209d39SAndroid Build Coastguard Worker typedef struct UTrie2Header {
51*0e209d39SAndroid Build Coastguard Worker     /** "Tri2" in big-endian US-ASCII (0x54726932) */
52*0e209d39SAndroid Build Coastguard Worker     uint32_t signature;
53*0e209d39SAndroid Build Coastguard Worker 
54*0e209d39SAndroid Build Coastguard Worker     /**
55*0e209d39SAndroid Build Coastguard Worker      * options bit field:
56*0e209d39SAndroid Build Coastguard Worker      * 15.. 4   reserved (0)
57*0e209d39SAndroid Build Coastguard Worker      *  3.. 0   UTrie2ValueBits valueBits
58*0e209d39SAndroid Build Coastguard Worker      */
59*0e209d39SAndroid Build Coastguard Worker     uint16_t options;
60*0e209d39SAndroid Build Coastguard Worker 
61*0e209d39SAndroid Build Coastguard Worker     /** UTRIE2_INDEX_1_OFFSET..UTRIE2_MAX_INDEX_LENGTH */
62*0e209d39SAndroid Build Coastguard Worker     uint16_t indexLength;
63*0e209d39SAndroid Build Coastguard Worker 
64*0e209d39SAndroid Build Coastguard Worker     /** (UTRIE2_DATA_START_OFFSET..UTRIE2_MAX_DATA_LENGTH)>>UTRIE2_INDEX_SHIFT */
65*0e209d39SAndroid Build Coastguard Worker     uint16_t shiftedDataLength;
66*0e209d39SAndroid Build Coastguard Worker 
67*0e209d39SAndroid Build Coastguard Worker     /** Null index and data blocks, not shifted. */
68*0e209d39SAndroid Build Coastguard Worker     uint16_t index2NullOffset, dataNullOffset;
69*0e209d39SAndroid Build Coastguard Worker 
70*0e209d39SAndroid Build Coastguard Worker     /**
71*0e209d39SAndroid Build Coastguard Worker      * First code point of the single-value range ending with U+10ffff,
72*0e209d39SAndroid Build Coastguard Worker      * rounded up and then shifted right by UTRIE2_SHIFT_1.
73*0e209d39SAndroid Build Coastguard Worker      */
74*0e209d39SAndroid Build Coastguard Worker     uint16_t shiftedHighStart;
75*0e209d39SAndroid Build Coastguard Worker } UTrie2Header;
76*0e209d39SAndroid Build Coastguard Worker 
77*0e209d39SAndroid Build Coastguard Worker /**
78*0e209d39SAndroid Build Coastguard Worker  * Constants for use with UTrie2Header.options.
79*0e209d39SAndroid Build Coastguard Worker  * @internal
80*0e209d39SAndroid Build Coastguard Worker  */
81*0e209d39SAndroid Build Coastguard Worker enum {
82*0e209d39SAndroid Build Coastguard Worker     /** Mask to get the UTrie2ValueBits valueBits from options. */
83*0e209d39SAndroid Build Coastguard Worker     UTRIE2_OPTIONS_VALUE_BITS_MASK=0xf
84*0e209d39SAndroid Build Coastguard Worker };
85*0e209d39SAndroid Build Coastguard Worker 
86*0e209d39SAndroid Build Coastguard Worker /* Building a trie ---------------------------------------------------------- */
87*0e209d39SAndroid Build Coastguard Worker 
88*0e209d39SAndroid Build Coastguard Worker /*
89*0e209d39SAndroid Build Coastguard Worker  * These definitions are mostly needed by utrie2_builder.c, but also by
90*0e209d39SAndroid Build Coastguard Worker  * utrie2_get32() and utrie2_enum().
91*0e209d39SAndroid Build Coastguard Worker  */
92*0e209d39SAndroid Build Coastguard Worker 
93*0e209d39SAndroid Build Coastguard Worker enum {
94*0e209d39SAndroid Build Coastguard Worker     /**
95*0e209d39SAndroid Build Coastguard Worker      * At build time, leave a gap in the index-2 table,
96*0e209d39SAndroid Build Coastguard Worker      * at least as long as the maximum lengths of the 2-byte UTF-8 index-2 table
97*0e209d39SAndroid Build Coastguard Worker      * and the supplementary index-1 table.
98*0e209d39SAndroid Build Coastguard Worker      * Round up to UTRIE2_INDEX_2_BLOCK_LENGTH for proper compacting.
99*0e209d39SAndroid Build Coastguard Worker      */
100*0e209d39SAndroid Build Coastguard Worker     UNEWTRIE2_INDEX_GAP_OFFSET=UTRIE2_INDEX_2_BMP_LENGTH,
101*0e209d39SAndroid Build Coastguard Worker     UNEWTRIE2_INDEX_GAP_LENGTH=
102*0e209d39SAndroid Build Coastguard Worker         ((UTRIE2_UTF8_2B_INDEX_2_LENGTH+UTRIE2_MAX_INDEX_1_LENGTH)+UTRIE2_INDEX_2_MASK)&
103*0e209d39SAndroid Build Coastguard Worker         ~UTRIE2_INDEX_2_MASK,
104*0e209d39SAndroid Build Coastguard Worker 
105*0e209d39SAndroid Build Coastguard Worker     /**
106*0e209d39SAndroid Build Coastguard Worker      * Maximum length of the build-time index-2 array.
107*0e209d39SAndroid Build Coastguard Worker      * Maximum number of Unicode code points (0x110000) shifted right by UTRIE2_SHIFT_2,
108*0e209d39SAndroid Build Coastguard Worker      * plus the part of the index-2 table for lead surrogate code points,
109*0e209d39SAndroid Build Coastguard Worker      * plus the build-time index gap,
110*0e209d39SAndroid Build Coastguard Worker      * plus the null index-2 block.
111*0e209d39SAndroid Build Coastguard Worker      */
112*0e209d39SAndroid Build Coastguard Worker     UNEWTRIE2_MAX_INDEX_2_LENGTH=
113*0e209d39SAndroid Build Coastguard Worker         (0x110000>>UTRIE2_SHIFT_2)+
114*0e209d39SAndroid Build Coastguard Worker         UTRIE2_LSCP_INDEX_2_LENGTH+
115*0e209d39SAndroid Build Coastguard Worker         UNEWTRIE2_INDEX_GAP_LENGTH+
116*0e209d39SAndroid Build Coastguard Worker         UTRIE2_INDEX_2_BLOCK_LENGTH,
117*0e209d39SAndroid Build Coastguard Worker 
118*0e209d39SAndroid Build Coastguard Worker     UNEWTRIE2_INDEX_1_LENGTH=0x110000>>UTRIE2_SHIFT_1
119*0e209d39SAndroid Build Coastguard Worker };
120*0e209d39SAndroid Build Coastguard Worker 
121*0e209d39SAndroid Build Coastguard Worker /**
122*0e209d39SAndroid Build Coastguard Worker  * Maximum length of the build-time data array.
123*0e209d39SAndroid Build Coastguard Worker  * One entry per 0x110000 code points, plus the illegal-UTF-8 block and the null block,
124*0e209d39SAndroid Build Coastguard Worker  * plus values for the 0x400 surrogate code units.
125*0e209d39SAndroid Build Coastguard Worker  */
126*0e209d39SAndroid Build Coastguard Worker #define UNEWTRIE2_MAX_DATA_LENGTH (0x110000+0x40+0x40+0x400)
127*0e209d39SAndroid Build Coastguard Worker 
128*0e209d39SAndroid Build Coastguard Worker /*
129*0e209d39SAndroid Build Coastguard Worker  * Build-time trie structure.
130*0e209d39SAndroid Build Coastguard Worker  *
131*0e209d39SAndroid Build Coastguard Worker  * Just using a boolean flag for "repeat use" could lead to data array overflow
132*0e209d39SAndroid Build Coastguard Worker  * because we would not be able to detect when a data block becomes unused.
133*0e209d39SAndroid Build Coastguard Worker  * It also leads to orphan data blocks that are kept through serialization.
134*0e209d39SAndroid Build Coastguard Worker  *
135*0e209d39SAndroid Build Coastguard Worker  * Need to use reference counting for data blocks,
136*0e209d39SAndroid Build Coastguard Worker  * and allocDataBlock() needs to look for a free block before increasing dataLength.
137*0e209d39SAndroid Build Coastguard Worker  *
138*0e209d39SAndroid Build Coastguard Worker  * This scheme seems like overkill for index-2 blocks since the whole index array is
139*0e209d39SAndroid Build Coastguard Worker  * preallocated anyway (unlike the growable data array).
140*0e209d39SAndroid Build Coastguard Worker  * Just allocating multiple index-2 blocks as needed.
141*0e209d39SAndroid Build Coastguard Worker  */
142*0e209d39SAndroid Build Coastguard Worker struct UNewTrie2 {
143*0e209d39SAndroid Build Coastguard Worker     int32_t index1[UNEWTRIE2_INDEX_1_LENGTH];
144*0e209d39SAndroid Build Coastguard Worker     int32_t index2[UNEWTRIE2_MAX_INDEX_2_LENGTH];
145*0e209d39SAndroid Build Coastguard Worker     uint32_t *data;
146*0e209d39SAndroid Build Coastguard Worker #ifdef UCPTRIE_DEBUG
147*0e209d39SAndroid Build Coastguard Worker     UMutableCPTrie *t3;
148*0e209d39SAndroid Build Coastguard Worker #endif
149*0e209d39SAndroid Build Coastguard Worker 
150*0e209d39SAndroid Build Coastguard Worker     uint32_t initialValue, errorValue;
151*0e209d39SAndroid Build Coastguard Worker     int32_t index2Length, dataCapacity, dataLength;
152*0e209d39SAndroid Build Coastguard Worker     int32_t firstFreeBlock;
153*0e209d39SAndroid Build Coastguard Worker     int32_t index2NullOffset, dataNullOffset;
154*0e209d39SAndroid Build Coastguard Worker     UChar32 highStart;
155*0e209d39SAndroid Build Coastguard Worker     UBool isCompacted;
156*0e209d39SAndroid Build Coastguard Worker 
157*0e209d39SAndroid Build Coastguard Worker     /**
158*0e209d39SAndroid Build Coastguard Worker      * Multi-purpose per-data-block table.
159*0e209d39SAndroid Build Coastguard Worker      *
160*0e209d39SAndroid Build Coastguard Worker      * Before compacting:
161*0e209d39SAndroid Build Coastguard Worker      *
162*0e209d39SAndroid Build Coastguard Worker      * Per-data-block reference counters/free-block list.
163*0e209d39SAndroid Build Coastguard Worker      *  0: unused
164*0e209d39SAndroid Build Coastguard Worker      * >0: reference counter (number of index-2 entries pointing here)
165*0e209d39SAndroid Build Coastguard Worker      * <0: next free data block in free-block list
166*0e209d39SAndroid Build Coastguard Worker      *
167*0e209d39SAndroid Build Coastguard Worker      * While compacting:
168*0e209d39SAndroid Build Coastguard Worker      *
169*0e209d39SAndroid Build Coastguard Worker      * Map of adjusted indexes, used in compactData() and compactIndex2().
170*0e209d39SAndroid Build Coastguard Worker      * Maps from original indexes to new ones.
171*0e209d39SAndroid Build Coastguard Worker      */
172*0e209d39SAndroid Build Coastguard Worker     int32_t map[UNEWTRIE2_MAX_DATA_LENGTH>>UTRIE2_SHIFT_2];
173*0e209d39SAndroid Build Coastguard Worker };
174*0e209d39SAndroid Build Coastguard Worker 
175*0e209d39SAndroid Build Coastguard Worker #endif
176