xref: /aosp_15_r20/external/icu/libicu/cts_headers/unicode/ucharstrie.h (revision 0e209d3975ff4a8c132096b14b0e9364a753506e)
1*0e209d39SAndroid Build Coastguard Worker // © 2016 and later: Unicode, Inc. and others.
2*0e209d39SAndroid Build Coastguard Worker // License & terms of use: http://www.unicode.org/copyright.html
3*0e209d39SAndroid Build Coastguard Worker /*
4*0e209d39SAndroid Build Coastguard Worker *******************************************************************************
5*0e209d39SAndroid Build Coastguard Worker *   Copyright (C) 2010-2012, International Business Machines
6*0e209d39SAndroid Build Coastguard Worker *   Corporation and others.  All Rights Reserved.
7*0e209d39SAndroid Build Coastguard Worker *******************************************************************************
8*0e209d39SAndroid Build Coastguard Worker *   file name:  ucharstrie.h
9*0e209d39SAndroid Build Coastguard Worker *   encoding:   UTF-8
10*0e209d39SAndroid Build Coastguard Worker *   tab size:   8 (not used)
11*0e209d39SAndroid Build Coastguard Worker *   indentation:4
12*0e209d39SAndroid Build Coastguard Worker *
13*0e209d39SAndroid Build Coastguard Worker *   created on: 2010nov14
14*0e209d39SAndroid Build Coastguard Worker *   created by: Markus W. Scherer
15*0e209d39SAndroid Build Coastguard Worker */
16*0e209d39SAndroid Build Coastguard Worker 
17*0e209d39SAndroid Build Coastguard Worker #ifndef __UCHARSTRIE_H__
18*0e209d39SAndroid Build Coastguard Worker #define __UCHARSTRIE_H__
19*0e209d39SAndroid Build Coastguard Worker 
20*0e209d39SAndroid Build Coastguard Worker /**
21*0e209d39SAndroid Build Coastguard Worker  * \file
22*0e209d39SAndroid Build Coastguard Worker  * \brief C++ API: Trie for mapping Unicode strings (or 16-bit-unit sequences)
23*0e209d39SAndroid Build Coastguard Worker  *                 to integer values.
24*0e209d39SAndroid Build Coastguard Worker  */
25*0e209d39SAndroid Build Coastguard Worker 
26*0e209d39SAndroid Build Coastguard Worker #include "unicode/utypes.h"
27*0e209d39SAndroid Build Coastguard Worker 
28*0e209d39SAndroid Build Coastguard Worker #if U_SHOW_CPLUSPLUS_API
29*0e209d39SAndroid Build Coastguard Worker 
30*0e209d39SAndroid Build Coastguard Worker #include "unicode/unistr.h"
31*0e209d39SAndroid Build Coastguard Worker #include "unicode/uobject.h"
32*0e209d39SAndroid Build Coastguard Worker #include "unicode/ustringtrie.h"
33*0e209d39SAndroid Build Coastguard Worker 
34*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_BEGIN
35*0e209d39SAndroid Build Coastguard Worker 
36*0e209d39SAndroid Build Coastguard Worker class Appendable;
37*0e209d39SAndroid Build Coastguard Worker class UCharsTrieBuilder;
38*0e209d39SAndroid Build Coastguard Worker class UVector32;
39*0e209d39SAndroid Build Coastguard Worker 
40*0e209d39SAndroid Build Coastguard Worker /**
41*0e209d39SAndroid Build Coastguard Worker  * Light-weight, non-const reader class for a UCharsTrie.
42*0e209d39SAndroid Build Coastguard Worker  * Traverses a char16_t-serialized data structure with minimal state,
43*0e209d39SAndroid Build Coastguard Worker  * for mapping strings (16-bit-unit sequences) to non-negative integer values.
44*0e209d39SAndroid Build Coastguard Worker  *
45*0e209d39SAndroid Build Coastguard Worker  * This class owns the serialized trie data only if it was constructed by
46*0e209d39SAndroid Build Coastguard Worker  * the builder's build() method.
47*0e209d39SAndroid Build Coastguard Worker  * The public constructor and the copy constructor only alias the data (only copy the pointer).
48*0e209d39SAndroid Build Coastguard Worker  * There is no assignment operator.
49*0e209d39SAndroid Build Coastguard Worker  *
50*0e209d39SAndroid Build Coastguard Worker  * This class is not intended for public subclassing.
51*0e209d39SAndroid Build Coastguard Worker  * @stable ICU 4.8
52*0e209d39SAndroid Build Coastguard Worker  */
53*0e209d39SAndroid Build Coastguard Worker class U_COMMON_API UCharsTrie : public UMemory {
54*0e209d39SAndroid Build Coastguard Worker public:
55*0e209d39SAndroid Build Coastguard Worker     /**
56*0e209d39SAndroid Build Coastguard Worker      * Constructs a UCharsTrie reader instance.
57*0e209d39SAndroid Build Coastguard Worker      *
58*0e209d39SAndroid Build Coastguard Worker      * The trieUChars must contain a copy of a char16_t sequence from the UCharsTrieBuilder,
59*0e209d39SAndroid Build Coastguard Worker      * starting with the first char16_t of that sequence.
60*0e209d39SAndroid Build Coastguard Worker      * The UCharsTrie object will not read more char16_ts than
61*0e209d39SAndroid Build Coastguard Worker      * the UCharsTrieBuilder generated in the corresponding build() call.
62*0e209d39SAndroid Build Coastguard Worker      *
63*0e209d39SAndroid Build Coastguard Worker      * The array is not copied/cloned and must not be modified while
64*0e209d39SAndroid Build Coastguard Worker      * the UCharsTrie object is in use.
65*0e209d39SAndroid Build Coastguard Worker      *
66*0e209d39SAndroid Build Coastguard Worker      * @param trieUChars The char16_t array that contains the serialized trie.
67*0e209d39SAndroid Build Coastguard Worker      * @stable ICU 4.8
68*0e209d39SAndroid Build Coastguard Worker      */
UCharsTrie(ConstChar16Ptr trieUChars)69*0e209d39SAndroid Build Coastguard Worker     UCharsTrie(ConstChar16Ptr trieUChars)
70*0e209d39SAndroid Build Coastguard Worker             : ownedArray_(nullptr), uchars_(trieUChars),
71*0e209d39SAndroid Build Coastguard Worker               pos_(uchars_), remainingMatchLength_(-1) {}
72*0e209d39SAndroid Build Coastguard Worker 
73*0e209d39SAndroid Build Coastguard Worker     /**
74*0e209d39SAndroid Build Coastguard Worker      * Destructor.
75*0e209d39SAndroid Build Coastguard Worker      * @stable ICU 4.8
76*0e209d39SAndroid Build Coastguard Worker      */
77*0e209d39SAndroid Build Coastguard Worker     ~UCharsTrie();
78*0e209d39SAndroid Build Coastguard Worker 
79*0e209d39SAndroid Build Coastguard Worker     /**
80*0e209d39SAndroid Build Coastguard Worker      * Copy constructor, copies the other trie reader object and its state,
81*0e209d39SAndroid Build Coastguard Worker      * but not the char16_t array which will be shared. (Shallow copy.)
82*0e209d39SAndroid Build Coastguard Worker      * @param other Another UCharsTrie object.
83*0e209d39SAndroid Build Coastguard Worker      * @stable ICU 4.8
84*0e209d39SAndroid Build Coastguard Worker      */
UCharsTrie(const UCharsTrie & other)85*0e209d39SAndroid Build Coastguard Worker     UCharsTrie(const UCharsTrie &other)
86*0e209d39SAndroid Build Coastguard Worker             : ownedArray_(nullptr), uchars_(other.uchars_),
87*0e209d39SAndroid Build Coastguard Worker               pos_(other.pos_), remainingMatchLength_(other.remainingMatchLength_) {}
88*0e209d39SAndroid Build Coastguard Worker 
89*0e209d39SAndroid Build Coastguard Worker     /**
90*0e209d39SAndroid Build Coastguard Worker      * Resets this trie to its initial state.
91*0e209d39SAndroid Build Coastguard Worker      * @return *this
92*0e209d39SAndroid Build Coastguard Worker      * @stable ICU 4.8
93*0e209d39SAndroid Build Coastguard Worker      */
reset()94*0e209d39SAndroid Build Coastguard Worker     UCharsTrie &reset() {
95*0e209d39SAndroid Build Coastguard Worker         pos_=uchars_;
96*0e209d39SAndroid Build Coastguard Worker         remainingMatchLength_=-1;
97*0e209d39SAndroid Build Coastguard Worker         return *this;
98*0e209d39SAndroid Build Coastguard Worker     }
99*0e209d39SAndroid Build Coastguard Worker 
100*0e209d39SAndroid Build Coastguard Worker     /**
101*0e209d39SAndroid Build Coastguard Worker      * Returns the state of this trie as a 64-bit integer.
102*0e209d39SAndroid Build Coastguard Worker      * The state value is never 0.
103*0e209d39SAndroid Build Coastguard Worker      *
104*0e209d39SAndroid Build Coastguard Worker      * @return opaque state value
105*0e209d39SAndroid Build Coastguard Worker      * @see resetToState64
106*0e209d39SAndroid Build Coastguard Worker      * @stable ICU 65
107*0e209d39SAndroid Build Coastguard Worker      */
getState64()108*0e209d39SAndroid Build Coastguard Worker     uint64_t getState64() const {
109*0e209d39SAndroid Build Coastguard Worker         return (static_cast<uint64_t>(remainingMatchLength_ + 2) << kState64RemainingShift) |
110*0e209d39SAndroid Build Coastguard Worker             (uint64_t)(pos_ - uchars_);
111*0e209d39SAndroid Build Coastguard Worker     }
112*0e209d39SAndroid Build Coastguard Worker 
113*0e209d39SAndroid Build Coastguard Worker     /**
114*0e209d39SAndroid Build Coastguard Worker      * Resets this trie to the saved state.
115*0e209d39SAndroid Build Coastguard Worker      * Unlike resetToState(State), the 64-bit state value
116*0e209d39SAndroid Build Coastguard Worker      * must be from getState64() from the same trie object or
117*0e209d39SAndroid Build Coastguard Worker      * from one initialized the exact same way.
118*0e209d39SAndroid Build Coastguard Worker      * Because of no validation, this method is faster.
119*0e209d39SAndroid Build Coastguard Worker      *
120*0e209d39SAndroid Build Coastguard Worker      * @param state The opaque trie state value from getState64().
121*0e209d39SAndroid Build Coastguard Worker      * @return *this
122*0e209d39SAndroid Build Coastguard Worker      * @see getState64
123*0e209d39SAndroid Build Coastguard Worker      * @see resetToState
124*0e209d39SAndroid Build Coastguard Worker      * @see reset
125*0e209d39SAndroid Build Coastguard Worker      * @stable ICU 65
126*0e209d39SAndroid Build Coastguard Worker      */
resetToState64(uint64_t state)127*0e209d39SAndroid Build Coastguard Worker     UCharsTrie &resetToState64(uint64_t state) {
128*0e209d39SAndroid Build Coastguard Worker         remainingMatchLength_ = static_cast<int32_t>(state >> kState64RemainingShift) - 2;
129*0e209d39SAndroid Build Coastguard Worker         pos_ = uchars_ + (state & kState64PosMask);
130*0e209d39SAndroid Build Coastguard Worker         return *this;
131*0e209d39SAndroid Build Coastguard Worker     }
132*0e209d39SAndroid Build Coastguard Worker 
133*0e209d39SAndroid Build Coastguard Worker     /**
134*0e209d39SAndroid Build Coastguard Worker      * UCharsTrie state object, for saving a trie's current state
135*0e209d39SAndroid Build Coastguard Worker      * and resetting the trie back to this state later.
136*0e209d39SAndroid Build Coastguard Worker      * @stable ICU 4.8
137*0e209d39SAndroid Build Coastguard Worker      */
138*0e209d39SAndroid Build Coastguard Worker     class State : public UMemory {
139*0e209d39SAndroid Build Coastguard Worker     public:
140*0e209d39SAndroid Build Coastguard Worker         /**
141*0e209d39SAndroid Build Coastguard Worker          * Constructs an empty State.
142*0e209d39SAndroid Build Coastguard Worker          * @stable ICU 4.8
143*0e209d39SAndroid Build Coastguard Worker          */
State()144*0e209d39SAndroid Build Coastguard Worker         State() { uchars=nullptr; }
145*0e209d39SAndroid Build Coastguard Worker     private:
146*0e209d39SAndroid Build Coastguard Worker         friend class UCharsTrie;
147*0e209d39SAndroid Build Coastguard Worker 
148*0e209d39SAndroid Build Coastguard Worker         const char16_t *uchars;
149*0e209d39SAndroid Build Coastguard Worker         const char16_t *pos;
150*0e209d39SAndroid Build Coastguard Worker         int32_t remainingMatchLength;
151*0e209d39SAndroid Build Coastguard Worker     };
152*0e209d39SAndroid Build Coastguard Worker 
153*0e209d39SAndroid Build Coastguard Worker     /**
154*0e209d39SAndroid Build Coastguard Worker      * Saves the state of this trie.
155*0e209d39SAndroid Build Coastguard Worker      * @param state The State object to hold the trie's state.
156*0e209d39SAndroid Build Coastguard Worker      * @return *this
157*0e209d39SAndroid Build Coastguard Worker      * @see resetToState
158*0e209d39SAndroid Build Coastguard Worker      * @stable ICU 4.8
159*0e209d39SAndroid Build Coastguard Worker      */
saveState(State & state)160*0e209d39SAndroid Build Coastguard Worker     const UCharsTrie &saveState(State &state) const {
161*0e209d39SAndroid Build Coastguard Worker         state.uchars=uchars_;
162*0e209d39SAndroid Build Coastguard Worker         state.pos=pos_;
163*0e209d39SAndroid Build Coastguard Worker         state.remainingMatchLength=remainingMatchLength_;
164*0e209d39SAndroid Build Coastguard Worker         return *this;
165*0e209d39SAndroid Build Coastguard Worker     }
166*0e209d39SAndroid Build Coastguard Worker 
167*0e209d39SAndroid Build Coastguard Worker     /**
168*0e209d39SAndroid Build Coastguard Worker      * Resets this trie to the saved state.
169*0e209d39SAndroid Build Coastguard Worker      * If the state object contains no state, or the state of a different trie,
170*0e209d39SAndroid Build Coastguard Worker      * then this trie remains unchanged.
171*0e209d39SAndroid Build Coastguard Worker      * @param state The State object which holds a saved trie state.
172*0e209d39SAndroid Build Coastguard Worker      * @return *this
173*0e209d39SAndroid Build Coastguard Worker      * @see saveState
174*0e209d39SAndroid Build Coastguard Worker      * @see reset
175*0e209d39SAndroid Build Coastguard Worker      * @stable ICU 4.8
176*0e209d39SAndroid Build Coastguard Worker      */
resetToState(const State & state)177*0e209d39SAndroid Build Coastguard Worker     UCharsTrie &resetToState(const State &state) {
178*0e209d39SAndroid Build Coastguard Worker         if(uchars_==state.uchars && uchars_!=nullptr) {
179*0e209d39SAndroid Build Coastguard Worker             pos_=state.pos;
180*0e209d39SAndroid Build Coastguard Worker             remainingMatchLength_=state.remainingMatchLength;
181*0e209d39SAndroid Build Coastguard Worker         }
182*0e209d39SAndroid Build Coastguard Worker         return *this;
183*0e209d39SAndroid Build Coastguard Worker     }
184*0e209d39SAndroid Build Coastguard Worker 
185*0e209d39SAndroid Build Coastguard Worker     /**
186*0e209d39SAndroid Build Coastguard Worker      * Determines whether the string so far matches, whether it has a value,
187*0e209d39SAndroid Build Coastguard Worker      * and whether another input char16_t can continue a matching string.
188*0e209d39SAndroid Build Coastguard Worker      * @return The match/value Result.
189*0e209d39SAndroid Build Coastguard Worker      * @stable ICU 4.8
190*0e209d39SAndroid Build Coastguard Worker      */
191*0e209d39SAndroid Build Coastguard Worker     UStringTrieResult current() const;
192*0e209d39SAndroid Build Coastguard Worker 
193*0e209d39SAndroid Build Coastguard Worker     /**
194*0e209d39SAndroid Build Coastguard Worker      * Traverses the trie from the initial state for this input char16_t.
195*0e209d39SAndroid Build Coastguard Worker      * Equivalent to reset().next(uchar).
196*0e209d39SAndroid Build Coastguard Worker      * @param uchar Input char value. Values below 0 and above 0xffff will never match.
197*0e209d39SAndroid Build Coastguard Worker      * @return The match/value Result.
198*0e209d39SAndroid Build Coastguard Worker      * @stable ICU 4.8
199*0e209d39SAndroid Build Coastguard Worker      */
first(int32_t uchar)200*0e209d39SAndroid Build Coastguard Worker     inline UStringTrieResult first(int32_t uchar) {
201*0e209d39SAndroid Build Coastguard Worker         remainingMatchLength_=-1;
202*0e209d39SAndroid Build Coastguard Worker         return nextImpl(uchars_, uchar);
203*0e209d39SAndroid Build Coastguard Worker     }
204*0e209d39SAndroid Build Coastguard Worker 
205*0e209d39SAndroid Build Coastguard Worker     /**
206*0e209d39SAndroid Build Coastguard Worker      * Traverses the trie from the initial state for the
207*0e209d39SAndroid Build Coastguard Worker      * one or two UTF-16 code units for this input code point.
208*0e209d39SAndroid Build Coastguard Worker      * Equivalent to reset().nextForCodePoint(cp).
209*0e209d39SAndroid Build Coastguard Worker      * @param cp A Unicode code point 0..0x10ffff.
210*0e209d39SAndroid Build Coastguard Worker      * @return The match/value Result.
211*0e209d39SAndroid Build Coastguard Worker      * @stable ICU 4.8
212*0e209d39SAndroid Build Coastguard Worker      */
213*0e209d39SAndroid Build Coastguard Worker     UStringTrieResult firstForCodePoint(UChar32 cp);
214*0e209d39SAndroid Build Coastguard Worker 
215*0e209d39SAndroid Build Coastguard Worker     /**
216*0e209d39SAndroid Build Coastguard Worker      * Traverses the trie from the current state for this input char16_t.
217*0e209d39SAndroid Build Coastguard Worker      * @param uchar Input char value. Values below 0 and above 0xffff will never match.
218*0e209d39SAndroid Build Coastguard Worker      * @return The match/value Result.
219*0e209d39SAndroid Build Coastguard Worker      * @stable ICU 4.8
220*0e209d39SAndroid Build Coastguard Worker      */
221*0e209d39SAndroid Build Coastguard Worker     UStringTrieResult next(int32_t uchar);
222*0e209d39SAndroid Build Coastguard Worker 
223*0e209d39SAndroid Build Coastguard Worker     /**
224*0e209d39SAndroid Build Coastguard Worker      * Traverses the trie from the current state for the
225*0e209d39SAndroid Build Coastguard Worker      * one or two UTF-16 code units for this input code point.
226*0e209d39SAndroid Build Coastguard Worker      * @param cp A Unicode code point 0..0x10ffff.
227*0e209d39SAndroid Build Coastguard Worker      * @return The match/value Result.
228*0e209d39SAndroid Build Coastguard Worker      * @stable ICU 4.8
229*0e209d39SAndroid Build Coastguard Worker      */
230*0e209d39SAndroid Build Coastguard Worker     UStringTrieResult nextForCodePoint(UChar32 cp);
231*0e209d39SAndroid Build Coastguard Worker 
232*0e209d39SAndroid Build Coastguard Worker     /**
233*0e209d39SAndroid Build Coastguard Worker      * Traverses the trie from the current state for this string.
234*0e209d39SAndroid Build Coastguard Worker      * Equivalent to
235*0e209d39SAndroid Build Coastguard Worker      * \code
236*0e209d39SAndroid Build Coastguard Worker      * Result result=current();
237*0e209d39SAndroid Build Coastguard Worker      * for(each c in s)
238*0e209d39SAndroid Build Coastguard Worker      *   if(!USTRINGTRIE_HAS_NEXT(result)) return USTRINGTRIE_NO_MATCH;
239*0e209d39SAndroid Build Coastguard Worker      *   result=next(c);
240*0e209d39SAndroid Build Coastguard Worker      * return result;
241*0e209d39SAndroid Build Coastguard Worker      * \endcode
242*0e209d39SAndroid Build Coastguard Worker      * @param s A string. Can be nullptr if length is 0.
243*0e209d39SAndroid Build Coastguard Worker      * @param length The length of the string. Can be -1 if NUL-terminated.
244*0e209d39SAndroid Build Coastguard Worker      * @return The match/value Result.
245*0e209d39SAndroid Build Coastguard Worker      * @stable ICU 4.8
246*0e209d39SAndroid Build Coastguard Worker      */
247*0e209d39SAndroid Build Coastguard Worker     UStringTrieResult next(ConstChar16Ptr s, int32_t length);
248*0e209d39SAndroid Build Coastguard Worker 
249*0e209d39SAndroid Build Coastguard Worker     /**
250*0e209d39SAndroid Build Coastguard Worker      * Returns a matching string's value if called immediately after
251*0e209d39SAndroid Build Coastguard Worker      * current()/first()/next() returned USTRINGTRIE_INTERMEDIATE_VALUE or USTRINGTRIE_FINAL_VALUE.
252*0e209d39SAndroid Build Coastguard Worker      * getValue() can be called multiple times.
253*0e209d39SAndroid Build Coastguard Worker      *
254*0e209d39SAndroid Build Coastguard Worker      * Do not call getValue() after USTRINGTRIE_NO_MATCH or USTRINGTRIE_NO_VALUE!
255*0e209d39SAndroid Build Coastguard Worker      * @return The value for the string so far.
256*0e209d39SAndroid Build Coastguard Worker      * @stable ICU 4.8
257*0e209d39SAndroid Build Coastguard Worker      */
getValue()258*0e209d39SAndroid Build Coastguard Worker     inline int32_t getValue() const {
259*0e209d39SAndroid Build Coastguard Worker         const char16_t *pos=pos_;
260*0e209d39SAndroid Build Coastguard Worker         int32_t leadUnit=*pos++;
261*0e209d39SAndroid Build Coastguard Worker         // U_ASSERT(leadUnit>=kMinValueLead);
262*0e209d39SAndroid Build Coastguard Worker         return leadUnit&kValueIsFinal ?
263*0e209d39SAndroid Build Coastguard Worker             readValue(pos, leadUnit&0x7fff) : readNodeValue(pos, leadUnit);
264*0e209d39SAndroid Build Coastguard Worker     }
265*0e209d39SAndroid Build Coastguard Worker 
266*0e209d39SAndroid Build Coastguard Worker     /**
267*0e209d39SAndroid Build Coastguard Worker      * Determines whether all strings reachable from the current state
268*0e209d39SAndroid Build Coastguard Worker      * map to the same value.
269*0e209d39SAndroid Build Coastguard Worker      * @param uniqueValue Receives the unique value, if this function returns true.
270*0e209d39SAndroid Build Coastguard Worker      *                    (output-only)
271*0e209d39SAndroid Build Coastguard Worker      * @return true if all strings reachable from the current state
272*0e209d39SAndroid Build Coastguard Worker      *         map to the same value.
273*0e209d39SAndroid Build Coastguard Worker      * @stable ICU 4.8
274*0e209d39SAndroid Build Coastguard Worker      */
hasUniqueValue(int32_t & uniqueValue)275*0e209d39SAndroid Build Coastguard Worker     inline UBool hasUniqueValue(int32_t &uniqueValue) const {
276*0e209d39SAndroid Build Coastguard Worker         const char16_t *pos=pos_;
277*0e209d39SAndroid Build Coastguard Worker         // Skip the rest of a pending linear-match node.
278*0e209d39SAndroid Build Coastguard Worker         return pos!=nullptr && findUniqueValue(pos+remainingMatchLength_+1, false, uniqueValue);
279*0e209d39SAndroid Build Coastguard Worker     }
280*0e209d39SAndroid Build Coastguard Worker 
281*0e209d39SAndroid Build Coastguard Worker     /**
282*0e209d39SAndroid Build Coastguard Worker      * Finds each char16_t which continues the string from the current state.
283*0e209d39SAndroid Build Coastguard Worker      * That is, each char16_t c for which it would be next(c)!=USTRINGTRIE_NO_MATCH now.
284*0e209d39SAndroid Build Coastguard Worker      * @param out Each next char16_t is appended to this object.
285*0e209d39SAndroid Build Coastguard Worker      * @return the number of char16_ts which continue the string from here
286*0e209d39SAndroid Build Coastguard Worker      * @stable ICU 4.8
287*0e209d39SAndroid Build Coastguard Worker      */
288*0e209d39SAndroid Build Coastguard Worker     int32_t getNextUChars(Appendable &out) const;
289*0e209d39SAndroid Build Coastguard Worker 
290*0e209d39SAndroid Build Coastguard Worker     /**
291*0e209d39SAndroid Build Coastguard Worker      * Iterator for all of the (string, value) pairs in a UCharsTrie.
292*0e209d39SAndroid Build Coastguard Worker      * @stable ICU 4.8
293*0e209d39SAndroid Build Coastguard Worker      */
294*0e209d39SAndroid Build Coastguard Worker     class U_COMMON_API Iterator : public UMemory {
295*0e209d39SAndroid Build Coastguard Worker     public:
296*0e209d39SAndroid Build Coastguard Worker         /**
297*0e209d39SAndroid Build Coastguard Worker          * Iterates from the root of a char16_t-serialized UCharsTrie.
298*0e209d39SAndroid Build Coastguard Worker          * @param trieUChars The trie char16_ts.
299*0e209d39SAndroid Build Coastguard Worker          * @param maxStringLength If 0, the iterator returns full strings.
300*0e209d39SAndroid Build Coastguard Worker          *                        Otherwise, the iterator returns strings with this maximum length.
301*0e209d39SAndroid Build Coastguard Worker          * @param errorCode Standard ICU error code. Its input value must
302*0e209d39SAndroid Build Coastguard Worker          *                  pass the U_SUCCESS() test, or else the function returns
303*0e209d39SAndroid Build Coastguard Worker          *                  immediately. Check for U_FAILURE() on output or use with
304*0e209d39SAndroid Build Coastguard Worker          *                  function chaining. (See User Guide for details.)
305*0e209d39SAndroid Build Coastguard Worker          * @stable ICU 4.8
306*0e209d39SAndroid Build Coastguard Worker          */
307*0e209d39SAndroid Build Coastguard Worker         Iterator(ConstChar16Ptr trieUChars, int32_t maxStringLength, UErrorCode &errorCode);
308*0e209d39SAndroid Build Coastguard Worker 
309*0e209d39SAndroid Build Coastguard Worker         /**
310*0e209d39SAndroid Build Coastguard Worker          * Iterates from the current state of the specified UCharsTrie.
311*0e209d39SAndroid Build Coastguard Worker          * @param trie The trie whose state will be copied for iteration.
312*0e209d39SAndroid Build Coastguard Worker          * @param maxStringLength If 0, the iterator returns full strings.
313*0e209d39SAndroid Build Coastguard Worker          *                        Otherwise, the iterator returns strings with this maximum length.
314*0e209d39SAndroid Build Coastguard Worker          * @param errorCode Standard ICU error code. Its input value must
315*0e209d39SAndroid Build Coastguard Worker          *                  pass the U_SUCCESS() test, or else the function returns
316*0e209d39SAndroid Build Coastguard Worker          *                  immediately. Check for U_FAILURE() on output or use with
317*0e209d39SAndroid Build Coastguard Worker          *                  function chaining. (See User Guide for details.)
318*0e209d39SAndroid Build Coastguard Worker          * @stable ICU 4.8
319*0e209d39SAndroid Build Coastguard Worker          */
320*0e209d39SAndroid Build Coastguard Worker         Iterator(const UCharsTrie &trie, int32_t maxStringLength, UErrorCode &errorCode);
321*0e209d39SAndroid Build Coastguard Worker 
322*0e209d39SAndroid Build Coastguard Worker         /**
323*0e209d39SAndroid Build Coastguard Worker          * Destructor.
324*0e209d39SAndroid Build Coastguard Worker          * @stable ICU 4.8
325*0e209d39SAndroid Build Coastguard Worker          */
326*0e209d39SAndroid Build Coastguard Worker         ~Iterator();
327*0e209d39SAndroid Build Coastguard Worker 
328*0e209d39SAndroid Build Coastguard Worker         /**
329*0e209d39SAndroid Build Coastguard Worker          * Resets this iterator to its initial state.
330*0e209d39SAndroid Build Coastguard Worker          * @return *this
331*0e209d39SAndroid Build Coastguard Worker          * @stable ICU 4.8
332*0e209d39SAndroid Build Coastguard Worker          */
333*0e209d39SAndroid Build Coastguard Worker         Iterator &reset();
334*0e209d39SAndroid Build Coastguard Worker 
335*0e209d39SAndroid Build Coastguard Worker         /**
336*0e209d39SAndroid Build Coastguard Worker          * @return true if there are more elements.
337*0e209d39SAndroid Build Coastguard Worker          * @stable ICU 4.8
338*0e209d39SAndroid Build Coastguard Worker          */
339*0e209d39SAndroid Build Coastguard Worker         UBool hasNext() const;
340*0e209d39SAndroid Build Coastguard Worker 
341*0e209d39SAndroid Build Coastguard Worker         /**
342*0e209d39SAndroid Build Coastguard Worker          * Finds the next (string, value) pair if there is one.
343*0e209d39SAndroid Build Coastguard Worker          *
344*0e209d39SAndroid Build Coastguard Worker          * If the string is truncated to the maximum length and does not
345*0e209d39SAndroid Build Coastguard Worker          * have a real value, then the value is set to -1.
346*0e209d39SAndroid Build Coastguard Worker          * In this case, this "not a real value" is indistinguishable from
347*0e209d39SAndroid Build Coastguard Worker          * a real value of -1.
348*0e209d39SAndroid Build Coastguard Worker          * @param errorCode Standard ICU error code. Its input value must
349*0e209d39SAndroid Build Coastguard Worker          *                  pass the U_SUCCESS() test, or else the function returns
350*0e209d39SAndroid Build Coastguard Worker          *                  immediately. Check for U_FAILURE() on output or use with
351*0e209d39SAndroid Build Coastguard Worker          *                  function chaining. (See User Guide for details.)
352*0e209d39SAndroid Build Coastguard Worker          * @return true if there is another element.
353*0e209d39SAndroid Build Coastguard Worker          * @stable ICU 4.8
354*0e209d39SAndroid Build Coastguard Worker          */
355*0e209d39SAndroid Build Coastguard Worker         UBool next(UErrorCode &errorCode);
356*0e209d39SAndroid Build Coastguard Worker 
357*0e209d39SAndroid Build Coastguard Worker         /**
358*0e209d39SAndroid Build Coastguard Worker          * @return The string for the last successful next().
359*0e209d39SAndroid Build Coastguard Worker          * @stable ICU 4.8
360*0e209d39SAndroid Build Coastguard Worker          */
getString()361*0e209d39SAndroid Build Coastguard Worker         const UnicodeString &getString() const { return str_; }
362*0e209d39SAndroid Build Coastguard Worker         /**
363*0e209d39SAndroid Build Coastguard Worker          * @return The value for the last successful next().
364*0e209d39SAndroid Build Coastguard Worker          * @stable ICU 4.8
365*0e209d39SAndroid Build Coastguard Worker          */
getValue()366*0e209d39SAndroid Build Coastguard Worker         int32_t getValue() const { return value_; }
367*0e209d39SAndroid Build Coastguard Worker 
368*0e209d39SAndroid Build Coastguard Worker     private:
truncateAndStop()369*0e209d39SAndroid Build Coastguard Worker         UBool truncateAndStop() {
370*0e209d39SAndroid Build Coastguard Worker             pos_=nullptr;
371*0e209d39SAndroid Build Coastguard Worker             value_=-1;  // no real value for str
372*0e209d39SAndroid Build Coastguard Worker             return true;
373*0e209d39SAndroid Build Coastguard Worker         }
374*0e209d39SAndroid Build Coastguard Worker 
375*0e209d39SAndroid Build Coastguard Worker         const char16_t *branchNext(const char16_t *pos, int32_t length, UErrorCode &errorCode);
376*0e209d39SAndroid Build Coastguard Worker 
377*0e209d39SAndroid Build Coastguard Worker         const char16_t *uchars_;
378*0e209d39SAndroid Build Coastguard Worker         const char16_t *pos_;
379*0e209d39SAndroid Build Coastguard Worker         const char16_t *initialPos_;
380*0e209d39SAndroid Build Coastguard Worker         int32_t remainingMatchLength_;
381*0e209d39SAndroid Build Coastguard Worker         int32_t initialRemainingMatchLength_;
382*0e209d39SAndroid Build Coastguard Worker         UBool skipValue_;  // Skip intermediate value which was already delivered.
383*0e209d39SAndroid Build Coastguard Worker 
384*0e209d39SAndroid Build Coastguard Worker         UnicodeString str_;
385*0e209d39SAndroid Build Coastguard Worker         int32_t maxLength_;
386*0e209d39SAndroid Build Coastguard Worker         int32_t value_;
387*0e209d39SAndroid Build Coastguard Worker 
388*0e209d39SAndroid Build Coastguard Worker         // The stack stores pairs of integers for backtracking to another
389*0e209d39SAndroid Build Coastguard Worker         // outbound edge of a branch node.
390*0e209d39SAndroid Build Coastguard Worker         // The first integer is an offset from uchars_.
391*0e209d39SAndroid Build Coastguard Worker         // The second integer has the str_.length() from before the node in bits 15..0,
392*0e209d39SAndroid Build Coastguard Worker         // and the remaining branch length in bits 31..16.
393*0e209d39SAndroid Build Coastguard Worker         // (We could store the remaining branch length minus 1 in bits 30..16 and not use the sign bit,
394*0e209d39SAndroid Build Coastguard Worker         // but the code looks more confusing that way.)
395*0e209d39SAndroid Build Coastguard Worker         UVector32 *stack_;
396*0e209d39SAndroid Build Coastguard Worker     };
397*0e209d39SAndroid Build Coastguard Worker 
398*0e209d39SAndroid Build Coastguard Worker private:
399*0e209d39SAndroid Build Coastguard Worker     friend class UCharsTrieBuilder;
400*0e209d39SAndroid Build Coastguard Worker 
401*0e209d39SAndroid Build Coastguard Worker     /**
402*0e209d39SAndroid Build Coastguard Worker      * Constructs a UCharsTrie reader instance.
403*0e209d39SAndroid Build Coastguard Worker      * Unlike the public constructor which just aliases an array,
404*0e209d39SAndroid Build Coastguard Worker      * this constructor adopts the builder's array.
405*0e209d39SAndroid Build Coastguard Worker      * This constructor is only called by the builder.
406*0e209d39SAndroid Build Coastguard Worker      */
UCharsTrie(char16_t * adoptUChars,const char16_t * trieUChars)407*0e209d39SAndroid Build Coastguard Worker     UCharsTrie(char16_t *adoptUChars, const char16_t *trieUChars)
408*0e209d39SAndroid Build Coastguard Worker             : ownedArray_(adoptUChars), uchars_(trieUChars),
409*0e209d39SAndroid Build Coastguard Worker               pos_(uchars_), remainingMatchLength_(-1) {}
410*0e209d39SAndroid Build Coastguard Worker 
411*0e209d39SAndroid Build Coastguard Worker     // No assignment operator.
412*0e209d39SAndroid Build Coastguard Worker     UCharsTrie &operator=(const UCharsTrie &other) = delete;
413*0e209d39SAndroid Build Coastguard Worker 
stop()414*0e209d39SAndroid Build Coastguard Worker     inline void stop() {
415*0e209d39SAndroid Build Coastguard Worker         pos_=nullptr;
416*0e209d39SAndroid Build Coastguard Worker     }
417*0e209d39SAndroid Build Coastguard Worker 
418*0e209d39SAndroid Build Coastguard Worker     // Reads a compact 32-bit integer.
419*0e209d39SAndroid Build Coastguard Worker     // pos is already after the leadUnit, and the lead unit has bit 15 reset.
readValue(const char16_t * pos,int32_t leadUnit)420*0e209d39SAndroid Build Coastguard Worker     static inline int32_t readValue(const char16_t *pos, int32_t leadUnit) {
421*0e209d39SAndroid Build Coastguard Worker         int32_t value;
422*0e209d39SAndroid Build Coastguard Worker         if(leadUnit<kMinTwoUnitValueLead) {
423*0e209d39SAndroid Build Coastguard Worker             value=leadUnit;
424*0e209d39SAndroid Build Coastguard Worker         } else if(leadUnit<kThreeUnitValueLead) {
425*0e209d39SAndroid Build Coastguard Worker             value=((leadUnit-kMinTwoUnitValueLead)<<16)|*pos;
426*0e209d39SAndroid Build Coastguard Worker         } else {
427*0e209d39SAndroid Build Coastguard Worker             value=(pos[0]<<16)|pos[1];
428*0e209d39SAndroid Build Coastguard Worker         }
429*0e209d39SAndroid Build Coastguard Worker         return value;
430*0e209d39SAndroid Build Coastguard Worker     }
skipValue(const char16_t * pos,int32_t leadUnit)431*0e209d39SAndroid Build Coastguard Worker     static inline const char16_t *skipValue(const char16_t *pos, int32_t leadUnit) {
432*0e209d39SAndroid Build Coastguard Worker         if(leadUnit>=kMinTwoUnitValueLead) {
433*0e209d39SAndroid Build Coastguard Worker             if(leadUnit<kThreeUnitValueLead) {
434*0e209d39SAndroid Build Coastguard Worker                 ++pos;
435*0e209d39SAndroid Build Coastguard Worker             } else {
436*0e209d39SAndroid Build Coastguard Worker                 pos+=2;
437*0e209d39SAndroid Build Coastguard Worker             }
438*0e209d39SAndroid Build Coastguard Worker         }
439*0e209d39SAndroid Build Coastguard Worker         return pos;
440*0e209d39SAndroid Build Coastguard Worker     }
skipValue(const char16_t * pos)441*0e209d39SAndroid Build Coastguard Worker     static inline const char16_t *skipValue(const char16_t *pos) {
442*0e209d39SAndroid Build Coastguard Worker         int32_t leadUnit=*pos++;
443*0e209d39SAndroid Build Coastguard Worker         return skipValue(pos, leadUnit&0x7fff);
444*0e209d39SAndroid Build Coastguard Worker     }
445*0e209d39SAndroid Build Coastguard Worker 
readNodeValue(const char16_t * pos,int32_t leadUnit)446*0e209d39SAndroid Build Coastguard Worker     static inline int32_t readNodeValue(const char16_t *pos, int32_t leadUnit) {
447*0e209d39SAndroid Build Coastguard Worker         // U_ASSERT(kMinValueLead<=leadUnit && leadUnit<kValueIsFinal);
448*0e209d39SAndroid Build Coastguard Worker         int32_t value;
449*0e209d39SAndroid Build Coastguard Worker         if(leadUnit<kMinTwoUnitNodeValueLead) {
450*0e209d39SAndroid Build Coastguard Worker             value=(leadUnit>>6)-1;
451*0e209d39SAndroid Build Coastguard Worker         } else if(leadUnit<kThreeUnitNodeValueLead) {
452*0e209d39SAndroid Build Coastguard Worker             value=(((leadUnit&0x7fc0)-kMinTwoUnitNodeValueLead)<<10)|*pos;
453*0e209d39SAndroid Build Coastguard Worker         } else {
454*0e209d39SAndroid Build Coastguard Worker             value=(pos[0]<<16)|pos[1];
455*0e209d39SAndroid Build Coastguard Worker         }
456*0e209d39SAndroid Build Coastguard Worker         return value;
457*0e209d39SAndroid Build Coastguard Worker     }
skipNodeValue(const char16_t * pos,int32_t leadUnit)458*0e209d39SAndroid Build Coastguard Worker     static inline const char16_t *skipNodeValue(const char16_t *pos, int32_t leadUnit) {
459*0e209d39SAndroid Build Coastguard Worker         // U_ASSERT(kMinValueLead<=leadUnit && leadUnit<kValueIsFinal);
460*0e209d39SAndroid Build Coastguard Worker         if(leadUnit>=kMinTwoUnitNodeValueLead) {
461*0e209d39SAndroid Build Coastguard Worker             if(leadUnit<kThreeUnitNodeValueLead) {
462*0e209d39SAndroid Build Coastguard Worker                 ++pos;
463*0e209d39SAndroid Build Coastguard Worker             } else {
464*0e209d39SAndroid Build Coastguard Worker                 pos+=2;
465*0e209d39SAndroid Build Coastguard Worker             }
466*0e209d39SAndroid Build Coastguard Worker         }
467*0e209d39SAndroid Build Coastguard Worker         return pos;
468*0e209d39SAndroid Build Coastguard Worker     }
469*0e209d39SAndroid Build Coastguard Worker 
jumpByDelta(const char16_t * pos)470*0e209d39SAndroid Build Coastguard Worker     static inline const char16_t *jumpByDelta(const char16_t *pos) {
471*0e209d39SAndroid Build Coastguard Worker         int32_t delta=*pos++;
472*0e209d39SAndroid Build Coastguard Worker         if(delta>=kMinTwoUnitDeltaLead) {
473*0e209d39SAndroid Build Coastguard Worker             if(delta==kThreeUnitDeltaLead) {
474*0e209d39SAndroid Build Coastguard Worker                 delta=(pos[0]<<16)|pos[1];
475*0e209d39SAndroid Build Coastguard Worker                 pos+=2;
476*0e209d39SAndroid Build Coastguard Worker             } else {
477*0e209d39SAndroid Build Coastguard Worker                 delta=((delta-kMinTwoUnitDeltaLead)<<16)|*pos++;
478*0e209d39SAndroid Build Coastguard Worker             }
479*0e209d39SAndroid Build Coastguard Worker         }
480*0e209d39SAndroid Build Coastguard Worker         return pos+delta;
481*0e209d39SAndroid Build Coastguard Worker     }
482*0e209d39SAndroid Build Coastguard Worker 
skipDelta(const char16_t * pos)483*0e209d39SAndroid Build Coastguard Worker     static const char16_t *skipDelta(const char16_t *pos) {
484*0e209d39SAndroid Build Coastguard Worker         int32_t delta=*pos++;
485*0e209d39SAndroid Build Coastguard Worker         if(delta>=kMinTwoUnitDeltaLead) {
486*0e209d39SAndroid Build Coastguard Worker             if(delta==kThreeUnitDeltaLead) {
487*0e209d39SAndroid Build Coastguard Worker                 pos+=2;
488*0e209d39SAndroid Build Coastguard Worker             } else {
489*0e209d39SAndroid Build Coastguard Worker                 ++pos;
490*0e209d39SAndroid Build Coastguard Worker             }
491*0e209d39SAndroid Build Coastguard Worker         }
492*0e209d39SAndroid Build Coastguard Worker         return pos;
493*0e209d39SAndroid Build Coastguard Worker     }
494*0e209d39SAndroid Build Coastguard Worker 
valueResult(int32_t node)495*0e209d39SAndroid Build Coastguard Worker     static inline UStringTrieResult valueResult(int32_t node) {
496*0e209d39SAndroid Build Coastguard Worker         return (UStringTrieResult)(USTRINGTRIE_INTERMEDIATE_VALUE-(node>>15));
497*0e209d39SAndroid Build Coastguard Worker     }
498*0e209d39SAndroid Build Coastguard Worker 
499*0e209d39SAndroid Build Coastguard Worker     // Handles a branch node for both next(uchar) and next(string).
500*0e209d39SAndroid Build Coastguard Worker     UStringTrieResult branchNext(const char16_t *pos, int32_t length, int32_t uchar);
501*0e209d39SAndroid Build Coastguard Worker 
502*0e209d39SAndroid Build Coastguard Worker     // Requires remainingLength_<0.
503*0e209d39SAndroid Build Coastguard Worker     UStringTrieResult nextImpl(const char16_t *pos, int32_t uchar);
504*0e209d39SAndroid Build Coastguard Worker 
505*0e209d39SAndroid Build Coastguard Worker     // Helper functions for hasUniqueValue().
506*0e209d39SAndroid Build Coastguard Worker     // Recursively finds a unique value (or whether there is not a unique one)
507*0e209d39SAndroid Build Coastguard Worker     // from a branch.
508*0e209d39SAndroid Build Coastguard Worker     static const char16_t *findUniqueValueFromBranch(const char16_t *pos, int32_t length,
509*0e209d39SAndroid Build Coastguard Worker                                                   UBool haveUniqueValue, int32_t &uniqueValue);
510*0e209d39SAndroid Build Coastguard Worker     // Recursively finds a unique value (or whether there is not a unique one)
511*0e209d39SAndroid Build Coastguard Worker     // starting from a position on a node lead unit.
512*0e209d39SAndroid Build Coastguard Worker     static UBool findUniqueValue(const char16_t *pos, UBool haveUniqueValue, int32_t &uniqueValue);
513*0e209d39SAndroid Build Coastguard Worker 
514*0e209d39SAndroid Build Coastguard Worker     // Helper functions for getNextUChars().
515*0e209d39SAndroid Build Coastguard Worker     // getNextUChars() when pos is on a branch node.
516*0e209d39SAndroid Build Coastguard Worker     static void getNextBranchUChars(const char16_t *pos, int32_t length, Appendable &out);
517*0e209d39SAndroid Build Coastguard Worker 
518*0e209d39SAndroid Build Coastguard Worker     // UCharsTrie data structure
519*0e209d39SAndroid Build Coastguard Worker     //
520*0e209d39SAndroid Build Coastguard Worker     // The trie consists of a series of char16_t-serialized nodes for incremental
521*0e209d39SAndroid Build Coastguard Worker     // Unicode string/char16_t sequence matching. (char16_t=16-bit unsigned integer)
522*0e209d39SAndroid Build Coastguard Worker     // The root node is at the beginning of the trie data.
523*0e209d39SAndroid Build Coastguard Worker     //
524*0e209d39SAndroid Build Coastguard Worker     // Types of nodes are distinguished by their node lead unit ranges.
525*0e209d39SAndroid Build Coastguard Worker     // After each node, except a final-value node, another node follows to
526*0e209d39SAndroid Build Coastguard Worker     // encode match values or continue matching further units.
527*0e209d39SAndroid Build Coastguard Worker     //
528*0e209d39SAndroid Build Coastguard Worker     // Node types:
529*0e209d39SAndroid Build Coastguard Worker     //  - Final-value node: Stores a 32-bit integer in a compact, variable-length format.
530*0e209d39SAndroid Build Coastguard Worker     //    The value is for the string/char16_t sequence so far.
531*0e209d39SAndroid Build Coastguard Worker     //  - Match node, optionally with an intermediate value in a different compact format.
532*0e209d39SAndroid Build Coastguard Worker     //    The value, if present, is for the string/char16_t sequence so far.
533*0e209d39SAndroid Build Coastguard Worker     //
534*0e209d39SAndroid Build Coastguard Worker     //  Aside from the value, which uses the node lead unit's high bits:
535*0e209d39SAndroid Build Coastguard Worker     //
536*0e209d39SAndroid Build Coastguard Worker     //  - Linear-match node: Matches a number of units.
537*0e209d39SAndroid Build Coastguard Worker     //  - Branch node: Branches to other nodes according to the current input unit.
538*0e209d39SAndroid Build Coastguard Worker     //    The node unit is the length of the branch (number of units to select from)
539*0e209d39SAndroid Build Coastguard Worker     //    minus 1. It is followed by a sub-node:
540*0e209d39SAndroid Build Coastguard Worker     //    - If the length is at most kMaxBranchLinearSubNodeLength, then
541*0e209d39SAndroid Build Coastguard Worker     //      there are length-1 (key, value) pairs and then one more comparison unit.
542*0e209d39SAndroid Build Coastguard Worker     //      If one of the key units matches, then the value is either a final value for
543*0e209d39SAndroid Build Coastguard Worker     //      the string so far, or a "jump" delta to the next node.
544*0e209d39SAndroid Build Coastguard Worker     //      If the last unit matches, then matching continues with the next node.
545*0e209d39SAndroid Build Coastguard Worker     //      (Values have the same encoding as final-value nodes.)
546*0e209d39SAndroid Build Coastguard Worker     //    - If the length is greater than kMaxBranchLinearSubNodeLength, then
547*0e209d39SAndroid Build Coastguard Worker     //      there is one unit and one "jump" delta.
548*0e209d39SAndroid Build Coastguard Worker     //      If the input unit is less than the sub-node unit, then "jump" by delta to
549*0e209d39SAndroid Build Coastguard Worker     //      the next sub-node which will have a length of length/2.
550*0e209d39SAndroid Build Coastguard Worker     //      (The delta has its own compact encoding.)
551*0e209d39SAndroid Build Coastguard Worker     //      Otherwise, skip the "jump" delta to the next sub-node
552*0e209d39SAndroid Build Coastguard Worker     //      which will have a length of length-length/2.
553*0e209d39SAndroid Build Coastguard Worker 
554*0e209d39SAndroid Build Coastguard Worker     // Match-node lead unit values, after masking off intermediate-value bits:
555*0e209d39SAndroid Build Coastguard Worker 
556*0e209d39SAndroid Build Coastguard Worker     // 0000..002f: Branch node. If node!=0 then the length is node+1, otherwise
557*0e209d39SAndroid Build Coastguard Worker     // the length is one more than the next unit.
558*0e209d39SAndroid Build Coastguard Worker 
559*0e209d39SAndroid Build Coastguard Worker     // For a branch sub-node with at most this many entries, we drop down
560*0e209d39SAndroid Build Coastguard Worker     // to a linear search.
561*0e209d39SAndroid Build Coastguard Worker     static const int32_t kMaxBranchLinearSubNodeLength=5;
562*0e209d39SAndroid Build Coastguard Worker 
563*0e209d39SAndroid Build Coastguard Worker     // 0030..003f: Linear-match node, match 1..16 units and continue reading the next node.
564*0e209d39SAndroid Build Coastguard Worker     static const int32_t kMinLinearMatch=0x30;
565*0e209d39SAndroid Build Coastguard Worker     static const int32_t kMaxLinearMatchLength=0x10;
566*0e209d39SAndroid Build Coastguard Worker 
567*0e209d39SAndroid Build Coastguard Worker     // Match-node lead unit bits 14..6 for the optional intermediate value.
568*0e209d39SAndroid Build Coastguard Worker     // If these bits are 0, then there is no intermediate value.
569*0e209d39SAndroid Build Coastguard Worker     // Otherwise, see the *NodeValue* constants below.
570*0e209d39SAndroid Build Coastguard Worker     static const int32_t kMinValueLead=kMinLinearMatch+kMaxLinearMatchLength;  // 0x0040
571*0e209d39SAndroid Build Coastguard Worker     static const int32_t kNodeTypeMask=kMinValueLead-1;  // 0x003f
572*0e209d39SAndroid Build Coastguard Worker 
573*0e209d39SAndroid Build Coastguard Worker     // A final-value node has bit 15 set.
574*0e209d39SAndroid Build Coastguard Worker     static const int32_t kValueIsFinal=0x8000;
575*0e209d39SAndroid Build Coastguard Worker 
576*0e209d39SAndroid Build Coastguard Worker     // Compact value: After testing and masking off bit 15, use the following thresholds.
577*0e209d39SAndroid Build Coastguard Worker     static const int32_t kMaxOneUnitValue=0x3fff;
578*0e209d39SAndroid Build Coastguard Worker 
579*0e209d39SAndroid Build Coastguard Worker     static const int32_t kMinTwoUnitValueLead=kMaxOneUnitValue+1;  // 0x4000
580*0e209d39SAndroid Build Coastguard Worker     static const int32_t kThreeUnitValueLead=0x7fff;
581*0e209d39SAndroid Build Coastguard Worker 
582*0e209d39SAndroid Build Coastguard Worker     static const int32_t kMaxTwoUnitValue=((kThreeUnitValueLead-kMinTwoUnitValueLead)<<16)-1;  // 0x3ffeffff
583*0e209d39SAndroid Build Coastguard Worker 
584*0e209d39SAndroid Build Coastguard Worker     // Compact intermediate-value integer, lead unit shared with a branch or linear-match node.
585*0e209d39SAndroid Build Coastguard Worker     static const int32_t kMaxOneUnitNodeValue=0xff;
586*0e209d39SAndroid Build Coastguard Worker     static const int32_t kMinTwoUnitNodeValueLead=kMinValueLead+((kMaxOneUnitNodeValue+1)<<6);  // 0x4040
587*0e209d39SAndroid Build Coastguard Worker     static const int32_t kThreeUnitNodeValueLead=0x7fc0;
588*0e209d39SAndroid Build Coastguard Worker 
589*0e209d39SAndroid Build Coastguard Worker     static const int32_t kMaxTwoUnitNodeValue=
590*0e209d39SAndroid Build Coastguard Worker         ((kThreeUnitNodeValueLead-kMinTwoUnitNodeValueLead)<<10)-1;  // 0xfdffff
591*0e209d39SAndroid Build Coastguard Worker 
592*0e209d39SAndroid Build Coastguard Worker     // Compact delta integers.
593*0e209d39SAndroid Build Coastguard Worker     static const int32_t kMaxOneUnitDelta=0xfbff;
594*0e209d39SAndroid Build Coastguard Worker     static const int32_t kMinTwoUnitDeltaLead=kMaxOneUnitDelta+1;  // 0xfc00
595*0e209d39SAndroid Build Coastguard Worker     static const int32_t kThreeUnitDeltaLead=0xffff;
596*0e209d39SAndroid Build Coastguard Worker 
597*0e209d39SAndroid Build Coastguard Worker     static const int32_t kMaxTwoUnitDelta=((kThreeUnitDeltaLead-kMinTwoUnitDeltaLead)<<16)-1;  // 0x03feffff
598*0e209d39SAndroid Build Coastguard Worker 
599*0e209d39SAndroid Build Coastguard Worker     // For getState64():
600*0e209d39SAndroid Build Coastguard Worker     // The remainingMatchLength_ is -1..14=(kMaxLinearMatchLength=0x10)-2
601*0e209d39SAndroid Build Coastguard Worker     // so we need at least 5 bits for that.
602*0e209d39SAndroid Build Coastguard Worker     // We add 2 to store it as a positive value 1..16=kMaxLinearMatchLength.
603*0e209d39SAndroid Build Coastguard Worker     static constexpr int32_t kState64RemainingShift = 59;
604*0e209d39SAndroid Build Coastguard Worker     static constexpr uint64_t kState64PosMask = (UINT64_C(1) << kState64RemainingShift) - 1;
605*0e209d39SAndroid Build Coastguard Worker 
606*0e209d39SAndroid Build Coastguard Worker     char16_t *ownedArray_;
607*0e209d39SAndroid Build Coastguard Worker 
608*0e209d39SAndroid Build Coastguard Worker     // Fixed value referencing the UCharsTrie words.
609*0e209d39SAndroid Build Coastguard Worker     const char16_t *uchars_;
610*0e209d39SAndroid Build Coastguard Worker 
611*0e209d39SAndroid Build Coastguard Worker     // Iterator variables.
612*0e209d39SAndroid Build Coastguard Worker 
613*0e209d39SAndroid Build Coastguard Worker     // Pointer to next trie unit to read. nullptr if no more matches.
614*0e209d39SAndroid Build Coastguard Worker     const char16_t *pos_;
615*0e209d39SAndroid Build Coastguard Worker     // Remaining length of a linear-match node, minus 1. Negative if not in such a node.
616*0e209d39SAndroid Build Coastguard Worker     int32_t remainingMatchLength_;
617*0e209d39SAndroid Build Coastguard Worker };
618*0e209d39SAndroid Build Coastguard Worker 
619*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_END
620*0e209d39SAndroid Build Coastguard Worker 
621*0e209d39SAndroid Build Coastguard Worker #endif /* U_SHOW_CPLUSPLUS_API */
622*0e209d39SAndroid Build Coastguard Worker 
623*0e209d39SAndroid Build Coastguard Worker #endif  // __UCHARSTRIE_H__
624