xref: /aosp_15_r20/external/icu/libicu/cts_headers/ruleiter.h (revision 0e209d3975ff4a8c132096b14b0e9364a753506e)
1*0e209d39SAndroid Build Coastguard Worker // © 2016 and later: Unicode, Inc. and others.
2*0e209d39SAndroid Build Coastguard Worker // License & terms of use: http://www.unicode.org/copyright.html
3*0e209d39SAndroid Build Coastguard Worker /*
4*0e209d39SAndroid Build Coastguard Worker **********************************************************************
5*0e209d39SAndroid Build Coastguard Worker * Copyright (c) 2003-2011, International Business Machines
6*0e209d39SAndroid Build Coastguard Worker * Corporation and others.  All Rights Reserved.
7*0e209d39SAndroid Build Coastguard Worker **********************************************************************
8*0e209d39SAndroid Build Coastguard Worker * Author: Alan Liu
9*0e209d39SAndroid Build Coastguard Worker * Created: September 24 2003
10*0e209d39SAndroid Build Coastguard Worker * Since: ICU 2.8
11*0e209d39SAndroid Build Coastguard Worker **********************************************************************
12*0e209d39SAndroid Build Coastguard Worker */
13*0e209d39SAndroid Build Coastguard Worker #ifndef _RULEITER_H_
14*0e209d39SAndroid Build Coastguard Worker #define _RULEITER_H_
15*0e209d39SAndroid Build Coastguard Worker 
16*0e209d39SAndroid Build Coastguard Worker #include "unicode/uobject.h"
17*0e209d39SAndroid Build Coastguard Worker 
18*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_BEGIN
19*0e209d39SAndroid Build Coastguard Worker 
20*0e209d39SAndroid Build Coastguard Worker class UnicodeString;
21*0e209d39SAndroid Build Coastguard Worker class ParsePosition;
22*0e209d39SAndroid Build Coastguard Worker class SymbolTable;
23*0e209d39SAndroid Build Coastguard Worker 
24*0e209d39SAndroid Build Coastguard Worker /**
25*0e209d39SAndroid Build Coastguard Worker  * An iterator that returns 32-bit code points.  This class is deliberately
26*0e209d39SAndroid Build Coastguard Worker  * <em>not</em> related to any of the ICU character iterator classes
27*0e209d39SAndroid Build Coastguard Worker  * in order to minimize complexity.
28*0e209d39SAndroid Build Coastguard Worker  * @author Alan Liu
29*0e209d39SAndroid Build Coastguard Worker  * @since ICU 2.8
30*0e209d39SAndroid Build Coastguard Worker  */
31*0e209d39SAndroid Build Coastguard Worker class RuleCharacterIterator : public UMemory {
32*0e209d39SAndroid Build Coastguard Worker 
33*0e209d39SAndroid Build Coastguard Worker     // TODO: Ideas for later.  (Do not implement if not needed, lest the
34*0e209d39SAndroid Build Coastguard Worker     // code coverage numbers go down due to unused methods.)
35*0e209d39SAndroid Build Coastguard Worker     // 1. Add a copy constructor, operator==() method.
36*0e209d39SAndroid Build Coastguard Worker     // 2. Rather than return DONE, throw an exception if the end
37*0e209d39SAndroid Build Coastguard Worker     // is reached -- this is an alternate usage model, probably not useful.
38*0e209d39SAndroid Build Coastguard Worker 
39*0e209d39SAndroid Build Coastguard Worker private:
40*0e209d39SAndroid Build Coastguard Worker     /**
41*0e209d39SAndroid Build Coastguard Worker      * Text being iterated.
42*0e209d39SAndroid Build Coastguard Worker      */
43*0e209d39SAndroid Build Coastguard Worker     const UnicodeString& text;
44*0e209d39SAndroid Build Coastguard Worker 
45*0e209d39SAndroid Build Coastguard Worker     /**
46*0e209d39SAndroid Build Coastguard Worker      * Position of iterator.
47*0e209d39SAndroid Build Coastguard Worker      */
48*0e209d39SAndroid Build Coastguard Worker     ParsePosition& pos;
49*0e209d39SAndroid Build Coastguard Worker 
50*0e209d39SAndroid Build Coastguard Worker     /**
51*0e209d39SAndroid Build Coastguard Worker      * Symbol table used to parse and dereference variables.  May be 0.
52*0e209d39SAndroid Build Coastguard Worker      */
53*0e209d39SAndroid Build Coastguard Worker     const SymbolTable* sym;
54*0e209d39SAndroid Build Coastguard Worker 
55*0e209d39SAndroid Build Coastguard Worker     /**
56*0e209d39SAndroid Build Coastguard Worker      * Current variable expansion, or 0 if none.
57*0e209d39SAndroid Build Coastguard Worker      */
58*0e209d39SAndroid Build Coastguard Worker     const UnicodeString* buf;
59*0e209d39SAndroid Build Coastguard Worker 
60*0e209d39SAndroid Build Coastguard Worker     /**
61*0e209d39SAndroid Build Coastguard Worker      * Position within buf.  Meaningless if buf == 0.
62*0e209d39SAndroid Build Coastguard Worker      */
63*0e209d39SAndroid Build Coastguard Worker     int32_t bufPos;
64*0e209d39SAndroid Build Coastguard Worker 
65*0e209d39SAndroid Build Coastguard Worker public:
66*0e209d39SAndroid Build Coastguard Worker     /**
67*0e209d39SAndroid Build Coastguard Worker      * Value returned when there are no more characters to iterate.
68*0e209d39SAndroid Build Coastguard Worker      */
69*0e209d39SAndroid Build Coastguard Worker     static constexpr int32_t DONE = -1;
70*0e209d39SAndroid Build Coastguard Worker 
71*0e209d39SAndroid Build Coastguard Worker     /**
72*0e209d39SAndroid Build Coastguard Worker      * Bitmask option to enable parsing of variable names.  If (options &
73*0e209d39SAndroid Build Coastguard Worker      * PARSE_VARIABLES) != 0, then an embedded variable will be expanded to
74*0e209d39SAndroid Build Coastguard Worker      * its value.  Variables are parsed using the SymbolTable API.
75*0e209d39SAndroid Build Coastguard Worker      */
76*0e209d39SAndroid Build Coastguard Worker     static constexpr int32_t PARSE_VARIABLES = 1;
77*0e209d39SAndroid Build Coastguard Worker 
78*0e209d39SAndroid Build Coastguard Worker     /**
79*0e209d39SAndroid Build Coastguard Worker      * Bitmask option to enable parsing of escape sequences.  If (options &
80*0e209d39SAndroid Build Coastguard Worker      * PARSE_ESCAPES) != 0, then an embedded escape sequence will be expanded
81*0e209d39SAndroid Build Coastguard Worker      * to its value.  Escapes are parsed using Utility.unescapeAt().
82*0e209d39SAndroid Build Coastguard Worker      */
83*0e209d39SAndroid Build Coastguard Worker     static constexpr int32_t PARSE_ESCAPES   = 2;
84*0e209d39SAndroid Build Coastguard Worker 
85*0e209d39SAndroid Build Coastguard Worker     /**
86*0e209d39SAndroid Build Coastguard Worker      * Bitmask option to enable skipping of whitespace.  If (options &
87*0e209d39SAndroid Build Coastguard Worker      * SKIP_WHITESPACE) != 0, then Pattern_White_Space characters will be silently
88*0e209d39SAndroid Build Coastguard Worker      * skipped, as if they were not present in the input.
89*0e209d39SAndroid Build Coastguard Worker      */
90*0e209d39SAndroid Build Coastguard Worker     static constexpr int32_t SKIP_WHITESPACE = 4;
91*0e209d39SAndroid Build Coastguard Worker 
92*0e209d39SAndroid Build Coastguard Worker     /**
93*0e209d39SAndroid Build Coastguard Worker      * Constructs an iterator over the given text, starting at the given
94*0e209d39SAndroid Build Coastguard Worker      * position.
95*0e209d39SAndroid Build Coastguard Worker      * @param text the text to be iterated
96*0e209d39SAndroid Build Coastguard Worker      * @param sym the symbol table, or null if there is none.  If sym is null,
97*0e209d39SAndroid Build Coastguard Worker      * then variables will not be dereferenced, even if the PARSE_VARIABLES
98*0e209d39SAndroid Build Coastguard Worker      * option is set.
99*0e209d39SAndroid Build Coastguard Worker      * @param pos upon input, the index of the next character to return.  If a
100*0e209d39SAndroid Build Coastguard Worker      * variable has been dereferenced, then pos will <em>not</em> increment as
101*0e209d39SAndroid Build Coastguard Worker      * characters of the variable value are iterated.
102*0e209d39SAndroid Build Coastguard Worker      */
103*0e209d39SAndroid Build Coastguard Worker     RuleCharacterIterator(const UnicodeString& text, const SymbolTable* sym,
104*0e209d39SAndroid Build Coastguard Worker                           ParsePosition& pos);
105*0e209d39SAndroid Build Coastguard Worker 
106*0e209d39SAndroid Build Coastguard Worker     /**
107*0e209d39SAndroid Build Coastguard Worker      * Returns true if this iterator has no more characters to return.
108*0e209d39SAndroid Build Coastguard Worker      */
109*0e209d39SAndroid Build Coastguard Worker     UBool atEnd() const;
110*0e209d39SAndroid Build Coastguard Worker 
111*0e209d39SAndroid Build Coastguard Worker     /**
112*0e209d39SAndroid Build Coastguard Worker      * Returns the next character using the given options, or DONE if there
113*0e209d39SAndroid Build Coastguard Worker      * are no more characters, and advance the position to the next
114*0e209d39SAndroid Build Coastguard Worker      * character.
115*0e209d39SAndroid Build Coastguard Worker      * @param options one or more of the following options, bitwise-OR-ed
116*0e209d39SAndroid Build Coastguard Worker      * together: PARSE_VARIABLES, PARSE_ESCAPES, SKIP_WHITESPACE.
117*0e209d39SAndroid Build Coastguard Worker      * @param isEscaped output parameter set to true if the character
118*0e209d39SAndroid Build Coastguard Worker      * was escaped
119*0e209d39SAndroid Build Coastguard Worker      * @param ec input-output error code.  An error will only be set by
120*0e209d39SAndroid Build Coastguard Worker      * this routing if options includes PARSE_VARIABLES and an unknown
121*0e209d39SAndroid Build Coastguard Worker      * variable name is seen, or if options includes PARSE_ESCAPES and
122*0e209d39SAndroid Build Coastguard Worker      * an invalid escape sequence is seen.
123*0e209d39SAndroid Build Coastguard Worker      * @return the current 32-bit code point, or DONE
124*0e209d39SAndroid Build Coastguard Worker      */
125*0e209d39SAndroid Build Coastguard Worker     UChar32 next(int32_t options, UBool& isEscaped, UErrorCode& ec);
126*0e209d39SAndroid Build Coastguard Worker 
127*0e209d39SAndroid Build Coastguard Worker     /**
128*0e209d39SAndroid Build Coastguard Worker      * Returns true if this iterator is currently within a variable expansion.
129*0e209d39SAndroid Build Coastguard Worker      */
130*0e209d39SAndroid Build Coastguard Worker     inline UBool inVariable() const;
131*0e209d39SAndroid Build Coastguard Worker 
132*0e209d39SAndroid Build Coastguard Worker     /**
133*0e209d39SAndroid Build Coastguard Worker      * An opaque object representing the position of a RuleCharacterIterator.
134*0e209d39SAndroid Build Coastguard Worker      */
135*0e209d39SAndroid Build Coastguard Worker     struct Pos : public UMemory {
136*0e209d39SAndroid Build Coastguard Worker     private:
137*0e209d39SAndroid Build Coastguard Worker         const UnicodeString* buf;
138*0e209d39SAndroid Build Coastguard Worker         int32_t pos;
139*0e209d39SAndroid Build Coastguard Worker         int32_t bufPos;
140*0e209d39SAndroid Build Coastguard Worker         friend class RuleCharacterIterator;
141*0e209d39SAndroid Build Coastguard Worker     };
142*0e209d39SAndroid Build Coastguard Worker 
143*0e209d39SAndroid Build Coastguard Worker     /**
144*0e209d39SAndroid Build Coastguard Worker      * Sets an object which, when later passed to setPos(), will
145*0e209d39SAndroid Build Coastguard Worker      * restore this iterator's position.  Usage idiom:
146*0e209d39SAndroid Build Coastguard Worker      *
147*0e209d39SAndroid Build Coastguard Worker      * RuleCharacterIterator iterator = ...;
148*0e209d39SAndroid Build Coastguard Worker      * RuleCharacterIterator::Pos pos;
149*0e209d39SAndroid Build Coastguard Worker      * iterator.getPos(pos);
150*0e209d39SAndroid Build Coastguard Worker      * for (;;) {
151*0e209d39SAndroid Build Coastguard Worker      *   iterator.getPos(pos);
152*0e209d39SAndroid Build Coastguard Worker      *   int c = iterator.next(...);
153*0e209d39SAndroid Build Coastguard Worker      *   ...
154*0e209d39SAndroid Build Coastguard Worker      * }
155*0e209d39SAndroid Build Coastguard Worker      * iterator.setPos(pos);
156*0e209d39SAndroid Build Coastguard Worker      *
157*0e209d39SAndroid Build Coastguard Worker      * @param p a position object to be set to this iterator's
158*0e209d39SAndroid Build Coastguard Worker      * current position.
159*0e209d39SAndroid Build Coastguard Worker      */
160*0e209d39SAndroid Build Coastguard Worker     void getPos(Pos& p) const;
161*0e209d39SAndroid Build Coastguard Worker 
162*0e209d39SAndroid Build Coastguard Worker     /**
163*0e209d39SAndroid Build Coastguard Worker      * Restores this iterator to the position it had when getPos()
164*0e209d39SAndroid Build Coastguard Worker      * set the given object.
165*0e209d39SAndroid Build Coastguard Worker      * @param p a position object previously set by getPos()
166*0e209d39SAndroid Build Coastguard Worker      */
167*0e209d39SAndroid Build Coastguard Worker     void setPos(const Pos& p);
168*0e209d39SAndroid Build Coastguard Worker 
169*0e209d39SAndroid Build Coastguard Worker     /**
170*0e209d39SAndroid Build Coastguard Worker      * Skips ahead past any ignored characters, as indicated by the given
171*0e209d39SAndroid Build Coastguard Worker      * options.  This is useful in conjunction with the lookahead() method.
172*0e209d39SAndroid Build Coastguard Worker      *
173*0e209d39SAndroid Build Coastguard Worker      * Currently, this only has an effect for SKIP_WHITESPACE.
174*0e209d39SAndroid Build Coastguard Worker      * @param options one or more of the following options, bitwise-OR-ed
175*0e209d39SAndroid Build Coastguard Worker      * together: PARSE_VARIABLES, PARSE_ESCAPES, SKIP_WHITESPACE.
176*0e209d39SAndroid Build Coastguard Worker      */
177*0e209d39SAndroid Build Coastguard Worker     void skipIgnored(int32_t options);
178*0e209d39SAndroid Build Coastguard Worker 
179*0e209d39SAndroid Build Coastguard Worker     /**
180*0e209d39SAndroid Build Coastguard Worker      * Returns a string containing the remainder of the characters to be
181*0e209d39SAndroid Build Coastguard Worker      * returned by this iterator, without any option processing.  If the
182*0e209d39SAndroid Build Coastguard Worker      * iterator is currently within a variable expansion, this will only
183*0e209d39SAndroid Build Coastguard Worker      * extend to the end of the variable expansion.  This method is provided
184*0e209d39SAndroid Build Coastguard Worker      * so that iterators may interoperate with string-based APIs.  The typical
185*0e209d39SAndroid Build Coastguard Worker      * sequence of calls is to call skipIgnored(), then call lookahead(), then
186*0e209d39SAndroid Build Coastguard Worker      * parse the string returned by lookahead(), then call jumpahead() to
187*0e209d39SAndroid Build Coastguard Worker      * resynchronize the iterator.
188*0e209d39SAndroid Build Coastguard Worker      * @param result a string to receive the characters to be returned
189*0e209d39SAndroid Build Coastguard Worker      * by future calls to next()
190*0e209d39SAndroid Build Coastguard Worker      * @param maxLookAhead The maximum to copy into the result.
191*0e209d39SAndroid Build Coastguard Worker      * @return a reference to result
192*0e209d39SAndroid Build Coastguard Worker      */
193*0e209d39SAndroid Build Coastguard Worker     UnicodeString& lookahead(UnicodeString& result, int32_t maxLookAhead = -1) const;
194*0e209d39SAndroid Build Coastguard Worker 
195*0e209d39SAndroid Build Coastguard Worker     /**
196*0e209d39SAndroid Build Coastguard Worker      * Advances the position by the given number of 16-bit code units.
197*0e209d39SAndroid Build Coastguard Worker      * This is useful in conjunction with the lookahead() method.
198*0e209d39SAndroid Build Coastguard Worker      * @param count the number of 16-bit code units to jump over
199*0e209d39SAndroid Build Coastguard Worker      */
200*0e209d39SAndroid Build Coastguard Worker     void jumpahead(int32_t count);
201*0e209d39SAndroid Build Coastguard Worker 
202*0e209d39SAndroid Build Coastguard Worker     /**
203*0e209d39SAndroid Build Coastguard Worker      * Returns a string representation of this object, consisting of the
204*0e209d39SAndroid Build Coastguard Worker      * characters being iterated, with a '|' marking the current position.
205*0e209d39SAndroid Build Coastguard Worker      * Position within an expanded variable is <em>not</em> indicated.
206*0e209d39SAndroid Build Coastguard Worker      * @param result output parameter to receive a string
207*0e209d39SAndroid Build Coastguard Worker      * representation of this object
208*0e209d39SAndroid Build Coastguard Worker      */
209*0e209d39SAndroid Build Coastguard Worker //    UnicodeString& toString(UnicodeString& result) const;
210*0e209d39SAndroid Build Coastguard Worker 
211*0e209d39SAndroid Build Coastguard Worker private:
212*0e209d39SAndroid Build Coastguard Worker     /**
213*0e209d39SAndroid Build Coastguard Worker      * Returns the current 32-bit code point without parsing escapes, parsing
214*0e209d39SAndroid Build Coastguard Worker      * variables, or skipping whitespace.
215*0e209d39SAndroid Build Coastguard Worker      * @return the current 32-bit code point
216*0e209d39SAndroid Build Coastguard Worker      */
217*0e209d39SAndroid Build Coastguard Worker     UChar32 _current() const;
218*0e209d39SAndroid Build Coastguard Worker 
219*0e209d39SAndroid Build Coastguard Worker     /**
220*0e209d39SAndroid Build Coastguard Worker      * Advances the position by the given amount.
221*0e209d39SAndroid Build Coastguard Worker      * @param count the number of 16-bit code units to advance past
222*0e209d39SAndroid Build Coastguard Worker      */
223*0e209d39SAndroid Build Coastguard Worker     void _advance(int32_t count);
224*0e209d39SAndroid Build Coastguard Worker };
225*0e209d39SAndroid Build Coastguard Worker 
inVariable()226*0e209d39SAndroid Build Coastguard Worker inline UBool RuleCharacterIterator::inVariable() const {
227*0e209d39SAndroid Build Coastguard Worker     return buf != nullptr;
228*0e209d39SAndroid Build Coastguard Worker }
229*0e209d39SAndroid Build Coastguard Worker 
230*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_END
231*0e209d39SAndroid Build Coastguard Worker 
232*0e209d39SAndroid Build Coastguard Worker #endif // _RULEITER_H_
233*0e209d39SAndroid Build Coastguard Worker //eof
234