xref: /aosp_15_r20/external/icu/libicu/cts_headers/util.h (revision 0e209d3975ff4a8c132096b14b0e9364a753506e)
1*0e209d39SAndroid Build Coastguard Worker // © 2016 and later: Unicode, Inc. and others.
2*0e209d39SAndroid Build Coastguard Worker // License & terms of use: http://www.unicode.org/copyright.html
3*0e209d39SAndroid Build Coastguard Worker /*
4*0e209d39SAndroid Build Coastguard Worker  **********************************************************************
5*0e209d39SAndroid Build Coastguard Worker  *   Copyright (c) 2001-2011, International Business Machines
6*0e209d39SAndroid Build Coastguard Worker  *   Corporation and others.  All Rights Reserved.
7*0e209d39SAndroid Build Coastguard Worker  **********************************************************************
8*0e209d39SAndroid Build Coastguard Worker  *   Date        Name        Description
9*0e209d39SAndroid Build Coastguard Worker  *   11/19/2001  aliu        Creation.
10*0e209d39SAndroid Build Coastguard Worker  **********************************************************************
11*0e209d39SAndroid Build Coastguard Worker  */
12*0e209d39SAndroid Build Coastguard Worker 
13*0e209d39SAndroid Build Coastguard Worker #ifndef ICU_UTIL_H
14*0e209d39SAndroid Build Coastguard Worker #define ICU_UTIL_H
15*0e209d39SAndroid Build Coastguard Worker 
16*0e209d39SAndroid Build Coastguard Worker #include "charstr.h"
17*0e209d39SAndroid Build Coastguard Worker #include "unicode/unistr.h"
18*0e209d39SAndroid Build Coastguard Worker #include "unicode/uobject.h"
19*0e209d39SAndroid Build Coastguard Worker #include "unicode/utypes.h"
20*0e209d39SAndroid Build Coastguard Worker //--------------------------------------------------------------------
21*0e209d39SAndroid Build Coastguard Worker // class ICU_Utility
22*0e209d39SAndroid Build Coastguard Worker // i18n utility functions, scoped into the class ICU_Utility.
23*0e209d39SAndroid Build Coastguard Worker //--------------------------------------------------------------------
24*0e209d39SAndroid Build Coastguard Worker 
25*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_BEGIN
26*0e209d39SAndroid Build Coastguard Worker 
27*0e209d39SAndroid Build Coastguard Worker class UnicodeMatcher;
28*0e209d39SAndroid Build Coastguard Worker 
29*0e209d39SAndroid Build Coastguard Worker class U_COMMON_API ICU_Utility /* not : public UObject because all methods are static */ {
30*0e209d39SAndroid Build Coastguard Worker  public:
31*0e209d39SAndroid Build Coastguard Worker 
32*0e209d39SAndroid Build Coastguard Worker     /**
33*0e209d39SAndroid Build Coastguard Worker      * Append a number to the given UnicodeString in the given radix.
34*0e209d39SAndroid Build Coastguard Worker      * Standard digits '0'-'9' are used and letters 'A'-'Z' for
35*0e209d39SAndroid Build Coastguard Worker      * radices 11 through 36.
36*0e209d39SAndroid Build Coastguard Worker      * @param result the digits of the number are appended here
37*0e209d39SAndroid Build Coastguard Worker      * @param n the number to be converted to digits; may be negative.
38*0e209d39SAndroid Build Coastguard Worker      * If negative, a '-' is prepended to the digits.
39*0e209d39SAndroid Build Coastguard Worker      * @param radix a radix from 2 to 36 inclusive.
40*0e209d39SAndroid Build Coastguard Worker      * @param minDigits the minimum number of digits, not including
41*0e209d39SAndroid Build Coastguard Worker      * any '-', to produce.  Values less than 2 have no effect.  One
42*0e209d39SAndroid Build Coastguard Worker      * digit is always emitted regardless of this parameter.
43*0e209d39SAndroid Build Coastguard Worker      * @return a reference to result
44*0e209d39SAndroid Build Coastguard Worker      */
45*0e209d39SAndroid Build Coastguard Worker     static UnicodeString& appendNumber(UnicodeString& result, int32_t n,
46*0e209d39SAndroid Build Coastguard Worker                                        int32_t radix = 10,
47*0e209d39SAndroid Build Coastguard Worker                                        int32_t minDigits = 1);
48*0e209d39SAndroid Build Coastguard Worker 
49*0e209d39SAndroid Build Coastguard Worker     /** Returns a bogus UnicodeString by value. */
makeBogusString()50*0e209d39SAndroid Build Coastguard Worker     static inline UnicodeString makeBogusString() {
51*0e209d39SAndroid Build Coastguard Worker         UnicodeString result;
52*0e209d39SAndroid Build Coastguard Worker         result.setToBogus();
53*0e209d39SAndroid Build Coastguard Worker         return result;
54*0e209d39SAndroid Build Coastguard Worker     }
55*0e209d39SAndroid Build Coastguard Worker 
56*0e209d39SAndroid Build Coastguard Worker     /**
57*0e209d39SAndroid Build Coastguard Worker      * Return true if the character is NOT printable ASCII.
58*0e209d39SAndroid Build Coastguard Worker      * The tab, newline and linefeed characters are considered unprintable.
59*0e209d39SAndroid Build Coastguard Worker      */
60*0e209d39SAndroid Build Coastguard Worker     static UBool isUnprintable(UChar32 c);
61*0e209d39SAndroid Build Coastguard Worker 
62*0e209d39SAndroid Build Coastguard Worker     /**
63*0e209d39SAndroid Build Coastguard Worker      * @return true for control codes and for surrogate and noncharacter code points
64*0e209d39SAndroid Build Coastguard Worker      */
65*0e209d39SAndroid Build Coastguard Worker     static UBool shouldAlwaysBeEscaped(UChar32 c);
66*0e209d39SAndroid Build Coastguard Worker 
67*0e209d39SAndroid Build Coastguard Worker     /**
68*0e209d39SAndroid Build Coastguard Worker      * Escapes one unprintable code point using \uxxxx notation for U+0000 to
69*0e209d39SAndroid Build Coastguard Worker      * U+FFFF and \Uxxxxxxxx for U+10000 and above.  If the character is
70*0e209d39SAndroid Build Coastguard Worker      * printable ASCII, then do nothing and return false.  Otherwise,
71*0e209d39SAndroid Build Coastguard Worker      * append the escaped notation and return true.
72*0e209d39SAndroid Build Coastguard Worker      */
73*0e209d39SAndroid Build Coastguard Worker     static UBool escapeUnprintable(UnicodeString& result, UChar32 c);
74*0e209d39SAndroid Build Coastguard Worker 
75*0e209d39SAndroid Build Coastguard Worker     /**
76*0e209d39SAndroid Build Coastguard Worker      * Escapes one code point using \uxxxx notation
77*0e209d39SAndroid Build Coastguard Worker      * for U+0000 to U+FFFF and \Uxxxxxxxx for U+10000 and above.
78*0e209d39SAndroid Build Coastguard Worker      * @return result
79*0e209d39SAndroid Build Coastguard Worker      */
80*0e209d39SAndroid Build Coastguard Worker     static UnicodeString &escape(UnicodeString& result, UChar32 c);
81*0e209d39SAndroid Build Coastguard Worker 
82*0e209d39SAndroid Build Coastguard Worker     /**
83*0e209d39SAndroid Build Coastguard Worker      * Returns the index of a character, ignoring quoted text.
84*0e209d39SAndroid Build Coastguard Worker      * For example, in the string "abc'hide'h", the 'h' in "hide" will not be
85*0e209d39SAndroid Build Coastguard Worker      * found by a search for 'h'.
86*0e209d39SAndroid Build Coastguard Worker      * @param text text to be searched
87*0e209d39SAndroid Build Coastguard Worker      * @param start the beginning index, inclusive; <code>0 <= start
88*0e209d39SAndroid Build Coastguard Worker      * <= limit</code>.
89*0e209d39SAndroid Build Coastguard Worker      * @param limit the ending index, exclusive; <code>start <= limit
90*0e209d39SAndroid Build Coastguard Worker      * <= text.length()</code>.
91*0e209d39SAndroid Build Coastguard Worker      * @param c character to search for
92*0e209d39SAndroid Build Coastguard Worker      * @return Offset of the first instance of c, or -1 if not found.
93*0e209d39SAndroid Build Coastguard Worker      */
94*0e209d39SAndroid Build Coastguard Worker //?FOR FUTURE USE.  DISABLE FOR NOW for coverage reasons.
95*0e209d39SAndroid Build Coastguard Worker //    static int32_t quotedIndexOf(const UnicodeString& text,
96*0e209d39SAndroid Build Coastguard Worker //                                 int32_t start, int32_t limit,
97*0e209d39SAndroid Build Coastguard Worker //                                 char16_t c);
98*0e209d39SAndroid Build Coastguard Worker 
99*0e209d39SAndroid Build Coastguard Worker     /**
100*0e209d39SAndroid Build Coastguard Worker      * Skip over a sequence of zero or more white space characters at pos.
101*0e209d39SAndroid Build Coastguard Worker      * @param advance if true, advance pos to the first non-white-space
102*0e209d39SAndroid Build Coastguard Worker      * character at or after pos, or str.length(), if there is none.
103*0e209d39SAndroid Build Coastguard Worker      * Otherwise leave pos unchanged.
104*0e209d39SAndroid Build Coastguard Worker      * @return the index of the first non-white-space character at or
105*0e209d39SAndroid Build Coastguard Worker      * after pos, or str.length(), if there is none.
106*0e209d39SAndroid Build Coastguard Worker      */
107*0e209d39SAndroid Build Coastguard Worker     static int32_t skipWhitespace(const UnicodeString& str, int32_t& pos,
108*0e209d39SAndroid Build Coastguard Worker                                   UBool advance = false);
109*0e209d39SAndroid Build Coastguard Worker 
110*0e209d39SAndroid Build Coastguard Worker     /**
111*0e209d39SAndroid Build Coastguard Worker      * Skip over Pattern_White_Space in a Replaceable.
112*0e209d39SAndroid Build Coastguard Worker      * Skipping may be done in the forward or
113*0e209d39SAndroid Build Coastguard Worker      * reverse direction.  In either case, the leftmost index will be
114*0e209d39SAndroid Build Coastguard Worker      * inclusive, and the rightmost index will be exclusive.  That is,
115*0e209d39SAndroid Build Coastguard Worker      * given a range defined as [start, limit), the call
116*0e209d39SAndroid Build Coastguard Worker      * skipWhitespace(text, start, limit) will advance start past leading
117*0e209d39SAndroid Build Coastguard Worker      * whitespace, whereas the call skipWhitespace(text, limit, start),
118*0e209d39SAndroid Build Coastguard Worker      * will back up limit past trailing whitespace.
119*0e209d39SAndroid Build Coastguard Worker      * @param text the text to be analyzed
120*0e209d39SAndroid Build Coastguard Worker      * @param pos either the start or limit of a range of 'text', to skip
121*0e209d39SAndroid Build Coastguard Worker      * leading or trailing whitespace, respectively
122*0e209d39SAndroid Build Coastguard Worker      * @param stop either the limit or start of a range of 'text', to skip
123*0e209d39SAndroid Build Coastguard Worker      * leading or trailing whitespace, respectively
124*0e209d39SAndroid Build Coastguard Worker      * @return the new start or limit, depending on what was passed in to
125*0e209d39SAndroid Build Coastguard Worker      * 'pos'
126*0e209d39SAndroid Build Coastguard Worker      */
127*0e209d39SAndroid Build Coastguard Worker //?FOR FUTURE USE.  DISABLE FOR NOW for coverage reasons.
128*0e209d39SAndroid Build Coastguard Worker //?    static int32_t skipWhitespace(const Replaceable& text,
129*0e209d39SAndroid Build Coastguard Worker //?                                  int32_t pos, int32_t stop);
130*0e209d39SAndroid Build Coastguard Worker 
131*0e209d39SAndroid Build Coastguard Worker     /**
132*0e209d39SAndroid Build Coastguard Worker      * Parse a single non-whitespace character 'ch', optionally
133*0e209d39SAndroid Build Coastguard Worker      * preceded by whitespace.
134*0e209d39SAndroid Build Coastguard Worker      * @param id the string to be parsed
135*0e209d39SAndroid Build Coastguard Worker      * @param pos INPUT-OUTPUT parameter.  On input, pos[0] is the
136*0e209d39SAndroid Build Coastguard Worker      * offset of the first character to be parsed.  On output, pos[0]
137*0e209d39SAndroid Build Coastguard Worker      * is the index after the last parsed character.  If the parse
138*0e209d39SAndroid Build Coastguard Worker      * fails, pos[0] will be unchanged.
139*0e209d39SAndroid Build Coastguard Worker      * @param ch the non-whitespace character to be parsed.
140*0e209d39SAndroid Build Coastguard Worker      * @return true if 'ch' is seen preceded by zero or more
141*0e209d39SAndroid Build Coastguard Worker      * whitespace characters.
142*0e209d39SAndroid Build Coastguard Worker      */
143*0e209d39SAndroid Build Coastguard Worker     static UBool parseChar(const UnicodeString& id, int32_t& pos, char16_t ch);
144*0e209d39SAndroid Build Coastguard Worker 
145*0e209d39SAndroid Build Coastguard Worker     /**
146*0e209d39SAndroid Build Coastguard Worker      * Parse a pattern string starting at offset pos.  Keywords are
147*0e209d39SAndroid Build Coastguard Worker      * matched case-insensitively.  Spaces may be skipped and may be
148*0e209d39SAndroid Build Coastguard Worker      * optional or required.  Integer values may be parsed, and if
149*0e209d39SAndroid Build Coastguard Worker      * they are, they will be returned in the given array.  If
150*0e209d39SAndroid Build Coastguard Worker      * successful, the offset of the next non-space character is
151*0e209d39SAndroid Build Coastguard Worker      * returned.  On failure, -1 is returned.
152*0e209d39SAndroid Build Coastguard Worker      * @param pattern must only contain lowercase characters, which
153*0e209d39SAndroid Build Coastguard Worker      * will match their uppercase equivalents as well.  A space
154*0e209d39SAndroid Build Coastguard Worker      * character matches one or more required spaces.  A '~' character
155*0e209d39SAndroid Build Coastguard Worker      * matches zero or more optional spaces.  A '#' character matches
156*0e209d39SAndroid Build Coastguard Worker      * an integer and stores it in parsedInts, which the caller must
157*0e209d39SAndroid Build Coastguard Worker      * ensure has enough capacity.
158*0e209d39SAndroid Build Coastguard Worker      * @param parsedInts array to receive parsed integers.  Caller
159*0e209d39SAndroid Build Coastguard Worker      * must ensure that parsedInts.length is >= the number of '#'
160*0e209d39SAndroid Build Coastguard Worker      * signs in 'pattern'.
161*0e209d39SAndroid Build Coastguard Worker      * @return the position after the last character parsed, or -1 if
162*0e209d39SAndroid Build Coastguard Worker      * the parse failed
163*0e209d39SAndroid Build Coastguard Worker      */
164*0e209d39SAndroid Build Coastguard Worker     static int32_t parsePattern(const UnicodeString& rule, int32_t pos, int32_t limit,
165*0e209d39SAndroid Build Coastguard Worker                                 const UnicodeString& pattern, int32_t* parsedInts);
166*0e209d39SAndroid Build Coastguard Worker 
167*0e209d39SAndroid Build Coastguard Worker     /**
168*0e209d39SAndroid Build Coastguard Worker      * Parse a pattern string within the given Replaceable and a parsing
169*0e209d39SAndroid Build Coastguard Worker      * pattern.  Characters are matched literally and case-sensitively
170*0e209d39SAndroid Build Coastguard Worker      * except for the following special characters:
171*0e209d39SAndroid Build Coastguard Worker      *
172*0e209d39SAndroid Build Coastguard Worker      * ~  zero or more Pattern_White_Space chars
173*0e209d39SAndroid Build Coastguard Worker      *
174*0e209d39SAndroid Build Coastguard Worker      * If end of pattern is reached with all matches along the way,
175*0e209d39SAndroid Build Coastguard Worker      * pos is advanced to the first unparsed index and returned.
176*0e209d39SAndroid Build Coastguard Worker      * Otherwise -1 is returned.
177*0e209d39SAndroid Build Coastguard Worker      * @param pat pattern that controls parsing
178*0e209d39SAndroid Build Coastguard Worker      * @param text text to be parsed, starting at index
179*0e209d39SAndroid Build Coastguard Worker      * @param index offset to first character to parse
180*0e209d39SAndroid Build Coastguard Worker      * @param limit offset after last character to parse
181*0e209d39SAndroid Build Coastguard Worker      * @return index after last parsed character, or -1 on parse failure.
182*0e209d39SAndroid Build Coastguard Worker      */
183*0e209d39SAndroid Build Coastguard Worker     static int32_t parsePattern(const UnicodeString& pat,
184*0e209d39SAndroid Build Coastguard Worker                                 const Replaceable& text,
185*0e209d39SAndroid Build Coastguard Worker                                 int32_t index,
186*0e209d39SAndroid Build Coastguard Worker                                 int32_t limit);
187*0e209d39SAndroid Build Coastguard Worker 
188*0e209d39SAndroid Build Coastguard Worker     /**
189*0e209d39SAndroid Build Coastguard Worker      * Parse an integer at pos, either of the form \d+ or of the form
190*0e209d39SAndroid Build Coastguard Worker      * 0x[0-9A-Fa-f]+ or 0[0-7]+, that is, in standard decimal, hex,
191*0e209d39SAndroid Build Coastguard Worker      * or octal format.
192*0e209d39SAndroid Build Coastguard Worker      * @param pos INPUT-OUTPUT parameter.  On input, the index of the first
193*0e209d39SAndroid Build Coastguard Worker      * character to parse.  On output, the index of the character after the
194*0e209d39SAndroid Build Coastguard Worker      * last parsed character.
195*0e209d39SAndroid Build Coastguard Worker      */
196*0e209d39SAndroid Build Coastguard Worker     static int32_t parseInteger(const UnicodeString& rule, int32_t& pos, int32_t limit);
197*0e209d39SAndroid Build Coastguard Worker 
198*0e209d39SAndroid Build Coastguard Worker     /**
199*0e209d39SAndroid Build Coastguard Worker      * Parse an integer at pos using only ASCII digits.
200*0e209d39SAndroid Build Coastguard Worker      * Base 10 only.
201*0e209d39SAndroid Build Coastguard Worker      * @param pos INPUT-OUTPUT parameter.  On input, the index of the first
202*0e209d39SAndroid Build Coastguard Worker      * character to parse.  On output, the index of the character after the
203*0e209d39SAndroid Build Coastguard Worker      * last parsed character.
204*0e209d39SAndroid Build Coastguard Worker      */
205*0e209d39SAndroid Build Coastguard Worker     static int32_t parseAsciiInteger(const UnicodeString& str, int32_t& pos);
206*0e209d39SAndroid Build Coastguard Worker 
207*0e209d39SAndroid Build Coastguard Worker     /**
208*0e209d39SAndroid Build Coastguard Worker      * Parse a Unicode identifier from the given string at the given
209*0e209d39SAndroid Build Coastguard Worker      * position.  Return the identifier, or an empty string if there
210*0e209d39SAndroid Build Coastguard Worker      * is no identifier.
211*0e209d39SAndroid Build Coastguard Worker      * @param str the string to parse
212*0e209d39SAndroid Build Coastguard Worker      * @param pos INPUT-OUTPUT parameter.  On INPUT, pos is the
213*0e209d39SAndroid Build Coastguard Worker      * first character to examine.  It must be less than str.length(),
214*0e209d39SAndroid Build Coastguard Worker      * and it must not point to a whitespace character.  That is, must
215*0e209d39SAndroid Build Coastguard Worker      * have pos < str.length() and
216*0e209d39SAndroid Build Coastguard Worker      * !UCharacter::isWhitespace(str.char32At(pos)).  On
217*0e209d39SAndroid Build Coastguard Worker      * OUTPUT, the position after the last parsed character.
218*0e209d39SAndroid Build Coastguard Worker      * @return the Unicode identifier, or an empty string if there is
219*0e209d39SAndroid Build Coastguard Worker      * no valid identifier at pos.
220*0e209d39SAndroid Build Coastguard Worker      */
221*0e209d39SAndroid Build Coastguard Worker     static UnicodeString parseUnicodeIdentifier(const UnicodeString& str, int32_t& pos);
222*0e209d39SAndroid Build Coastguard Worker 
223*0e209d39SAndroid Build Coastguard Worker     /**
224*0e209d39SAndroid Build Coastguard Worker      * Parse an unsigned 31-bit integer at the given offset.  Use
225*0e209d39SAndroid Build Coastguard Worker      * UCharacter.digit() to parse individual characters into digits.
226*0e209d39SAndroid Build Coastguard Worker      * @param text the text to be parsed
227*0e209d39SAndroid Build Coastguard Worker      * @param pos INPUT-OUTPUT parameter.  On entry, pos is the
228*0e209d39SAndroid Build Coastguard Worker      * offset within text at which to start parsing; it should point
229*0e209d39SAndroid Build Coastguard Worker      * to a valid digit.  On exit, pos is the offset after the last
230*0e209d39SAndroid Build Coastguard Worker      * parsed character.  If the parse failed, it will be unchanged on
231*0e209d39SAndroid Build Coastguard Worker      * exit.  Must be >= 0 on entry.
232*0e209d39SAndroid Build Coastguard Worker      * @param radix the radix in which to parse; must be >= 2 and <=
233*0e209d39SAndroid Build Coastguard Worker      * 36.
234*0e209d39SAndroid Build Coastguard Worker      * @return a non-negative parsed number, or -1 upon parse failure.
235*0e209d39SAndroid Build Coastguard Worker      * Parse fails if there are no digits, that is, if pos does not
236*0e209d39SAndroid Build Coastguard Worker      * point to a valid digit on entry, or if the number to be parsed
237*0e209d39SAndroid Build Coastguard Worker      * does not fit into a 31-bit unsigned integer.
238*0e209d39SAndroid Build Coastguard Worker      */
239*0e209d39SAndroid Build Coastguard Worker     static int32_t parseNumber(const UnicodeString& text,
240*0e209d39SAndroid Build Coastguard Worker                                int32_t& pos, int8_t radix);
241*0e209d39SAndroid Build Coastguard Worker 
242*0e209d39SAndroid Build Coastguard Worker     static void appendToRule(UnicodeString& rule,
243*0e209d39SAndroid Build Coastguard Worker                              UChar32 c,
244*0e209d39SAndroid Build Coastguard Worker                              UBool isLiteral,
245*0e209d39SAndroid Build Coastguard Worker                              UBool escapeUnprintable,
246*0e209d39SAndroid Build Coastguard Worker                              UnicodeString& quoteBuf);
247*0e209d39SAndroid Build Coastguard Worker 
248*0e209d39SAndroid Build Coastguard Worker     static void appendToRule(UnicodeString& rule,
249*0e209d39SAndroid Build Coastguard Worker                              const UnicodeString& text,
250*0e209d39SAndroid Build Coastguard Worker                              UBool isLiteral,
251*0e209d39SAndroid Build Coastguard Worker                              UBool escapeUnprintable,
252*0e209d39SAndroid Build Coastguard Worker                              UnicodeString& quoteBuf);
253*0e209d39SAndroid Build Coastguard Worker 
254*0e209d39SAndroid Build Coastguard Worker     static void appendToRule(UnicodeString& rule,
255*0e209d39SAndroid Build Coastguard Worker                              const UnicodeMatcher* matcher,
256*0e209d39SAndroid Build Coastguard Worker                              UBool escapeUnprintable,
257*0e209d39SAndroid Build Coastguard Worker                              UnicodeString& quoteBuf);
258*0e209d39SAndroid Build Coastguard Worker 
259*0e209d39SAndroid Build Coastguard Worker private:
260*0e209d39SAndroid Build Coastguard Worker     // do not instantiate
261*0e209d39SAndroid Build Coastguard Worker     ICU_Utility() = delete;
262*0e209d39SAndroid Build Coastguard Worker };
263*0e209d39SAndroid Build Coastguard Worker 
264*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_END
265*0e209d39SAndroid Build Coastguard Worker 
266*0e209d39SAndroid Build Coastguard Worker #endif
267*0e209d39SAndroid Build Coastguard Worker //eof
268