xref: /aosp_15_r20/external/icu/libicu/cts_headers/unicode/regex.h (revision 0e209d3975ff4a8c132096b14b0e9364a753506e)
1*0e209d39SAndroid Build Coastguard Worker // © 2016 and later: Unicode, Inc. and others.
2*0e209d39SAndroid Build Coastguard Worker // License & terms of use: http://www.unicode.org/copyright.html
3*0e209d39SAndroid Build Coastguard Worker /*
4*0e209d39SAndroid Build Coastguard Worker **********************************************************************
5*0e209d39SAndroid Build Coastguard Worker *   Copyright (C) 2002-2016, International Business Machines
6*0e209d39SAndroid Build Coastguard Worker *   Corporation and others.  All Rights Reserved.
7*0e209d39SAndroid Build Coastguard Worker **********************************************************************
8*0e209d39SAndroid Build Coastguard Worker *   file name:  regex.h
9*0e209d39SAndroid Build Coastguard Worker *   encoding:   UTF-8
10*0e209d39SAndroid Build Coastguard Worker *   indentation:4
11*0e209d39SAndroid Build Coastguard Worker *
12*0e209d39SAndroid Build Coastguard Worker *   created on: 2002oct22
13*0e209d39SAndroid Build Coastguard Worker *   created by: Andy Heninger
14*0e209d39SAndroid Build Coastguard Worker *
15*0e209d39SAndroid Build Coastguard Worker *   ICU Regular Expressions, API for C++
16*0e209d39SAndroid Build Coastguard Worker */
17*0e209d39SAndroid Build Coastguard Worker 
18*0e209d39SAndroid Build Coastguard Worker #ifndef REGEX_H
19*0e209d39SAndroid Build Coastguard Worker #define REGEX_H
20*0e209d39SAndroid Build Coastguard Worker 
21*0e209d39SAndroid Build Coastguard Worker //#define REGEX_DEBUG
22*0e209d39SAndroid Build Coastguard Worker 
23*0e209d39SAndroid Build Coastguard Worker /**
24*0e209d39SAndroid Build Coastguard Worker  * \file
25*0e209d39SAndroid Build Coastguard Worker  * \brief C++ API: Regular Expressions
26*0e209d39SAndroid Build Coastguard Worker  *
27*0e209d39SAndroid Build Coastguard Worker  * The ICU API for processing regular expressions consists of two classes,
28*0e209d39SAndroid Build Coastguard Worker  *  `RegexPattern` and `RegexMatcher`.
29*0e209d39SAndroid Build Coastguard Worker  *  `RegexPattern` objects represent a pre-processed, or compiled
30*0e209d39SAndroid Build Coastguard Worker  *  regular expression.  They are created from a regular expression pattern string,
31*0e209d39SAndroid Build Coastguard Worker  *  and can be used to create `RegexMatcher` objects for the pattern.
32*0e209d39SAndroid Build Coastguard Worker  *
33*0e209d39SAndroid Build Coastguard Worker  * Class `RegexMatcher` bundles together a regular expression
34*0e209d39SAndroid Build Coastguard Worker  *  pattern and a target string to which the search pattern will be applied.
35*0e209d39SAndroid Build Coastguard Worker  *  `RegexMatcher` includes API for doing plain find or search
36*0e209d39SAndroid Build Coastguard Worker  *  operations, for search and replace operations, and for obtaining detailed
37*0e209d39SAndroid Build Coastguard Worker  *  information about bounds of a match.
38*0e209d39SAndroid Build Coastguard Worker  *
39*0e209d39SAndroid Build Coastguard Worker  * Note that by constructing `RegexMatcher` objects directly from regular
40*0e209d39SAndroid Build Coastguard Worker  * expression pattern strings application code can be simplified and the explicit
41*0e209d39SAndroid Build Coastguard Worker  * need for `RegexPattern` objects can usually be eliminated.
42*0e209d39SAndroid Build Coastguard Worker  *
43*0e209d39SAndroid Build Coastguard Worker  */
44*0e209d39SAndroid Build Coastguard Worker 
45*0e209d39SAndroid Build Coastguard Worker #include "unicode/utypes.h"
46*0e209d39SAndroid Build Coastguard Worker 
47*0e209d39SAndroid Build Coastguard Worker #if U_SHOW_CPLUSPLUS_API
48*0e209d39SAndroid Build Coastguard Worker 
49*0e209d39SAndroid Build Coastguard Worker #if !UCONFIG_NO_REGULAR_EXPRESSIONS
50*0e209d39SAndroid Build Coastguard Worker 
51*0e209d39SAndroid Build Coastguard Worker #include "unicode/uobject.h"
52*0e209d39SAndroid Build Coastguard Worker #include "unicode/unistr.h"
53*0e209d39SAndroid Build Coastguard Worker #include "unicode/utext.h"
54*0e209d39SAndroid Build Coastguard Worker #include "unicode/parseerr.h"
55*0e209d39SAndroid Build Coastguard Worker 
56*0e209d39SAndroid Build Coastguard Worker #include "unicode/uregex.h"
57*0e209d39SAndroid Build Coastguard Worker 
58*0e209d39SAndroid Build Coastguard Worker // Forward Declarations
59*0e209d39SAndroid Build Coastguard Worker 
60*0e209d39SAndroid Build Coastguard Worker struct UHashtable;
61*0e209d39SAndroid Build Coastguard Worker 
62*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_BEGIN
63*0e209d39SAndroid Build Coastguard Worker 
64*0e209d39SAndroid Build Coastguard Worker struct Regex8BitSet;
65*0e209d39SAndroid Build Coastguard Worker class  RegexCImpl;
66*0e209d39SAndroid Build Coastguard Worker class  RegexMatcher;
67*0e209d39SAndroid Build Coastguard Worker class  RegexPattern;
68*0e209d39SAndroid Build Coastguard Worker struct REStackFrame;
69*0e209d39SAndroid Build Coastguard Worker class  BreakIterator;
70*0e209d39SAndroid Build Coastguard Worker class  UnicodeSet;
71*0e209d39SAndroid Build Coastguard Worker class  UVector;
72*0e209d39SAndroid Build Coastguard Worker class  UVector32;
73*0e209d39SAndroid Build Coastguard Worker class  UVector64;
74*0e209d39SAndroid Build Coastguard Worker 
75*0e209d39SAndroid Build Coastguard Worker 
76*0e209d39SAndroid Build Coastguard Worker /**
77*0e209d39SAndroid Build Coastguard Worker   * Class `RegexPattern` represents a compiled regular expression.  It includes
78*0e209d39SAndroid Build Coastguard Worker   * factory methods for creating a RegexPattern object from the source (string) form
79*0e209d39SAndroid Build Coastguard Worker   * of a regular expression, methods for creating RegexMatchers that allow the pattern
80*0e209d39SAndroid Build Coastguard Worker   * to be applied to input text, and a few convenience methods for simple common
81*0e209d39SAndroid Build Coastguard Worker   * uses of regular expressions.
82*0e209d39SAndroid Build Coastguard Worker   *
83*0e209d39SAndroid Build Coastguard Worker   * Class RegexPattern is not intended to be subclassed.
84*0e209d39SAndroid Build Coastguard Worker   *
85*0e209d39SAndroid Build Coastguard Worker   * @stable ICU 2.4
86*0e209d39SAndroid Build Coastguard Worker   */
87*0e209d39SAndroid Build Coastguard Worker class U_I18N_API RegexPattern final : public UObject {
88*0e209d39SAndroid Build Coastguard Worker public:
89*0e209d39SAndroid Build Coastguard Worker 
90*0e209d39SAndroid Build Coastguard Worker     /**
91*0e209d39SAndroid Build Coastguard Worker      * default constructor.  Create a RegexPattern object that refers to no actual
92*0e209d39SAndroid Build Coastguard Worker      *   pattern.  Not normally needed; RegexPattern objects are usually
93*0e209d39SAndroid Build Coastguard Worker      *   created using the factory method `compile()`.
94*0e209d39SAndroid Build Coastguard Worker      *
95*0e209d39SAndroid Build Coastguard Worker      * @stable ICU 2.4
96*0e209d39SAndroid Build Coastguard Worker      */
97*0e209d39SAndroid Build Coastguard Worker     RegexPattern();
98*0e209d39SAndroid Build Coastguard Worker 
99*0e209d39SAndroid Build Coastguard Worker     /**
100*0e209d39SAndroid Build Coastguard Worker      * Copy Constructor.  Create a new RegexPattern object that is equivalent
101*0e209d39SAndroid Build Coastguard Worker      *                    to the source object.
102*0e209d39SAndroid Build Coastguard Worker      * @param source the pattern object to be copied.
103*0e209d39SAndroid Build Coastguard Worker      * @stable ICU 2.4
104*0e209d39SAndroid Build Coastguard Worker      */
105*0e209d39SAndroid Build Coastguard Worker     RegexPattern(const RegexPattern &source);
106*0e209d39SAndroid Build Coastguard Worker 
107*0e209d39SAndroid Build Coastguard Worker     /**
108*0e209d39SAndroid Build Coastguard Worker      * Destructor.  Note that a RegexPattern object must persist so long as any
109*0e209d39SAndroid Build Coastguard Worker      *  RegexMatcher objects that were created from the RegexPattern are active.
110*0e209d39SAndroid Build Coastguard Worker      * @stable ICU 2.4
111*0e209d39SAndroid Build Coastguard Worker      */
112*0e209d39SAndroid Build Coastguard Worker     virtual ~RegexPattern();
113*0e209d39SAndroid Build Coastguard Worker 
114*0e209d39SAndroid Build Coastguard Worker     /**
115*0e209d39SAndroid Build Coastguard Worker      * Comparison operator.  Two RegexPattern objects are considered equal if they
116*0e209d39SAndroid Build Coastguard Worker      * were constructed from identical source patterns using the same #URegexpFlag
117*0e209d39SAndroid Build Coastguard Worker      * settings.
118*0e209d39SAndroid Build Coastguard Worker      * @param that a RegexPattern object to compare with "this".
119*0e209d39SAndroid Build Coastguard Worker      * @return true if the objects are equivalent.
120*0e209d39SAndroid Build Coastguard Worker      * @stable ICU 2.4
121*0e209d39SAndroid Build Coastguard Worker      */
122*0e209d39SAndroid Build Coastguard Worker     bool            operator==(const RegexPattern& that) const;
123*0e209d39SAndroid Build Coastguard Worker 
124*0e209d39SAndroid Build Coastguard Worker     /**
125*0e209d39SAndroid Build Coastguard Worker      * Comparison operator.  Two RegexPattern objects are considered equal if they
126*0e209d39SAndroid Build Coastguard Worker      * were constructed from identical source patterns using the same #URegexpFlag
127*0e209d39SAndroid Build Coastguard Worker      * settings.
128*0e209d39SAndroid Build Coastguard Worker      * @param that a RegexPattern object to compare with "this".
129*0e209d39SAndroid Build Coastguard Worker      * @return true if the objects are different.
130*0e209d39SAndroid Build Coastguard Worker      * @stable ICU 2.4
131*0e209d39SAndroid Build Coastguard Worker      */
132*0e209d39SAndroid Build Coastguard Worker     inline bool     operator!=(const RegexPattern& that) const {return ! operator ==(that);}
133*0e209d39SAndroid Build Coastguard Worker 
134*0e209d39SAndroid Build Coastguard Worker     /**
135*0e209d39SAndroid Build Coastguard Worker      * Assignment operator.  After assignment, this RegexPattern will behave identically
136*0e209d39SAndroid Build Coastguard Worker      *     to the source object.
137*0e209d39SAndroid Build Coastguard Worker      * @stable ICU 2.4
138*0e209d39SAndroid Build Coastguard Worker      */
139*0e209d39SAndroid Build Coastguard Worker     RegexPattern  &operator =(const RegexPattern &source);
140*0e209d39SAndroid Build Coastguard Worker 
141*0e209d39SAndroid Build Coastguard Worker     /**
142*0e209d39SAndroid Build Coastguard Worker      * Create an exact copy of this RegexPattern object.  Since RegexPattern is not
143*0e209d39SAndroid Build Coastguard Worker      * intended to be subclassed, <code>clone()</code> and the copy construction are
144*0e209d39SAndroid Build Coastguard Worker      * equivalent operations.
145*0e209d39SAndroid Build Coastguard Worker      * @return the copy of this RegexPattern
146*0e209d39SAndroid Build Coastguard Worker      * @stable ICU 2.4
147*0e209d39SAndroid Build Coastguard Worker      */
148*0e209d39SAndroid Build Coastguard Worker     virtual RegexPattern  *clone() const;
149*0e209d39SAndroid Build Coastguard Worker 
150*0e209d39SAndroid Build Coastguard Worker 
151*0e209d39SAndroid Build Coastguard Worker    /**
152*0e209d39SAndroid Build Coastguard Worker     * Compiles the regular expression in string form into a RegexPattern
153*0e209d39SAndroid Build Coastguard Worker     * object.  These compile methods, rather than the constructors, are the usual
154*0e209d39SAndroid Build Coastguard Worker     * way that RegexPattern objects are created.
155*0e209d39SAndroid Build Coastguard Worker     *
156*0e209d39SAndroid Build Coastguard Worker     * Note that RegexPattern objects must not be deleted while RegexMatcher
157*0e209d39SAndroid Build Coastguard Worker     * objects created from the pattern are active.  RegexMatchers keep a pointer
158*0e209d39SAndroid Build Coastguard Worker     * back to their pattern, so premature deletion of the pattern is a
159*0e209d39SAndroid Build Coastguard Worker     * catastrophic error.
160*0e209d39SAndroid Build Coastguard Worker     *
161*0e209d39SAndroid Build Coastguard Worker     * All #URegexpFlag pattern match mode flags are set to their default values.
162*0e209d39SAndroid Build Coastguard Worker     *
163*0e209d39SAndroid Build Coastguard Worker     * Note that it is often more convenient to construct a RegexMatcher directly
164*0e209d39SAndroid Build Coastguard Worker     *    from a pattern string rather than separately compiling the pattern and
165*0e209d39SAndroid Build Coastguard Worker     *    then creating a RegexMatcher object from the pattern.
166*0e209d39SAndroid Build Coastguard Worker     *
167*0e209d39SAndroid Build Coastguard Worker     * @param regex The regular expression to be compiled.
168*0e209d39SAndroid Build Coastguard Worker     * @param pe    Receives the position (line and column nubers) of any error
169*0e209d39SAndroid Build Coastguard Worker     *              within the regular expression.)
170*0e209d39SAndroid Build Coastguard Worker     * @param status A reference to a UErrorCode to receive any errors.
171*0e209d39SAndroid Build Coastguard Worker     * @return      A regexPattern object for the compiled pattern.
172*0e209d39SAndroid Build Coastguard Worker     *
173*0e209d39SAndroid Build Coastguard Worker     * @stable ICU 2.4
174*0e209d39SAndroid Build Coastguard Worker     */
175*0e209d39SAndroid Build Coastguard Worker     static RegexPattern * U_EXPORT2 compile( const UnicodeString &regex,
176*0e209d39SAndroid Build Coastguard Worker         UParseError          &pe,
177*0e209d39SAndroid Build Coastguard Worker         UErrorCode           &status);
178*0e209d39SAndroid Build Coastguard Worker 
179*0e209d39SAndroid Build Coastguard Worker    /**
180*0e209d39SAndroid Build Coastguard Worker     * Compiles the regular expression in string form into a RegexPattern
181*0e209d39SAndroid Build Coastguard Worker     * object.  These compile methods, rather than the constructors, are the usual
182*0e209d39SAndroid Build Coastguard Worker     * way that RegexPattern objects are created.
183*0e209d39SAndroid Build Coastguard Worker     *
184*0e209d39SAndroid Build Coastguard Worker     * Note that RegexPattern objects must not be deleted while RegexMatcher
185*0e209d39SAndroid Build Coastguard Worker     * objects created from the pattern are active.  RegexMatchers keep a pointer
186*0e209d39SAndroid Build Coastguard Worker     * back to their pattern, so premature deletion of the pattern is a
187*0e209d39SAndroid Build Coastguard Worker     * catastrophic error.
188*0e209d39SAndroid Build Coastguard Worker     *
189*0e209d39SAndroid Build Coastguard Worker     * All #URegexpFlag pattern match mode flags are set to their default values.
190*0e209d39SAndroid Build Coastguard Worker     *
191*0e209d39SAndroid Build Coastguard Worker     * Note that it is often more convenient to construct a RegexMatcher directly
192*0e209d39SAndroid Build Coastguard Worker     *    from a pattern string rather than separately compiling the pattern and
193*0e209d39SAndroid Build Coastguard Worker     *    then creating a RegexMatcher object from the pattern.
194*0e209d39SAndroid Build Coastguard Worker     *
195*0e209d39SAndroid Build Coastguard Worker     * @param regex The regular expression to be compiled. Note, the text referred
196*0e209d39SAndroid Build Coastguard Worker     *              to by this UText must not be deleted during the lifetime of the
197*0e209d39SAndroid Build Coastguard Worker     *              RegexPattern object or any RegexMatcher object created from it.
198*0e209d39SAndroid Build Coastguard Worker     * @param pe    Receives the position (line and column nubers) of any error
199*0e209d39SAndroid Build Coastguard Worker     *              within the regular expression.)
200*0e209d39SAndroid Build Coastguard Worker     * @param status A reference to a UErrorCode to receive any errors.
201*0e209d39SAndroid Build Coastguard Worker     * @return      A regexPattern object for the compiled pattern.
202*0e209d39SAndroid Build Coastguard Worker     *
203*0e209d39SAndroid Build Coastguard Worker     * @stable ICU 4.6
204*0e209d39SAndroid Build Coastguard Worker     */
205*0e209d39SAndroid Build Coastguard Worker     static RegexPattern * U_EXPORT2 compile( UText *regex,
206*0e209d39SAndroid Build Coastguard Worker         UParseError          &pe,
207*0e209d39SAndroid Build Coastguard Worker         UErrorCode           &status);
208*0e209d39SAndroid Build Coastguard Worker 
209*0e209d39SAndroid Build Coastguard Worker    /**
210*0e209d39SAndroid Build Coastguard Worker     * Compiles the regular expression in string form into a RegexPattern
211*0e209d39SAndroid Build Coastguard Worker     * object using the specified #URegexpFlag match mode flags.  These compile methods,
212*0e209d39SAndroid Build Coastguard Worker     * rather than the constructors, are the usual way that RegexPattern objects
213*0e209d39SAndroid Build Coastguard Worker     * are created.
214*0e209d39SAndroid Build Coastguard Worker     *
215*0e209d39SAndroid Build Coastguard Worker     * Note that RegexPattern objects must not be deleted while RegexMatcher
216*0e209d39SAndroid Build Coastguard Worker     * objects created from the pattern are active.  RegexMatchers keep a pointer
217*0e209d39SAndroid Build Coastguard Worker     * back to their pattern, so premature deletion of the pattern is a
218*0e209d39SAndroid Build Coastguard Worker     * catastrophic error.
219*0e209d39SAndroid Build Coastguard Worker     *
220*0e209d39SAndroid Build Coastguard Worker     * Note that it is often more convenient to construct a RegexMatcher directly
221*0e209d39SAndroid Build Coastguard Worker     *    from a pattern string instead of than separately compiling the pattern and
222*0e209d39SAndroid Build Coastguard Worker     *    then creating a RegexMatcher object from the pattern.
223*0e209d39SAndroid Build Coastguard Worker     *
224*0e209d39SAndroid Build Coastguard Worker     * @param regex The regular expression to be compiled.
225*0e209d39SAndroid Build Coastguard Worker     * @param flags The #URegexpFlag match mode flags to be used, e.g. #UREGEX_CASE_INSENSITIVE.
226*0e209d39SAndroid Build Coastguard Worker     * @param pe    Receives the position (line and column numbers) of any error
227*0e209d39SAndroid Build Coastguard Worker     *              within the regular expression.)
228*0e209d39SAndroid Build Coastguard Worker     * @param status   A reference to a UErrorCode to receive any errors.
229*0e209d39SAndroid Build Coastguard Worker     * @return      A regexPattern object for the compiled pattern.
230*0e209d39SAndroid Build Coastguard Worker     *
231*0e209d39SAndroid Build Coastguard Worker     * @stable ICU 2.4
232*0e209d39SAndroid Build Coastguard Worker     */
233*0e209d39SAndroid Build Coastguard Worker     static RegexPattern * U_EXPORT2 compile( const UnicodeString &regex,
234*0e209d39SAndroid Build Coastguard Worker         uint32_t             flags,
235*0e209d39SAndroid Build Coastguard Worker         UParseError          &pe,
236*0e209d39SAndroid Build Coastguard Worker         UErrorCode           &status);
237*0e209d39SAndroid Build Coastguard Worker 
238*0e209d39SAndroid Build Coastguard Worker    /**
239*0e209d39SAndroid Build Coastguard Worker     * Compiles the regular expression in string form into a RegexPattern
240*0e209d39SAndroid Build Coastguard Worker     * object using the specified #URegexpFlag match mode flags.  These compile methods,
241*0e209d39SAndroid Build Coastguard Worker     * rather than the constructors, are the usual way that RegexPattern objects
242*0e209d39SAndroid Build Coastguard Worker     * are created.
243*0e209d39SAndroid Build Coastguard Worker     *
244*0e209d39SAndroid Build Coastguard Worker     * Note that RegexPattern objects must not be deleted while RegexMatcher
245*0e209d39SAndroid Build Coastguard Worker     * objects created from the pattern are active.  RegexMatchers keep a pointer
246*0e209d39SAndroid Build Coastguard Worker     * back to their pattern, so premature deletion of the pattern is a
247*0e209d39SAndroid Build Coastguard Worker     * catastrophic error.
248*0e209d39SAndroid Build Coastguard Worker     *
249*0e209d39SAndroid Build Coastguard Worker     * Note that it is often more convenient to construct a RegexMatcher directly
250*0e209d39SAndroid Build Coastguard Worker     *    from a pattern string instead of than separately compiling the pattern and
251*0e209d39SAndroid Build Coastguard Worker     *    then creating a RegexMatcher object from the pattern.
252*0e209d39SAndroid Build Coastguard Worker     *
253*0e209d39SAndroid Build Coastguard Worker     * @param regex The regular expression to be compiled. Note, the text referred
254*0e209d39SAndroid Build Coastguard Worker     *              to by this UText must not be deleted during the lifetime of the
255*0e209d39SAndroid Build Coastguard Worker     *              RegexPattern object or any RegexMatcher object created from it.
256*0e209d39SAndroid Build Coastguard Worker     * @param flags The #URegexpFlag match mode flags to be used, e.g. #UREGEX_CASE_INSENSITIVE.
257*0e209d39SAndroid Build Coastguard Worker     * @param pe    Receives the position (line and column numbers) of any error
258*0e209d39SAndroid Build Coastguard Worker     *              within the regular expression.)
259*0e209d39SAndroid Build Coastguard Worker     * @param status   A reference to a UErrorCode to receive any errors.
260*0e209d39SAndroid Build Coastguard Worker     * @return      A regexPattern object for the compiled pattern.
261*0e209d39SAndroid Build Coastguard Worker     *
262*0e209d39SAndroid Build Coastguard Worker     * @stable ICU 4.6
263*0e209d39SAndroid Build Coastguard Worker     */
264*0e209d39SAndroid Build Coastguard Worker     static RegexPattern * U_EXPORT2 compile( UText *regex,
265*0e209d39SAndroid Build Coastguard Worker         uint32_t             flags,
266*0e209d39SAndroid Build Coastguard Worker         UParseError          &pe,
267*0e209d39SAndroid Build Coastguard Worker         UErrorCode           &status);
268*0e209d39SAndroid Build Coastguard Worker 
269*0e209d39SAndroid Build Coastguard Worker    /**
270*0e209d39SAndroid Build Coastguard Worker     * Compiles the regular expression in string form into a RegexPattern
271*0e209d39SAndroid Build Coastguard Worker     * object using the specified #URegexpFlag match mode flags.  These compile methods,
272*0e209d39SAndroid Build Coastguard Worker     * rather than the constructors, are the usual way that RegexPattern objects
273*0e209d39SAndroid Build Coastguard Worker     * are created.
274*0e209d39SAndroid Build Coastguard Worker     *
275*0e209d39SAndroid Build Coastguard Worker     * Note that RegexPattern objects must not be deleted while RegexMatcher
276*0e209d39SAndroid Build Coastguard Worker     * objects created from the pattern are active.  RegexMatchers keep a pointer
277*0e209d39SAndroid Build Coastguard Worker     * back to their pattern, so premature deletion of the pattern is a
278*0e209d39SAndroid Build Coastguard Worker     * catastrophic error.
279*0e209d39SAndroid Build Coastguard Worker     *
280*0e209d39SAndroid Build Coastguard Worker     * Note that it is often more convenient to construct a RegexMatcher directly
281*0e209d39SAndroid Build Coastguard Worker     *    from a pattern string instead of than separately compiling the pattern and
282*0e209d39SAndroid Build Coastguard Worker     *    then creating a RegexMatcher object from the pattern.
283*0e209d39SAndroid Build Coastguard Worker     *
284*0e209d39SAndroid Build Coastguard Worker     * @param regex The regular expression to be compiled.
285*0e209d39SAndroid Build Coastguard Worker     * @param flags The #URegexpFlag match mode flags to be used, e.g. #UREGEX_CASE_INSENSITIVE.
286*0e209d39SAndroid Build Coastguard Worker     * @param status   A reference to a UErrorCode to receive any errors.
287*0e209d39SAndroid Build Coastguard Worker     * @return      A regexPattern object for the compiled pattern.
288*0e209d39SAndroid Build Coastguard Worker     *
289*0e209d39SAndroid Build Coastguard Worker     * @stable ICU 2.6
290*0e209d39SAndroid Build Coastguard Worker     */
291*0e209d39SAndroid Build Coastguard Worker     static RegexPattern * U_EXPORT2 compile( const UnicodeString &regex,
292*0e209d39SAndroid Build Coastguard Worker         uint32_t             flags,
293*0e209d39SAndroid Build Coastguard Worker         UErrorCode           &status);
294*0e209d39SAndroid Build Coastguard Worker 
295*0e209d39SAndroid Build Coastguard Worker    /**
296*0e209d39SAndroid Build Coastguard Worker     * Compiles the regular expression in string form into a RegexPattern
297*0e209d39SAndroid Build Coastguard Worker     * object using the specified #URegexpFlag match mode flags.  These compile methods,
298*0e209d39SAndroid Build Coastguard Worker     * rather than the constructors, are the usual way that RegexPattern objects
299*0e209d39SAndroid Build Coastguard Worker     * are created.
300*0e209d39SAndroid Build Coastguard Worker     *
301*0e209d39SAndroid Build Coastguard Worker     * Note that RegexPattern objects must not be deleted while RegexMatcher
302*0e209d39SAndroid Build Coastguard Worker     * objects created from the pattern are active.  RegexMatchers keep a pointer
303*0e209d39SAndroid Build Coastguard Worker     * back to their pattern, so premature deletion of the pattern is a
304*0e209d39SAndroid Build Coastguard Worker     * catastrophic error.
305*0e209d39SAndroid Build Coastguard Worker     *
306*0e209d39SAndroid Build Coastguard Worker     * Note that it is often more convenient to construct a RegexMatcher directly
307*0e209d39SAndroid Build Coastguard Worker     *    from a pattern string instead of than separately compiling the pattern and
308*0e209d39SAndroid Build Coastguard Worker     *    then creating a RegexMatcher object from the pattern.
309*0e209d39SAndroid Build Coastguard Worker     *
310*0e209d39SAndroid Build Coastguard Worker     * @param regex The regular expression to be compiled. Note, the text referred
311*0e209d39SAndroid Build Coastguard Worker     *              to by this UText must not be deleted during the lifetime of the
312*0e209d39SAndroid Build Coastguard Worker     *              RegexPattern object or any RegexMatcher object created from it.
313*0e209d39SAndroid Build Coastguard Worker     * @param flags The #URegexpFlag match mode flags to be used, e.g. #UREGEX_CASE_INSENSITIVE.
314*0e209d39SAndroid Build Coastguard Worker     * @param status   A reference to a UErrorCode to receive any errors.
315*0e209d39SAndroid Build Coastguard Worker     * @return      A regexPattern object for the compiled pattern.
316*0e209d39SAndroid Build Coastguard Worker     *
317*0e209d39SAndroid Build Coastguard Worker     * @stable ICU 4.6
318*0e209d39SAndroid Build Coastguard Worker     */
319*0e209d39SAndroid Build Coastguard Worker     static RegexPattern * U_EXPORT2 compile( UText *regex,
320*0e209d39SAndroid Build Coastguard Worker         uint32_t             flags,
321*0e209d39SAndroid Build Coastguard Worker         UErrorCode           &status);
322*0e209d39SAndroid Build Coastguard Worker 
323*0e209d39SAndroid Build Coastguard Worker    /**
324*0e209d39SAndroid Build Coastguard Worker     * Get the #URegexpFlag match mode flags that were used when compiling this pattern.
325*0e209d39SAndroid Build Coastguard Worker     * @return  the #URegexpFlag match mode flags
326*0e209d39SAndroid Build Coastguard Worker     * @stable ICU 2.4
327*0e209d39SAndroid Build Coastguard Worker     */
328*0e209d39SAndroid Build Coastguard Worker     virtual uint32_t flags() const;
329*0e209d39SAndroid Build Coastguard Worker 
330*0e209d39SAndroid Build Coastguard Worker    /**
331*0e209d39SAndroid Build Coastguard Worker     * Creates a RegexMatcher that will match the given input against this pattern.  The
332*0e209d39SAndroid Build Coastguard Worker     * RegexMatcher can then be used to perform match, find or replace operations
333*0e209d39SAndroid Build Coastguard Worker     * on the input.  Note that a RegexPattern object must not be deleted while
334*0e209d39SAndroid Build Coastguard Worker     * RegexMatchers created from it still exist and might possibly be used again.
335*0e209d39SAndroid Build Coastguard Worker     *
336*0e209d39SAndroid Build Coastguard Worker     * The matcher will retain a reference to the supplied input string, and all regexp
337*0e209d39SAndroid Build Coastguard Worker     * pattern matching operations happen directly on this original string.  It is
338*0e209d39SAndroid Build Coastguard Worker     * critical that the string not be altered or deleted before use by the regular
339*0e209d39SAndroid Build Coastguard Worker     * expression operations is complete.
340*0e209d39SAndroid Build Coastguard Worker     *
341*0e209d39SAndroid Build Coastguard Worker     * @param input    The input string to which the regular expression will be applied.
342*0e209d39SAndroid Build Coastguard Worker     * @param status   A reference to a UErrorCode to receive any errors.
343*0e209d39SAndroid Build Coastguard Worker     * @return         A RegexMatcher object for this pattern and input.
344*0e209d39SAndroid Build Coastguard Worker     *
345*0e209d39SAndroid Build Coastguard Worker     * @stable ICU 2.4
346*0e209d39SAndroid Build Coastguard Worker     */
347*0e209d39SAndroid Build Coastguard Worker     virtual RegexMatcher *matcher(const UnicodeString &input,
348*0e209d39SAndroid Build Coastguard Worker         UErrorCode          &status) const;
349*0e209d39SAndroid Build Coastguard Worker 
350*0e209d39SAndroid Build Coastguard Worker private:
351*0e209d39SAndroid Build Coastguard Worker     /**
352*0e209d39SAndroid Build Coastguard Worker      * Cause a compilation error if an application accidentally attempts to
353*0e209d39SAndroid Build Coastguard Worker      *   create a matcher with a (char16_t *) string as input rather than
354*0e209d39SAndroid Build Coastguard Worker      *   a UnicodeString.  Avoids a dangling reference to a temporary string.
355*0e209d39SAndroid Build Coastguard Worker      *
356*0e209d39SAndroid Build Coastguard Worker      * To efficiently work with char16_t *strings, wrap the data in a UnicodeString
357*0e209d39SAndroid Build Coastguard Worker      * using one of the aliasing constructors, such as
358*0e209d39SAndroid Build Coastguard Worker      * `UnicodeString(UBool isTerminated, const char16_t *text, int32_t textLength);`
359*0e209d39SAndroid Build Coastguard Worker      * or in a UText, using
360*0e209d39SAndroid Build Coastguard Worker      * `utext_openUChars(UText *ut, const char16_t *text, int64_t textLength, UErrorCode *status);`
361*0e209d39SAndroid Build Coastguard Worker      *
362*0e209d39SAndroid Build Coastguard Worker      */
363*0e209d39SAndroid Build Coastguard Worker     RegexMatcher *matcher(const char16_t *input,
364*0e209d39SAndroid Build Coastguard Worker         UErrorCode          &status) const = delete;
365*0e209d39SAndroid Build Coastguard Worker public:
366*0e209d39SAndroid Build Coastguard Worker 
367*0e209d39SAndroid Build Coastguard Worker 
368*0e209d39SAndroid Build Coastguard Worker    /**
369*0e209d39SAndroid Build Coastguard Worker     * Creates a RegexMatcher that will match against this pattern.  The
370*0e209d39SAndroid Build Coastguard Worker     * RegexMatcher can be used to perform match, find or replace operations.
371*0e209d39SAndroid Build Coastguard Worker     * Note that a RegexPattern object must not be deleted while
372*0e209d39SAndroid Build Coastguard Worker     * RegexMatchers created from it still exist and might possibly be used again.
373*0e209d39SAndroid Build Coastguard Worker     *
374*0e209d39SAndroid Build Coastguard Worker     * @param status   A reference to a UErrorCode to receive any errors.
375*0e209d39SAndroid Build Coastguard Worker     * @return      A RegexMatcher object for this pattern and input.
376*0e209d39SAndroid Build Coastguard Worker     *
377*0e209d39SAndroid Build Coastguard Worker     * @stable ICU 2.6
378*0e209d39SAndroid Build Coastguard Worker     */
379*0e209d39SAndroid Build Coastguard Worker     virtual RegexMatcher *matcher(UErrorCode  &status) const;
380*0e209d39SAndroid Build Coastguard Worker 
381*0e209d39SAndroid Build Coastguard Worker 
382*0e209d39SAndroid Build Coastguard Worker    /**
383*0e209d39SAndroid Build Coastguard Worker     * Test whether a string matches a regular expression.  This convenience function
384*0e209d39SAndroid Build Coastguard Worker     * both compiles the regular expression and applies it in a single operation.
385*0e209d39SAndroid Build Coastguard Worker     * Note that if the same pattern needs to be applied repeatedly, this method will be
386*0e209d39SAndroid Build Coastguard Worker     * less efficient than creating and reusing a RegexMatcher object.
387*0e209d39SAndroid Build Coastguard Worker     *
388*0e209d39SAndroid Build Coastguard Worker     * @param regex The regular expression
389*0e209d39SAndroid Build Coastguard Worker     * @param input The string data to be matched
390*0e209d39SAndroid Build Coastguard Worker     * @param pe Receives the position of any syntax errors within the regular expression
391*0e209d39SAndroid Build Coastguard Worker     * @param status A reference to a UErrorCode to receive any errors.
392*0e209d39SAndroid Build Coastguard Worker     * @return True if the regular expression exactly matches the full input string.
393*0e209d39SAndroid Build Coastguard Worker     *
394*0e209d39SAndroid Build Coastguard Worker     * @stable ICU 2.4
395*0e209d39SAndroid Build Coastguard Worker     */
396*0e209d39SAndroid Build Coastguard Worker     static UBool U_EXPORT2 matches(const UnicodeString   &regex,
397*0e209d39SAndroid Build Coastguard Worker         const UnicodeString   &input,
398*0e209d39SAndroid Build Coastguard Worker               UParseError     &pe,
399*0e209d39SAndroid Build Coastguard Worker               UErrorCode      &status);
400*0e209d39SAndroid Build Coastguard Worker 
401*0e209d39SAndroid Build Coastguard Worker    /**
402*0e209d39SAndroid Build Coastguard Worker     * Test whether a string matches a regular expression.  This convenience function
403*0e209d39SAndroid Build Coastguard Worker     * both compiles the regular expression and applies it in a single operation.
404*0e209d39SAndroid Build Coastguard Worker     * Note that if the same pattern needs to be applied repeatedly, this method will be
405*0e209d39SAndroid Build Coastguard Worker     * less efficient than creating and reusing a RegexMatcher object.
406*0e209d39SAndroid Build Coastguard Worker     *
407*0e209d39SAndroid Build Coastguard Worker     * @param regex The regular expression
408*0e209d39SAndroid Build Coastguard Worker     * @param input The string data to be matched
409*0e209d39SAndroid Build Coastguard Worker     * @param pe Receives the position of any syntax errors within the regular expression
410*0e209d39SAndroid Build Coastguard Worker     * @param status A reference to a UErrorCode to receive any errors.
411*0e209d39SAndroid Build Coastguard Worker     * @return True if the regular expression exactly matches the full input string.
412*0e209d39SAndroid Build Coastguard Worker     *
413*0e209d39SAndroid Build Coastguard Worker     * @stable ICU 4.6
414*0e209d39SAndroid Build Coastguard Worker     */
415*0e209d39SAndroid Build Coastguard Worker     static UBool U_EXPORT2 matches(UText *regex,
416*0e209d39SAndroid Build Coastguard Worker         UText           *input,
417*0e209d39SAndroid Build Coastguard Worker         UParseError     &pe,
418*0e209d39SAndroid Build Coastguard Worker         UErrorCode      &status);
419*0e209d39SAndroid Build Coastguard Worker 
420*0e209d39SAndroid Build Coastguard Worker    /**
421*0e209d39SAndroid Build Coastguard Worker     * Returns the regular expression from which this pattern was compiled. This method will work
422*0e209d39SAndroid Build Coastguard Worker     * even if the pattern was compiled from a UText.
423*0e209d39SAndroid Build Coastguard Worker     *
424*0e209d39SAndroid Build Coastguard Worker     * Note: If the pattern was originally compiled from a UText, and that UText was modified,
425*0e209d39SAndroid Build Coastguard Worker     * the returned string may no longer reflect the RegexPattern object.
426*0e209d39SAndroid Build Coastguard Worker     * @stable ICU 2.4
427*0e209d39SAndroid Build Coastguard Worker     */
428*0e209d39SAndroid Build Coastguard Worker     virtual UnicodeString pattern() const;
429*0e209d39SAndroid Build Coastguard Worker 
430*0e209d39SAndroid Build Coastguard Worker 
431*0e209d39SAndroid Build Coastguard Worker    /**
432*0e209d39SAndroid Build Coastguard Worker     * Returns the regular expression from which this pattern was compiled. This method will work
433*0e209d39SAndroid Build Coastguard Worker     * even if the pattern was compiled from a UnicodeString.
434*0e209d39SAndroid Build Coastguard Worker     *
435*0e209d39SAndroid Build Coastguard Worker     * Note: This is the original input, not a clone. If the pattern was originally compiled from a
436*0e209d39SAndroid Build Coastguard Worker     * UText, and that UText was modified, the returned UText may no longer reflect the RegexPattern
437*0e209d39SAndroid Build Coastguard Worker     * object.
438*0e209d39SAndroid Build Coastguard Worker     *
439*0e209d39SAndroid Build Coastguard Worker     * @stable ICU 4.6
440*0e209d39SAndroid Build Coastguard Worker     */
441*0e209d39SAndroid Build Coastguard Worker     virtual UText *patternText(UErrorCode      &status) const;
442*0e209d39SAndroid Build Coastguard Worker 
443*0e209d39SAndroid Build Coastguard Worker 
444*0e209d39SAndroid Build Coastguard Worker     /**
445*0e209d39SAndroid Build Coastguard Worker      * Get the group number corresponding to a named capture group.
446*0e209d39SAndroid Build Coastguard Worker      * The returned number can be used with any function that access
447*0e209d39SAndroid Build Coastguard Worker      * capture groups by number.
448*0e209d39SAndroid Build Coastguard Worker      *
449*0e209d39SAndroid Build Coastguard Worker      * The function returns an error status if the specified name does not
450*0e209d39SAndroid Build Coastguard Worker      * appear in the pattern.
451*0e209d39SAndroid Build Coastguard Worker      *
452*0e209d39SAndroid Build Coastguard Worker      * @param  groupName   The capture group name.
453*0e209d39SAndroid Build Coastguard Worker      * @param  status      A UErrorCode to receive any errors.
454*0e209d39SAndroid Build Coastguard Worker      *
455*0e209d39SAndroid Build Coastguard Worker      * @stable ICU 55
456*0e209d39SAndroid Build Coastguard Worker      */
457*0e209d39SAndroid Build Coastguard Worker     virtual int32_t groupNumberFromName(const UnicodeString &groupName, UErrorCode &status) const;
458*0e209d39SAndroid Build Coastguard Worker 
459*0e209d39SAndroid Build Coastguard Worker 
460*0e209d39SAndroid Build Coastguard Worker     /**
461*0e209d39SAndroid Build Coastguard Worker      * Get the group number corresponding to a named capture group.
462*0e209d39SAndroid Build Coastguard Worker      * The returned number can be used with any function that access
463*0e209d39SAndroid Build Coastguard Worker      * capture groups by number.
464*0e209d39SAndroid Build Coastguard Worker      *
465*0e209d39SAndroid Build Coastguard Worker      * The function returns an error status if the specified name does not
466*0e209d39SAndroid Build Coastguard Worker      * appear in the pattern.
467*0e209d39SAndroid Build Coastguard Worker      *
468*0e209d39SAndroid Build Coastguard Worker      * @param  groupName   The capture group name,
469*0e209d39SAndroid Build Coastguard Worker      *                     platform invariant characters only.
470*0e209d39SAndroid Build Coastguard Worker      * @param  nameLength  The length of the name, or -1 if the name is
471*0e209d39SAndroid Build Coastguard Worker      *                     nul-terminated.
472*0e209d39SAndroid Build Coastguard Worker      * @param  status      A UErrorCode to receive any errors.
473*0e209d39SAndroid Build Coastguard Worker      *
474*0e209d39SAndroid Build Coastguard Worker      * @stable ICU 55
475*0e209d39SAndroid Build Coastguard Worker      */
476*0e209d39SAndroid Build Coastguard Worker     virtual int32_t groupNumberFromName(const char *groupName, int32_t nameLength, UErrorCode &status) const;
477*0e209d39SAndroid Build Coastguard Worker 
478*0e209d39SAndroid Build Coastguard Worker 
479*0e209d39SAndroid Build Coastguard Worker     /**
480*0e209d39SAndroid Build Coastguard Worker      * Split a string into fields.  Somewhat like split() from Perl or Java.
481*0e209d39SAndroid Build Coastguard Worker      * Pattern matches identify delimiters that separate the input
482*0e209d39SAndroid Build Coastguard Worker      * into fields.  The input data between the delimiters becomes the
483*0e209d39SAndroid Build Coastguard Worker      * fields themselves.
484*0e209d39SAndroid Build Coastguard Worker      *
485*0e209d39SAndroid Build Coastguard Worker      * If the delimiter pattern includes capture groups, the captured text will
486*0e209d39SAndroid Build Coastguard Worker      * also appear in the destination array of output strings, interspersed
487*0e209d39SAndroid Build Coastguard Worker      * with the fields.  This is similar to Perl, but differs from Java,
488*0e209d39SAndroid Build Coastguard Worker      * which ignores the presence of capture groups in the pattern.
489*0e209d39SAndroid Build Coastguard Worker      *
490*0e209d39SAndroid Build Coastguard Worker      * Trailing empty fields will always be returned, assuming sufficient
491*0e209d39SAndroid Build Coastguard Worker      * destination capacity.  This differs from the default behavior for Java
492*0e209d39SAndroid Build Coastguard Worker      * and Perl where trailing empty fields are not returned.
493*0e209d39SAndroid Build Coastguard Worker      *
494*0e209d39SAndroid Build Coastguard Worker      * The number of strings produced by the split operation is returned.
495*0e209d39SAndroid Build Coastguard Worker      * This count includes the strings from capture groups in the delimiter pattern.
496*0e209d39SAndroid Build Coastguard Worker      * This behavior differs from Java, which ignores capture groups.
497*0e209d39SAndroid Build Coastguard Worker      *
498*0e209d39SAndroid Build Coastguard Worker      * For the best performance on split() operations,
499*0e209d39SAndroid Build Coastguard Worker      * <code>RegexMatcher::split</code> is preferable to this function
500*0e209d39SAndroid Build Coastguard Worker      *
501*0e209d39SAndroid Build Coastguard Worker      * @param input   The string to be split into fields.  The field delimiters
502*0e209d39SAndroid Build Coastguard Worker      *                match the pattern (in the "this" object)
503*0e209d39SAndroid Build Coastguard Worker      * @param dest    An array of UnicodeStrings to receive the results of the split.
504*0e209d39SAndroid Build Coastguard Worker      *                This is an array of actual UnicodeString objects, not an
505*0e209d39SAndroid Build Coastguard Worker      *                array of pointers to strings.  Local (stack based) arrays can
506*0e209d39SAndroid Build Coastguard Worker      *                work well here.
507*0e209d39SAndroid Build Coastguard Worker      * @param destCapacity  The number of elements in the destination array.
508*0e209d39SAndroid Build Coastguard Worker      *                If the number of fields found is less than destCapacity, the
509*0e209d39SAndroid Build Coastguard Worker      *                extra strings in the destination array are not altered.
510*0e209d39SAndroid Build Coastguard Worker      *                If the number of destination strings is less than the number
511*0e209d39SAndroid Build Coastguard Worker      *                of fields, the trailing part of the input string, including any
512*0e209d39SAndroid Build Coastguard Worker      *                field delimiters, is placed in the last destination string.
513*0e209d39SAndroid Build Coastguard Worker      * @param status  A reference to a UErrorCode to receive any errors.
514*0e209d39SAndroid Build Coastguard Worker      * @return        The number of fields into which the input string was split.
515*0e209d39SAndroid Build Coastguard Worker      * @stable ICU 2.4
516*0e209d39SAndroid Build Coastguard Worker      */
517*0e209d39SAndroid Build Coastguard Worker     virtual int32_t  split(const UnicodeString &input,
518*0e209d39SAndroid Build Coastguard Worker         UnicodeString    dest[],
519*0e209d39SAndroid Build Coastguard Worker         int32_t          destCapacity,
520*0e209d39SAndroid Build Coastguard Worker         UErrorCode       &status) const;
521*0e209d39SAndroid Build Coastguard Worker 
522*0e209d39SAndroid Build Coastguard Worker 
523*0e209d39SAndroid Build Coastguard Worker     /**
524*0e209d39SAndroid Build Coastguard Worker      * Split a string into fields.  Somewhat like %split() from Perl or Java.
525*0e209d39SAndroid Build Coastguard Worker      * Pattern matches identify delimiters that separate the input
526*0e209d39SAndroid Build Coastguard Worker      * into fields.  The input data between the delimiters becomes the
527*0e209d39SAndroid Build Coastguard Worker      * fields themselves.
528*0e209d39SAndroid Build Coastguard Worker      *
529*0e209d39SAndroid Build Coastguard Worker      * If the delimiter pattern includes capture groups, the captured text will
530*0e209d39SAndroid Build Coastguard Worker      * also appear in the destination array of output strings, interspersed
531*0e209d39SAndroid Build Coastguard Worker      * with the fields.  This is similar to Perl, but differs from Java,
532*0e209d39SAndroid Build Coastguard Worker      * which ignores the presence of capture groups in the pattern.
533*0e209d39SAndroid Build Coastguard Worker      *
534*0e209d39SAndroid Build Coastguard Worker      * Trailing empty fields will always be returned, assuming sufficient
535*0e209d39SAndroid Build Coastguard Worker      * destination capacity.  This differs from the default behavior for Java
536*0e209d39SAndroid Build Coastguard Worker      * and Perl where trailing empty fields are not returned.
537*0e209d39SAndroid Build Coastguard Worker      *
538*0e209d39SAndroid Build Coastguard Worker      * The number of strings produced by the split operation is returned.
539*0e209d39SAndroid Build Coastguard Worker      * This count includes the strings from capture groups in the delimiter pattern.
540*0e209d39SAndroid Build Coastguard Worker      * This behavior differs from Java, which ignores capture groups.
541*0e209d39SAndroid Build Coastguard Worker      *
542*0e209d39SAndroid Build Coastguard Worker      *  For the best performance on split() operations,
543*0e209d39SAndroid Build Coastguard Worker      *  `RegexMatcher::split()` is preferable to this function
544*0e209d39SAndroid Build Coastguard Worker      *
545*0e209d39SAndroid Build Coastguard Worker      * @param input   The string to be split into fields.  The field delimiters
546*0e209d39SAndroid Build Coastguard Worker      *                match the pattern (in the "this" object)
547*0e209d39SAndroid Build Coastguard Worker      * @param dest    An array of mutable UText structs to receive the results of the split.
548*0e209d39SAndroid Build Coastguard Worker      *                If a field is nullptr, a new UText is allocated to contain the results for
549*0e209d39SAndroid Build Coastguard Worker      *                that field. This new UText is not guaranteed to be mutable.
550*0e209d39SAndroid Build Coastguard Worker      * @param destCapacity  The number of elements in the destination array.
551*0e209d39SAndroid Build Coastguard Worker      *                If the number of fields found is less than destCapacity, the
552*0e209d39SAndroid Build Coastguard Worker      *                extra strings in the destination array are not altered.
553*0e209d39SAndroid Build Coastguard Worker      *                If the number of destination strings is less than the number
554*0e209d39SAndroid Build Coastguard Worker      *                of fields, the trailing part of the input string, including any
555*0e209d39SAndroid Build Coastguard Worker      *                field delimiters, is placed in the last destination string.
556*0e209d39SAndroid Build Coastguard Worker      * @param status  A reference to a UErrorCode to receive any errors.
557*0e209d39SAndroid Build Coastguard Worker      * @return        The number of destination strings used.
558*0e209d39SAndroid Build Coastguard Worker      *
559*0e209d39SAndroid Build Coastguard Worker      * @stable ICU 4.6
560*0e209d39SAndroid Build Coastguard Worker      */
561*0e209d39SAndroid Build Coastguard Worker     virtual int32_t  split(UText *input,
562*0e209d39SAndroid Build Coastguard Worker         UText            *dest[],
563*0e209d39SAndroid Build Coastguard Worker         int32_t          destCapacity,
564*0e209d39SAndroid Build Coastguard Worker         UErrorCode       &status) const;
565*0e209d39SAndroid Build Coastguard Worker 
566*0e209d39SAndroid Build Coastguard Worker 
567*0e209d39SAndroid Build Coastguard Worker     /**
568*0e209d39SAndroid Build Coastguard Worker      * ICU "poor man's RTTI", returns a UClassID for the actual class.
569*0e209d39SAndroid Build Coastguard Worker      *
570*0e209d39SAndroid Build Coastguard Worker      * @stable ICU 2.4
571*0e209d39SAndroid Build Coastguard Worker      */
572*0e209d39SAndroid Build Coastguard Worker     virtual UClassID getDynamicClassID() const override;
573*0e209d39SAndroid Build Coastguard Worker 
574*0e209d39SAndroid Build Coastguard Worker     /**
575*0e209d39SAndroid Build Coastguard Worker      * ICU "poor man's RTTI", returns a UClassID for this class.
576*0e209d39SAndroid Build Coastguard Worker      *
577*0e209d39SAndroid Build Coastguard Worker      * @stable ICU 2.4
578*0e209d39SAndroid Build Coastguard Worker      */
579*0e209d39SAndroid Build Coastguard Worker     static UClassID U_EXPORT2 getStaticClassID();
580*0e209d39SAndroid Build Coastguard Worker 
581*0e209d39SAndroid Build Coastguard Worker private:
582*0e209d39SAndroid Build Coastguard Worker     //
583*0e209d39SAndroid Build Coastguard Worker     //  Implementation Data
584*0e209d39SAndroid Build Coastguard Worker     //
585*0e209d39SAndroid Build Coastguard Worker     UText          *fPattern;      // The original pattern string.
586*0e209d39SAndroid Build Coastguard Worker     UnicodeString  *fPatternString; // The original pattern UncodeString if relevant
587*0e209d39SAndroid Build Coastguard Worker     uint32_t        fFlags;        // The flags used when compiling the pattern.
588*0e209d39SAndroid Build Coastguard Worker                                    //
589*0e209d39SAndroid Build Coastguard Worker     UVector64       *fCompiledPat; // The compiled pattern p-code.
590*0e209d39SAndroid Build Coastguard Worker     UnicodeString   fLiteralText;  // Any literal string data from the pattern,
591*0e209d39SAndroid Build Coastguard Worker                                    //   after un-escaping, for use during the match.
592*0e209d39SAndroid Build Coastguard Worker 
593*0e209d39SAndroid Build Coastguard Worker     UVector         *fSets;        // Any UnicodeSets referenced from the pattern.
594*0e209d39SAndroid Build Coastguard Worker     Regex8BitSet    *fSets8;       //      (and fast sets for latin-1 range.)
595*0e209d39SAndroid Build Coastguard Worker 
596*0e209d39SAndroid Build Coastguard Worker 
597*0e209d39SAndroid Build Coastguard Worker     UErrorCode      fDeferredStatus; // status if some prior error has left this
598*0e209d39SAndroid Build Coastguard Worker                                    //  RegexPattern in an unusable state.
599*0e209d39SAndroid Build Coastguard Worker 
600*0e209d39SAndroid Build Coastguard Worker     int32_t         fMinMatchLen;  // Minimum Match Length.  All matches will have length
601*0e209d39SAndroid Build Coastguard Worker                                    //   >= this value.  For some patterns, this calculated
602*0e209d39SAndroid Build Coastguard Worker                                    //   value may be less than the true shortest
603*0e209d39SAndroid Build Coastguard Worker                                    //   possible match.
604*0e209d39SAndroid Build Coastguard Worker 
605*0e209d39SAndroid Build Coastguard Worker     int32_t         fFrameSize;    // Size of a state stack frame in the
606*0e209d39SAndroid Build Coastguard Worker                                    //   execution engine.
607*0e209d39SAndroid Build Coastguard Worker 
608*0e209d39SAndroid Build Coastguard Worker     int32_t         fDataSize;     // The size of the data needed by the pattern that
609*0e209d39SAndroid Build Coastguard Worker                                    //   does not go on the state stack, but has just
610*0e209d39SAndroid Build Coastguard Worker                                    //   a single copy per matcher.
611*0e209d39SAndroid Build Coastguard Worker 
612*0e209d39SAndroid Build Coastguard Worker     UVector32       *fGroupMap;    // Map from capture group number to position of
613*0e209d39SAndroid Build Coastguard Worker                                    //   the group's variables in the matcher stack frame.
614*0e209d39SAndroid Build Coastguard Worker 
615*0e209d39SAndroid Build Coastguard Worker     int32_t         fStartType;    // Info on how a match must start.
616*0e209d39SAndroid Build Coastguard Worker     int32_t         fInitialStringIdx;     //
617*0e209d39SAndroid Build Coastguard Worker     int32_t         fInitialStringLen;
618*0e209d39SAndroid Build Coastguard Worker     UnicodeSet     *fInitialChars;
619*0e209d39SAndroid Build Coastguard Worker     UChar32         fInitialChar;
620*0e209d39SAndroid Build Coastguard Worker     Regex8BitSet   *fInitialChars8;
621*0e209d39SAndroid Build Coastguard Worker     UBool           fNeedsAltInput;
622*0e209d39SAndroid Build Coastguard Worker 
623*0e209d39SAndroid Build Coastguard Worker     UHashtable     *fNamedCaptureMap;  // Map from capture group names to numbers.
624*0e209d39SAndroid Build Coastguard Worker 
625*0e209d39SAndroid Build Coastguard Worker     friend class RegexCompile;
626*0e209d39SAndroid Build Coastguard Worker     friend class RegexMatcher;
627*0e209d39SAndroid Build Coastguard Worker     friend class RegexCImpl;
628*0e209d39SAndroid Build Coastguard Worker 
629*0e209d39SAndroid Build Coastguard Worker     //
630*0e209d39SAndroid Build Coastguard Worker     //  Implementation Methods
631*0e209d39SAndroid Build Coastguard Worker     //
632*0e209d39SAndroid Build Coastguard Worker     void        init();                 // Common initialization, for use by constructors.
633*0e209d39SAndroid Build Coastguard Worker     bool        initNamedCaptureMap();  // Lazy init for fNamedCaptureMap.
634*0e209d39SAndroid Build Coastguard Worker     void        zap();                  // Common cleanup
635*0e209d39SAndroid Build Coastguard Worker 
636*0e209d39SAndroid Build Coastguard Worker     void        dumpOp(int32_t index) const;
637*0e209d39SAndroid Build Coastguard Worker 
638*0e209d39SAndroid Build Coastguard Worker   public:
639*0e209d39SAndroid Build Coastguard Worker #ifndef U_HIDE_INTERNAL_API
640*0e209d39SAndroid Build Coastguard Worker     /**
641*0e209d39SAndroid Build Coastguard Worker       * Dump a compiled pattern. Internal debug function.
642*0e209d39SAndroid Build Coastguard Worker       * @internal
643*0e209d39SAndroid Build Coastguard Worker       */
644*0e209d39SAndroid Build Coastguard Worker     void        dumpPattern() const;
645*0e209d39SAndroid Build Coastguard Worker #endif  /* U_HIDE_INTERNAL_API */
646*0e209d39SAndroid Build Coastguard Worker };
647*0e209d39SAndroid Build Coastguard Worker 
648*0e209d39SAndroid Build Coastguard Worker 
649*0e209d39SAndroid Build Coastguard Worker 
650*0e209d39SAndroid Build Coastguard Worker /**
651*0e209d39SAndroid Build Coastguard Worker  *  class RegexMatcher bundles together a regular expression pattern and
652*0e209d39SAndroid Build Coastguard Worker  *  input text to which the expression can be applied.  It includes methods
653*0e209d39SAndroid Build Coastguard Worker  *  for testing for matches, and for find and replace operations.
654*0e209d39SAndroid Build Coastguard Worker  *
655*0e209d39SAndroid Build Coastguard Worker  * <p>Class RegexMatcher is not intended to be subclassed.</p>
656*0e209d39SAndroid Build Coastguard Worker  *
657*0e209d39SAndroid Build Coastguard Worker  * @stable ICU 2.4
658*0e209d39SAndroid Build Coastguard Worker  */
659*0e209d39SAndroid Build Coastguard Worker class U_I18N_API RegexMatcher final : public UObject {
660*0e209d39SAndroid Build Coastguard Worker public:
661*0e209d39SAndroid Build Coastguard Worker 
662*0e209d39SAndroid Build Coastguard Worker     /**
663*0e209d39SAndroid Build Coastguard Worker       * Construct a RegexMatcher for a regular expression.
664*0e209d39SAndroid Build Coastguard Worker       * This is a convenience method that avoids the need to explicitly create
665*0e209d39SAndroid Build Coastguard Worker       * a RegexPattern object.  Note that if several RegexMatchers need to be
666*0e209d39SAndroid Build Coastguard Worker       * created for the same expression, it will be more efficient to
667*0e209d39SAndroid Build Coastguard Worker       * separately create and cache a RegexPattern object, and use
668*0e209d39SAndroid Build Coastguard Worker       * its matcher() method to create the RegexMatcher objects.
669*0e209d39SAndroid Build Coastguard Worker       *
670*0e209d39SAndroid Build Coastguard Worker       *  @param regexp The Regular Expression to be compiled.
671*0e209d39SAndroid Build Coastguard Worker       *  @param flags  #URegexpFlag options, such as #UREGEX_CASE_INSENSITIVE.
672*0e209d39SAndroid Build Coastguard Worker       *  @param status Any errors are reported by setting this UErrorCode variable.
673*0e209d39SAndroid Build Coastguard Worker       *  @stable ICU 2.6
674*0e209d39SAndroid Build Coastguard Worker       */
675*0e209d39SAndroid Build Coastguard Worker     RegexMatcher(const UnicodeString &regexp, uint32_t flags, UErrorCode &status);
676*0e209d39SAndroid Build Coastguard Worker 
677*0e209d39SAndroid Build Coastguard Worker     /**
678*0e209d39SAndroid Build Coastguard Worker       * Construct a RegexMatcher for a regular expression.
679*0e209d39SAndroid Build Coastguard Worker       * This is a convenience method that avoids the need to explicitly create
680*0e209d39SAndroid Build Coastguard Worker       * a RegexPattern object.  Note that if several RegexMatchers need to be
681*0e209d39SAndroid Build Coastguard Worker       * created for the same expression, it will be more efficient to
682*0e209d39SAndroid Build Coastguard Worker       * separately create and cache a RegexPattern object, and use
683*0e209d39SAndroid Build Coastguard Worker       * its matcher() method to create the RegexMatcher objects.
684*0e209d39SAndroid Build Coastguard Worker       *
685*0e209d39SAndroid Build Coastguard Worker       *  @param regexp The regular expression to be compiled.
686*0e209d39SAndroid Build Coastguard Worker       *  @param flags  #URegexpFlag options, such as #UREGEX_CASE_INSENSITIVE.
687*0e209d39SAndroid Build Coastguard Worker       *  @param status Any errors are reported by setting this UErrorCode variable.
688*0e209d39SAndroid Build Coastguard Worker       *
689*0e209d39SAndroid Build Coastguard Worker       *  @stable ICU 4.6
690*0e209d39SAndroid Build Coastguard Worker       */
691*0e209d39SAndroid Build Coastguard Worker     RegexMatcher(UText *regexp, uint32_t flags, UErrorCode &status);
692*0e209d39SAndroid Build Coastguard Worker 
693*0e209d39SAndroid Build Coastguard Worker     /**
694*0e209d39SAndroid Build Coastguard Worker       * Construct a RegexMatcher for a regular expression.
695*0e209d39SAndroid Build Coastguard Worker       * This is a convenience method that avoids the need to explicitly create
696*0e209d39SAndroid Build Coastguard Worker       * a RegexPattern object.  Note that if several RegexMatchers need to be
697*0e209d39SAndroid Build Coastguard Worker       * created for the same expression, it will be more efficient to
698*0e209d39SAndroid Build Coastguard Worker       * separately create and cache a RegexPattern object, and use
699*0e209d39SAndroid Build Coastguard Worker       * its matcher() method to create the RegexMatcher objects.
700*0e209d39SAndroid Build Coastguard Worker       *
701*0e209d39SAndroid Build Coastguard Worker       * The matcher will retain a reference to the supplied input string, and all regexp
702*0e209d39SAndroid Build Coastguard Worker       * pattern matching operations happen directly on the original string.  It is
703*0e209d39SAndroid Build Coastguard Worker       * critical that the string not be altered or deleted before use by the regular
704*0e209d39SAndroid Build Coastguard Worker       * expression operations is complete.
705*0e209d39SAndroid Build Coastguard Worker       *
706*0e209d39SAndroid Build Coastguard Worker       *  @param regexp The Regular Expression to be compiled.
707*0e209d39SAndroid Build Coastguard Worker       *  @param input  The string to match.  The matcher retains a reference to the
708*0e209d39SAndroid Build Coastguard Worker       *                caller's string; mo copy is made.
709*0e209d39SAndroid Build Coastguard Worker       *  @param flags  #URegexpFlag options, such as #UREGEX_CASE_INSENSITIVE.
710*0e209d39SAndroid Build Coastguard Worker       *  @param status Any errors are reported by setting this UErrorCode variable.
711*0e209d39SAndroid Build Coastguard Worker       *  @stable ICU 2.6
712*0e209d39SAndroid Build Coastguard Worker       */
713*0e209d39SAndroid Build Coastguard Worker     RegexMatcher(const UnicodeString &regexp, const UnicodeString &input,
714*0e209d39SAndroid Build Coastguard Worker         uint32_t flags, UErrorCode &status);
715*0e209d39SAndroid Build Coastguard Worker 
716*0e209d39SAndroid Build Coastguard Worker     /**
717*0e209d39SAndroid Build Coastguard Worker       * Construct a RegexMatcher for a regular expression.
718*0e209d39SAndroid Build Coastguard Worker       * This is a convenience method that avoids the need to explicitly create
719*0e209d39SAndroid Build Coastguard Worker       * a RegexPattern object.  Note that if several RegexMatchers need to be
720*0e209d39SAndroid Build Coastguard Worker       * created for the same expression, it will be more efficient to
721*0e209d39SAndroid Build Coastguard Worker       * separately create and cache a RegexPattern object, and use
722*0e209d39SAndroid Build Coastguard Worker       * its matcher() method to create the RegexMatcher objects.
723*0e209d39SAndroid Build Coastguard Worker       *
724*0e209d39SAndroid Build Coastguard Worker       * The matcher will make a shallow clone of the supplied input text, and all regexp
725*0e209d39SAndroid Build Coastguard Worker       * pattern matching operations happen on this clone.  While read-only operations on
726*0e209d39SAndroid Build Coastguard Worker       * the supplied text are permitted, it is critical that the underlying string not be
727*0e209d39SAndroid Build Coastguard Worker       * altered or deleted before use by the regular expression operations is complete.
728*0e209d39SAndroid Build Coastguard Worker       *
729*0e209d39SAndroid Build Coastguard Worker       *  @param regexp The Regular Expression to be compiled.
730*0e209d39SAndroid Build Coastguard Worker       *  @param input  The string to match.  The matcher retains a shallow clone of the text.
731*0e209d39SAndroid Build Coastguard Worker       *  @param flags  #URegexpFlag options, such as #UREGEX_CASE_INSENSITIVE.
732*0e209d39SAndroid Build Coastguard Worker       *  @param status Any errors are reported by setting this UErrorCode variable.
733*0e209d39SAndroid Build Coastguard Worker       *
734*0e209d39SAndroid Build Coastguard Worker       *  @stable ICU 4.6
735*0e209d39SAndroid Build Coastguard Worker       */
736*0e209d39SAndroid Build Coastguard Worker     RegexMatcher(UText *regexp, UText *input,
737*0e209d39SAndroid Build Coastguard Worker         uint32_t flags, UErrorCode &status);
738*0e209d39SAndroid Build Coastguard Worker 
739*0e209d39SAndroid Build Coastguard Worker private:
740*0e209d39SAndroid Build Coastguard Worker     /**
741*0e209d39SAndroid Build Coastguard Worker      * Cause a compilation error if an application accidentally attempts to
742*0e209d39SAndroid Build Coastguard Worker      *   create a matcher with a (char16_t *) string as input rather than
743*0e209d39SAndroid Build Coastguard Worker      *   a UnicodeString.    Avoids a dangling reference to a temporary string.
744*0e209d39SAndroid Build Coastguard Worker      *
745*0e209d39SAndroid Build Coastguard Worker      * To efficiently work with char16_t *strings, wrap the data in a UnicodeString
746*0e209d39SAndroid Build Coastguard Worker      * using one of the aliasing constructors, such as
747*0e209d39SAndroid Build Coastguard Worker      * `UnicodeString(UBool isTerminated, const char16_t *text, int32_t textLength);`
748*0e209d39SAndroid Build Coastguard Worker      * or in a UText, using
749*0e209d39SAndroid Build Coastguard Worker      * `utext_openUChars(UText *ut, const char16_t *text, int64_t textLength, UErrorCode *status);`
750*0e209d39SAndroid Build Coastguard Worker      */
751*0e209d39SAndroid Build Coastguard Worker     RegexMatcher(const UnicodeString &regexp, const char16_t *input,
752*0e209d39SAndroid Build Coastguard Worker         uint32_t flags, UErrorCode &status) = delete;
753*0e209d39SAndroid Build Coastguard Worker public:
754*0e209d39SAndroid Build Coastguard Worker 
755*0e209d39SAndroid Build Coastguard Worker 
756*0e209d39SAndroid Build Coastguard Worker    /**
757*0e209d39SAndroid Build Coastguard Worker     *   Destructor.
758*0e209d39SAndroid Build Coastguard Worker     *
759*0e209d39SAndroid Build Coastguard Worker     *  @stable ICU 2.4
760*0e209d39SAndroid Build Coastguard Worker     */
761*0e209d39SAndroid Build Coastguard Worker     virtual ~RegexMatcher();
762*0e209d39SAndroid Build Coastguard Worker 
763*0e209d39SAndroid Build Coastguard Worker 
764*0e209d39SAndroid Build Coastguard Worker    /**
765*0e209d39SAndroid Build Coastguard Worker     *   Attempts to match the entire input region against the pattern.
766*0e209d39SAndroid Build Coastguard Worker     *    @param   status     A reference to a UErrorCode to receive any errors.
767*0e209d39SAndroid Build Coastguard Worker     *    @return true if there is a match
768*0e209d39SAndroid Build Coastguard Worker     *    @stable ICU 2.4
769*0e209d39SAndroid Build Coastguard Worker     */
770*0e209d39SAndroid Build Coastguard Worker     virtual UBool matches(UErrorCode &status);
771*0e209d39SAndroid Build Coastguard Worker 
772*0e209d39SAndroid Build Coastguard Worker 
773*0e209d39SAndroid Build Coastguard Worker    /**
774*0e209d39SAndroid Build Coastguard Worker     *   Resets the matcher, then attempts to match the input beginning
775*0e209d39SAndroid Build Coastguard Worker     *   at the specified startIndex, and extending to the end of the input.
776*0e209d39SAndroid Build Coastguard Worker     *   The input region is reset to include the entire input string.
777*0e209d39SAndroid Build Coastguard Worker     *   A successful match must extend to the end of the input.
778*0e209d39SAndroid Build Coastguard Worker     *    @param   startIndex The input string (native) index at which to begin matching.
779*0e209d39SAndroid Build Coastguard Worker     *    @param   status     A reference to a UErrorCode to receive any errors.
780*0e209d39SAndroid Build Coastguard Worker     *    @return true if there is a match
781*0e209d39SAndroid Build Coastguard Worker     *    @stable ICU 2.8
782*0e209d39SAndroid Build Coastguard Worker     */
783*0e209d39SAndroid Build Coastguard Worker     virtual UBool matches(int64_t startIndex, UErrorCode &status);
784*0e209d39SAndroid Build Coastguard Worker 
785*0e209d39SAndroid Build Coastguard Worker 
786*0e209d39SAndroid Build Coastguard Worker    /**
787*0e209d39SAndroid Build Coastguard Worker     *   Attempts to match the input string, starting from the beginning of the region,
788*0e209d39SAndroid Build Coastguard Worker     *   against the pattern.  Like the matches() method, this function
789*0e209d39SAndroid Build Coastguard Worker     *   always starts at the beginning of the input region;
790*0e209d39SAndroid Build Coastguard Worker     *   unlike that function, it does not require that the entire region be matched.
791*0e209d39SAndroid Build Coastguard Worker     *
792*0e209d39SAndroid Build Coastguard Worker     *   If the match succeeds then more information can be obtained via the start(),
793*0e209d39SAndroid Build Coastguard Worker     *   end(), and group() functions.
794*0e209d39SAndroid Build Coastguard Worker     *
795*0e209d39SAndroid Build Coastguard Worker     *    @param   status     A reference to a UErrorCode to receive any errors.
796*0e209d39SAndroid Build Coastguard Worker     *    @return  true if there is a match at the start of the input string.
797*0e209d39SAndroid Build Coastguard Worker     *    @stable ICU 2.4
798*0e209d39SAndroid Build Coastguard Worker     */
799*0e209d39SAndroid Build Coastguard Worker     virtual UBool lookingAt(UErrorCode &status);
800*0e209d39SAndroid Build Coastguard Worker 
801*0e209d39SAndroid Build Coastguard Worker 
802*0e209d39SAndroid Build Coastguard Worker   /**
803*0e209d39SAndroid Build Coastguard Worker     *   Attempts to match the input string, starting from the specified index, against the pattern.
804*0e209d39SAndroid Build Coastguard Worker     *   The match may be of any length, and is not required to extend to the end
805*0e209d39SAndroid Build Coastguard Worker     *   of the input string.  Contrast with match().
806*0e209d39SAndroid Build Coastguard Worker     *
807*0e209d39SAndroid Build Coastguard Worker     *   If the match succeeds then more information can be obtained via the start(),
808*0e209d39SAndroid Build Coastguard Worker     *   end(), and group() functions.
809*0e209d39SAndroid Build Coastguard Worker     *
810*0e209d39SAndroid Build Coastguard Worker     *    @param   startIndex The input string (native) index at which to begin matching.
811*0e209d39SAndroid Build Coastguard Worker     *    @param   status     A reference to a UErrorCode to receive any errors.
812*0e209d39SAndroid Build Coastguard Worker     *    @return  true if there is a match.
813*0e209d39SAndroid Build Coastguard Worker     *    @stable ICU 2.8
814*0e209d39SAndroid Build Coastguard Worker     */
815*0e209d39SAndroid Build Coastguard Worker     virtual UBool lookingAt(int64_t startIndex, UErrorCode &status);
816*0e209d39SAndroid Build Coastguard Worker 
817*0e209d39SAndroid Build Coastguard Worker 
818*0e209d39SAndroid Build Coastguard Worker    /**
819*0e209d39SAndroid Build Coastguard Worker     *  Find the next pattern match in the input string.
820*0e209d39SAndroid Build Coastguard Worker     *  The find begins searching the input at the location following the end of
821*0e209d39SAndroid Build Coastguard Worker     *  the previous match, or at the start of the string if there is no previous match.
822*0e209d39SAndroid Build Coastguard Worker     *  If a match is found, `start()`, `end()` and `group()`
823*0e209d39SAndroid Build Coastguard Worker     *  will provide more information regarding the match.
824*0e209d39SAndroid Build Coastguard Worker     *  Note that if the input string is changed by the application,
825*0e209d39SAndroid Build Coastguard Worker     *     use find(startPos, status) instead of find(), because the saved starting
826*0e209d39SAndroid Build Coastguard Worker     *     position may not be valid with the altered input string.
827*0e209d39SAndroid Build Coastguard Worker     *  @return  true if a match is found.
828*0e209d39SAndroid Build Coastguard Worker     *  @stable ICU 2.4
829*0e209d39SAndroid Build Coastguard Worker     */
830*0e209d39SAndroid Build Coastguard Worker     virtual UBool find();
831*0e209d39SAndroid Build Coastguard Worker 
832*0e209d39SAndroid Build Coastguard Worker 
833*0e209d39SAndroid Build Coastguard Worker    /**
834*0e209d39SAndroid Build Coastguard Worker     *  Find the next pattern match in the input string.
835*0e209d39SAndroid Build Coastguard Worker     *  The find begins searching the input at the location following the end of
836*0e209d39SAndroid Build Coastguard Worker     *  the previous match, or at the start of the string if there is no previous match.
837*0e209d39SAndroid Build Coastguard Worker     *  If a match is found, `start()`, `end()` and `group()`
838*0e209d39SAndroid Build Coastguard Worker     *  will provide more information regarding the match.
839*0e209d39SAndroid Build Coastguard Worker     *
840*0e209d39SAndroid Build Coastguard Worker     *  Note that if the input string is changed by the application,
841*0e209d39SAndroid Build Coastguard Worker     *  use find(startPos, status) instead of find(), because the saved starting
842*0e209d39SAndroid Build Coastguard Worker     *  position may not be valid with the altered input string.
843*0e209d39SAndroid Build Coastguard Worker     *  @param   status  A reference to a UErrorCode to receive any errors.
844*0e209d39SAndroid Build Coastguard Worker     *  @return  true if a match is found.
845*0e209d39SAndroid Build Coastguard Worker     * @stable ICU 55
846*0e209d39SAndroid Build Coastguard Worker     */
847*0e209d39SAndroid Build Coastguard Worker     virtual UBool find(UErrorCode &status);
848*0e209d39SAndroid Build Coastguard Worker 
849*0e209d39SAndroid Build Coastguard Worker    /**
850*0e209d39SAndroid Build Coastguard Worker     *   Resets this RegexMatcher and then attempts to find the next substring of the
851*0e209d39SAndroid Build Coastguard Worker     *   input string that matches the pattern, starting at the specified index.
852*0e209d39SAndroid Build Coastguard Worker     *
853*0e209d39SAndroid Build Coastguard Worker     *   @param   start     The (native) index in the input string to begin the search.
854*0e209d39SAndroid Build Coastguard Worker     *   @param   status    A reference to a UErrorCode to receive any errors.
855*0e209d39SAndroid Build Coastguard Worker     *   @return  true if a match is found.
856*0e209d39SAndroid Build Coastguard Worker     *   @stable ICU 2.4
857*0e209d39SAndroid Build Coastguard Worker     */
858*0e209d39SAndroid Build Coastguard Worker     virtual UBool find(int64_t start, UErrorCode &status);
859*0e209d39SAndroid Build Coastguard Worker 
860*0e209d39SAndroid Build Coastguard Worker 
861*0e209d39SAndroid Build Coastguard Worker    /**
862*0e209d39SAndroid Build Coastguard Worker     *   Returns a string containing the text matched by the previous match.
863*0e209d39SAndroid Build Coastguard Worker     *   If the pattern can match an empty string, an empty string may be returned.
864*0e209d39SAndroid Build Coastguard Worker     *   @param   status      A reference to a UErrorCode to receive any errors.
865*0e209d39SAndroid Build Coastguard Worker     *                        Possible errors are  U_REGEX_INVALID_STATE if no match
866*0e209d39SAndroid Build Coastguard Worker     *                        has been attempted or the last match failed.
867*0e209d39SAndroid Build Coastguard Worker     *   @return  a string containing the matched input text.
868*0e209d39SAndroid Build Coastguard Worker     *   @stable ICU 2.4
869*0e209d39SAndroid Build Coastguard Worker     */
870*0e209d39SAndroid Build Coastguard Worker     virtual UnicodeString group(UErrorCode &status) const;
871*0e209d39SAndroid Build Coastguard Worker 
872*0e209d39SAndroid Build Coastguard Worker 
873*0e209d39SAndroid Build Coastguard Worker    /**
874*0e209d39SAndroid Build Coastguard Worker     *    Returns a string containing the text captured by the given group
875*0e209d39SAndroid Build Coastguard Worker     *    during the previous match operation.  Group(0) is the entire match.
876*0e209d39SAndroid Build Coastguard Worker     *
877*0e209d39SAndroid Build Coastguard Worker     *    A zero length string is returned both for capture groups that did not
878*0e209d39SAndroid Build Coastguard Worker     *    participate in the match and for actual zero length matches.
879*0e209d39SAndroid Build Coastguard Worker     *    To distinguish between these two cases use the function start(),
880*0e209d39SAndroid Build Coastguard Worker     *    which returns -1 for non-participating groups.
881*0e209d39SAndroid Build Coastguard Worker     *
882*0e209d39SAndroid Build Coastguard Worker     *    @param groupNum the capture group number
883*0e209d39SAndroid Build Coastguard Worker     *    @param   status     A reference to a UErrorCode to receive any errors.
884*0e209d39SAndroid Build Coastguard Worker     *                        Possible errors are  U_REGEX_INVALID_STATE if no match
885*0e209d39SAndroid Build Coastguard Worker     *                        has been attempted or the last match failed and
886*0e209d39SAndroid Build Coastguard Worker     *                        U_INDEX_OUTOFBOUNDS_ERROR for a bad capture group number.
887*0e209d39SAndroid Build Coastguard Worker     *    @return the captured text
888*0e209d39SAndroid Build Coastguard Worker     *    @stable ICU 2.4
889*0e209d39SAndroid Build Coastguard Worker     */
890*0e209d39SAndroid Build Coastguard Worker     virtual UnicodeString group(int32_t groupNum, UErrorCode &status) const;
891*0e209d39SAndroid Build Coastguard Worker 
892*0e209d39SAndroid Build Coastguard Worker    /**
893*0e209d39SAndroid Build Coastguard Worker     *   Returns the number of capturing groups in this matcher's pattern.
894*0e209d39SAndroid Build Coastguard Worker     *   @return the number of capture groups
895*0e209d39SAndroid Build Coastguard Worker     *   @stable ICU 2.4
896*0e209d39SAndroid Build Coastguard Worker     */
897*0e209d39SAndroid Build Coastguard Worker     virtual int32_t groupCount() const;
898*0e209d39SAndroid Build Coastguard Worker 
899*0e209d39SAndroid Build Coastguard Worker 
900*0e209d39SAndroid Build Coastguard Worker    /**
901*0e209d39SAndroid Build Coastguard Worker     *   Returns a shallow clone of the entire live input string with the UText current native index
902*0e209d39SAndroid Build Coastguard Worker     *   set to the beginning of the requested group.
903*0e209d39SAndroid Build Coastguard Worker     *
904*0e209d39SAndroid Build Coastguard Worker     *   @param   dest        The UText into which the input should be cloned, or nullptr to create a new UText
905*0e209d39SAndroid Build Coastguard Worker     *   @param   group_len   A reference to receive the length of the desired capture group
906*0e209d39SAndroid Build Coastguard Worker     *   @param   status      A reference to a UErrorCode to receive any errors.
907*0e209d39SAndroid Build Coastguard Worker     *                        Possible errors are  U_REGEX_INVALID_STATE if no match
908*0e209d39SAndroid Build Coastguard Worker     *                        has been attempted or the last match failed and
909*0e209d39SAndroid Build Coastguard Worker     *                        U_INDEX_OUTOFBOUNDS_ERROR for a bad capture group number.
910*0e209d39SAndroid Build Coastguard Worker     *   @return dest if non-nullptr, a shallow copy of the input text otherwise
911*0e209d39SAndroid Build Coastguard Worker     *
912*0e209d39SAndroid Build Coastguard Worker     *   @stable ICU 4.6
913*0e209d39SAndroid Build Coastguard Worker     */
914*0e209d39SAndroid Build Coastguard Worker     virtual UText *group(UText *dest, int64_t &group_len, UErrorCode &status) const;
915*0e209d39SAndroid Build Coastguard Worker 
916*0e209d39SAndroid Build Coastguard Worker    /**
917*0e209d39SAndroid Build Coastguard Worker     *   Returns a shallow clone of the entire live input string with the UText current native index
918*0e209d39SAndroid Build Coastguard Worker     *   set to the beginning of the requested group.
919*0e209d39SAndroid Build Coastguard Worker     *
920*0e209d39SAndroid Build Coastguard Worker     *   A group length of zero is returned both for capture groups that did not
921*0e209d39SAndroid Build Coastguard Worker     *   participate in the match and for actual zero length matches.
922*0e209d39SAndroid Build Coastguard Worker     *   To distinguish between these two cases use the function start(),
923*0e209d39SAndroid Build Coastguard Worker     *   which returns -1 for non-participating groups.
924*0e209d39SAndroid Build Coastguard Worker     *
925*0e209d39SAndroid Build Coastguard Worker     *   @param   groupNum   The capture group number.
926*0e209d39SAndroid Build Coastguard Worker     *   @param   dest        The UText into which the input should be cloned, or nullptr to create a new UText.
927*0e209d39SAndroid Build Coastguard Worker     *   @param   group_len   A reference to receive the length of the desired capture group
928*0e209d39SAndroid Build Coastguard Worker     *   @param   status      A reference to a UErrorCode to receive any errors.
929*0e209d39SAndroid Build Coastguard Worker     *                        Possible errors are  U_REGEX_INVALID_STATE if no match
930*0e209d39SAndroid Build Coastguard Worker     *                        has been attempted or the last match failed and
931*0e209d39SAndroid Build Coastguard Worker     *                        U_INDEX_OUTOFBOUNDS_ERROR for a bad capture group number.
932*0e209d39SAndroid Build Coastguard Worker     *   @return dest if non-nullptr, a shallow copy of the input text otherwise
933*0e209d39SAndroid Build Coastguard Worker     *
934*0e209d39SAndroid Build Coastguard Worker     *   @stable ICU 4.6
935*0e209d39SAndroid Build Coastguard Worker     */
936*0e209d39SAndroid Build Coastguard Worker     virtual UText *group(int32_t groupNum, UText *dest, int64_t &group_len, UErrorCode &status) const;
937*0e209d39SAndroid Build Coastguard Worker 
938*0e209d39SAndroid Build Coastguard Worker    /**
939*0e209d39SAndroid Build Coastguard Worker     *   Returns the index in the input string of the start of the text matched
940*0e209d39SAndroid Build Coastguard Worker     *   during the previous match operation.
941*0e209d39SAndroid Build Coastguard Worker     *    @param   status      a reference to a UErrorCode to receive any errors.
942*0e209d39SAndroid Build Coastguard Worker     *    @return              The (native) position in the input string of the start of the last match.
943*0e209d39SAndroid Build Coastguard Worker     *    @stable ICU 2.4
944*0e209d39SAndroid Build Coastguard Worker     */
945*0e209d39SAndroid Build Coastguard Worker     virtual int32_t start(UErrorCode &status) const;
946*0e209d39SAndroid Build Coastguard Worker 
947*0e209d39SAndroid Build Coastguard Worker    /**
948*0e209d39SAndroid Build Coastguard Worker     *   Returns the index in the input string of the start of the text matched
949*0e209d39SAndroid Build Coastguard Worker     *   during the previous match operation.
950*0e209d39SAndroid Build Coastguard Worker     *    @param   status      a reference to a UErrorCode to receive any errors.
951*0e209d39SAndroid Build Coastguard Worker     *    @return              The (native) position in the input string of the start of the last match.
952*0e209d39SAndroid Build Coastguard Worker     *   @stable ICU 4.6
953*0e209d39SAndroid Build Coastguard Worker     */
954*0e209d39SAndroid Build Coastguard Worker     virtual int64_t start64(UErrorCode &status) const;
955*0e209d39SAndroid Build Coastguard Worker 
956*0e209d39SAndroid Build Coastguard Worker 
957*0e209d39SAndroid Build Coastguard Worker    /**
958*0e209d39SAndroid Build Coastguard Worker     *   Returns the index in the input string of the start of the text matched by the
959*0e209d39SAndroid Build Coastguard Worker     *    specified capture group during the previous match operation.  Return -1 if
960*0e209d39SAndroid Build Coastguard Worker     *    the capture group exists in the pattern, but was not part of the last match.
961*0e209d39SAndroid Build Coastguard Worker     *
962*0e209d39SAndroid Build Coastguard Worker     *    @param  group       the capture group number
963*0e209d39SAndroid Build Coastguard Worker     *    @param  status      A reference to a UErrorCode to receive any errors.  Possible
964*0e209d39SAndroid Build Coastguard Worker     *                        errors are  U_REGEX_INVALID_STATE if no match has been
965*0e209d39SAndroid Build Coastguard Worker     *                        attempted or the last match failed, and
966*0e209d39SAndroid Build Coastguard Worker     *                        U_INDEX_OUTOFBOUNDS_ERROR for a bad capture group number
967*0e209d39SAndroid Build Coastguard Worker     *    @return the (native) start position of substring matched by the specified group.
968*0e209d39SAndroid Build Coastguard Worker     *    @stable ICU 2.4
969*0e209d39SAndroid Build Coastguard Worker     */
970*0e209d39SAndroid Build Coastguard Worker     virtual int32_t start(int32_t group, UErrorCode &status) const;
971*0e209d39SAndroid Build Coastguard Worker 
972*0e209d39SAndroid Build Coastguard Worker    /**
973*0e209d39SAndroid Build Coastguard Worker     *   Returns the index in the input string of the start of the text matched by the
974*0e209d39SAndroid Build Coastguard Worker     *    specified capture group during the previous match operation.  Return -1 if
975*0e209d39SAndroid Build Coastguard Worker     *    the capture group exists in the pattern, but was not part of the last match.
976*0e209d39SAndroid Build Coastguard Worker     *
977*0e209d39SAndroid Build Coastguard Worker     *    @param  group       the capture group number.
978*0e209d39SAndroid Build Coastguard Worker     *    @param  status      A reference to a UErrorCode to receive any errors.  Possible
979*0e209d39SAndroid Build Coastguard Worker     *                        errors are  U_REGEX_INVALID_STATE if no match has been
980*0e209d39SAndroid Build Coastguard Worker     *                        attempted or the last match failed, and
981*0e209d39SAndroid Build Coastguard Worker     *                        U_INDEX_OUTOFBOUNDS_ERROR for a bad capture group number.
982*0e209d39SAndroid Build Coastguard Worker     *    @return the (native) start position of substring matched by the specified group.
983*0e209d39SAndroid Build Coastguard Worker     *    @stable ICU 4.6
984*0e209d39SAndroid Build Coastguard Worker     */
985*0e209d39SAndroid Build Coastguard Worker     virtual int64_t start64(int32_t group, UErrorCode &status) const;
986*0e209d39SAndroid Build Coastguard Worker 
987*0e209d39SAndroid Build Coastguard Worker    /**
988*0e209d39SAndroid Build Coastguard Worker     *    Returns the index in the input string of the first character following the
989*0e209d39SAndroid Build Coastguard Worker     *    text matched during the previous match operation.
990*0e209d39SAndroid Build Coastguard Worker     *
991*0e209d39SAndroid Build Coastguard Worker     *   @param   status      A reference to a UErrorCode to receive any errors.  Possible
992*0e209d39SAndroid Build Coastguard Worker     *                        errors are  U_REGEX_INVALID_STATE if no match has been
993*0e209d39SAndroid Build Coastguard Worker     *                        attempted or the last match failed.
994*0e209d39SAndroid Build Coastguard Worker     *    @return the index of the last character matched, plus one.
995*0e209d39SAndroid Build Coastguard Worker     *                        The index value returned is a native index, corresponding to
996*0e209d39SAndroid Build Coastguard Worker     *                        code units for the underlying encoding type, for example,
997*0e209d39SAndroid Build Coastguard Worker     *                        a byte index for UTF-8.
998*0e209d39SAndroid Build Coastguard Worker     *   @stable ICU 2.4
999*0e209d39SAndroid Build Coastguard Worker     */
1000*0e209d39SAndroid Build Coastguard Worker     virtual int32_t end(UErrorCode &status) const;
1001*0e209d39SAndroid Build Coastguard Worker 
1002*0e209d39SAndroid Build Coastguard Worker    /**
1003*0e209d39SAndroid Build Coastguard Worker     *    Returns the index in the input string of the first character following the
1004*0e209d39SAndroid Build Coastguard Worker     *    text matched during the previous match operation.
1005*0e209d39SAndroid Build Coastguard Worker     *
1006*0e209d39SAndroid Build Coastguard Worker     *   @param   status      A reference to a UErrorCode to receive any errors.  Possible
1007*0e209d39SAndroid Build Coastguard Worker     *                        errors are  U_REGEX_INVALID_STATE if no match has been
1008*0e209d39SAndroid Build Coastguard Worker     *                        attempted or the last match failed.
1009*0e209d39SAndroid Build Coastguard Worker     *    @return the index of the last character matched, plus one.
1010*0e209d39SAndroid Build Coastguard Worker     *                        The index value returned is a native index, corresponding to
1011*0e209d39SAndroid Build Coastguard Worker     *                        code units for the underlying encoding type, for example,
1012*0e209d39SAndroid Build Coastguard Worker     *                        a byte index for UTF-8.
1013*0e209d39SAndroid Build Coastguard Worker     *   @stable ICU 4.6
1014*0e209d39SAndroid Build Coastguard Worker     */
1015*0e209d39SAndroid Build Coastguard Worker     virtual int64_t end64(UErrorCode &status) const;
1016*0e209d39SAndroid Build Coastguard Worker 
1017*0e209d39SAndroid Build Coastguard Worker 
1018*0e209d39SAndroid Build Coastguard Worker    /**
1019*0e209d39SAndroid Build Coastguard Worker     *    Returns the index in the input string of the character following the
1020*0e209d39SAndroid Build Coastguard Worker     *    text matched by the specified capture group during the previous match operation.
1021*0e209d39SAndroid Build Coastguard Worker     *
1022*0e209d39SAndroid Build Coastguard Worker     *    @param group  the capture group number
1023*0e209d39SAndroid Build Coastguard Worker     *    @param   status      A reference to a UErrorCode to receive any errors.  Possible
1024*0e209d39SAndroid Build Coastguard Worker     *                        errors are  U_REGEX_INVALID_STATE if no match has been
1025*0e209d39SAndroid Build Coastguard Worker     *                        attempted or the last match failed and
1026*0e209d39SAndroid Build Coastguard Worker     *                        U_INDEX_OUTOFBOUNDS_ERROR for a bad capture group number
1027*0e209d39SAndroid Build Coastguard Worker     *    @return  the index of the first character following the text
1028*0e209d39SAndroid Build Coastguard Worker     *              captured by the specified group during the previous match operation.
1029*0e209d39SAndroid Build Coastguard Worker     *              Return -1 if the capture group exists in the pattern but was not part of the match.
1030*0e209d39SAndroid Build Coastguard Worker     *              The index value returned is a native index, corresponding to
1031*0e209d39SAndroid Build Coastguard Worker     *              code units for the underlying encoding type, for example,
1032*0e209d39SAndroid Build Coastguard Worker     *              a byte index for UTF8.
1033*0e209d39SAndroid Build Coastguard Worker     *    @stable ICU 2.4
1034*0e209d39SAndroid Build Coastguard Worker     */
1035*0e209d39SAndroid Build Coastguard Worker     virtual int32_t end(int32_t group, UErrorCode &status) const;
1036*0e209d39SAndroid Build Coastguard Worker 
1037*0e209d39SAndroid Build Coastguard Worker    /**
1038*0e209d39SAndroid Build Coastguard Worker     *    Returns the index in the input string of the character following the
1039*0e209d39SAndroid Build Coastguard Worker     *    text matched by the specified capture group during the previous match operation.
1040*0e209d39SAndroid Build Coastguard Worker     *
1041*0e209d39SAndroid Build Coastguard Worker     *    @param group  the capture group number
1042*0e209d39SAndroid Build Coastguard Worker     *    @param   status      A reference to a UErrorCode to receive any errors.  Possible
1043*0e209d39SAndroid Build Coastguard Worker     *                        errors are  U_REGEX_INVALID_STATE if no match has been
1044*0e209d39SAndroid Build Coastguard Worker     *                        attempted or the last match failed and
1045*0e209d39SAndroid Build Coastguard Worker     *                        U_INDEX_OUTOFBOUNDS_ERROR for a bad capture group number
1046*0e209d39SAndroid Build Coastguard Worker     *    @return  the index of the first character following the text
1047*0e209d39SAndroid Build Coastguard Worker     *              captured by the specified group during the previous match operation.
1048*0e209d39SAndroid Build Coastguard Worker     *              Return -1 if the capture group exists in the pattern but was not part of the match.
1049*0e209d39SAndroid Build Coastguard Worker     *              The index value returned is a native index, corresponding to
1050*0e209d39SAndroid Build Coastguard Worker     *              code units for the underlying encoding type, for example,
1051*0e209d39SAndroid Build Coastguard Worker     *              a byte index for UTF8.
1052*0e209d39SAndroid Build Coastguard Worker     *   @stable ICU 4.6
1053*0e209d39SAndroid Build Coastguard Worker     */
1054*0e209d39SAndroid Build Coastguard Worker     virtual int64_t end64(int32_t group, UErrorCode &status) const;
1055*0e209d39SAndroid Build Coastguard Worker 
1056*0e209d39SAndroid Build Coastguard Worker    /**
1057*0e209d39SAndroid Build Coastguard Worker     *   Resets this matcher.  The effect is to remove any memory of previous matches,
1058*0e209d39SAndroid Build Coastguard Worker     *       and to cause subsequent find() operations to begin at the beginning of
1059*0e209d39SAndroid Build Coastguard Worker     *       the input string.
1060*0e209d39SAndroid Build Coastguard Worker     *
1061*0e209d39SAndroid Build Coastguard Worker     *   @return this RegexMatcher.
1062*0e209d39SAndroid Build Coastguard Worker     *   @stable ICU 2.4
1063*0e209d39SAndroid Build Coastguard Worker     */
1064*0e209d39SAndroid Build Coastguard Worker     virtual RegexMatcher &reset();
1065*0e209d39SAndroid Build Coastguard Worker 
1066*0e209d39SAndroid Build Coastguard Worker 
1067*0e209d39SAndroid Build Coastguard Worker    /**
1068*0e209d39SAndroid Build Coastguard Worker     *   Resets this matcher, and set the current input position.
1069*0e209d39SAndroid Build Coastguard Worker     *   The effect is to remove any memory of previous matches,
1070*0e209d39SAndroid Build Coastguard Worker     *       and to cause subsequent find() operations to begin at
1071*0e209d39SAndroid Build Coastguard Worker     *       the specified (native) position in the input string.
1072*0e209d39SAndroid Build Coastguard Worker     *
1073*0e209d39SAndroid Build Coastguard Worker     *   The matcher's region is reset to its default, which is the entire
1074*0e209d39SAndroid Build Coastguard Worker     *   input string.
1075*0e209d39SAndroid Build Coastguard Worker     *
1076*0e209d39SAndroid Build Coastguard Worker     *   An alternative to this function is to set a match region
1077*0e209d39SAndroid Build Coastguard Worker     *   beginning at the desired index.
1078*0e209d39SAndroid Build Coastguard Worker     *
1079*0e209d39SAndroid Build Coastguard Worker     *   @return this RegexMatcher.
1080*0e209d39SAndroid Build Coastguard Worker     *   @stable ICU 2.8
1081*0e209d39SAndroid Build Coastguard Worker     */
1082*0e209d39SAndroid Build Coastguard Worker     virtual RegexMatcher &reset(int64_t index, UErrorCode &status);
1083*0e209d39SAndroid Build Coastguard Worker 
1084*0e209d39SAndroid Build Coastguard Worker 
1085*0e209d39SAndroid Build Coastguard Worker    /**
1086*0e209d39SAndroid Build Coastguard Worker     *   Resets this matcher with a new input string.  This allows instances of RegexMatcher
1087*0e209d39SAndroid Build Coastguard Worker     *     to be reused, which is more efficient than creating a new RegexMatcher for
1088*0e209d39SAndroid Build Coastguard Worker     *     each input string to be processed.
1089*0e209d39SAndroid Build Coastguard Worker     *   @param input The new string on which subsequent pattern matches will operate.
1090*0e209d39SAndroid Build Coastguard Worker     *                The matcher retains a reference to the callers string, and operates
1091*0e209d39SAndroid Build Coastguard Worker     *                directly on that.  Ownership of the string remains with the caller.
1092*0e209d39SAndroid Build Coastguard Worker     *                Because no copy of the string is made, it is essential that the
1093*0e209d39SAndroid Build Coastguard Worker     *                caller not delete the string until after regexp operations on it
1094*0e209d39SAndroid Build Coastguard Worker     *                are done.
1095*0e209d39SAndroid Build Coastguard Worker     *                Note that while a reset on the matcher with an input string that is then
1096*0e209d39SAndroid Build Coastguard Worker     *                modified across/during matcher operations may be supported currently for UnicodeString,
1097*0e209d39SAndroid Build Coastguard Worker     *                this was not originally intended behavior, and support for this is not guaranteed
1098*0e209d39SAndroid Build Coastguard Worker     *                in upcoming versions of ICU.
1099*0e209d39SAndroid Build Coastguard Worker     *   @return this RegexMatcher.
1100*0e209d39SAndroid Build Coastguard Worker     *   @stable ICU 2.4
1101*0e209d39SAndroid Build Coastguard Worker     */
1102*0e209d39SAndroid Build Coastguard Worker     virtual RegexMatcher &reset(const UnicodeString &input);
1103*0e209d39SAndroid Build Coastguard Worker 
1104*0e209d39SAndroid Build Coastguard Worker 
1105*0e209d39SAndroid Build Coastguard Worker    /**
1106*0e209d39SAndroid Build Coastguard Worker     *   Resets this matcher with a new input string.  This allows instances of RegexMatcher
1107*0e209d39SAndroid Build Coastguard Worker     *     to be reused, which is more efficient than creating a new RegexMatcher for
1108*0e209d39SAndroid Build Coastguard Worker     *     each input string to be processed.
1109*0e209d39SAndroid Build Coastguard Worker     *   @param input The new string on which subsequent pattern matches will operate.
1110*0e209d39SAndroid Build Coastguard Worker     *                The matcher makes a shallow clone of the given text; ownership of the
1111*0e209d39SAndroid Build Coastguard Worker     *                original string remains with the caller. Because no deep copy of the
1112*0e209d39SAndroid Build Coastguard Worker     *                text is made, it is essential that the caller not modify the string
1113*0e209d39SAndroid Build Coastguard Worker     *                until after regexp operations on it are done.
1114*0e209d39SAndroid Build Coastguard Worker     *   @return this RegexMatcher.
1115*0e209d39SAndroid Build Coastguard Worker     *
1116*0e209d39SAndroid Build Coastguard Worker     *   @stable ICU 4.6
1117*0e209d39SAndroid Build Coastguard Worker     */
1118*0e209d39SAndroid Build Coastguard Worker     virtual RegexMatcher &reset(UText *input);
1119*0e209d39SAndroid Build Coastguard Worker 
1120*0e209d39SAndroid Build Coastguard Worker 
1121*0e209d39SAndroid Build Coastguard Worker   /**
1122*0e209d39SAndroid Build Coastguard Worker     *  Set the subject text string upon which the regular expression is looking for matches
1123*0e209d39SAndroid Build Coastguard Worker     *  without changing any other aspect of the matching state.
1124*0e209d39SAndroid Build Coastguard Worker     *  The new and previous text strings must have the same content.
1125*0e209d39SAndroid Build Coastguard Worker     *
1126*0e209d39SAndroid Build Coastguard Worker     *  This function is intended for use in environments where ICU is operating on
1127*0e209d39SAndroid Build Coastguard Worker     *  strings that may move around in memory.  It provides a mechanism for notifying
1128*0e209d39SAndroid Build Coastguard Worker     *  ICU that the string has been relocated, and providing a new UText to access the
1129*0e209d39SAndroid Build Coastguard Worker     *  string in its new position.
1130*0e209d39SAndroid Build Coastguard Worker     *
1131*0e209d39SAndroid Build Coastguard Worker     *  Note that the regular expression implementation never copies the underlying text
1132*0e209d39SAndroid Build Coastguard Worker     *  of a string being matched, but always operates directly on the original text
1133*0e209d39SAndroid Build Coastguard Worker     *  provided by the user. Refreshing simply drops the references to the old text
1134*0e209d39SAndroid Build Coastguard Worker     *  and replaces them with references to the new.
1135*0e209d39SAndroid Build Coastguard Worker     *
1136*0e209d39SAndroid Build Coastguard Worker     *  Caution:  this function is normally used only by very specialized,
1137*0e209d39SAndroid Build Coastguard Worker     *  system-level code.  One example use case is with garbage collection that moves
1138*0e209d39SAndroid Build Coastguard Worker     *  the text in memory.
1139*0e209d39SAndroid Build Coastguard Worker     *
1140*0e209d39SAndroid Build Coastguard Worker     * @param input      The new (moved) text string.
1141*0e209d39SAndroid Build Coastguard Worker     * @param status     Receives errors detected by this function.
1142*0e209d39SAndroid Build Coastguard Worker     *
1143*0e209d39SAndroid Build Coastguard Worker     * @stable ICU 4.8
1144*0e209d39SAndroid Build Coastguard Worker     */
1145*0e209d39SAndroid Build Coastguard Worker     virtual RegexMatcher &refreshInputText(UText *input, UErrorCode &status);
1146*0e209d39SAndroid Build Coastguard Worker 
1147*0e209d39SAndroid Build Coastguard Worker private:
1148*0e209d39SAndroid Build Coastguard Worker     /**
1149*0e209d39SAndroid Build Coastguard Worker      * Cause a compilation error if an application accidentally attempts to
1150*0e209d39SAndroid Build Coastguard Worker      *   reset a matcher with a (char16_t *) string as input rather than
1151*0e209d39SAndroid Build Coastguard Worker      *   a UnicodeString.    Avoids a dangling reference to a temporary string.
1152*0e209d39SAndroid Build Coastguard Worker      *
1153*0e209d39SAndroid Build Coastguard Worker      * To efficiently work with char16_t *strings, wrap the data in a UnicodeString
1154*0e209d39SAndroid Build Coastguard Worker      * using one of the aliasing constructors, such as
1155*0e209d39SAndroid Build Coastguard Worker      * `UnicodeString(UBool isTerminated, const char16_t *text, int32_t textLength);`
1156*0e209d39SAndroid Build Coastguard Worker      * or in a UText, using
1157*0e209d39SAndroid Build Coastguard Worker      * `utext_openUChars(UText *ut, const char16_t *text, int64_t textLength, UErrorCode *status);`
1158*0e209d39SAndroid Build Coastguard Worker      *
1159*0e209d39SAndroid Build Coastguard Worker      */
1160*0e209d39SAndroid Build Coastguard Worker     RegexMatcher &reset(const char16_t *input) = delete;
1161*0e209d39SAndroid Build Coastguard Worker public:
1162*0e209d39SAndroid Build Coastguard Worker 
1163*0e209d39SAndroid Build Coastguard Worker    /**
1164*0e209d39SAndroid Build Coastguard Worker     *   Returns the input string being matched.  Ownership of the string belongs to
1165*0e209d39SAndroid Build Coastguard Worker     *   the matcher; it should not be altered or deleted. This method will work even if the input
1166*0e209d39SAndroid Build Coastguard Worker     *   was originally supplied as a UText.
1167*0e209d39SAndroid Build Coastguard Worker     *   @return the input string
1168*0e209d39SAndroid Build Coastguard Worker     *   @stable ICU 2.4
1169*0e209d39SAndroid Build Coastguard Worker     */
1170*0e209d39SAndroid Build Coastguard Worker     virtual const UnicodeString &input() const;
1171*0e209d39SAndroid Build Coastguard Worker 
1172*0e209d39SAndroid Build Coastguard Worker    /**
1173*0e209d39SAndroid Build Coastguard Worker     *   Returns the input string being matched.  This is the live input text; it should not be
1174*0e209d39SAndroid Build Coastguard Worker     *   altered or deleted. This method will work even if the input was originally supplied as
1175*0e209d39SAndroid Build Coastguard Worker     *   a UnicodeString.
1176*0e209d39SAndroid Build Coastguard Worker     *   @return the input text
1177*0e209d39SAndroid Build Coastguard Worker     *
1178*0e209d39SAndroid Build Coastguard Worker     *   @stable ICU 4.6
1179*0e209d39SAndroid Build Coastguard Worker     */
1180*0e209d39SAndroid Build Coastguard Worker     virtual UText *inputText() const;
1181*0e209d39SAndroid Build Coastguard Worker 
1182*0e209d39SAndroid Build Coastguard Worker    /**
1183*0e209d39SAndroid Build Coastguard Worker     *   Returns the input string being matched, either by copying it into the provided
1184*0e209d39SAndroid Build Coastguard Worker     *   UText parameter or by returning a shallow clone of the live input. Note that copying
1185*0e209d39SAndroid Build Coastguard Worker     *   the entire input may cause significant performance and memory issues.
1186*0e209d39SAndroid Build Coastguard Worker     *   @param dest The UText into which the input should be copied, or nullptr to create a new UText
1187*0e209d39SAndroid Build Coastguard Worker     *   @param status error code
1188*0e209d39SAndroid Build Coastguard Worker     *   @return dest if non-nullptr, a shallow copy of the input text otherwise
1189*0e209d39SAndroid Build Coastguard Worker     *
1190*0e209d39SAndroid Build Coastguard Worker     *   @stable ICU 4.6
1191*0e209d39SAndroid Build Coastguard Worker     */
1192*0e209d39SAndroid Build Coastguard Worker     virtual UText *getInput(UText *dest, UErrorCode &status) const;
1193*0e209d39SAndroid Build Coastguard Worker 
1194*0e209d39SAndroid Build Coastguard Worker 
1195*0e209d39SAndroid Build Coastguard Worker    /** Sets the limits of this matcher's region.
1196*0e209d39SAndroid Build Coastguard Worker      * The region is the part of the input string that will be searched to find a match.
1197*0e209d39SAndroid Build Coastguard Worker      * Invoking this method resets the matcher, and then sets the region to start
1198*0e209d39SAndroid Build Coastguard Worker      * at the index specified by the start parameter and end at the index specified
1199*0e209d39SAndroid Build Coastguard Worker      * by the end parameter.
1200*0e209d39SAndroid Build Coastguard Worker      *
1201*0e209d39SAndroid Build Coastguard Worker      * Depending on the transparency and anchoring being used (see useTransparentBounds
1202*0e209d39SAndroid Build Coastguard Worker      * and useAnchoringBounds), certain constructs such as anchors may behave differently
1203*0e209d39SAndroid Build Coastguard Worker      * at or around the boundaries of the region
1204*0e209d39SAndroid Build Coastguard Worker      *
1205*0e209d39SAndroid Build Coastguard Worker      * The function will fail if start is greater than limit, or if either index
1206*0e209d39SAndroid Build Coastguard Worker      *  is less than zero or greater than the length of the string being matched.
1207*0e209d39SAndroid Build Coastguard Worker      *
1208*0e209d39SAndroid Build Coastguard Worker      * @param start  The (native) index to begin searches at.
1209*0e209d39SAndroid Build Coastguard Worker      * @param limit  The index to end searches at (exclusive).
1210*0e209d39SAndroid Build Coastguard Worker      * @param status A reference to a UErrorCode to receive any errors.
1211*0e209d39SAndroid Build Coastguard Worker      * @stable ICU 4.0
1212*0e209d39SAndroid Build Coastguard Worker      */
1213*0e209d39SAndroid Build Coastguard Worker      virtual RegexMatcher &region(int64_t start, int64_t limit, UErrorCode &status);
1214*0e209d39SAndroid Build Coastguard Worker 
1215*0e209d39SAndroid Build Coastguard Worker    /**
1216*0e209d39SAndroid Build Coastguard Worker      * Identical to region(start, limit, status) but also allows a start position without
1217*0e209d39SAndroid Build Coastguard Worker      *  resetting the region state.
1218*0e209d39SAndroid Build Coastguard Worker      * @param regionStart The region start
1219*0e209d39SAndroid Build Coastguard Worker      * @param regionLimit the limit of the region
1220*0e209d39SAndroid Build Coastguard Worker      * @param startIndex  The (native) index within the region bounds at which to begin searches.
1221*0e209d39SAndroid Build Coastguard Worker      * @param status A reference to a UErrorCode to receive any errors.
1222*0e209d39SAndroid Build Coastguard Worker      *                If startIndex is not within the specified region bounds,
1223*0e209d39SAndroid Build Coastguard Worker      *                U_INDEX_OUTOFBOUNDS_ERROR is returned.
1224*0e209d39SAndroid Build Coastguard Worker      * @stable ICU 4.6
1225*0e209d39SAndroid Build Coastguard Worker      */
1226*0e209d39SAndroid Build Coastguard Worker      virtual RegexMatcher &region(int64_t regionStart, int64_t regionLimit, int64_t startIndex, UErrorCode &status);
1227*0e209d39SAndroid Build Coastguard Worker 
1228*0e209d39SAndroid Build Coastguard Worker    /**
1229*0e209d39SAndroid Build Coastguard Worker      * Reports the start index of this matcher's region. The searches this matcher
1230*0e209d39SAndroid Build Coastguard Worker      * conducts are limited to finding matches within regionStart (inclusive) and
1231*0e209d39SAndroid Build Coastguard Worker      * regionEnd (exclusive).
1232*0e209d39SAndroid Build Coastguard Worker      *
1233*0e209d39SAndroid Build Coastguard Worker      * @return The starting (native) index of this matcher's region.
1234*0e209d39SAndroid Build Coastguard Worker      * @stable ICU 4.0
1235*0e209d39SAndroid Build Coastguard Worker      */
1236*0e209d39SAndroid Build Coastguard Worker      virtual int32_t regionStart() const;
1237*0e209d39SAndroid Build Coastguard Worker 
1238*0e209d39SAndroid Build Coastguard Worker    /**
1239*0e209d39SAndroid Build Coastguard Worker      * Reports the start index of this matcher's region. The searches this matcher
1240*0e209d39SAndroid Build Coastguard Worker      * conducts are limited to finding matches within regionStart (inclusive) and
1241*0e209d39SAndroid Build Coastguard Worker      * regionEnd (exclusive).
1242*0e209d39SAndroid Build Coastguard Worker      *
1243*0e209d39SAndroid Build Coastguard Worker      * @return The starting (native) index of this matcher's region.
1244*0e209d39SAndroid Build Coastguard Worker      * @stable ICU 4.6
1245*0e209d39SAndroid Build Coastguard Worker      */
1246*0e209d39SAndroid Build Coastguard Worker      virtual int64_t regionStart64() const;
1247*0e209d39SAndroid Build Coastguard Worker 
1248*0e209d39SAndroid Build Coastguard Worker 
1249*0e209d39SAndroid Build Coastguard Worker     /**
1250*0e209d39SAndroid Build Coastguard Worker       * Reports the end (limit) index (exclusive) of this matcher's region. The searches
1251*0e209d39SAndroid Build Coastguard Worker       * this matcher conducts are limited to finding matches within regionStart
1252*0e209d39SAndroid Build Coastguard Worker       * (inclusive) and regionEnd (exclusive).
1253*0e209d39SAndroid Build Coastguard Worker       *
1254*0e209d39SAndroid Build Coastguard Worker       * @return The ending point (native) of this matcher's region.
1255*0e209d39SAndroid Build Coastguard Worker       * @stable ICU 4.0
1256*0e209d39SAndroid Build Coastguard Worker       */
1257*0e209d39SAndroid Build Coastguard Worker       virtual int32_t regionEnd() const;
1258*0e209d39SAndroid Build Coastguard Worker 
1259*0e209d39SAndroid Build Coastguard Worker    /**
1260*0e209d39SAndroid Build Coastguard Worker      * Reports the end (limit) index (exclusive) of this matcher's region. The searches
1261*0e209d39SAndroid Build Coastguard Worker      * this matcher conducts are limited to finding matches within regionStart
1262*0e209d39SAndroid Build Coastguard Worker      * (inclusive) and regionEnd (exclusive).
1263*0e209d39SAndroid Build Coastguard Worker      *
1264*0e209d39SAndroid Build Coastguard Worker      * @return The ending point (native) of this matcher's region.
1265*0e209d39SAndroid Build Coastguard Worker      * @stable ICU 4.6
1266*0e209d39SAndroid Build Coastguard Worker      */
1267*0e209d39SAndroid Build Coastguard Worker       virtual int64_t regionEnd64() const;
1268*0e209d39SAndroid Build Coastguard Worker 
1269*0e209d39SAndroid Build Coastguard Worker     /**
1270*0e209d39SAndroid Build Coastguard Worker       * Queries the transparency of region bounds for this matcher.
1271*0e209d39SAndroid Build Coastguard Worker       * See useTransparentBounds for a description of transparent and opaque bounds.
1272*0e209d39SAndroid Build Coastguard Worker       * By default, a matcher uses opaque region boundaries.
1273*0e209d39SAndroid Build Coastguard Worker       *
1274*0e209d39SAndroid Build Coastguard Worker       * @return true if this matcher is using opaque bounds, false if it is not.
1275*0e209d39SAndroid Build Coastguard Worker       * @stable ICU 4.0
1276*0e209d39SAndroid Build Coastguard Worker       */
1277*0e209d39SAndroid Build Coastguard Worker       virtual UBool hasTransparentBounds() const;
1278*0e209d39SAndroid Build Coastguard Worker 
1279*0e209d39SAndroid Build Coastguard Worker     /**
1280*0e209d39SAndroid Build Coastguard Worker       * Sets the transparency of region bounds for this matcher.
1281*0e209d39SAndroid Build Coastguard Worker       * Invoking this function with an argument of true will set this matcher to use transparent bounds.
1282*0e209d39SAndroid Build Coastguard Worker       * If the boolean argument is false, then opaque bounds will be used.
1283*0e209d39SAndroid Build Coastguard Worker       *
1284*0e209d39SAndroid Build Coastguard Worker       * Using transparent bounds, the boundaries of this matcher's region are transparent
1285*0e209d39SAndroid Build Coastguard Worker       * to lookahead, lookbehind, and boundary matching constructs. Those constructs can
1286*0e209d39SAndroid Build Coastguard Worker       * see text beyond the boundaries of the region while checking for a match.
1287*0e209d39SAndroid Build Coastguard Worker       *
1288*0e209d39SAndroid Build Coastguard Worker       * With opaque bounds, no text outside of the matcher's region is visible to lookahead,
1289*0e209d39SAndroid Build Coastguard Worker       * lookbehind, and boundary matching constructs.
1290*0e209d39SAndroid Build Coastguard Worker       *
1291*0e209d39SAndroid Build Coastguard Worker       * By default, a matcher uses opaque bounds.
1292*0e209d39SAndroid Build Coastguard Worker       *
1293*0e209d39SAndroid Build Coastguard Worker       * @param   b true for transparent bounds; false for opaque bounds
1294*0e209d39SAndroid Build Coastguard Worker       * @return  This Matcher;
1295*0e209d39SAndroid Build Coastguard Worker       * @stable ICU 4.0
1296*0e209d39SAndroid Build Coastguard Worker       **/
1297*0e209d39SAndroid Build Coastguard Worker       virtual RegexMatcher &useTransparentBounds(UBool b);
1298*0e209d39SAndroid Build Coastguard Worker 
1299*0e209d39SAndroid Build Coastguard Worker 
1300*0e209d39SAndroid Build Coastguard Worker     /**
1301*0e209d39SAndroid Build Coastguard Worker       * Return true if this matcher is using anchoring bounds.
1302*0e209d39SAndroid Build Coastguard Worker       * By default, matchers use anchoring region bounds.
1303*0e209d39SAndroid Build Coastguard Worker       *
1304*0e209d39SAndroid Build Coastguard Worker       * @return true if this matcher is using anchoring bounds.
1305*0e209d39SAndroid Build Coastguard Worker       * @stable ICU 4.0
1306*0e209d39SAndroid Build Coastguard Worker       */
1307*0e209d39SAndroid Build Coastguard Worker       virtual UBool hasAnchoringBounds() const;
1308*0e209d39SAndroid Build Coastguard Worker 
1309*0e209d39SAndroid Build Coastguard Worker 
1310*0e209d39SAndroid Build Coastguard Worker     /**
1311*0e209d39SAndroid Build Coastguard Worker       * Set whether this matcher is using Anchoring Bounds for its region.
1312*0e209d39SAndroid Build Coastguard Worker       * With anchoring bounds, pattern anchors such as ^ and $ will match at the start
1313*0e209d39SAndroid Build Coastguard Worker       * and end of the region.  Without Anchoring Bounds, anchors will only match at
1314*0e209d39SAndroid Build Coastguard Worker       * the positions they would in the complete text.
1315*0e209d39SAndroid Build Coastguard Worker       *
1316*0e209d39SAndroid Build Coastguard Worker       * Anchoring Bounds are the default for regions.
1317*0e209d39SAndroid Build Coastguard Worker       *
1318*0e209d39SAndroid Build Coastguard Worker       * @param b true if to enable anchoring bounds; false to disable them.
1319*0e209d39SAndroid Build Coastguard Worker       * @return  This Matcher
1320*0e209d39SAndroid Build Coastguard Worker       * @stable ICU 4.0
1321*0e209d39SAndroid Build Coastguard Worker       */
1322*0e209d39SAndroid Build Coastguard Worker       virtual RegexMatcher &useAnchoringBounds(UBool b);
1323*0e209d39SAndroid Build Coastguard Worker 
1324*0e209d39SAndroid Build Coastguard Worker 
1325*0e209d39SAndroid Build Coastguard Worker     /**
1326*0e209d39SAndroid Build Coastguard Worker       * Return true if the most recent matching operation attempted to access
1327*0e209d39SAndroid Build Coastguard Worker       *  additional input beyond the available input text.
1328*0e209d39SAndroid Build Coastguard Worker       *  In this case, additional input text could change the results of the match.
1329*0e209d39SAndroid Build Coastguard Worker       *
1330*0e209d39SAndroid Build Coastguard Worker       *  hitEnd() is defined for both successful and unsuccessful matches.
1331*0e209d39SAndroid Build Coastguard Worker       *  In either case hitEnd() will return true if if the end of the text was
1332*0e209d39SAndroid Build Coastguard Worker       *  reached at any point during the matching process.
1333*0e209d39SAndroid Build Coastguard Worker       *
1334*0e209d39SAndroid Build Coastguard Worker       *  @return  true if the most recent match hit the end of input
1335*0e209d39SAndroid Build Coastguard Worker       *  @stable ICU 4.0
1336*0e209d39SAndroid Build Coastguard Worker       */
1337*0e209d39SAndroid Build Coastguard Worker       virtual UBool hitEnd() const;
1338*0e209d39SAndroid Build Coastguard Worker 
1339*0e209d39SAndroid Build Coastguard Worker     /**
1340*0e209d39SAndroid Build Coastguard Worker       * Return true the most recent match succeeded and additional input could cause
1341*0e209d39SAndroid Build Coastguard Worker       * it to fail. If this method returns false and a match was found, then more input
1342*0e209d39SAndroid Build Coastguard Worker       * might change the match but the match won't be lost. If a match was not found,
1343*0e209d39SAndroid Build Coastguard Worker       * then requireEnd has no meaning.
1344*0e209d39SAndroid Build Coastguard Worker       *
1345*0e209d39SAndroid Build Coastguard Worker       * @return true if more input could cause the most recent match to no longer match.
1346*0e209d39SAndroid Build Coastguard Worker       * @stable ICU 4.0
1347*0e209d39SAndroid Build Coastguard Worker       */
1348*0e209d39SAndroid Build Coastguard Worker       virtual UBool requireEnd() const;
1349*0e209d39SAndroid Build Coastguard Worker 
1350*0e209d39SAndroid Build Coastguard Worker 
1351*0e209d39SAndroid Build Coastguard Worker    /**
1352*0e209d39SAndroid Build Coastguard Worker     *    Returns the pattern that is interpreted by this matcher.
1353*0e209d39SAndroid Build Coastguard Worker     *    @return  the RegexPattern for this RegexMatcher
1354*0e209d39SAndroid Build Coastguard Worker     *    @stable ICU 2.4
1355*0e209d39SAndroid Build Coastguard Worker     */
1356*0e209d39SAndroid Build Coastguard Worker     virtual const RegexPattern &pattern() const;
1357*0e209d39SAndroid Build Coastguard Worker 
1358*0e209d39SAndroid Build Coastguard Worker 
1359*0e209d39SAndroid Build Coastguard Worker    /**
1360*0e209d39SAndroid Build Coastguard Worker     *    Replaces every substring of the input that matches the pattern
1361*0e209d39SAndroid Build Coastguard Worker     *    with the given replacement string.  This is a convenience function that
1362*0e209d39SAndroid Build Coastguard Worker     *    provides a complete find-and-replace-all operation.
1363*0e209d39SAndroid Build Coastguard Worker     *
1364*0e209d39SAndroid Build Coastguard Worker     *    This method first resets this matcher. It then scans the input string
1365*0e209d39SAndroid Build Coastguard Worker     *    looking for matches of the pattern. Input that is not part of any
1366*0e209d39SAndroid Build Coastguard Worker     *    match is left unchanged; each match is replaced in the result by the
1367*0e209d39SAndroid Build Coastguard Worker     *    replacement string. The replacement string may contain references to
1368*0e209d39SAndroid Build Coastguard Worker     *    capture groups.
1369*0e209d39SAndroid Build Coastguard Worker     *
1370*0e209d39SAndroid Build Coastguard Worker     *    @param   replacement a string containing the replacement text.
1371*0e209d39SAndroid Build Coastguard Worker     *    @param   status      a reference to a UErrorCode to receive any errors.
1372*0e209d39SAndroid Build Coastguard Worker     *    @return              a string containing the results of the find and replace.
1373*0e209d39SAndroid Build Coastguard Worker     *    @stable ICU 2.4
1374*0e209d39SAndroid Build Coastguard Worker     */
1375*0e209d39SAndroid Build Coastguard Worker     virtual UnicodeString replaceAll(const UnicodeString &replacement, UErrorCode &status);
1376*0e209d39SAndroid Build Coastguard Worker 
1377*0e209d39SAndroid Build Coastguard Worker 
1378*0e209d39SAndroid Build Coastguard Worker    /**
1379*0e209d39SAndroid Build Coastguard Worker     *    Replaces every substring of the input that matches the pattern
1380*0e209d39SAndroid Build Coastguard Worker     *    with the given replacement string.  This is a convenience function that
1381*0e209d39SAndroid Build Coastguard Worker     *    provides a complete find-and-replace-all operation.
1382*0e209d39SAndroid Build Coastguard Worker     *
1383*0e209d39SAndroid Build Coastguard Worker     *    This method first resets this matcher. It then scans the input string
1384*0e209d39SAndroid Build Coastguard Worker     *    looking for matches of the pattern. Input that is not part of any
1385*0e209d39SAndroid Build Coastguard Worker     *    match is left unchanged; each match is replaced in the result by the
1386*0e209d39SAndroid Build Coastguard Worker     *    replacement string. The replacement string may contain references to
1387*0e209d39SAndroid Build Coastguard Worker     *    capture groups.
1388*0e209d39SAndroid Build Coastguard Worker     *
1389*0e209d39SAndroid Build Coastguard Worker     *    @param   replacement a string containing the replacement text.
1390*0e209d39SAndroid Build Coastguard Worker     *    @param   dest        a mutable UText in which the results are placed.
1391*0e209d39SAndroid Build Coastguard Worker     *                          If nullptr, a new UText will be created (which may not be mutable).
1392*0e209d39SAndroid Build Coastguard Worker     *    @param   status      a reference to a UErrorCode to receive any errors.
1393*0e209d39SAndroid Build Coastguard Worker     *    @return              a string containing the results of the find and replace.
1394*0e209d39SAndroid Build Coastguard Worker     *                          If a pre-allocated UText was provided, it will always be used and returned.
1395*0e209d39SAndroid Build Coastguard Worker     *
1396*0e209d39SAndroid Build Coastguard Worker     *    @stable ICU 4.6
1397*0e209d39SAndroid Build Coastguard Worker     */
1398*0e209d39SAndroid Build Coastguard Worker     virtual UText *replaceAll(UText *replacement, UText *dest, UErrorCode &status);
1399*0e209d39SAndroid Build Coastguard Worker 
1400*0e209d39SAndroid Build Coastguard Worker 
1401*0e209d39SAndroid Build Coastguard Worker    /**
1402*0e209d39SAndroid Build Coastguard Worker     * Replaces the first substring of the input that matches
1403*0e209d39SAndroid Build Coastguard Worker     * the pattern with the replacement string.   This is a convenience
1404*0e209d39SAndroid Build Coastguard Worker     * function that provides a complete find-and-replace operation.
1405*0e209d39SAndroid Build Coastguard Worker     *
1406*0e209d39SAndroid Build Coastguard Worker     * This function first resets this RegexMatcher. It then scans the input string
1407*0e209d39SAndroid Build Coastguard Worker     * looking for a match of the pattern. Input that is not part
1408*0e209d39SAndroid Build Coastguard Worker     * of the match is appended directly to the result string; the match is replaced
1409*0e209d39SAndroid Build Coastguard Worker     * in the result by the replacement string. The replacement string may contain
1410*0e209d39SAndroid Build Coastguard Worker     * references to captured groups.
1411*0e209d39SAndroid Build Coastguard Worker     *
1412*0e209d39SAndroid Build Coastguard Worker     * The state of the matcher (the position at which a subsequent find()
1413*0e209d39SAndroid Build Coastguard Worker     *    would begin) after completing a replaceFirst() is not specified.  The
1414*0e209d39SAndroid Build Coastguard Worker     *    RegexMatcher should be reset before doing additional find() operations.
1415*0e209d39SAndroid Build Coastguard Worker     *
1416*0e209d39SAndroid Build Coastguard Worker     *    @param   replacement a string containing the replacement text.
1417*0e209d39SAndroid Build Coastguard Worker     *    @param   status      a reference to a UErrorCode to receive any errors.
1418*0e209d39SAndroid Build Coastguard Worker     *    @return              a string containing the results of the find and replace.
1419*0e209d39SAndroid Build Coastguard Worker     *    @stable ICU 2.4
1420*0e209d39SAndroid Build Coastguard Worker     */
1421*0e209d39SAndroid Build Coastguard Worker     virtual UnicodeString replaceFirst(const UnicodeString &replacement, UErrorCode &status);
1422*0e209d39SAndroid Build Coastguard Worker 
1423*0e209d39SAndroid Build Coastguard Worker 
1424*0e209d39SAndroid Build Coastguard Worker    /**
1425*0e209d39SAndroid Build Coastguard Worker     * Replaces the first substring of the input that matches
1426*0e209d39SAndroid Build Coastguard Worker     * the pattern with the replacement string.   This is a convenience
1427*0e209d39SAndroid Build Coastguard Worker     * function that provides a complete find-and-replace operation.
1428*0e209d39SAndroid Build Coastguard Worker     *
1429*0e209d39SAndroid Build Coastguard Worker     * This function first resets this RegexMatcher. It then scans the input string
1430*0e209d39SAndroid Build Coastguard Worker     * looking for a match of the pattern. Input that is not part
1431*0e209d39SAndroid Build Coastguard Worker     * of the match is appended directly to the result string; the match is replaced
1432*0e209d39SAndroid Build Coastguard Worker     * in the result by the replacement string. The replacement string may contain
1433*0e209d39SAndroid Build Coastguard Worker     * references to captured groups.
1434*0e209d39SAndroid Build Coastguard Worker     *
1435*0e209d39SAndroid Build Coastguard Worker     * The state of the matcher (the position at which a subsequent find()
1436*0e209d39SAndroid Build Coastguard Worker     *    would begin) after completing a replaceFirst() is not specified.  The
1437*0e209d39SAndroid Build Coastguard Worker     *    RegexMatcher should be reset before doing additional find() operations.
1438*0e209d39SAndroid Build Coastguard Worker     *
1439*0e209d39SAndroid Build Coastguard Worker     *    @param   replacement a string containing the replacement text.
1440*0e209d39SAndroid Build Coastguard Worker     *    @param   dest        a mutable UText in which the results are placed.
1441*0e209d39SAndroid Build Coastguard Worker     *                          If nullptr, a new UText will be created (which may not be mutable).
1442*0e209d39SAndroid Build Coastguard Worker     *    @param   status      a reference to a UErrorCode to receive any errors.
1443*0e209d39SAndroid Build Coastguard Worker     *    @return              a string containing the results of the find and replace.
1444*0e209d39SAndroid Build Coastguard Worker     *                          If a pre-allocated UText was provided, it will always be used and returned.
1445*0e209d39SAndroid Build Coastguard Worker     *
1446*0e209d39SAndroid Build Coastguard Worker     *    @stable ICU 4.6
1447*0e209d39SAndroid Build Coastguard Worker     */
1448*0e209d39SAndroid Build Coastguard Worker     virtual UText *replaceFirst(UText *replacement, UText *dest, UErrorCode &status);
1449*0e209d39SAndroid Build Coastguard Worker 
1450*0e209d39SAndroid Build Coastguard Worker 
1451*0e209d39SAndroid Build Coastguard Worker    /**
1452*0e209d39SAndroid Build Coastguard Worker     *   Implements a replace operation intended to be used as part of an
1453*0e209d39SAndroid Build Coastguard Worker     *   incremental find-and-replace.
1454*0e209d39SAndroid Build Coastguard Worker     *
1455*0e209d39SAndroid Build Coastguard Worker     *   The input string, starting from the end of the previous replacement and ending at
1456*0e209d39SAndroid Build Coastguard Worker     *   the start of the current match, is appended to the destination string.  Then the
1457*0e209d39SAndroid Build Coastguard Worker     *   replacement string is appended to the output string,
1458*0e209d39SAndroid Build Coastguard Worker     *   including handling any substitutions of captured text.
1459*0e209d39SAndroid Build Coastguard Worker     *
1460*0e209d39SAndroid Build Coastguard Worker     *   For simple, prepackaged, non-incremental find-and-replace
1461*0e209d39SAndroid Build Coastguard Worker     *   operations, see replaceFirst() or replaceAll().
1462*0e209d39SAndroid Build Coastguard Worker     *
1463*0e209d39SAndroid Build Coastguard Worker     *   @param   dest        A UnicodeString to which the results of the find-and-replace are appended.
1464*0e209d39SAndroid Build Coastguard Worker     *   @param   replacement A UnicodeString that provides the text to be substituted for
1465*0e209d39SAndroid Build Coastguard Worker     *                        the input text that matched the regexp pattern.  The replacement
1466*0e209d39SAndroid Build Coastguard Worker     *                        text may contain references to captured text from the
1467*0e209d39SAndroid Build Coastguard Worker     *                        input.
1468*0e209d39SAndroid Build Coastguard Worker     *   @param   status      A reference to a UErrorCode to receive any errors.  Possible
1469*0e209d39SAndroid Build Coastguard Worker     *                        errors are  U_REGEX_INVALID_STATE if no match has been
1470*0e209d39SAndroid Build Coastguard Worker     *                        attempted or the last match failed, and U_INDEX_OUTOFBOUNDS_ERROR
1471*0e209d39SAndroid Build Coastguard Worker     *                        if the replacement text specifies a capture group that
1472*0e209d39SAndroid Build Coastguard Worker     *                        does not exist in the pattern.
1473*0e209d39SAndroid Build Coastguard Worker     *
1474*0e209d39SAndroid Build Coastguard Worker     *   @return  this  RegexMatcher
1475*0e209d39SAndroid Build Coastguard Worker     *   @stable ICU 2.4
1476*0e209d39SAndroid Build Coastguard Worker     *
1477*0e209d39SAndroid Build Coastguard Worker     */
1478*0e209d39SAndroid Build Coastguard Worker     virtual RegexMatcher &appendReplacement(UnicodeString &dest,
1479*0e209d39SAndroid Build Coastguard Worker         const UnicodeString &replacement, UErrorCode &status);
1480*0e209d39SAndroid Build Coastguard Worker 
1481*0e209d39SAndroid Build Coastguard Worker 
1482*0e209d39SAndroid Build Coastguard Worker    /**
1483*0e209d39SAndroid Build Coastguard Worker     *   Implements a replace operation intended to be used as part of an
1484*0e209d39SAndroid Build Coastguard Worker     *   incremental find-and-replace.
1485*0e209d39SAndroid Build Coastguard Worker     *
1486*0e209d39SAndroid Build Coastguard Worker     *   The input string, starting from the end of the previous replacement and ending at
1487*0e209d39SAndroid Build Coastguard Worker     *   the start of the current match, is appended to the destination string.  Then the
1488*0e209d39SAndroid Build Coastguard Worker     *   replacement string is appended to the output string,
1489*0e209d39SAndroid Build Coastguard Worker     *   including handling any substitutions of captured text.
1490*0e209d39SAndroid Build Coastguard Worker     *
1491*0e209d39SAndroid Build Coastguard Worker     *   For simple, prepackaged, non-incremental find-and-replace
1492*0e209d39SAndroid Build Coastguard Worker     *   operations, see replaceFirst() or replaceAll().
1493*0e209d39SAndroid Build Coastguard Worker     *
1494*0e209d39SAndroid Build Coastguard Worker     *   @param   dest        A mutable UText to which the results of the find-and-replace are appended.
1495*0e209d39SAndroid Build Coastguard Worker     *                         Must not be nullptr.
1496*0e209d39SAndroid Build Coastguard Worker     *   @param   replacement A UText that provides the text to be substituted for
1497*0e209d39SAndroid Build Coastguard Worker     *                        the input text that matched the regexp pattern.  The replacement
1498*0e209d39SAndroid Build Coastguard Worker     *                        text may contain references to captured text from the input.
1499*0e209d39SAndroid Build Coastguard Worker     *   @param   status      A reference to a UErrorCode to receive any errors.  Possible
1500*0e209d39SAndroid Build Coastguard Worker     *                        errors are  U_REGEX_INVALID_STATE if no match has been
1501*0e209d39SAndroid Build Coastguard Worker     *                        attempted or the last match failed, and U_INDEX_OUTOFBOUNDS_ERROR
1502*0e209d39SAndroid Build Coastguard Worker     *                        if the replacement text specifies a capture group that
1503*0e209d39SAndroid Build Coastguard Worker     *                        does not exist in the pattern.
1504*0e209d39SAndroid Build Coastguard Worker     *
1505*0e209d39SAndroid Build Coastguard Worker     *   @return  this  RegexMatcher
1506*0e209d39SAndroid Build Coastguard Worker     *
1507*0e209d39SAndroid Build Coastguard Worker     *   @stable ICU 4.6
1508*0e209d39SAndroid Build Coastguard Worker     */
1509*0e209d39SAndroid Build Coastguard Worker     virtual RegexMatcher &appendReplacement(UText *dest,
1510*0e209d39SAndroid Build Coastguard Worker         UText *replacement, UErrorCode &status);
1511*0e209d39SAndroid Build Coastguard Worker 
1512*0e209d39SAndroid Build Coastguard Worker 
1513*0e209d39SAndroid Build Coastguard Worker    /**
1514*0e209d39SAndroid Build Coastguard Worker     * As the final step in a find-and-replace operation, append the remainder
1515*0e209d39SAndroid Build Coastguard Worker     * of the input string, starting at the position following the last appendReplacement(),
1516*0e209d39SAndroid Build Coastguard Worker     * to the destination string. `appendTail()` is intended to be invoked after one
1517*0e209d39SAndroid Build Coastguard Worker     * or more invocations of the `RegexMatcher::appendReplacement()`.
1518*0e209d39SAndroid Build Coastguard Worker     *
1519*0e209d39SAndroid Build Coastguard Worker     *  @param dest A UnicodeString to which the results of the find-and-replace are appended.
1520*0e209d39SAndroid Build Coastguard Worker     *  @return  the destination string.
1521*0e209d39SAndroid Build Coastguard Worker     *  @stable ICU 2.4
1522*0e209d39SAndroid Build Coastguard Worker     */
1523*0e209d39SAndroid Build Coastguard Worker     virtual UnicodeString &appendTail(UnicodeString &dest);
1524*0e209d39SAndroid Build Coastguard Worker 
1525*0e209d39SAndroid Build Coastguard Worker 
1526*0e209d39SAndroid Build Coastguard Worker    /**
1527*0e209d39SAndroid Build Coastguard Worker     * As the final step in a find-and-replace operation, append the remainder
1528*0e209d39SAndroid Build Coastguard Worker     * of the input string, starting at the position following the last appendReplacement(),
1529*0e209d39SAndroid Build Coastguard Worker     * to the destination string. `appendTail()` is intended to be invoked after one
1530*0e209d39SAndroid Build Coastguard Worker     * or more invocations of the `RegexMatcher::appendReplacement()`.
1531*0e209d39SAndroid Build Coastguard Worker     *
1532*0e209d39SAndroid Build Coastguard Worker     *  @param dest A mutable UText to which the results of the find-and-replace are appended.
1533*0e209d39SAndroid Build Coastguard Worker     *               Must not be nullptr.
1534*0e209d39SAndroid Build Coastguard Worker     *  @param status error cod
1535*0e209d39SAndroid Build Coastguard Worker     *  @return  the destination string.
1536*0e209d39SAndroid Build Coastguard Worker     *
1537*0e209d39SAndroid Build Coastguard Worker     *  @stable ICU 4.6
1538*0e209d39SAndroid Build Coastguard Worker     */
1539*0e209d39SAndroid Build Coastguard Worker     virtual UText *appendTail(UText *dest, UErrorCode &status);
1540*0e209d39SAndroid Build Coastguard Worker 
1541*0e209d39SAndroid Build Coastguard Worker 
1542*0e209d39SAndroid Build Coastguard Worker     /**
1543*0e209d39SAndroid Build Coastguard Worker      * Split a string into fields.  Somewhat like %split() from Perl.
1544*0e209d39SAndroid Build Coastguard Worker      * The pattern matches identify delimiters that separate the input
1545*0e209d39SAndroid Build Coastguard Worker      *  into fields.  The input data between the matches becomes the
1546*0e209d39SAndroid Build Coastguard Worker      *  fields themselves.
1547*0e209d39SAndroid Build Coastguard Worker      *
1548*0e209d39SAndroid Build Coastguard Worker      * @param input   The string to be split into fields.  The field delimiters
1549*0e209d39SAndroid Build Coastguard Worker      *                match the pattern (in the "this" object).  This matcher
1550*0e209d39SAndroid Build Coastguard Worker      *                will be reset to this input string.
1551*0e209d39SAndroid Build Coastguard Worker      * @param dest    An array of UnicodeStrings to receive the results of the split.
1552*0e209d39SAndroid Build Coastguard Worker      *                This is an array of actual UnicodeString objects, not an
1553*0e209d39SAndroid Build Coastguard Worker      *                array of pointers to strings.  Local (stack based) arrays can
1554*0e209d39SAndroid Build Coastguard Worker      *                work well here.
1555*0e209d39SAndroid Build Coastguard Worker      * @param destCapacity  The number of elements in the destination array.
1556*0e209d39SAndroid Build Coastguard Worker      *                If the number of fields found is less than destCapacity, the
1557*0e209d39SAndroid Build Coastguard Worker      *                extra strings in the destination array are not altered.
1558*0e209d39SAndroid Build Coastguard Worker      *                If the number of destination strings is less than the number
1559*0e209d39SAndroid Build Coastguard Worker      *                of fields, the trailing part of the input string, including any
1560*0e209d39SAndroid Build Coastguard Worker      *                field delimiters, is placed in the last destination string.
1561*0e209d39SAndroid Build Coastguard Worker      * @param status  A reference to a UErrorCode to receive any errors.
1562*0e209d39SAndroid Build Coastguard Worker      * @return        The number of fields into which the input string was split.
1563*0e209d39SAndroid Build Coastguard Worker      * @stable ICU 2.6
1564*0e209d39SAndroid Build Coastguard Worker      */
1565*0e209d39SAndroid Build Coastguard Worker     virtual int32_t  split(const UnicodeString &input,
1566*0e209d39SAndroid Build Coastguard Worker         UnicodeString    dest[],
1567*0e209d39SAndroid Build Coastguard Worker         int32_t          destCapacity,
1568*0e209d39SAndroid Build Coastguard Worker         UErrorCode       &status);
1569*0e209d39SAndroid Build Coastguard Worker 
1570*0e209d39SAndroid Build Coastguard Worker 
1571*0e209d39SAndroid Build Coastguard Worker     /**
1572*0e209d39SAndroid Build Coastguard Worker      * Split a string into fields.  Somewhat like %split() from Perl.
1573*0e209d39SAndroid Build Coastguard Worker      * The pattern matches identify delimiters that separate the input
1574*0e209d39SAndroid Build Coastguard Worker      *  into fields.  The input data between the matches becomes the
1575*0e209d39SAndroid Build Coastguard Worker      *  fields themselves.
1576*0e209d39SAndroid Build Coastguard Worker      *
1577*0e209d39SAndroid Build Coastguard Worker      * @param input   The string to be split into fields.  The field delimiters
1578*0e209d39SAndroid Build Coastguard Worker      *                match the pattern (in the "this" object).  This matcher
1579*0e209d39SAndroid Build Coastguard Worker      *                will be reset to this input string.
1580*0e209d39SAndroid Build Coastguard Worker      * @param dest    An array of mutable UText structs to receive the results of the split.
1581*0e209d39SAndroid Build Coastguard Worker      *                If a field is nullptr, a new UText is allocated to contain the results for
1582*0e209d39SAndroid Build Coastguard Worker      *                that field. This new UText is not guaranteed to be mutable.
1583*0e209d39SAndroid Build Coastguard Worker      * @param destCapacity  The number of elements in the destination array.
1584*0e209d39SAndroid Build Coastguard Worker      *                If the number of fields found is less than destCapacity, the
1585*0e209d39SAndroid Build Coastguard Worker      *                extra strings in the destination array are not altered.
1586*0e209d39SAndroid Build Coastguard Worker      *                If the number of destination strings is less than the number
1587*0e209d39SAndroid Build Coastguard Worker      *                of fields, the trailing part of the input string, including any
1588*0e209d39SAndroid Build Coastguard Worker      *                field delimiters, is placed in the last destination string.
1589*0e209d39SAndroid Build Coastguard Worker      * @param status  A reference to a UErrorCode to receive any errors.
1590*0e209d39SAndroid Build Coastguard Worker      * @return        The number of fields into which the input string was split.
1591*0e209d39SAndroid Build Coastguard Worker      *
1592*0e209d39SAndroid Build Coastguard Worker      * @stable ICU 4.6
1593*0e209d39SAndroid Build Coastguard Worker      */
1594*0e209d39SAndroid Build Coastguard Worker     virtual int32_t  split(UText *input,
1595*0e209d39SAndroid Build Coastguard Worker         UText           *dest[],
1596*0e209d39SAndroid Build Coastguard Worker         int32_t          destCapacity,
1597*0e209d39SAndroid Build Coastguard Worker         UErrorCode       &status);
1598*0e209d39SAndroid Build Coastguard Worker 
1599*0e209d39SAndroid Build Coastguard Worker   /**
1600*0e209d39SAndroid Build Coastguard Worker     *   Set a processing time limit for match operations with this Matcher.
1601*0e209d39SAndroid Build Coastguard Worker     *
1602*0e209d39SAndroid Build Coastguard Worker     *   Some patterns, when matching certain strings, can run in exponential time.
1603*0e209d39SAndroid Build Coastguard Worker     *   For practical purposes, the match operation may appear to be in an
1604*0e209d39SAndroid Build Coastguard Worker     *   infinite loop.
1605*0e209d39SAndroid Build Coastguard Worker     *   When a limit is set a match operation will fail with an error if the
1606*0e209d39SAndroid Build Coastguard Worker     *   limit is exceeded.
1607*0e209d39SAndroid Build Coastguard Worker     *
1608*0e209d39SAndroid Build Coastguard Worker     *   The units of the limit are steps of the match engine.
1609*0e209d39SAndroid Build Coastguard Worker     *   Correspondence with actual processor time will depend on the speed
1610*0e209d39SAndroid Build Coastguard Worker     *   of the processor and the details of the specific pattern, but will
1611*0e209d39SAndroid Build Coastguard Worker     *   typically be on the order of milliseconds.
1612*0e209d39SAndroid Build Coastguard Worker     *
1613*0e209d39SAndroid Build Coastguard Worker     *   By default, the matching time is not limited.
1614*0e209d39SAndroid Build Coastguard Worker     *
1615*0e209d39SAndroid Build Coastguard Worker     *
1616*0e209d39SAndroid Build Coastguard Worker     *   @param   limit       The limit value, or 0 for no limit.
1617*0e209d39SAndroid Build Coastguard Worker     *   @param   status      A reference to a UErrorCode to receive any errors.
1618*0e209d39SAndroid Build Coastguard Worker     *   @stable ICU 4.0
1619*0e209d39SAndroid Build Coastguard Worker     */
1620*0e209d39SAndroid Build Coastguard Worker     virtual void setTimeLimit(int32_t limit, UErrorCode &status);
1621*0e209d39SAndroid Build Coastguard Worker 
1622*0e209d39SAndroid Build Coastguard Worker   /**
1623*0e209d39SAndroid Build Coastguard Worker     * Get the time limit, if any, for match operations made with this Matcher.
1624*0e209d39SAndroid Build Coastguard Worker     *
1625*0e209d39SAndroid Build Coastguard Worker     *   @return the maximum allowed time for a match, in units of processing steps.
1626*0e209d39SAndroid Build Coastguard Worker     *   @stable ICU 4.0
1627*0e209d39SAndroid Build Coastguard Worker     */
1628*0e209d39SAndroid Build Coastguard Worker     virtual int32_t getTimeLimit() const;
1629*0e209d39SAndroid Build Coastguard Worker 
1630*0e209d39SAndroid Build Coastguard Worker   /**
1631*0e209d39SAndroid Build Coastguard Worker     *  Set the amount of heap storage available for use by the match backtracking stack.
1632*0e209d39SAndroid Build Coastguard Worker     *  The matcher is also reset, discarding any results from previous matches.
1633*0e209d39SAndroid Build Coastguard Worker     *
1634*0e209d39SAndroid Build Coastguard Worker     *  ICU uses a backtracking regular expression engine, with the backtrack stack
1635*0e209d39SAndroid Build Coastguard Worker     *  maintained on the heap.  This function sets the limit to the amount of memory
1636*0e209d39SAndroid Build Coastguard Worker     *  that can be used for this purpose.  A backtracking stack overflow will
1637*0e209d39SAndroid Build Coastguard Worker     *  result in an error from the match operation that caused it.
1638*0e209d39SAndroid Build Coastguard Worker     *
1639*0e209d39SAndroid Build Coastguard Worker     *  A limit is desirable because a malicious or poorly designed pattern can use
1640*0e209d39SAndroid Build Coastguard Worker     *  excessive memory, potentially crashing the process.  A limit is enabled
1641*0e209d39SAndroid Build Coastguard Worker     *  by default.
1642*0e209d39SAndroid Build Coastguard Worker     *
1643*0e209d39SAndroid Build Coastguard Worker     *  @param limit  The maximum size, in bytes, of the matching backtrack stack.
1644*0e209d39SAndroid Build Coastguard Worker     *                A value of zero means no limit.
1645*0e209d39SAndroid Build Coastguard Worker     *                The limit must be greater or equal to zero.
1646*0e209d39SAndroid Build Coastguard Worker     *
1647*0e209d39SAndroid Build Coastguard Worker     *  @param status   A reference to a UErrorCode to receive any errors.
1648*0e209d39SAndroid Build Coastguard Worker     *
1649*0e209d39SAndroid Build Coastguard Worker     *  @stable ICU 4.0
1650*0e209d39SAndroid Build Coastguard Worker     */
1651*0e209d39SAndroid Build Coastguard Worker     virtual void setStackLimit(int32_t  limit, UErrorCode &status);
1652*0e209d39SAndroid Build Coastguard Worker 
1653*0e209d39SAndroid Build Coastguard Worker   /**
1654*0e209d39SAndroid Build Coastguard Worker     *  Get the size of the heap storage available for use by the back tracking stack.
1655*0e209d39SAndroid Build Coastguard Worker     *
1656*0e209d39SAndroid Build Coastguard Worker     *  @return  the maximum backtracking stack size, in bytes, or zero if the
1657*0e209d39SAndroid Build Coastguard Worker     *           stack size is unlimited.
1658*0e209d39SAndroid Build Coastguard Worker     *  @stable ICU 4.0
1659*0e209d39SAndroid Build Coastguard Worker     */
1660*0e209d39SAndroid Build Coastguard Worker     virtual int32_t  getStackLimit() const;
1661*0e209d39SAndroid Build Coastguard Worker 
1662*0e209d39SAndroid Build Coastguard Worker 
1663*0e209d39SAndroid Build Coastguard Worker   /**
1664*0e209d39SAndroid Build Coastguard Worker     * Set a callback function for use with this Matcher.
1665*0e209d39SAndroid Build Coastguard Worker     * During matching operations the function will be called periodically,
1666*0e209d39SAndroid Build Coastguard Worker     * giving the application the opportunity to terminate a long-running
1667*0e209d39SAndroid Build Coastguard Worker     * match.
1668*0e209d39SAndroid Build Coastguard Worker     *
1669*0e209d39SAndroid Build Coastguard Worker     *    @param   callback    A pointer to the user-supplied callback function.
1670*0e209d39SAndroid Build Coastguard Worker     *    @param   context     User context pointer.  The value supplied at the
1671*0e209d39SAndroid Build Coastguard Worker     *                         time the callback function is set will be saved
1672*0e209d39SAndroid Build Coastguard Worker     *                         and passed to the callback each time that it is called.
1673*0e209d39SAndroid Build Coastguard Worker     *    @param   status      A reference to a UErrorCode to receive any errors.
1674*0e209d39SAndroid Build Coastguard Worker     *  @stable ICU 4.0
1675*0e209d39SAndroid Build Coastguard Worker     */
1676*0e209d39SAndroid Build Coastguard Worker     virtual void setMatchCallback(URegexMatchCallback     *callback,
1677*0e209d39SAndroid Build Coastguard Worker                                   const void              *context,
1678*0e209d39SAndroid Build Coastguard Worker                                   UErrorCode              &status);
1679*0e209d39SAndroid Build Coastguard Worker 
1680*0e209d39SAndroid Build Coastguard Worker 
1681*0e209d39SAndroid Build Coastguard Worker   /**
1682*0e209d39SAndroid Build Coastguard Worker     *  Get the callback function for this URegularExpression.
1683*0e209d39SAndroid Build Coastguard Worker     *
1684*0e209d39SAndroid Build Coastguard Worker     *    @param   callback    Out parameter, receives a pointer to the user-supplied
1685*0e209d39SAndroid Build Coastguard Worker     *                         callback function.
1686*0e209d39SAndroid Build Coastguard Worker     *    @param   context     Out parameter, receives the user context pointer that
1687*0e209d39SAndroid Build Coastguard Worker     *                         was set when uregex_setMatchCallback() was called.
1688*0e209d39SAndroid Build Coastguard Worker     *    @param   status      A reference to a UErrorCode to receive any errors.
1689*0e209d39SAndroid Build Coastguard Worker     *    @stable ICU 4.0
1690*0e209d39SAndroid Build Coastguard Worker     */
1691*0e209d39SAndroid Build Coastguard Worker     virtual void getMatchCallback(URegexMatchCallback     *&callback,
1692*0e209d39SAndroid Build Coastguard Worker                                   const void              *&context,
1693*0e209d39SAndroid Build Coastguard Worker                                   UErrorCode              &status);
1694*0e209d39SAndroid Build Coastguard Worker 
1695*0e209d39SAndroid Build Coastguard Worker 
1696*0e209d39SAndroid Build Coastguard Worker   /**
1697*0e209d39SAndroid Build Coastguard Worker     * Set a progress callback function for use with find operations on this Matcher.
1698*0e209d39SAndroid Build Coastguard Worker     * During find operations, the callback will be invoked after each return from a
1699*0e209d39SAndroid Build Coastguard Worker     * match attempt, giving the application the opportunity to terminate a long-running
1700*0e209d39SAndroid Build Coastguard Worker     * find operation.
1701*0e209d39SAndroid Build Coastguard Worker     *
1702*0e209d39SAndroid Build Coastguard Worker     *    @param   callback    A pointer to the user-supplied callback function.
1703*0e209d39SAndroid Build Coastguard Worker     *    @param   context     User context pointer.  The value supplied at the
1704*0e209d39SAndroid Build Coastguard Worker     *                         time the callback function is set will be saved
1705*0e209d39SAndroid Build Coastguard Worker     *                         and passed to the callback each time that it is called.
1706*0e209d39SAndroid Build Coastguard Worker     *    @param   status      A reference to a UErrorCode to receive any errors.
1707*0e209d39SAndroid Build Coastguard Worker     *    @stable ICU 4.6
1708*0e209d39SAndroid Build Coastguard Worker     */
1709*0e209d39SAndroid Build Coastguard Worker     virtual void setFindProgressCallback(URegexFindProgressCallback      *callback,
1710*0e209d39SAndroid Build Coastguard Worker                                               const void                              *context,
1711*0e209d39SAndroid Build Coastguard Worker                                               UErrorCode                              &status);
1712*0e209d39SAndroid Build Coastguard Worker 
1713*0e209d39SAndroid Build Coastguard Worker 
1714*0e209d39SAndroid Build Coastguard Worker   /**
1715*0e209d39SAndroid Build Coastguard Worker     *  Get the find progress callback function for this URegularExpression.
1716*0e209d39SAndroid Build Coastguard Worker     *
1717*0e209d39SAndroid Build Coastguard Worker     *    @param   callback    Out parameter, receives a pointer to the user-supplied
1718*0e209d39SAndroid Build Coastguard Worker     *                         callback function.
1719*0e209d39SAndroid Build Coastguard Worker     *    @param   context     Out parameter, receives the user context pointer that
1720*0e209d39SAndroid Build Coastguard Worker     *                         was set when uregex_setFindProgressCallback() was called.
1721*0e209d39SAndroid Build Coastguard Worker     *    @param   status      A reference to a UErrorCode to receive any errors.
1722*0e209d39SAndroid Build Coastguard Worker     *    @stable ICU 4.6
1723*0e209d39SAndroid Build Coastguard Worker     */
1724*0e209d39SAndroid Build Coastguard Worker     virtual void getFindProgressCallback(URegexFindProgressCallback      *&callback,
1725*0e209d39SAndroid Build Coastguard Worker                                               const void                      *&context,
1726*0e209d39SAndroid Build Coastguard Worker                                               UErrorCode                      &status);
1727*0e209d39SAndroid Build Coastguard Worker 
1728*0e209d39SAndroid Build Coastguard Worker #ifndef U_HIDE_INTERNAL_API
1729*0e209d39SAndroid Build Coastguard Worker    /**
1730*0e209d39SAndroid Build Coastguard Worker      *   setTrace   Debug function, enable/disable tracing of the matching engine.
1731*0e209d39SAndroid Build Coastguard Worker      *              For internal ICU development use only.  DO NO USE!!!!
1732*0e209d39SAndroid Build Coastguard Worker      *   @internal
1733*0e209d39SAndroid Build Coastguard Worker      */
1734*0e209d39SAndroid Build Coastguard Worker     void setTrace(UBool state);
1735*0e209d39SAndroid Build Coastguard Worker #endif  /* U_HIDE_INTERNAL_API */
1736*0e209d39SAndroid Build Coastguard Worker 
1737*0e209d39SAndroid Build Coastguard Worker     /**
1738*0e209d39SAndroid Build Coastguard Worker     * ICU "poor man's RTTI", returns a UClassID for this class.
1739*0e209d39SAndroid Build Coastguard Worker     *
1740*0e209d39SAndroid Build Coastguard Worker     * @stable ICU 2.2
1741*0e209d39SAndroid Build Coastguard Worker     */
1742*0e209d39SAndroid Build Coastguard Worker     static UClassID U_EXPORT2 getStaticClassID();
1743*0e209d39SAndroid Build Coastguard Worker 
1744*0e209d39SAndroid Build Coastguard Worker     /**
1745*0e209d39SAndroid Build Coastguard Worker      * ICU "poor man's RTTI", returns a UClassID for the actual class.
1746*0e209d39SAndroid Build Coastguard Worker      *
1747*0e209d39SAndroid Build Coastguard Worker      * @stable ICU 2.2
1748*0e209d39SAndroid Build Coastguard Worker      */
1749*0e209d39SAndroid Build Coastguard Worker     virtual UClassID getDynamicClassID() const override;
1750*0e209d39SAndroid Build Coastguard Worker 
1751*0e209d39SAndroid Build Coastguard Worker private:
1752*0e209d39SAndroid Build Coastguard Worker     // Constructors and other object boilerplate are private.
1753*0e209d39SAndroid Build Coastguard Worker     // Instances of RegexMatcher can not be assigned, copied, cloned, etc.
1754*0e209d39SAndroid Build Coastguard Worker     RegexMatcher() = delete;                  // default constructor not implemented
1755*0e209d39SAndroid Build Coastguard Worker     RegexMatcher(const RegexPattern *pat);
1756*0e209d39SAndroid Build Coastguard Worker     RegexMatcher(const RegexMatcher &other) = delete;
1757*0e209d39SAndroid Build Coastguard Worker     RegexMatcher &operator =(const RegexMatcher &rhs) = delete;
1758*0e209d39SAndroid Build Coastguard Worker     void init(UErrorCode &status);                      // Common initialization
1759*0e209d39SAndroid Build Coastguard Worker     void init2(UText *t, UErrorCode &e);  // Common initialization, part 2.
1760*0e209d39SAndroid Build Coastguard Worker 
1761*0e209d39SAndroid Build Coastguard Worker     friend class RegexPattern;
1762*0e209d39SAndroid Build Coastguard Worker     friend class RegexCImpl;
1763*0e209d39SAndroid Build Coastguard Worker public:
1764*0e209d39SAndroid Build Coastguard Worker #ifndef U_HIDE_INTERNAL_API
1765*0e209d39SAndroid Build Coastguard Worker     /** @internal  */
1766*0e209d39SAndroid Build Coastguard Worker     void resetPreserveRegion();  // Reset matcher state, but preserve any region.
1767*0e209d39SAndroid Build Coastguard Worker #endif  /* U_HIDE_INTERNAL_API */
1768*0e209d39SAndroid Build Coastguard Worker private:
1769*0e209d39SAndroid Build Coastguard Worker 
1770*0e209d39SAndroid Build Coastguard Worker     //
1771*0e209d39SAndroid Build Coastguard Worker     //  MatchAt   This is the internal interface to the match engine itself.
1772*0e209d39SAndroid Build Coastguard Worker     //            Match status comes back in matcher member variables.
1773*0e209d39SAndroid Build Coastguard Worker     //
1774*0e209d39SAndroid Build Coastguard Worker     void                 MatchAt(int64_t startIdx, UBool toEnd, UErrorCode &status);
1775*0e209d39SAndroid Build Coastguard Worker     inline void          backTrack(int64_t &inputIdx, int32_t &patIdx);
1776*0e209d39SAndroid Build Coastguard Worker     UBool                isWordBoundary(int64_t pos);         // perform Perl-like  \b test
1777*0e209d39SAndroid Build Coastguard Worker     UBool                isUWordBoundary(int64_t pos, UErrorCode &status);   // perform RBBI based \b test
1778*0e209d39SAndroid Build Coastguard Worker     // Find a grapheme cluster boundary using a break iterator. For handling \X in regexes.
1779*0e209d39SAndroid Build Coastguard Worker     int64_t              followingGCBoundary(int64_t pos, UErrorCode &status);
1780*0e209d39SAndroid Build Coastguard Worker     REStackFrame        *resetStack();
1781*0e209d39SAndroid Build Coastguard Worker     inline REStackFrame *StateSave(REStackFrame *fp, int64_t savePatIdx, UErrorCode &status);
1782*0e209d39SAndroid Build Coastguard Worker     void                 IncrementTime(UErrorCode &status);
1783*0e209d39SAndroid Build Coastguard Worker 
1784*0e209d39SAndroid Build Coastguard Worker     // Call user find callback function, if set. Return true if operation should be interrupted.
1785*0e209d39SAndroid Build Coastguard Worker     inline UBool         findProgressInterrupt(int64_t matchIndex, UErrorCode &status);
1786*0e209d39SAndroid Build Coastguard Worker 
1787*0e209d39SAndroid Build Coastguard Worker     int64_t              appendGroup(int32_t groupNum, UText *dest, UErrorCode &status) const;
1788*0e209d39SAndroid Build Coastguard Worker 
1789*0e209d39SAndroid Build Coastguard Worker     UBool                findUsingChunk(UErrorCode &status);
1790*0e209d39SAndroid Build Coastguard Worker     void                 MatchChunkAt(int32_t startIdx, UBool toEnd, UErrorCode &status);
1791*0e209d39SAndroid Build Coastguard Worker     UBool                isChunkWordBoundary(int32_t pos);
1792*0e209d39SAndroid Build Coastguard Worker 
1793*0e209d39SAndroid Build Coastguard Worker     const RegexPattern  *fPattern;
1794*0e209d39SAndroid Build Coastguard Worker     RegexPattern        *fPatternOwned;    // Non-nullptr if this matcher owns the pattern, and
1795*0e209d39SAndroid Build Coastguard Worker                                            //   should delete it when through.
1796*0e209d39SAndroid Build Coastguard Worker 
1797*0e209d39SAndroid Build Coastguard Worker     const UnicodeString *fInput;           // The string being matched. Only used for input()
1798*0e209d39SAndroid Build Coastguard Worker     UText               *fInputText;       // The text being matched. Is never nullptr.
1799*0e209d39SAndroid Build Coastguard Worker     UText               *fAltInputText;    // A shallow copy of the text being matched.
1800*0e209d39SAndroid Build Coastguard Worker                                            //   Only created if the pattern contains backreferences.
1801*0e209d39SAndroid Build Coastguard Worker     int64_t              fInputLength;     // Full length of the input text.
1802*0e209d39SAndroid Build Coastguard Worker     int32_t              fFrameSize;       // The size of a frame in the backtrack stack.
1803*0e209d39SAndroid Build Coastguard Worker 
1804*0e209d39SAndroid Build Coastguard Worker     int64_t              fRegionStart;     // Start of the input region, default = 0.
1805*0e209d39SAndroid Build Coastguard Worker     int64_t              fRegionLimit;     // End of input region, default to input.length.
1806*0e209d39SAndroid Build Coastguard Worker 
1807*0e209d39SAndroid Build Coastguard Worker     int64_t              fAnchorStart;     // Region bounds for anchoring operations (^ or $).
1808*0e209d39SAndroid Build Coastguard Worker     int64_t              fAnchorLimit;     //   See useAnchoringBounds
1809*0e209d39SAndroid Build Coastguard Worker 
1810*0e209d39SAndroid Build Coastguard Worker     int64_t              fLookStart;       // Region bounds for look-ahead/behind and
1811*0e209d39SAndroid Build Coastguard Worker     int64_t              fLookLimit;       //   and other boundary tests.  See
1812*0e209d39SAndroid Build Coastguard Worker                                            //   useTransparentBounds
1813*0e209d39SAndroid Build Coastguard Worker 
1814*0e209d39SAndroid Build Coastguard Worker     int64_t              fActiveStart;     // Currently active bounds for matching.
1815*0e209d39SAndroid Build Coastguard Worker     int64_t              fActiveLimit;     //   Usually is the same as region, but
1816*0e209d39SAndroid Build Coastguard Worker                                            //   is changed to fLookStart/Limit when
1817*0e209d39SAndroid Build Coastguard Worker                                            //   entering look around regions.
1818*0e209d39SAndroid Build Coastguard Worker 
1819*0e209d39SAndroid Build Coastguard Worker     UBool                fTransparentBounds;  // True if using transparent bounds.
1820*0e209d39SAndroid Build Coastguard Worker     UBool                fAnchoringBounds; // True if using anchoring bounds.
1821*0e209d39SAndroid Build Coastguard Worker 
1822*0e209d39SAndroid Build Coastguard Worker     UBool                fMatch;           // True if the last attempted match was successful.
1823*0e209d39SAndroid Build Coastguard Worker     int64_t              fMatchStart;      // Position of the start of the most recent match
1824*0e209d39SAndroid Build Coastguard Worker     int64_t              fMatchEnd;        // First position after the end of the most recent match
1825*0e209d39SAndroid Build Coastguard Worker                                            //   Zero if no previous match, even when a region
1826*0e209d39SAndroid Build Coastguard Worker                                            //   is active.
1827*0e209d39SAndroid Build Coastguard Worker     int64_t              fLastMatchEnd;    // First position after the end of the previous match,
1828*0e209d39SAndroid Build Coastguard Worker                                            //   or -1 if there was no previous match.
1829*0e209d39SAndroid Build Coastguard Worker     int64_t              fAppendPosition;  // First position after the end of the previous
1830*0e209d39SAndroid Build Coastguard Worker                                            //   appendReplacement().  As described by the
1831*0e209d39SAndroid Build Coastguard Worker                                            //   JavaDoc for Java Matcher, where it is called
1832*0e209d39SAndroid Build Coastguard Worker                                            //   "append position"
1833*0e209d39SAndroid Build Coastguard Worker     UBool                fHitEnd;          // True if the last match touched the end of input.
1834*0e209d39SAndroid Build Coastguard Worker     UBool                fRequireEnd;      // True if the last match required end-of-input
1835*0e209d39SAndroid Build Coastguard Worker                                            //    (matched $ or Z)
1836*0e209d39SAndroid Build Coastguard Worker 
1837*0e209d39SAndroid Build Coastguard Worker     UVector64           *fStack;
1838*0e209d39SAndroid Build Coastguard Worker     REStackFrame        *fFrame;           // After finding a match, the last active stack frame,
1839*0e209d39SAndroid Build Coastguard Worker                                            //   which will contain the capture group results.
1840*0e209d39SAndroid Build Coastguard Worker                                            //   NOT valid while match engine is running.
1841*0e209d39SAndroid Build Coastguard Worker 
1842*0e209d39SAndroid Build Coastguard Worker     int64_t             *fData;            // Data area for use by the compiled pattern.
1843*0e209d39SAndroid Build Coastguard Worker     int64_t             fSmallData[8];     //   Use this for data if it's enough.
1844*0e209d39SAndroid Build Coastguard Worker 
1845*0e209d39SAndroid Build Coastguard Worker     int32_t             fTimeLimit;        // Max time (in arbitrary steps) to let the
1846*0e209d39SAndroid Build Coastguard Worker                                            //   match engine run.  Zero for unlimited.
1847*0e209d39SAndroid Build Coastguard Worker 
1848*0e209d39SAndroid Build Coastguard Worker     int32_t             fTime;             // Match time, accumulates while matching.
1849*0e209d39SAndroid Build Coastguard Worker     int32_t             fTickCounter;      // Low bits counter for time.  Counts down StateSaves.
1850*0e209d39SAndroid Build Coastguard Worker                                            //   Kept separately from fTime to keep as much
1851*0e209d39SAndroid Build Coastguard Worker                                            //   code as possible out of the inline
1852*0e209d39SAndroid Build Coastguard Worker                                            //   StateSave function.
1853*0e209d39SAndroid Build Coastguard Worker 
1854*0e209d39SAndroid Build Coastguard Worker     int32_t             fStackLimit;       // Maximum memory size to use for the backtrack
1855*0e209d39SAndroid Build Coastguard Worker                                            //   stack, in bytes.  Zero for unlimited.
1856*0e209d39SAndroid Build Coastguard Worker 
1857*0e209d39SAndroid Build Coastguard Worker     URegexMatchCallback *fCallbackFn;       // Pointer to match progress callback funct.
1858*0e209d39SAndroid Build Coastguard Worker                                            //   nullptr if there is no callback.
1859*0e209d39SAndroid Build Coastguard Worker     const void         *fCallbackContext;  // User Context ptr for callback function.
1860*0e209d39SAndroid Build Coastguard Worker 
1861*0e209d39SAndroid Build Coastguard Worker     URegexFindProgressCallback  *fFindProgressCallbackFn;  // Pointer to match progress callback funct.
1862*0e209d39SAndroid Build Coastguard Worker                                                            //   nullptr if there is no callback.
1863*0e209d39SAndroid Build Coastguard Worker     const void         *fFindProgressCallbackContext;      // User Context ptr for callback function.
1864*0e209d39SAndroid Build Coastguard Worker 
1865*0e209d39SAndroid Build Coastguard Worker 
1866*0e209d39SAndroid Build Coastguard Worker     UBool               fInputUniStrMaybeMutable;  // Set when fInputText wraps a UnicodeString that may be mutable - compatibility.
1867*0e209d39SAndroid Build Coastguard Worker 
1868*0e209d39SAndroid Build Coastguard Worker     UBool               fTraceDebug;       // Set true for debug tracing of match engine.
1869*0e209d39SAndroid Build Coastguard Worker 
1870*0e209d39SAndroid Build Coastguard Worker     UErrorCode          fDeferredStatus;   // Save error state that cannot be immediately
1871*0e209d39SAndroid Build Coastguard Worker                                            //   reported, or that permanently disables this matcher.
1872*0e209d39SAndroid Build Coastguard Worker 
1873*0e209d39SAndroid Build Coastguard Worker     BreakIterator       *fWordBreakItr;
1874*0e209d39SAndroid Build Coastguard Worker     BreakIterator       *fGCBreakItr;
1875*0e209d39SAndroid Build Coastguard Worker };
1876*0e209d39SAndroid Build Coastguard Worker 
1877*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_END
1878*0e209d39SAndroid Build Coastguard Worker #endif  // UCONFIG_NO_REGULAR_EXPRESSIONS
1879*0e209d39SAndroid Build Coastguard Worker 
1880*0e209d39SAndroid Build Coastguard Worker #endif /* U_SHOW_CPLUSPLUS_API */
1881*0e209d39SAndroid Build Coastguard Worker 
1882*0e209d39SAndroid Build Coastguard Worker #endif
1883