1*0e209d39SAndroid Build Coastguard Worker // © 2016 and later: Unicode, Inc. and others. 2*0e209d39SAndroid Build Coastguard Worker // License & terms of use: http://www.unicode.org/copyright.html 3*0e209d39SAndroid Build Coastguard Worker /* 4*0e209d39SAndroid Build Coastguard Worker ********************************************************************** 5*0e209d39SAndroid Build Coastguard Worker * Copyright (C) 2002-2016, International Business Machines 6*0e209d39SAndroid Build Coastguard Worker * Corporation and others. All Rights Reserved. 7*0e209d39SAndroid Build Coastguard Worker ********************************************************************** 8*0e209d39SAndroid Build Coastguard Worker * file name: regex.h 9*0e209d39SAndroid Build Coastguard Worker * encoding: UTF-8 10*0e209d39SAndroid Build Coastguard Worker * indentation:4 11*0e209d39SAndroid Build Coastguard Worker * 12*0e209d39SAndroid Build Coastguard Worker * created on: 2002oct22 13*0e209d39SAndroid Build Coastguard Worker * created by: Andy Heninger 14*0e209d39SAndroid Build Coastguard Worker * 15*0e209d39SAndroid Build Coastguard Worker * ICU Regular Expressions, API for C++ 16*0e209d39SAndroid Build Coastguard Worker */ 17*0e209d39SAndroid Build Coastguard Worker 18*0e209d39SAndroid Build Coastguard Worker #ifndef REGEX_H 19*0e209d39SAndroid Build Coastguard Worker #define REGEX_H 20*0e209d39SAndroid Build Coastguard Worker 21*0e209d39SAndroid Build Coastguard Worker //#define REGEX_DEBUG 22*0e209d39SAndroid Build Coastguard Worker 23*0e209d39SAndroid Build Coastguard Worker /** 24*0e209d39SAndroid Build Coastguard Worker * \file 25*0e209d39SAndroid Build Coastguard Worker * \brief C++ API: Regular Expressions 26*0e209d39SAndroid Build Coastguard Worker * 27*0e209d39SAndroid Build Coastguard Worker * The ICU API for processing regular expressions consists of two classes, 28*0e209d39SAndroid Build Coastguard Worker * `RegexPattern` and `RegexMatcher`. 29*0e209d39SAndroid Build Coastguard Worker * `RegexPattern` objects represent a pre-processed, or compiled 30*0e209d39SAndroid Build Coastguard Worker * regular expression. They are created from a regular expression pattern string, 31*0e209d39SAndroid Build Coastguard Worker * and can be used to create `RegexMatcher` objects for the pattern. 32*0e209d39SAndroid Build Coastguard Worker * 33*0e209d39SAndroid Build Coastguard Worker * Class `RegexMatcher` bundles together a regular expression 34*0e209d39SAndroid Build Coastguard Worker * pattern and a target string to which the search pattern will be applied. 35*0e209d39SAndroid Build Coastguard Worker * `RegexMatcher` includes API for doing plain find or search 36*0e209d39SAndroid Build Coastguard Worker * operations, for search and replace operations, and for obtaining detailed 37*0e209d39SAndroid Build Coastguard Worker * information about bounds of a match. 38*0e209d39SAndroid Build Coastguard Worker * 39*0e209d39SAndroid Build Coastguard Worker * Note that by constructing `RegexMatcher` objects directly from regular 40*0e209d39SAndroid Build Coastguard Worker * expression pattern strings application code can be simplified and the explicit 41*0e209d39SAndroid Build Coastguard Worker * need for `RegexPattern` objects can usually be eliminated. 42*0e209d39SAndroid Build Coastguard Worker * 43*0e209d39SAndroid Build Coastguard Worker */ 44*0e209d39SAndroid Build Coastguard Worker 45*0e209d39SAndroid Build Coastguard Worker #include "unicode/utypes.h" 46*0e209d39SAndroid Build Coastguard Worker 47*0e209d39SAndroid Build Coastguard Worker #if U_SHOW_CPLUSPLUS_API 48*0e209d39SAndroid Build Coastguard Worker 49*0e209d39SAndroid Build Coastguard Worker #if !UCONFIG_NO_REGULAR_EXPRESSIONS 50*0e209d39SAndroid Build Coastguard Worker 51*0e209d39SAndroid Build Coastguard Worker #include "unicode/uobject.h" 52*0e209d39SAndroid Build Coastguard Worker #include "unicode/unistr.h" 53*0e209d39SAndroid Build Coastguard Worker #include "unicode/utext.h" 54*0e209d39SAndroid Build Coastguard Worker #include "unicode/parseerr.h" 55*0e209d39SAndroid Build Coastguard Worker 56*0e209d39SAndroid Build Coastguard Worker #include "unicode/uregex.h" 57*0e209d39SAndroid Build Coastguard Worker 58*0e209d39SAndroid Build Coastguard Worker // Forward Declarations 59*0e209d39SAndroid Build Coastguard Worker 60*0e209d39SAndroid Build Coastguard Worker struct UHashtable; 61*0e209d39SAndroid Build Coastguard Worker 62*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_BEGIN 63*0e209d39SAndroid Build Coastguard Worker 64*0e209d39SAndroid Build Coastguard Worker struct Regex8BitSet; 65*0e209d39SAndroid Build Coastguard Worker class RegexCImpl; 66*0e209d39SAndroid Build Coastguard Worker class RegexMatcher; 67*0e209d39SAndroid Build Coastguard Worker class RegexPattern; 68*0e209d39SAndroid Build Coastguard Worker struct REStackFrame; 69*0e209d39SAndroid Build Coastguard Worker class BreakIterator; 70*0e209d39SAndroid Build Coastguard Worker class UnicodeSet; 71*0e209d39SAndroid Build Coastguard Worker class UVector; 72*0e209d39SAndroid Build Coastguard Worker class UVector32; 73*0e209d39SAndroid Build Coastguard Worker class UVector64; 74*0e209d39SAndroid Build Coastguard Worker 75*0e209d39SAndroid Build Coastguard Worker 76*0e209d39SAndroid Build Coastguard Worker /** 77*0e209d39SAndroid Build Coastguard Worker * Class `RegexPattern` represents a compiled regular expression. It includes 78*0e209d39SAndroid Build Coastguard Worker * factory methods for creating a RegexPattern object from the source (string) form 79*0e209d39SAndroid Build Coastguard Worker * of a regular expression, methods for creating RegexMatchers that allow the pattern 80*0e209d39SAndroid Build Coastguard Worker * to be applied to input text, and a few convenience methods for simple common 81*0e209d39SAndroid Build Coastguard Worker * uses of regular expressions. 82*0e209d39SAndroid Build Coastguard Worker * 83*0e209d39SAndroid Build Coastguard Worker * Class RegexPattern is not intended to be subclassed. 84*0e209d39SAndroid Build Coastguard Worker * 85*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.4 86*0e209d39SAndroid Build Coastguard Worker */ 87*0e209d39SAndroid Build Coastguard Worker class U_I18N_API RegexPattern final : public UObject { 88*0e209d39SAndroid Build Coastguard Worker public: 89*0e209d39SAndroid Build Coastguard Worker 90*0e209d39SAndroid Build Coastguard Worker /** 91*0e209d39SAndroid Build Coastguard Worker * default constructor. Create a RegexPattern object that refers to no actual 92*0e209d39SAndroid Build Coastguard Worker * pattern. Not normally needed; RegexPattern objects are usually 93*0e209d39SAndroid Build Coastguard Worker * created using the factory method `compile()`. 94*0e209d39SAndroid Build Coastguard Worker * 95*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.4 96*0e209d39SAndroid Build Coastguard Worker */ 97*0e209d39SAndroid Build Coastguard Worker RegexPattern(); 98*0e209d39SAndroid Build Coastguard Worker 99*0e209d39SAndroid Build Coastguard Worker /** 100*0e209d39SAndroid Build Coastguard Worker * Copy Constructor. Create a new RegexPattern object that is equivalent 101*0e209d39SAndroid Build Coastguard Worker * to the source object. 102*0e209d39SAndroid Build Coastguard Worker * @param source the pattern object to be copied. 103*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.4 104*0e209d39SAndroid Build Coastguard Worker */ 105*0e209d39SAndroid Build Coastguard Worker RegexPattern(const RegexPattern &source); 106*0e209d39SAndroid Build Coastguard Worker 107*0e209d39SAndroid Build Coastguard Worker /** 108*0e209d39SAndroid Build Coastguard Worker * Destructor. Note that a RegexPattern object must persist so long as any 109*0e209d39SAndroid Build Coastguard Worker * RegexMatcher objects that were created from the RegexPattern are active. 110*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.4 111*0e209d39SAndroid Build Coastguard Worker */ 112*0e209d39SAndroid Build Coastguard Worker virtual ~RegexPattern(); 113*0e209d39SAndroid Build Coastguard Worker 114*0e209d39SAndroid Build Coastguard Worker /** 115*0e209d39SAndroid Build Coastguard Worker * Comparison operator. Two RegexPattern objects are considered equal if they 116*0e209d39SAndroid Build Coastguard Worker * were constructed from identical source patterns using the same #URegexpFlag 117*0e209d39SAndroid Build Coastguard Worker * settings. 118*0e209d39SAndroid Build Coastguard Worker * @param that a RegexPattern object to compare with "this". 119*0e209d39SAndroid Build Coastguard Worker * @return true if the objects are equivalent. 120*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.4 121*0e209d39SAndroid Build Coastguard Worker */ 122*0e209d39SAndroid Build Coastguard Worker bool operator==(const RegexPattern& that) const; 123*0e209d39SAndroid Build Coastguard Worker 124*0e209d39SAndroid Build Coastguard Worker /** 125*0e209d39SAndroid Build Coastguard Worker * Comparison operator. Two RegexPattern objects are considered equal if they 126*0e209d39SAndroid Build Coastguard Worker * were constructed from identical source patterns using the same #URegexpFlag 127*0e209d39SAndroid Build Coastguard Worker * settings. 128*0e209d39SAndroid Build Coastguard Worker * @param that a RegexPattern object to compare with "this". 129*0e209d39SAndroid Build Coastguard Worker * @return true if the objects are different. 130*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.4 131*0e209d39SAndroid Build Coastguard Worker */ 132*0e209d39SAndroid Build Coastguard Worker inline bool operator!=(const RegexPattern& that) const {return ! operator ==(that);} 133*0e209d39SAndroid Build Coastguard Worker 134*0e209d39SAndroid Build Coastguard Worker /** 135*0e209d39SAndroid Build Coastguard Worker * Assignment operator. After assignment, this RegexPattern will behave identically 136*0e209d39SAndroid Build Coastguard Worker * to the source object. 137*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.4 138*0e209d39SAndroid Build Coastguard Worker */ 139*0e209d39SAndroid Build Coastguard Worker RegexPattern &operator =(const RegexPattern &source); 140*0e209d39SAndroid Build Coastguard Worker 141*0e209d39SAndroid Build Coastguard Worker /** 142*0e209d39SAndroid Build Coastguard Worker * Create an exact copy of this RegexPattern object. Since RegexPattern is not 143*0e209d39SAndroid Build Coastguard Worker * intended to be subclassed, <code>clone()</code> and the copy construction are 144*0e209d39SAndroid Build Coastguard Worker * equivalent operations. 145*0e209d39SAndroid Build Coastguard Worker * @return the copy of this RegexPattern 146*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.4 147*0e209d39SAndroid Build Coastguard Worker */ 148*0e209d39SAndroid Build Coastguard Worker virtual RegexPattern *clone() const; 149*0e209d39SAndroid Build Coastguard Worker 150*0e209d39SAndroid Build Coastguard Worker 151*0e209d39SAndroid Build Coastguard Worker /** 152*0e209d39SAndroid Build Coastguard Worker * Compiles the regular expression in string form into a RegexPattern 153*0e209d39SAndroid Build Coastguard Worker * object. These compile methods, rather than the constructors, are the usual 154*0e209d39SAndroid Build Coastguard Worker * way that RegexPattern objects are created. 155*0e209d39SAndroid Build Coastguard Worker * 156*0e209d39SAndroid Build Coastguard Worker * Note that RegexPattern objects must not be deleted while RegexMatcher 157*0e209d39SAndroid Build Coastguard Worker * objects created from the pattern are active. RegexMatchers keep a pointer 158*0e209d39SAndroid Build Coastguard Worker * back to their pattern, so premature deletion of the pattern is a 159*0e209d39SAndroid Build Coastguard Worker * catastrophic error. 160*0e209d39SAndroid Build Coastguard Worker * 161*0e209d39SAndroid Build Coastguard Worker * All #URegexpFlag pattern match mode flags are set to their default values. 162*0e209d39SAndroid Build Coastguard Worker * 163*0e209d39SAndroid Build Coastguard Worker * Note that it is often more convenient to construct a RegexMatcher directly 164*0e209d39SAndroid Build Coastguard Worker * from a pattern string rather than separately compiling the pattern and 165*0e209d39SAndroid Build Coastguard Worker * then creating a RegexMatcher object from the pattern. 166*0e209d39SAndroid Build Coastguard Worker * 167*0e209d39SAndroid Build Coastguard Worker * @param regex The regular expression to be compiled. 168*0e209d39SAndroid Build Coastguard Worker * @param pe Receives the position (line and column nubers) of any error 169*0e209d39SAndroid Build Coastguard Worker * within the regular expression.) 170*0e209d39SAndroid Build Coastguard Worker * @param status A reference to a UErrorCode to receive any errors. 171*0e209d39SAndroid Build Coastguard Worker * @return A regexPattern object for the compiled pattern. 172*0e209d39SAndroid Build Coastguard Worker * 173*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.4 174*0e209d39SAndroid Build Coastguard Worker */ 175*0e209d39SAndroid Build Coastguard Worker static RegexPattern * U_EXPORT2 compile( const UnicodeString ®ex, 176*0e209d39SAndroid Build Coastguard Worker UParseError &pe, 177*0e209d39SAndroid Build Coastguard Worker UErrorCode &status); 178*0e209d39SAndroid Build Coastguard Worker 179*0e209d39SAndroid Build Coastguard Worker /** 180*0e209d39SAndroid Build Coastguard Worker * Compiles the regular expression in string form into a RegexPattern 181*0e209d39SAndroid Build Coastguard Worker * object. These compile methods, rather than the constructors, are the usual 182*0e209d39SAndroid Build Coastguard Worker * way that RegexPattern objects are created. 183*0e209d39SAndroid Build Coastguard Worker * 184*0e209d39SAndroid Build Coastguard Worker * Note that RegexPattern objects must not be deleted while RegexMatcher 185*0e209d39SAndroid Build Coastguard Worker * objects created from the pattern are active. RegexMatchers keep a pointer 186*0e209d39SAndroid Build Coastguard Worker * back to their pattern, so premature deletion of the pattern is a 187*0e209d39SAndroid Build Coastguard Worker * catastrophic error. 188*0e209d39SAndroid Build Coastguard Worker * 189*0e209d39SAndroid Build Coastguard Worker * All #URegexpFlag pattern match mode flags are set to their default values. 190*0e209d39SAndroid Build Coastguard Worker * 191*0e209d39SAndroid Build Coastguard Worker * Note that it is often more convenient to construct a RegexMatcher directly 192*0e209d39SAndroid Build Coastguard Worker * from a pattern string rather than separately compiling the pattern and 193*0e209d39SAndroid Build Coastguard Worker * then creating a RegexMatcher object from the pattern. 194*0e209d39SAndroid Build Coastguard Worker * 195*0e209d39SAndroid Build Coastguard Worker * @param regex The regular expression to be compiled. Note, the text referred 196*0e209d39SAndroid Build Coastguard Worker * to by this UText must not be deleted during the lifetime of the 197*0e209d39SAndroid Build Coastguard Worker * RegexPattern object or any RegexMatcher object created from it. 198*0e209d39SAndroid Build Coastguard Worker * @param pe Receives the position (line and column nubers) of any error 199*0e209d39SAndroid Build Coastguard Worker * within the regular expression.) 200*0e209d39SAndroid Build Coastguard Worker * @param status A reference to a UErrorCode to receive any errors. 201*0e209d39SAndroid Build Coastguard Worker * @return A regexPattern object for the compiled pattern. 202*0e209d39SAndroid Build Coastguard Worker * 203*0e209d39SAndroid Build Coastguard Worker * @stable ICU 4.6 204*0e209d39SAndroid Build Coastguard Worker */ 205*0e209d39SAndroid Build Coastguard Worker static RegexPattern * U_EXPORT2 compile( UText *regex, 206*0e209d39SAndroid Build Coastguard Worker UParseError &pe, 207*0e209d39SAndroid Build Coastguard Worker UErrorCode &status); 208*0e209d39SAndroid Build Coastguard Worker 209*0e209d39SAndroid Build Coastguard Worker /** 210*0e209d39SAndroid Build Coastguard Worker * Compiles the regular expression in string form into a RegexPattern 211*0e209d39SAndroid Build Coastguard Worker * object using the specified #URegexpFlag match mode flags. These compile methods, 212*0e209d39SAndroid Build Coastguard Worker * rather than the constructors, are the usual way that RegexPattern objects 213*0e209d39SAndroid Build Coastguard Worker * are created. 214*0e209d39SAndroid Build Coastguard Worker * 215*0e209d39SAndroid Build Coastguard Worker * Note that RegexPattern objects must not be deleted while RegexMatcher 216*0e209d39SAndroid Build Coastguard Worker * objects created from the pattern are active. RegexMatchers keep a pointer 217*0e209d39SAndroid Build Coastguard Worker * back to their pattern, so premature deletion of the pattern is a 218*0e209d39SAndroid Build Coastguard Worker * catastrophic error. 219*0e209d39SAndroid Build Coastguard Worker * 220*0e209d39SAndroid Build Coastguard Worker * Note that it is often more convenient to construct a RegexMatcher directly 221*0e209d39SAndroid Build Coastguard Worker * from a pattern string instead of than separately compiling the pattern and 222*0e209d39SAndroid Build Coastguard Worker * then creating a RegexMatcher object from the pattern. 223*0e209d39SAndroid Build Coastguard Worker * 224*0e209d39SAndroid Build Coastguard Worker * @param regex The regular expression to be compiled. 225*0e209d39SAndroid Build Coastguard Worker * @param flags The #URegexpFlag match mode flags to be used, e.g. #UREGEX_CASE_INSENSITIVE. 226*0e209d39SAndroid Build Coastguard Worker * @param pe Receives the position (line and column numbers) of any error 227*0e209d39SAndroid Build Coastguard Worker * within the regular expression.) 228*0e209d39SAndroid Build Coastguard Worker * @param status A reference to a UErrorCode to receive any errors. 229*0e209d39SAndroid Build Coastguard Worker * @return A regexPattern object for the compiled pattern. 230*0e209d39SAndroid Build Coastguard Worker * 231*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.4 232*0e209d39SAndroid Build Coastguard Worker */ 233*0e209d39SAndroid Build Coastguard Worker static RegexPattern * U_EXPORT2 compile( const UnicodeString ®ex, 234*0e209d39SAndroid Build Coastguard Worker uint32_t flags, 235*0e209d39SAndroid Build Coastguard Worker UParseError &pe, 236*0e209d39SAndroid Build Coastguard Worker UErrorCode &status); 237*0e209d39SAndroid Build Coastguard Worker 238*0e209d39SAndroid Build Coastguard Worker /** 239*0e209d39SAndroid Build Coastguard Worker * Compiles the regular expression in string form into a RegexPattern 240*0e209d39SAndroid Build Coastguard Worker * object using the specified #URegexpFlag match mode flags. These compile methods, 241*0e209d39SAndroid Build Coastguard Worker * rather than the constructors, are the usual way that RegexPattern objects 242*0e209d39SAndroid Build Coastguard Worker * are created. 243*0e209d39SAndroid Build Coastguard Worker * 244*0e209d39SAndroid Build Coastguard Worker * Note that RegexPattern objects must not be deleted while RegexMatcher 245*0e209d39SAndroid Build Coastguard Worker * objects created from the pattern are active. RegexMatchers keep a pointer 246*0e209d39SAndroid Build Coastguard Worker * back to their pattern, so premature deletion of the pattern is a 247*0e209d39SAndroid Build Coastguard Worker * catastrophic error. 248*0e209d39SAndroid Build Coastguard Worker * 249*0e209d39SAndroid Build Coastguard Worker * Note that it is often more convenient to construct a RegexMatcher directly 250*0e209d39SAndroid Build Coastguard Worker * from a pattern string instead of than separately compiling the pattern and 251*0e209d39SAndroid Build Coastguard Worker * then creating a RegexMatcher object from the pattern. 252*0e209d39SAndroid Build Coastguard Worker * 253*0e209d39SAndroid Build Coastguard Worker * @param regex The regular expression to be compiled. Note, the text referred 254*0e209d39SAndroid Build Coastguard Worker * to by this UText must not be deleted during the lifetime of the 255*0e209d39SAndroid Build Coastguard Worker * RegexPattern object or any RegexMatcher object created from it. 256*0e209d39SAndroid Build Coastguard Worker * @param flags The #URegexpFlag match mode flags to be used, e.g. #UREGEX_CASE_INSENSITIVE. 257*0e209d39SAndroid Build Coastguard Worker * @param pe Receives the position (line and column numbers) of any error 258*0e209d39SAndroid Build Coastguard Worker * within the regular expression.) 259*0e209d39SAndroid Build Coastguard Worker * @param status A reference to a UErrorCode to receive any errors. 260*0e209d39SAndroid Build Coastguard Worker * @return A regexPattern object for the compiled pattern. 261*0e209d39SAndroid Build Coastguard Worker * 262*0e209d39SAndroid Build Coastguard Worker * @stable ICU 4.6 263*0e209d39SAndroid Build Coastguard Worker */ 264*0e209d39SAndroid Build Coastguard Worker static RegexPattern * U_EXPORT2 compile( UText *regex, 265*0e209d39SAndroid Build Coastguard Worker uint32_t flags, 266*0e209d39SAndroid Build Coastguard Worker UParseError &pe, 267*0e209d39SAndroid Build Coastguard Worker UErrorCode &status); 268*0e209d39SAndroid Build Coastguard Worker 269*0e209d39SAndroid Build Coastguard Worker /** 270*0e209d39SAndroid Build Coastguard Worker * Compiles the regular expression in string form into a RegexPattern 271*0e209d39SAndroid Build Coastguard Worker * object using the specified #URegexpFlag match mode flags. These compile methods, 272*0e209d39SAndroid Build Coastguard Worker * rather than the constructors, are the usual way that RegexPattern objects 273*0e209d39SAndroid Build Coastguard Worker * are created. 274*0e209d39SAndroid Build Coastguard Worker * 275*0e209d39SAndroid Build Coastguard Worker * Note that RegexPattern objects must not be deleted while RegexMatcher 276*0e209d39SAndroid Build Coastguard Worker * objects created from the pattern are active. RegexMatchers keep a pointer 277*0e209d39SAndroid Build Coastguard Worker * back to their pattern, so premature deletion of the pattern is a 278*0e209d39SAndroid Build Coastguard Worker * catastrophic error. 279*0e209d39SAndroid Build Coastguard Worker * 280*0e209d39SAndroid Build Coastguard Worker * Note that it is often more convenient to construct a RegexMatcher directly 281*0e209d39SAndroid Build Coastguard Worker * from a pattern string instead of than separately compiling the pattern and 282*0e209d39SAndroid Build Coastguard Worker * then creating a RegexMatcher object from the pattern. 283*0e209d39SAndroid Build Coastguard Worker * 284*0e209d39SAndroid Build Coastguard Worker * @param regex The regular expression to be compiled. 285*0e209d39SAndroid Build Coastguard Worker * @param flags The #URegexpFlag match mode flags to be used, e.g. #UREGEX_CASE_INSENSITIVE. 286*0e209d39SAndroid Build Coastguard Worker * @param status A reference to a UErrorCode to receive any errors. 287*0e209d39SAndroid Build Coastguard Worker * @return A regexPattern object for the compiled pattern. 288*0e209d39SAndroid Build Coastguard Worker * 289*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.6 290*0e209d39SAndroid Build Coastguard Worker */ 291*0e209d39SAndroid Build Coastguard Worker static RegexPattern * U_EXPORT2 compile( const UnicodeString ®ex, 292*0e209d39SAndroid Build Coastguard Worker uint32_t flags, 293*0e209d39SAndroid Build Coastguard Worker UErrorCode &status); 294*0e209d39SAndroid Build Coastguard Worker 295*0e209d39SAndroid Build Coastguard Worker /** 296*0e209d39SAndroid Build Coastguard Worker * Compiles the regular expression in string form into a RegexPattern 297*0e209d39SAndroid Build Coastguard Worker * object using the specified #URegexpFlag match mode flags. These compile methods, 298*0e209d39SAndroid Build Coastguard Worker * rather than the constructors, are the usual way that RegexPattern objects 299*0e209d39SAndroid Build Coastguard Worker * are created. 300*0e209d39SAndroid Build Coastguard Worker * 301*0e209d39SAndroid Build Coastguard Worker * Note that RegexPattern objects must not be deleted while RegexMatcher 302*0e209d39SAndroid Build Coastguard Worker * objects created from the pattern are active. RegexMatchers keep a pointer 303*0e209d39SAndroid Build Coastguard Worker * back to their pattern, so premature deletion of the pattern is a 304*0e209d39SAndroid Build Coastguard Worker * catastrophic error. 305*0e209d39SAndroid Build Coastguard Worker * 306*0e209d39SAndroid Build Coastguard Worker * Note that it is often more convenient to construct a RegexMatcher directly 307*0e209d39SAndroid Build Coastguard Worker * from a pattern string instead of than separately compiling the pattern and 308*0e209d39SAndroid Build Coastguard Worker * then creating a RegexMatcher object from the pattern. 309*0e209d39SAndroid Build Coastguard Worker * 310*0e209d39SAndroid Build Coastguard Worker * @param regex The regular expression to be compiled. Note, the text referred 311*0e209d39SAndroid Build Coastguard Worker * to by this UText must not be deleted during the lifetime of the 312*0e209d39SAndroid Build Coastguard Worker * RegexPattern object or any RegexMatcher object created from it. 313*0e209d39SAndroid Build Coastguard Worker * @param flags The #URegexpFlag match mode flags to be used, e.g. #UREGEX_CASE_INSENSITIVE. 314*0e209d39SAndroid Build Coastguard Worker * @param status A reference to a UErrorCode to receive any errors. 315*0e209d39SAndroid Build Coastguard Worker * @return A regexPattern object for the compiled pattern. 316*0e209d39SAndroid Build Coastguard Worker * 317*0e209d39SAndroid Build Coastguard Worker * @stable ICU 4.6 318*0e209d39SAndroid Build Coastguard Worker */ 319*0e209d39SAndroid Build Coastguard Worker static RegexPattern * U_EXPORT2 compile( UText *regex, 320*0e209d39SAndroid Build Coastguard Worker uint32_t flags, 321*0e209d39SAndroid Build Coastguard Worker UErrorCode &status); 322*0e209d39SAndroid Build Coastguard Worker 323*0e209d39SAndroid Build Coastguard Worker /** 324*0e209d39SAndroid Build Coastguard Worker * Get the #URegexpFlag match mode flags that were used when compiling this pattern. 325*0e209d39SAndroid Build Coastguard Worker * @return the #URegexpFlag match mode flags 326*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.4 327*0e209d39SAndroid Build Coastguard Worker */ 328*0e209d39SAndroid Build Coastguard Worker virtual uint32_t flags() const; 329*0e209d39SAndroid Build Coastguard Worker 330*0e209d39SAndroid Build Coastguard Worker /** 331*0e209d39SAndroid Build Coastguard Worker * Creates a RegexMatcher that will match the given input against this pattern. The 332*0e209d39SAndroid Build Coastguard Worker * RegexMatcher can then be used to perform match, find or replace operations 333*0e209d39SAndroid Build Coastguard Worker * on the input. Note that a RegexPattern object must not be deleted while 334*0e209d39SAndroid Build Coastguard Worker * RegexMatchers created from it still exist and might possibly be used again. 335*0e209d39SAndroid Build Coastguard Worker * 336*0e209d39SAndroid Build Coastguard Worker * The matcher will retain a reference to the supplied input string, and all regexp 337*0e209d39SAndroid Build Coastguard Worker * pattern matching operations happen directly on this original string. It is 338*0e209d39SAndroid Build Coastguard Worker * critical that the string not be altered or deleted before use by the regular 339*0e209d39SAndroid Build Coastguard Worker * expression operations is complete. 340*0e209d39SAndroid Build Coastguard Worker * 341*0e209d39SAndroid Build Coastguard Worker * @param input The input string to which the regular expression will be applied. 342*0e209d39SAndroid Build Coastguard Worker * @param status A reference to a UErrorCode to receive any errors. 343*0e209d39SAndroid Build Coastguard Worker * @return A RegexMatcher object for this pattern and input. 344*0e209d39SAndroid Build Coastguard Worker * 345*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.4 346*0e209d39SAndroid Build Coastguard Worker */ 347*0e209d39SAndroid Build Coastguard Worker virtual RegexMatcher *matcher(const UnicodeString &input, 348*0e209d39SAndroid Build Coastguard Worker UErrorCode &status) const; 349*0e209d39SAndroid Build Coastguard Worker 350*0e209d39SAndroid Build Coastguard Worker private: 351*0e209d39SAndroid Build Coastguard Worker /** 352*0e209d39SAndroid Build Coastguard Worker * Cause a compilation error if an application accidentally attempts to 353*0e209d39SAndroid Build Coastguard Worker * create a matcher with a (char16_t *) string as input rather than 354*0e209d39SAndroid Build Coastguard Worker * a UnicodeString. Avoids a dangling reference to a temporary string. 355*0e209d39SAndroid Build Coastguard Worker * 356*0e209d39SAndroid Build Coastguard Worker * To efficiently work with char16_t *strings, wrap the data in a UnicodeString 357*0e209d39SAndroid Build Coastguard Worker * using one of the aliasing constructors, such as 358*0e209d39SAndroid Build Coastguard Worker * `UnicodeString(UBool isTerminated, const char16_t *text, int32_t textLength);` 359*0e209d39SAndroid Build Coastguard Worker * or in a UText, using 360*0e209d39SAndroid Build Coastguard Worker * `utext_openUChars(UText *ut, const char16_t *text, int64_t textLength, UErrorCode *status);` 361*0e209d39SAndroid Build Coastguard Worker * 362*0e209d39SAndroid Build Coastguard Worker */ 363*0e209d39SAndroid Build Coastguard Worker RegexMatcher *matcher(const char16_t *input, 364*0e209d39SAndroid Build Coastguard Worker UErrorCode &status) const = delete; 365*0e209d39SAndroid Build Coastguard Worker public: 366*0e209d39SAndroid Build Coastguard Worker 367*0e209d39SAndroid Build Coastguard Worker 368*0e209d39SAndroid Build Coastguard Worker /** 369*0e209d39SAndroid Build Coastguard Worker * Creates a RegexMatcher that will match against this pattern. The 370*0e209d39SAndroid Build Coastguard Worker * RegexMatcher can be used to perform match, find or replace operations. 371*0e209d39SAndroid Build Coastguard Worker * Note that a RegexPattern object must not be deleted while 372*0e209d39SAndroid Build Coastguard Worker * RegexMatchers created from it still exist and might possibly be used again. 373*0e209d39SAndroid Build Coastguard Worker * 374*0e209d39SAndroid Build Coastguard Worker * @param status A reference to a UErrorCode to receive any errors. 375*0e209d39SAndroid Build Coastguard Worker * @return A RegexMatcher object for this pattern and input. 376*0e209d39SAndroid Build Coastguard Worker * 377*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.6 378*0e209d39SAndroid Build Coastguard Worker */ 379*0e209d39SAndroid Build Coastguard Worker virtual RegexMatcher *matcher(UErrorCode &status) const; 380*0e209d39SAndroid Build Coastguard Worker 381*0e209d39SAndroid Build Coastguard Worker 382*0e209d39SAndroid Build Coastguard Worker /** 383*0e209d39SAndroid Build Coastguard Worker * Test whether a string matches a regular expression. This convenience function 384*0e209d39SAndroid Build Coastguard Worker * both compiles the regular expression and applies it in a single operation. 385*0e209d39SAndroid Build Coastguard Worker * Note that if the same pattern needs to be applied repeatedly, this method will be 386*0e209d39SAndroid Build Coastguard Worker * less efficient than creating and reusing a RegexMatcher object. 387*0e209d39SAndroid Build Coastguard Worker * 388*0e209d39SAndroid Build Coastguard Worker * @param regex The regular expression 389*0e209d39SAndroid Build Coastguard Worker * @param input The string data to be matched 390*0e209d39SAndroid Build Coastguard Worker * @param pe Receives the position of any syntax errors within the regular expression 391*0e209d39SAndroid Build Coastguard Worker * @param status A reference to a UErrorCode to receive any errors. 392*0e209d39SAndroid Build Coastguard Worker * @return True if the regular expression exactly matches the full input string. 393*0e209d39SAndroid Build Coastguard Worker * 394*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.4 395*0e209d39SAndroid Build Coastguard Worker */ 396*0e209d39SAndroid Build Coastguard Worker static UBool U_EXPORT2 matches(const UnicodeString ®ex, 397*0e209d39SAndroid Build Coastguard Worker const UnicodeString &input, 398*0e209d39SAndroid Build Coastguard Worker UParseError &pe, 399*0e209d39SAndroid Build Coastguard Worker UErrorCode &status); 400*0e209d39SAndroid Build Coastguard Worker 401*0e209d39SAndroid Build Coastguard Worker /** 402*0e209d39SAndroid Build Coastguard Worker * Test whether a string matches a regular expression. This convenience function 403*0e209d39SAndroid Build Coastguard Worker * both compiles the regular expression and applies it in a single operation. 404*0e209d39SAndroid Build Coastguard Worker * Note that if the same pattern needs to be applied repeatedly, this method will be 405*0e209d39SAndroid Build Coastguard Worker * less efficient than creating and reusing a RegexMatcher object. 406*0e209d39SAndroid Build Coastguard Worker * 407*0e209d39SAndroid Build Coastguard Worker * @param regex The regular expression 408*0e209d39SAndroid Build Coastguard Worker * @param input The string data to be matched 409*0e209d39SAndroid Build Coastguard Worker * @param pe Receives the position of any syntax errors within the regular expression 410*0e209d39SAndroid Build Coastguard Worker * @param status A reference to a UErrorCode to receive any errors. 411*0e209d39SAndroid Build Coastguard Worker * @return True if the regular expression exactly matches the full input string. 412*0e209d39SAndroid Build Coastguard Worker * 413*0e209d39SAndroid Build Coastguard Worker * @stable ICU 4.6 414*0e209d39SAndroid Build Coastguard Worker */ 415*0e209d39SAndroid Build Coastguard Worker static UBool U_EXPORT2 matches(UText *regex, 416*0e209d39SAndroid Build Coastguard Worker UText *input, 417*0e209d39SAndroid Build Coastguard Worker UParseError &pe, 418*0e209d39SAndroid Build Coastguard Worker UErrorCode &status); 419*0e209d39SAndroid Build Coastguard Worker 420*0e209d39SAndroid Build Coastguard Worker /** 421*0e209d39SAndroid Build Coastguard Worker * Returns the regular expression from which this pattern was compiled. This method will work 422*0e209d39SAndroid Build Coastguard Worker * even if the pattern was compiled from a UText. 423*0e209d39SAndroid Build Coastguard Worker * 424*0e209d39SAndroid Build Coastguard Worker * Note: If the pattern was originally compiled from a UText, and that UText was modified, 425*0e209d39SAndroid Build Coastguard Worker * the returned string may no longer reflect the RegexPattern object. 426*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.4 427*0e209d39SAndroid Build Coastguard Worker */ 428*0e209d39SAndroid Build Coastguard Worker virtual UnicodeString pattern() const; 429*0e209d39SAndroid Build Coastguard Worker 430*0e209d39SAndroid Build Coastguard Worker 431*0e209d39SAndroid Build Coastguard Worker /** 432*0e209d39SAndroid Build Coastguard Worker * Returns the regular expression from which this pattern was compiled. This method will work 433*0e209d39SAndroid Build Coastguard Worker * even if the pattern was compiled from a UnicodeString. 434*0e209d39SAndroid Build Coastguard Worker * 435*0e209d39SAndroid Build Coastguard Worker * Note: This is the original input, not a clone. If the pattern was originally compiled from a 436*0e209d39SAndroid Build Coastguard Worker * UText, and that UText was modified, the returned UText may no longer reflect the RegexPattern 437*0e209d39SAndroid Build Coastguard Worker * object. 438*0e209d39SAndroid Build Coastguard Worker * 439*0e209d39SAndroid Build Coastguard Worker * @stable ICU 4.6 440*0e209d39SAndroid Build Coastguard Worker */ 441*0e209d39SAndroid Build Coastguard Worker virtual UText *patternText(UErrorCode &status) const; 442*0e209d39SAndroid Build Coastguard Worker 443*0e209d39SAndroid Build Coastguard Worker 444*0e209d39SAndroid Build Coastguard Worker /** 445*0e209d39SAndroid Build Coastguard Worker * Get the group number corresponding to a named capture group. 446*0e209d39SAndroid Build Coastguard Worker * The returned number can be used with any function that access 447*0e209d39SAndroid Build Coastguard Worker * capture groups by number. 448*0e209d39SAndroid Build Coastguard Worker * 449*0e209d39SAndroid Build Coastguard Worker * The function returns an error status if the specified name does not 450*0e209d39SAndroid Build Coastguard Worker * appear in the pattern. 451*0e209d39SAndroid Build Coastguard Worker * 452*0e209d39SAndroid Build Coastguard Worker * @param groupName The capture group name. 453*0e209d39SAndroid Build Coastguard Worker * @param status A UErrorCode to receive any errors. 454*0e209d39SAndroid Build Coastguard Worker * 455*0e209d39SAndroid Build Coastguard Worker * @stable ICU 55 456*0e209d39SAndroid Build Coastguard Worker */ 457*0e209d39SAndroid Build Coastguard Worker virtual int32_t groupNumberFromName(const UnicodeString &groupName, UErrorCode &status) const; 458*0e209d39SAndroid Build Coastguard Worker 459*0e209d39SAndroid Build Coastguard Worker 460*0e209d39SAndroid Build Coastguard Worker /** 461*0e209d39SAndroid Build Coastguard Worker * Get the group number corresponding to a named capture group. 462*0e209d39SAndroid Build Coastguard Worker * The returned number can be used with any function that access 463*0e209d39SAndroid Build Coastguard Worker * capture groups by number. 464*0e209d39SAndroid Build Coastguard Worker * 465*0e209d39SAndroid Build Coastguard Worker * The function returns an error status if the specified name does not 466*0e209d39SAndroid Build Coastguard Worker * appear in the pattern. 467*0e209d39SAndroid Build Coastguard Worker * 468*0e209d39SAndroid Build Coastguard Worker * @param groupName The capture group name, 469*0e209d39SAndroid Build Coastguard Worker * platform invariant characters only. 470*0e209d39SAndroid Build Coastguard Worker * @param nameLength The length of the name, or -1 if the name is 471*0e209d39SAndroid Build Coastguard Worker * nul-terminated. 472*0e209d39SAndroid Build Coastguard Worker * @param status A UErrorCode to receive any errors. 473*0e209d39SAndroid Build Coastguard Worker * 474*0e209d39SAndroid Build Coastguard Worker * @stable ICU 55 475*0e209d39SAndroid Build Coastguard Worker */ 476*0e209d39SAndroid Build Coastguard Worker virtual int32_t groupNumberFromName(const char *groupName, int32_t nameLength, UErrorCode &status) const; 477*0e209d39SAndroid Build Coastguard Worker 478*0e209d39SAndroid Build Coastguard Worker 479*0e209d39SAndroid Build Coastguard Worker /** 480*0e209d39SAndroid Build Coastguard Worker * Split a string into fields. Somewhat like split() from Perl or Java. 481*0e209d39SAndroid Build Coastguard Worker * Pattern matches identify delimiters that separate the input 482*0e209d39SAndroid Build Coastguard Worker * into fields. The input data between the delimiters becomes the 483*0e209d39SAndroid Build Coastguard Worker * fields themselves. 484*0e209d39SAndroid Build Coastguard Worker * 485*0e209d39SAndroid Build Coastguard Worker * If the delimiter pattern includes capture groups, the captured text will 486*0e209d39SAndroid Build Coastguard Worker * also appear in the destination array of output strings, interspersed 487*0e209d39SAndroid Build Coastguard Worker * with the fields. This is similar to Perl, but differs from Java, 488*0e209d39SAndroid Build Coastguard Worker * which ignores the presence of capture groups in the pattern. 489*0e209d39SAndroid Build Coastguard Worker * 490*0e209d39SAndroid Build Coastguard Worker * Trailing empty fields will always be returned, assuming sufficient 491*0e209d39SAndroid Build Coastguard Worker * destination capacity. This differs from the default behavior for Java 492*0e209d39SAndroid Build Coastguard Worker * and Perl where trailing empty fields are not returned. 493*0e209d39SAndroid Build Coastguard Worker * 494*0e209d39SAndroid Build Coastguard Worker * The number of strings produced by the split operation is returned. 495*0e209d39SAndroid Build Coastguard Worker * This count includes the strings from capture groups in the delimiter pattern. 496*0e209d39SAndroid Build Coastguard Worker * This behavior differs from Java, which ignores capture groups. 497*0e209d39SAndroid Build Coastguard Worker * 498*0e209d39SAndroid Build Coastguard Worker * For the best performance on split() operations, 499*0e209d39SAndroid Build Coastguard Worker * <code>RegexMatcher::split</code> is preferable to this function 500*0e209d39SAndroid Build Coastguard Worker * 501*0e209d39SAndroid Build Coastguard Worker * @param input The string to be split into fields. The field delimiters 502*0e209d39SAndroid Build Coastguard Worker * match the pattern (in the "this" object) 503*0e209d39SAndroid Build Coastguard Worker * @param dest An array of UnicodeStrings to receive the results of the split. 504*0e209d39SAndroid Build Coastguard Worker * This is an array of actual UnicodeString objects, not an 505*0e209d39SAndroid Build Coastguard Worker * array of pointers to strings. Local (stack based) arrays can 506*0e209d39SAndroid Build Coastguard Worker * work well here. 507*0e209d39SAndroid Build Coastguard Worker * @param destCapacity The number of elements in the destination array. 508*0e209d39SAndroid Build Coastguard Worker * If the number of fields found is less than destCapacity, the 509*0e209d39SAndroid Build Coastguard Worker * extra strings in the destination array are not altered. 510*0e209d39SAndroid Build Coastguard Worker * If the number of destination strings is less than the number 511*0e209d39SAndroid Build Coastguard Worker * of fields, the trailing part of the input string, including any 512*0e209d39SAndroid Build Coastguard Worker * field delimiters, is placed in the last destination string. 513*0e209d39SAndroid Build Coastguard Worker * @param status A reference to a UErrorCode to receive any errors. 514*0e209d39SAndroid Build Coastguard Worker * @return The number of fields into which the input string was split. 515*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.4 516*0e209d39SAndroid Build Coastguard Worker */ 517*0e209d39SAndroid Build Coastguard Worker virtual int32_t split(const UnicodeString &input, 518*0e209d39SAndroid Build Coastguard Worker UnicodeString dest[], 519*0e209d39SAndroid Build Coastguard Worker int32_t destCapacity, 520*0e209d39SAndroid Build Coastguard Worker UErrorCode &status) const; 521*0e209d39SAndroid Build Coastguard Worker 522*0e209d39SAndroid Build Coastguard Worker 523*0e209d39SAndroid Build Coastguard Worker /** 524*0e209d39SAndroid Build Coastguard Worker * Split a string into fields. Somewhat like %split() from Perl or Java. 525*0e209d39SAndroid Build Coastguard Worker * Pattern matches identify delimiters that separate the input 526*0e209d39SAndroid Build Coastguard Worker * into fields. The input data between the delimiters becomes the 527*0e209d39SAndroid Build Coastguard Worker * fields themselves. 528*0e209d39SAndroid Build Coastguard Worker * 529*0e209d39SAndroid Build Coastguard Worker * If the delimiter pattern includes capture groups, the captured text will 530*0e209d39SAndroid Build Coastguard Worker * also appear in the destination array of output strings, interspersed 531*0e209d39SAndroid Build Coastguard Worker * with the fields. This is similar to Perl, but differs from Java, 532*0e209d39SAndroid Build Coastguard Worker * which ignores the presence of capture groups in the pattern. 533*0e209d39SAndroid Build Coastguard Worker * 534*0e209d39SAndroid Build Coastguard Worker * Trailing empty fields will always be returned, assuming sufficient 535*0e209d39SAndroid Build Coastguard Worker * destination capacity. This differs from the default behavior for Java 536*0e209d39SAndroid Build Coastguard Worker * and Perl where trailing empty fields are not returned. 537*0e209d39SAndroid Build Coastguard Worker * 538*0e209d39SAndroid Build Coastguard Worker * The number of strings produced by the split operation is returned. 539*0e209d39SAndroid Build Coastguard Worker * This count includes the strings from capture groups in the delimiter pattern. 540*0e209d39SAndroid Build Coastguard Worker * This behavior differs from Java, which ignores capture groups. 541*0e209d39SAndroid Build Coastguard Worker * 542*0e209d39SAndroid Build Coastguard Worker * For the best performance on split() operations, 543*0e209d39SAndroid Build Coastguard Worker * `RegexMatcher::split()` is preferable to this function 544*0e209d39SAndroid Build Coastguard Worker * 545*0e209d39SAndroid Build Coastguard Worker * @param input The string to be split into fields. The field delimiters 546*0e209d39SAndroid Build Coastguard Worker * match the pattern (in the "this" object) 547*0e209d39SAndroid Build Coastguard Worker * @param dest An array of mutable UText structs to receive the results of the split. 548*0e209d39SAndroid Build Coastguard Worker * If a field is nullptr, a new UText is allocated to contain the results for 549*0e209d39SAndroid Build Coastguard Worker * that field. This new UText is not guaranteed to be mutable. 550*0e209d39SAndroid Build Coastguard Worker * @param destCapacity The number of elements in the destination array. 551*0e209d39SAndroid Build Coastguard Worker * If the number of fields found is less than destCapacity, the 552*0e209d39SAndroid Build Coastguard Worker * extra strings in the destination array are not altered. 553*0e209d39SAndroid Build Coastguard Worker * If the number of destination strings is less than the number 554*0e209d39SAndroid Build Coastguard Worker * of fields, the trailing part of the input string, including any 555*0e209d39SAndroid Build Coastguard Worker * field delimiters, is placed in the last destination string. 556*0e209d39SAndroid Build Coastguard Worker * @param status A reference to a UErrorCode to receive any errors. 557*0e209d39SAndroid Build Coastguard Worker * @return The number of destination strings used. 558*0e209d39SAndroid Build Coastguard Worker * 559*0e209d39SAndroid Build Coastguard Worker * @stable ICU 4.6 560*0e209d39SAndroid Build Coastguard Worker */ 561*0e209d39SAndroid Build Coastguard Worker virtual int32_t split(UText *input, 562*0e209d39SAndroid Build Coastguard Worker UText *dest[], 563*0e209d39SAndroid Build Coastguard Worker int32_t destCapacity, 564*0e209d39SAndroid Build Coastguard Worker UErrorCode &status) const; 565*0e209d39SAndroid Build Coastguard Worker 566*0e209d39SAndroid Build Coastguard Worker 567*0e209d39SAndroid Build Coastguard Worker /** 568*0e209d39SAndroid Build Coastguard Worker * ICU "poor man's RTTI", returns a UClassID for the actual class. 569*0e209d39SAndroid Build Coastguard Worker * 570*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.4 571*0e209d39SAndroid Build Coastguard Worker */ 572*0e209d39SAndroid Build Coastguard Worker virtual UClassID getDynamicClassID() const override; 573*0e209d39SAndroid Build Coastguard Worker 574*0e209d39SAndroid Build Coastguard Worker /** 575*0e209d39SAndroid Build Coastguard Worker * ICU "poor man's RTTI", returns a UClassID for this class. 576*0e209d39SAndroid Build Coastguard Worker * 577*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.4 578*0e209d39SAndroid Build Coastguard Worker */ 579*0e209d39SAndroid Build Coastguard Worker static UClassID U_EXPORT2 getStaticClassID(); 580*0e209d39SAndroid Build Coastguard Worker 581*0e209d39SAndroid Build Coastguard Worker private: 582*0e209d39SAndroid Build Coastguard Worker // 583*0e209d39SAndroid Build Coastguard Worker // Implementation Data 584*0e209d39SAndroid Build Coastguard Worker // 585*0e209d39SAndroid Build Coastguard Worker UText *fPattern; // The original pattern string. 586*0e209d39SAndroid Build Coastguard Worker UnicodeString *fPatternString; // The original pattern UncodeString if relevant 587*0e209d39SAndroid Build Coastguard Worker uint32_t fFlags; // The flags used when compiling the pattern. 588*0e209d39SAndroid Build Coastguard Worker // 589*0e209d39SAndroid Build Coastguard Worker UVector64 *fCompiledPat; // The compiled pattern p-code. 590*0e209d39SAndroid Build Coastguard Worker UnicodeString fLiteralText; // Any literal string data from the pattern, 591*0e209d39SAndroid Build Coastguard Worker // after un-escaping, for use during the match. 592*0e209d39SAndroid Build Coastguard Worker 593*0e209d39SAndroid Build Coastguard Worker UVector *fSets; // Any UnicodeSets referenced from the pattern. 594*0e209d39SAndroid Build Coastguard Worker Regex8BitSet *fSets8; // (and fast sets for latin-1 range.) 595*0e209d39SAndroid Build Coastguard Worker 596*0e209d39SAndroid Build Coastguard Worker 597*0e209d39SAndroid Build Coastguard Worker UErrorCode fDeferredStatus; // status if some prior error has left this 598*0e209d39SAndroid Build Coastguard Worker // RegexPattern in an unusable state. 599*0e209d39SAndroid Build Coastguard Worker 600*0e209d39SAndroid Build Coastguard Worker int32_t fMinMatchLen; // Minimum Match Length. All matches will have length 601*0e209d39SAndroid Build Coastguard Worker // >= this value. For some patterns, this calculated 602*0e209d39SAndroid Build Coastguard Worker // value may be less than the true shortest 603*0e209d39SAndroid Build Coastguard Worker // possible match. 604*0e209d39SAndroid Build Coastguard Worker 605*0e209d39SAndroid Build Coastguard Worker int32_t fFrameSize; // Size of a state stack frame in the 606*0e209d39SAndroid Build Coastguard Worker // execution engine. 607*0e209d39SAndroid Build Coastguard Worker 608*0e209d39SAndroid Build Coastguard Worker int32_t fDataSize; // The size of the data needed by the pattern that 609*0e209d39SAndroid Build Coastguard Worker // does not go on the state stack, but has just 610*0e209d39SAndroid Build Coastguard Worker // a single copy per matcher. 611*0e209d39SAndroid Build Coastguard Worker 612*0e209d39SAndroid Build Coastguard Worker UVector32 *fGroupMap; // Map from capture group number to position of 613*0e209d39SAndroid Build Coastguard Worker // the group's variables in the matcher stack frame. 614*0e209d39SAndroid Build Coastguard Worker 615*0e209d39SAndroid Build Coastguard Worker int32_t fStartType; // Info on how a match must start. 616*0e209d39SAndroid Build Coastguard Worker int32_t fInitialStringIdx; // 617*0e209d39SAndroid Build Coastguard Worker int32_t fInitialStringLen; 618*0e209d39SAndroid Build Coastguard Worker UnicodeSet *fInitialChars; 619*0e209d39SAndroid Build Coastguard Worker UChar32 fInitialChar; 620*0e209d39SAndroid Build Coastguard Worker Regex8BitSet *fInitialChars8; 621*0e209d39SAndroid Build Coastguard Worker UBool fNeedsAltInput; 622*0e209d39SAndroid Build Coastguard Worker 623*0e209d39SAndroid Build Coastguard Worker UHashtable *fNamedCaptureMap; // Map from capture group names to numbers. 624*0e209d39SAndroid Build Coastguard Worker 625*0e209d39SAndroid Build Coastguard Worker friend class RegexCompile; 626*0e209d39SAndroid Build Coastguard Worker friend class RegexMatcher; 627*0e209d39SAndroid Build Coastguard Worker friend class RegexCImpl; 628*0e209d39SAndroid Build Coastguard Worker 629*0e209d39SAndroid Build Coastguard Worker // 630*0e209d39SAndroid Build Coastguard Worker // Implementation Methods 631*0e209d39SAndroid Build Coastguard Worker // 632*0e209d39SAndroid Build Coastguard Worker void init(); // Common initialization, for use by constructors. 633*0e209d39SAndroid Build Coastguard Worker bool initNamedCaptureMap(); // Lazy init for fNamedCaptureMap. 634*0e209d39SAndroid Build Coastguard Worker void zap(); // Common cleanup 635*0e209d39SAndroid Build Coastguard Worker 636*0e209d39SAndroid Build Coastguard Worker void dumpOp(int32_t index) const; 637*0e209d39SAndroid Build Coastguard Worker 638*0e209d39SAndroid Build Coastguard Worker public: 639*0e209d39SAndroid Build Coastguard Worker #ifndef U_HIDE_INTERNAL_API 640*0e209d39SAndroid Build Coastguard Worker /** 641*0e209d39SAndroid Build Coastguard Worker * Dump a compiled pattern. Internal debug function. 642*0e209d39SAndroid Build Coastguard Worker * @internal 643*0e209d39SAndroid Build Coastguard Worker */ 644*0e209d39SAndroid Build Coastguard Worker void dumpPattern() const; 645*0e209d39SAndroid Build Coastguard Worker #endif /* U_HIDE_INTERNAL_API */ 646*0e209d39SAndroid Build Coastguard Worker }; 647*0e209d39SAndroid Build Coastguard Worker 648*0e209d39SAndroid Build Coastguard Worker 649*0e209d39SAndroid Build Coastguard Worker 650*0e209d39SAndroid Build Coastguard Worker /** 651*0e209d39SAndroid Build Coastguard Worker * class RegexMatcher bundles together a regular expression pattern and 652*0e209d39SAndroid Build Coastguard Worker * input text to which the expression can be applied. It includes methods 653*0e209d39SAndroid Build Coastguard Worker * for testing for matches, and for find and replace operations. 654*0e209d39SAndroid Build Coastguard Worker * 655*0e209d39SAndroid Build Coastguard Worker * <p>Class RegexMatcher is not intended to be subclassed.</p> 656*0e209d39SAndroid Build Coastguard Worker * 657*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.4 658*0e209d39SAndroid Build Coastguard Worker */ 659*0e209d39SAndroid Build Coastguard Worker class U_I18N_API RegexMatcher final : public UObject { 660*0e209d39SAndroid Build Coastguard Worker public: 661*0e209d39SAndroid Build Coastguard Worker 662*0e209d39SAndroid Build Coastguard Worker /** 663*0e209d39SAndroid Build Coastguard Worker * Construct a RegexMatcher for a regular expression. 664*0e209d39SAndroid Build Coastguard Worker * This is a convenience method that avoids the need to explicitly create 665*0e209d39SAndroid Build Coastguard Worker * a RegexPattern object. Note that if several RegexMatchers need to be 666*0e209d39SAndroid Build Coastguard Worker * created for the same expression, it will be more efficient to 667*0e209d39SAndroid Build Coastguard Worker * separately create and cache a RegexPattern object, and use 668*0e209d39SAndroid Build Coastguard Worker * its matcher() method to create the RegexMatcher objects. 669*0e209d39SAndroid Build Coastguard Worker * 670*0e209d39SAndroid Build Coastguard Worker * @param regexp The Regular Expression to be compiled. 671*0e209d39SAndroid Build Coastguard Worker * @param flags #URegexpFlag options, such as #UREGEX_CASE_INSENSITIVE. 672*0e209d39SAndroid Build Coastguard Worker * @param status Any errors are reported by setting this UErrorCode variable. 673*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.6 674*0e209d39SAndroid Build Coastguard Worker */ 675*0e209d39SAndroid Build Coastguard Worker RegexMatcher(const UnicodeString ®exp, uint32_t flags, UErrorCode &status); 676*0e209d39SAndroid Build Coastguard Worker 677*0e209d39SAndroid Build Coastguard Worker /** 678*0e209d39SAndroid Build Coastguard Worker * Construct a RegexMatcher for a regular expression. 679*0e209d39SAndroid Build Coastguard Worker * This is a convenience method that avoids the need to explicitly create 680*0e209d39SAndroid Build Coastguard Worker * a RegexPattern object. Note that if several RegexMatchers need to be 681*0e209d39SAndroid Build Coastguard Worker * created for the same expression, it will be more efficient to 682*0e209d39SAndroid Build Coastguard Worker * separately create and cache a RegexPattern object, and use 683*0e209d39SAndroid Build Coastguard Worker * its matcher() method to create the RegexMatcher objects. 684*0e209d39SAndroid Build Coastguard Worker * 685*0e209d39SAndroid Build Coastguard Worker * @param regexp The regular expression to be compiled. 686*0e209d39SAndroid Build Coastguard Worker * @param flags #URegexpFlag options, such as #UREGEX_CASE_INSENSITIVE. 687*0e209d39SAndroid Build Coastguard Worker * @param status Any errors are reported by setting this UErrorCode variable. 688*0e209d39SAndroid Build Coastguard Worker * 689*0e209d39SAndroid Build Coastguard Worker * @stable ICU 4.6 690*0e209d39SAndroid Build Coastguard Worker */ 691*0e209d39SAndroid Build Coastguard Worker RegexMatcher(UText *regexp, uint32_t flags, UErrorCode &status); 692*0e209d39SAndroid Build Coastguard Worker 693*0e209d39SAndroid Build Coastguard Worker /** 694*0e209d39SAndroid Build Coastguard Worker * Construct a RegexMatcher for a regular expression. 695*0e209d39SAndroid Build Coastguard Worker * This is a convenience method that avoids the need to explicitly create 696*0e209d39SAndroid Build Coastguard Worker * a RegexPattern object. Note that if several RegexMatchers need to be 697*0e209d39SAndroid Build Coastguard Worker * created for the same expression, it will be more efficient to 698*0e209d39SAndroid Build Coastguard Worker * separately create and cache a RegexPattern object, and use 699*0e209d39SAndroid Build Coastguard Worker * its matcher() method to create the RegexMatcher objects. 700*0e209d39SAndroid Build Coastguard Worker * 701*0e209d39SAndroid Build Coastguard Worker * The matcher will retain a reference to the supplied input string, and all regexp 702*0e209d39SAndroid Build Coastguard Worker * pattern matching operations happen directly on the original string. It is 703*0e209d39SAndroid Build Coastguard Worker * critical that the string not be altered or deleted before use by the regular 704*0e209d39SAndroid Build Coastguard Worker * expression operations is complete. 705*0e209d39SAndroid Build Coastguard Worker * 706*0e209d39SAndroid Build Coastguard Worker * @param regexp The Regular Expression to be compiled. 707*0e209d39SAndroid Build Coastguard Worker * @param input The string to match. The matcher retains a reference to the 708*0e209d39SAndroid Build Coastguard Worker * caller's string; mo copy is made. 709*0e209d39SAndroid Build Coastguard Worker * @param flags #URegexpFlag options, such as #UREGEX_CASE_INSENSITIVE. 710*0e209d39SAndroid Build Coastguard Worker * @param status Any errors are reported by setting this UErrorCode variable. 711*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.6 712*0e209d39SAndroid Build Coastguard Worker */ 713*0e209d39SAndroid Build Coastguard Worker RegexMatcher(const UnicodeString ®exp, const UnicodeString &input, 714*0e209d39SAndroid Build Coastguard Worker uint32_t flags, UErrorCode &status); 715*0e209d39SAndroid Build Coastguard Worker 716*0e209d39SAndroid Build Coastguard Worker /** 717*0e209d39SAndroid Build Coastguard Worker * Construct a RegexMatcher for a regular expression. 718*0e209d39SAndroid Build Coastguard Worker * This is a convenience method that avoids the need to explicitly create 719*0e209d39SAndroid Build Coastguard Worker * a RegexPattern object. Note that if several RegexMatchers need to be 720*0e209d39SAndroid Build Coastguard Worker * created for the same expression, it will be more efficient to 721*0e209d39SAndroid Build Coastguard Worker * separately create and cache a RegexPattern object, and use 722*0e209d39SAndroid Build Coastguard Worker * its matcher() method to create the RegexMatcher objects. 723*0e209d39SAndroid Build Coastguard Worker * 724*0e209d39SAndroid Build Coastguard Worker * The matcher will make a shallow clone of the supplied input text, and all regexp 725*0e209d39SAndroid Build Coastguard Worker * pattern matching operations happen on this clone. While read-only operations on 726*0e209d39SAndroid Build Coastguard Worker * the supplied text are permitted, it is critical that the underlying string not be 727*0e209d39SAndroid Build Coastguard Worker * altered or deleted before use by the regular expression operations is complete. 728*0e209d39SAndroid Build Coastguard Worker * 729*0e209d39SAndroid Build Coastguard Worker * @param regexp The Regular Expression to be compiled. 730*0e209d39SAndroid Build Coastguard Worker * @param input The string to match. The matcher retains a shallow clone of the text. 731*0e209d39SAndroid Build Coastguard Worker * @param flags #URegexpFlag options, such as #UREGEX_CASE_INSENSITIVE. 732*0e209d39SAndroid Build Coastguard Worker * @param status Any errors are reported by setting this UErrorCode variable. 733*0e209d39SAndroid Build Coastguard Worker * 734*0e209d39SAndroid Build Coastguard Worker * @stable ICU 4.6 735*0e209d39SAndroid Build Coastguard Worker */ 736*0e209d39SAndroid Build Coastguard Worker RegexMatcher(UText *regexp, UText *input, 737*0e209d39SAndroid Build Coastguard Worker uint32_t flags, UErrorCode &status); 738*0e209d39SAndroid Build Coastguard Worker 739*0e209d39SAndroid Build Coastguard Worker private: 740*0e209d39SAndroid Build Coastguard Worker /** 741*0e209d39SAndroid Build Coastguard Worker * Cause a compilation error if an application accidentally attempts to 742*0e209d39SAndroid Build Coastguard Worker * create a matcher with a (char16_t *) string as input rather than 743*0e209d39SAndroid Build Coastguard Worker * a UnicodeString. Avoids a dangling reference to a temporary string. 744*0e209d39SAndroid Build Coastguard Worker * 745*0e209d39SAndroid Build Coastguard Worker * To efficiently work with char16_t *strings, wrap the data in a UnicodeString 746*0e209d39SAndroid Build Coastguard Worker * using one of the aliasing constructors, such as 747*0e209d39SAndroid Build Coastguard Worker * `UnicodeString(UBool isTerminated, const char16_t *text, int32_t textLength);` 748*0e209d39SAndroid Build Coastguard Worker * or in a UText, using 749*0e209d39SAndroid Build Coastguard Worker * `utext_openUChars(UText *ut, const char16_t *text, int64_t textLength, UErrorCode *status);` 750*0e209d39SAndroid Build Coastguard Worker */ 751*0e209d39SAndroid Build Coastguard Worker RegexMatcher(const UnicodeString ®exp, const char16_t *input, 752*0e209d39SAndroid Build Coastguard Worker uint32_t flags, UErrorCode &status) = delete; 753*0e209d39SAndroid Build Coastguard Worker public: 754*0e209d39SAndroid Build Coastguard Worker 755*0e209d39SAndroid Build Coastguard Worker 756*0e209d39SAndroid Build Coastguard Worker /** 757*0e209d39SAndroid Build Coastguard Worker * Destructor. 758*0e209d39SAndroid Build Coastguard Worker * 759*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.4 760*0e209d39SAndroid Build Coastguard Worker */ 761*0e209d39SAndroid Build Coastguard Worker virtual ~RegexMatcher(); 762*0e209d39SAndroid Build Coastguard Worker 763*0e209d39SAndroid Build Coastguard Worker 764*0e209d39SAndroid Build Coastguard Worker /** 765*0e209d39SAndroid Build Coastguard Worker * Attempts to match the entire input region against the pattern. 766*0e209d39SAndroid Build Coastguard Worker * @param status A reference to a UErrorCode to receive any errors. 767*0e209d39SAndroid Build Coastguard Worker * @return true if there is a match 768*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.4 769*0e209d39SAndroid Build Coastguard Worker */ 770*0e209d39SAndroid Build Coastguard Worker virtual UBool matches(UErrorCode &status); 771*0e209d39SAndroid Build Coastguard Worker 772*0e209d39SAndroid Build Coastguard Worker 773*0e209d39SAndroid Build Coastguard Worker /** 774*0e209d39SAndroid Build Coastguard Worker * Resets the matcher, then attempts to match the input beginning 775*0e209d39SAndroid Build Coastguard Worker * at the specified startIndex, and extending to the end of the input. 776*0e209d39SAndroid Build Coastguard Worker * The input region is reset to include the entire input string. 777*0e209d39SAndroid Build Coastguard Worker * A successful match must extend to the end of the input. 778*0e209d39SAndroid Build Coastguard Worker * @param startIndex The input string (native) index at which to begin matching. 779*0e209d39SAndroid Build Coastguard Worker * @param status A reference to a UErrorCode to receive any errors. 780*0e209d39SAndroid Build Coastguard Worker * @return true if there is a match 781*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.8 782*0e209d39SAndroid Build Coastguard Worker */ 783*0e209d39SAndroid Build Coastguard Worker virtual UBool matches(int64_t startIndex, UErrorCode &status); 784*0e209d39SAndroid Build Coastguard Worker 785*0e209d39SAndroid Build Coastguard Worker 786*0e209d39SAndroid Build Coastguard Worker /** 787*0e209d39SAndroid Build Coastguard Worker * Attempts to match the input string, starting from the beginning of the region, 788*0e209d39SAndroid Build Coastguard Worker * against the pattern. Like the matches() method, this function 789*0e209d39SAndroid Build Coastguard Worker * always starts at the beginning of the input region; 790*0e209d39SAndroid Build Coastguard Worker * unlike that function, it does not require that the entire region be matched. 791*0e209d39SAndroid Build Coastguard Worker * 792*0e209d39SAndroid Build Coastguard Worker * If the match succeeds then more information can be obtained via the start(), 793*0e209d39SAndroid Build Coastguard Worker * end(), and group() functions. 794*0e209d39SAndroid Build Coastguard Worker * 795*0e209d39SAndroid Build Coastguard Worker * @param status A reference to a UErrorCode to receive any errors. 796*0e209d39SAndroid Build Coastguard Worker * @return true if there is a match at the start of the input string. 797*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.4 798*0e209d39SAndroid Build Coastguard Worker */ 799*0e209d39SAndroid Build Coastguard Worker virtual UBool lookingAt(UErrorCode &status); 800*0e209d39SAndroid Build Coastguard Worker 801*0e209d39SAndroid Build Coastguard Worker 802*0e209d39SAndroid Build Coastguard Worker /** 803*0e209d39SAndroid Build Coastguard Worker * Attempts to match the input string, starting from the specified index, against the pattern. 804*0e209d39SAndroid Build Coastguard Worker * The match may be of any length, and is not required to extend to the end 805*0e209d39SAndroid Build Coastguard Worker * of the input string. Contrast with match(). 806*0e209d39SAndroid Build Coastguard Worker * 807*0e209d39SAndroid Build Coastguard Worker * If the match succeeds then more information can be obtained via the start(), 808*0e209d39SAndroid Build Coastguard Worker * end(), and group() functions. 809*0e209d39SAndroid Build Coastguard Worker * 810*0e209d39SAndroid Build Coastguard Worker * @param startIndex The input string (native) index at which to begin matching. 811*0e209d39SAndroid Build Coastguard Worker * @param status A reference to a UErrorCode to receive any errors. 812*0e209d39SAndroid Build Coastguard Worker * @return true if there is a match. 813*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.8 814*0e209d39SAndroid Build Coastguard Worker */ 815*0e209d39SAndroid Build Coastguard Worker virtual UBool lookingAt(int64_t startIndex, UErrorCode &status); 816*0e209d39SAndroid Build Coastguard Worker 817*0e209d39SAndroid Build Coastguard Worker 818*0e209d39SAndroid Build Coastguard Worker /** 819*0e209d39SAndroid Build Coastguard Worker * Find the next pattern match in the input string. 820*0e209d39SAndroid Build Coastguard Worker * The find begins searching the input at the location following the end of 821*0e209d39SAndroid Build Coastguard Worker * the previous match, or at the start of the string if there is no previous match. 822*0e209d39SAndroid Build Coastguard Worker * If a match is found, `start()`, `end()` and `group()` 823*0e209d39SAndroid Build Coastguard Worker * will provide more information regarding the match. 824*0e209d39SAndroid Build Coastguard Worker * Note that if the input string is changed by the application, 825*0e209d39SAndroid Build Coastguard Worker * use find(startPos, status) instead of find(), because the saved starting 826*0e209d39SAndroid Build Coastguard Worker * position may not be valid with the altered input string. 827*0e209d39SAndroid Build Coastguard Worker * @return true if a match is found. 828*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.4 829*0e209d39SAndroid Build Coastguard Worker */ 830*0e209d39SAndroid Build Coastguard Worker virtual UBool find(); 831*0e209d39SAndroid Build Coastguard Worker 832*0e209d39SAndroid Build Coastguard Worker 833*0e209d39SAndroid Build Coastguard Worker /** 834*0e209d39SAndroid Build Coastguard Worker * Find the next pattern match in the input string. 835*0e209d39SAndroid Build Coastguard Worker * The find begins searching the input at the location following the end of 836*0e209d39SAndroid Build Coastguard Worker * the previous match, or at the start of the string if there is no previous match. 837*0e209d39SAndroid Build Coastguard Worker * If a match is found, `start()`, `end()` and `group()` 838*0e209d39SAndroid Build Coastguard Worker * will provide more information regarding the match. 839*0e209d39SAndroid Build Coastguard Worker * 840*0e209d39SAndroid Build Coastguard Worker * Note that if the input string is changed by the application, 841*0e209d39SAndroid Build Coastguard Worker * use find(startPos, status) instead of find(), because the saved starting 842*0e209d39SAndroid Build Coastguard Worker * position may not be valid with the altered input string. 843*0e209d39SAndroid Build Coastguard Worker * @param status A reference to a UErrorCode to receive any errors. 844*0e209d39SAndroid Build Coastguard Worker * @return true if a match is found. 845*0e209d39SAndroid Build Coastguard Worker * @stable ICU 55 846*0e209d39SAndroid Build Coastguard Worker */ 847*0e209d39SAndroid Build Coastguard Worker virtual UBool find(UErrorCode &status); 848*0e209d39SAndroid Build Coastguard Worker 849*0e209d39SAndroid Build Coastguard Worker /** 850*0e209d39SAndroid Build Coastguard Worker * Resets this RegexMatcher and then attempts to find the next substring of the 851*0e209d39SAndroid Build Coastguard Worker * input string that matches the pattern, starting at the specified index. 852*0e209d39SAndroid Build Coastguard Worker * 853*0e209d39SAndroid Build Coastguard Worker * @param start The (native) index in the input string to begin the search. 854*0e209d39SAndroid Build Coastguard Worker * @param status A reference to a UErrorCode to receive any errors. 855*0e209d39SAndroid Build Coastguard Worker * @return true if a match is found. 856*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.4 857*0e209d39SAndroid Build Coastguard Worker */ 858*0e209d39SAndroid Build Coastguard Worker virtual UBool find(int64_t start, UErrorCode &status); 859*0e209d39SAndroid Build Coastguard Worker 860*0e209d39SAndroid Build Coastguard Worker 861*0e209d39SAndroid Build Coastguard Worker /** 862*0e209d39SAndroid Build Coastguard Worker * Returns a string containing the text matched by the previous match. 863*0e209d39SAndroid Build Coastguard Worker * If the pattern can match an empty string, an empty string may be returned. 864*0e209d39SAndroid Build Coastguard Worker * @param status A reference to a UErrorCode to receive any errors. 865*0e209d39SAndroid Build Coastguard Worker * Possible errors are U_REGEX_INVALID_STATE if no match 866*0e209d39SAndroid Build Coastguard Worker * has been attempted or the last match failed. 867*0e209d39SAndroid Build Coastguard Worker * @return a string containing the matched input text. 868*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.4 869*0e209d39SAndroid Build Coastguard Worker */ 870*0e209d39SAndroid Build Coastguard Worker virtual UnicodeString group(UErrorCode &status) const; 871*0e209d39SAndroid Build Coastguard Worker 872*0e209d39SAndroid Build Coastguard Worker 873*0e209d39SAndroid Build Coastguard Worker /** 874*0e209d39SAndroid Build Coastguard Worker * Returns a string containing the text captured by the given group 875*0e209d39SAndroid Build Coastguard Worker * during the previous match operation. Group(0) is the entire match. 876*0e209d39SAndroid Build Coastguard Worker * 877*0e209d39SAndroid Build Coastguard Worker * A zero length string is returned both for capture groups that did not 878*0e209d39SAndroid Build Coastguard Worker * participate in the match and for actual zero length matches. 879*0e209d39SAndroid Build Coastguard Worker * To distinguish between these two cases use the function start(), 880*0e209d39SAndroid Build Coastguard Worker * which returns -1 for non-participating groups. 881*0e209d39SAndroid Build Coastguard Worker * 882*0e209d39SAndroid Build Coastguard Worker * @param groupNum the capture group number 883*0e209d39SAndroid Build Coastguard Worker * @param status A reference to a UErrorCode to receive any errors. 884*0e209d39SAndroid Build Coastguard Worker * Possible errors are U_REGEX_INVALID_STATE if no match 885*0e209d39SAndroid Build Coastguard Worker * has been attempted or the last match failed and 886*0e209d39SAndroid Build Coastguard Worker * U_INDEX_OUTOFBOUNDS_ERROR for a bad capture group number. 887*0e209d39SAndroid Build Coastguard Worker * @return the captured text 888*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.4 889*0e209d39SAndroid Build Coastguard Worker */ 890*0e209d39SAndroid Build Coastguard Worker virtual UnicodeString group(int32_t groupNum, UErrorCode &status) const; 891*0e209d39SAndroid Build Coastguard Worker 892*0e209d39SAndroid Build Coastguard Worker /** 893*0e209d39SAndroid Build Coastguard Worker * Returns the number of capturing groups in this matcher's pattern. 894*0e209d39SAndroid Build Coastguard Worker * @return the number of capture groups 895*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.4 896*0e209d39SAndroid Build Coastguard Worker */ 897*0e209d39SAndroid Build Coastguard Worker virtual int32_t groupCount() const; 898*0e209d39SAndroid Build Coastguard Worker 899*0e209d39SAndroid Build Coastguard Worker 900*0e209d39SAndroid Build Coastguard Worker /** 901*0e209d39SAndroid Build Coastguard Worker * Returns a shallow clone of the entire live input string with the UText current native index 902*0e209d39SAndroid Build Coastguard Worker * set to the beginning of the requested group. 903*0e209d39SAndroid Build Coastguard Worker * 904*0e209d39SAndroid Build Coastguard Worker * @param dest The UText into which the input should be cloned, or nullptr to create a new UText 905*0e209d39SAndroid Build Coastguard Worker * @param group_len A reference to receive the length of the desired capture group 906*0e209d39SAndroid Build Coastguard Worker * @param status A reference to a UErrorCode to receive any errors. 907*0e209d39SAndroid Build Coastguard Worker * Possible errors are U_REGEX_INVALID_STATE if no match 908*0e209d39SAndroid Build Coastguard Worker * has been attempted or the last match failed and 909*0e209d39SAndroid Build Coastguard Worker * U_INDEX_OUTOFBOUNDS_ERROR for a bad capture group number. 910*0e209d39SAndroid Build Coastguard Worker * @return dest if non-nullptr, a shallow copy of the input text otherwise 911*0e209d39SAndroid Build Coastguard Worker * 912*0e209d39SAndroid Build Coastguard Worker * @stable ICU 4.6 913*0e209d39SAndroid Build Coastguard Worker */ 914*0e209d39SAndroid Build Coastguard Worker virtual UText *group(UText *dest, int64_t &group_len, UErrorCode &status) const; 915*0e209d39SAndroid Build Coastguard Worker 916*0e209d39SAndroid Build Coastguard Worker /** 917*0e209d39SAndroid Build Coastguard Worker * Returns a shallow clone of the entire live input string with the UText current native index 918*0e209d39SAndroid Build Coastguard Worker * set to the beginning of the requested group. 919*0e209d39SAndroid Build Coastguard Worker * 920*0e209d39SAndroid Build Coastguard Worker * A group length of zero is returned both for capture groups that did not 921*0e209d39SAndroid Build Coastguard Worker * participate in the match and for actual zero length matches. 922*0e209d39SAndroid Build Coastguard Worker * To distinguish between these two cases use the function start(), 923*0e209d39SAndroid Build Coastguard Worker * which returns -1 for non-participating groups. 924*0e209d39SAndroid Build Coastguard Worker * 925*0e209d39SAndroid Build Coastguard Worker * @param groupNum The capture group number. 926*0e209d39SAndroid Build Coastguard Worker * @param dest The UText into which the input should be cloned, or nullptr to create a new UText. 927*0e209d39SAndroid Build Coastguard Worker * @param group_len A reference to receive the length of the desired capture group 928*0e209d39SAndroid Build Coastguard Worker * @param status A reference to a UErrorCode to receive any errors. 929*0e209d39SAndroid Build Coastguard Worker * Possible errors are U_REGEX_INVALID_STATE if no match 930*0e209d39SAndroid Build Coastguard Worker * has been attempted or the last match failed and 931*0e209d39SAndroid Build Coastguard Worker * U_INDEX_OUTOFBOUNDS_ERROR for a bad capture group number. 932*0e209d39SAndroid Build Coastguard Worker * @return dest if non-nullptr, a shallow copy of the input text otherwise 933*0e209d39SAndroid Build Coastguard Worker * 934*0e209d39SAndroid Build Coastguard Worker * @stable ICU 4.6 935*0e209d39SAndroid Build Coastguard Worker */ 936*0e209d39SAndroid Build Coastguard Worker virtual UText *group(int32_t groupNum, UText *dest, int64_t &group_len, UErrorCode &status) const; 937*0e209d39SAndroid Build Coastguard Worker 938*0e209d39SAndroid Build Coastguard Worker /** 939*0e209d39SAndroid Build Coastguard Worker * Returns the index in the input string of the start of the text matched 940*0e209d39SAndroid Build Coastguard Worker * during the previous match operation. 941*0e209d39SAndroid Build Coastguard Worker * @param status a reference to a UErrorCode to receive any errors. 942*0e209d39SAndroid Build Coastguard Worker * @return The (native) position in the input string of the start of the last match. 943*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.4 944*0e209d39SAndroid Build Coastguard Worker */ 945*0e209d39SAndroid Build Coastguard Worker virtual int32_t start(UErrorCode &status) const; 946*0e209d39SAndroid Build Coastguard Worker 947*0e209d39SAndroid Build Coastguard Worker /** 948*0e209d39SAndroid Build Coastguard Worker * Returns the index in the input string of the start of the text matched 949*0e209d39SAndroid Build Coastguard Worker * during the previous match operation. 950*0e209d39SAndroid Build Coastguard Worker * @param status a reference to a UErrorCode to receive any errors. 951*0e209d39SAndroid Build Coastguard Worker * @return The (native) position in the input string of the start of the last match. 952*0e209d39SAndroid Build Coastguard Worker * @stable ICU 4.6 953*0e209d39SAndroid Build Coastguard Worker */ 954*0e209d39SAndroid Build Coastguard Worker virtual int64_t start64(UErrorCode &status) const; 955*0e209d39SAndroid Build Coastguard Worker 956*0e209d39SAndroid Build Coastguard Worker 957*0e209d39SAndroid Build Coastguard Worker /** 958*0e209d39SAndroid Build Coastguard Worker * Returns the index in the input string of the start of the text matched by the 959*0e209d39SAndroid Build Coastguard Worker * specified capture group during the previous match operation. Return -1 if 960*0e209d39SAndroid Build Coastguard Worker * the capture group exists in the pattern, but was not part of the last match. 961*0e209d39SAndroid Build Coastguard Worker * 962*0e209d39SAndroid Build Coastguard Worker * @param group the capture group number 963*0e209d39SAndroid Build Coastguard Worker * @param status A reference to a UErrorCode to receive any errors. Possible 964*0e209d39SAndroid Build Coastguard Worker * errors are U_REGEX_INVALID_STATE if no match has been 965*0e209d39SAndroid Build Coastguard Worker * attempted or the last match failed, and 966*0e209d39SAndroid Build Coastguard Worker * U_INDEX_OUTOFBOUNDS_ERROR for a bad capture group number 967*0e209d39SAndroid Build Coastguard Worker * @return the (native) start position of substring matched by the specified group. 968*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.4 969*0e209d39SAndroid Build Coastguard Worker */ 970*0e209d39SAndroid Build Coastguard Worker virtual int32_t start(int32_t group, UErrorCode &status) const; 971*0e209d39SAndroid Build Coastguard Worker 972*0e209d39SAndroid Build Coastguard Worker /** 973*0e209d39SAndroid Build Coastguard Worker * Returns the index in the input string of the start of the text matched by the 974*0e209d39SAndroid Build Coastguard Worker * specified capture group during the previous match operation. Return -1 if 975*0e209d39SAndroid Build Coastguard Worker * the capture group exists in the pattern, but was not part of the last match. 976*0e209d39SAndroid Build Coastguard Worker * 977*0e209d39SAndroid Build Coastguard Worker * @param group the capture group number. 978*0e209d39SAndroid Build Coastguard Worker * @param status A reference to a UErrorCode to receive any errors. Possible 979*0e209d39SAndroid Build Coastguard Worker * errors are U_REGEX_INVALID_STATE if no match has been 980*0e209d39SAndroid Build Coastguard Worker * attempted or the last match failed, and 981*0e209d39SAndroid Build Coastguard Worker * U_INDEX_OUTOFBOUNDS_ERROR for a bad capture group number. 982*0e209d39SAndroid Build Coastguard Worker * @return the (native) start position of substring matched by the specified group. 983*0e209d39SAndroid Build Coastguard Worker * @stable ICU 4.6 984*0e209d39SAndroid Build Coastguard Worker */ 985*0e209d39SAndroid Build Coastguard Worker virtual int64_t start64(int32_t group, UErrorCode &status) const; 986*0e209d39SAndroid Build Coastguard Worker 987*0e209d39SAndroid Build Coastguard Worker /** 988*0e209d39SAndroid Build Coastguard Worker * Returns the index in the input string of the first character following the 989*0e209d39SAndroid Build Coastguard Worker * text matched during the previous match operation. 990*0e209d39SAndroid Build Coastguard Worker * 991*0e209d39SAndroid Build Coastguard Worker * @param status A reference to a UErrorCode to receive any errors. Possible 992*0e209d39SAndroid Build Coastguard Worker * errors are U_REGEX_INVALID_STATE if no match has been 993*0e209d39SAndroid Build Coastguard Worker * attempted or the last match failed. 994*0e209d39SAndroid Build Coastguard Worker * @return the index of the last character matched, plus one. 995*0e209d39SAndroid Build Coastguard Worker * The index value returned is a native index, corresponding to 996*0e209d39SAndroid Build Coastguard Worker * code units for the underlying encoding type, for example, 997*0e209d39SAndroid Build Coastguard Worker * a byte index for UTF-8. 998*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.4 999*0e209d39SAndroid Build Coastguard Worker */ 1000*0e209d39SAndroid Build Coastguard Worker virtual int32_t end(UErrorCode &status) const; 1001*0e209d39SAndroid Build Coastguard Worker 1002*0e209d39SAndroid Build Coastguard Worker /** 1003*0e209d39SAndroid Build Coastguard Worker * Returns the index in the input string of the first character following the 1004*0e209d39SAndroid Build Coastguard Worker * text matched during the previous match operation. 1005*0e209d39SAndroid Build Coastguard Worker * 1006*0e209d39SAndroid Build Coastguard Worker * @param status A reference to a UErrorCode to receive any errors. Possible 1007*0e209d39SAndroid Build Coastguard Worker * errors are U_REGEX_INVALID_STATE if no match has been 1008*0e209d39SAndroid Build Coastguard Worker * attempted or the last match failed. 1009*0e209d39SAndroid Build Coastguard Worker * @return the index of the last character matched, plus one. 1010*0e209d39SAndroid Build Coastguard Worker * The index value returned is a native index, corresponding to 1011*0e209d39SAndroid Build Coastguard Worker * code units for the underlying encoding type, for example, 1012*0e209d39SAndroid Build Coastguard Worker * a byte index for UTF-8. 1013*0e209d39SAndroid Build Coastguard Worker * @stable ICU 4.6 1014*0e209d39SAndroid Build Coastguard Worker */ 1015*0e209d39SAndroid Build Coastguard Worker virtual int64_t end64(UErrorCode &status) const; 1016*0e209d39SAndroid Build Coastguard Worker 1017*0e209d39SAndroid Build Coastguard Worker 1018*0e209d39SAndroid Build Coastguard Worker /** 1019*0e209d39SAndroid Build Coastguard Worker * Returns the index in the input string of the character following the 1020*0e209d39SAndroid Build Coastguard Worker * text matched by the specified capture group during the previous match operation. 1021*0e209d39SAndroid Build Coastguard Worker * 1022*0e209d39SAndroid Build Coastguard Worker * @param group the capture group number 1023*0e209d39SAndroid Build Coastguard Worker * @param status A reference to a UErrorCode to receive any errors. Possible 1024*0e209d39SAndroid Build Coastguard Worker * errors are U_REGEX_INVALID_STATE if no match has been 1025*0e209d39SAndroid Build Coastguard Worker * attempted or the last match failed and 1026*0e209d39SAndroid Build Coastguard Worker * U_INDEX_OUTOFBOUNDS_ERROR for a bad capture group number 1027*0e209d39SAndroid Build Coastguard Worker * @return the index of the first character following the text 1028*0e209d39SAndroid Build Coastguard Worker * captured by the specified group during the previous match operation. 1029*0e209d39SAndroid Build Coastguard Worker * Return -1 if the capture group exists in the pattern but was not part of the match. 1030*0e209d39SAndroid Build Coastguard Worker * The index value returned is a native index, corresponding to 1031*0e209d39SAndroid Build Coastguard Worker * code units for the underlying encoding type, for example, 1032*0e209d39SAndroid Build Coastguard Worker * a byte index for UTF8. 1033*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.4 1034*0e209d39SAndroid Build Coastguard Worker */ 1035*0e209d39SAndroid Build Coastguard Worker virtual int32_t end(int32_t group, UErrorCode &status) const; 1036*0e209d39SAndroid Build Coastguard Worker 1037*0e209d39SAndroid Build Coastguard Worker /** 1038*0e209d39SAndroid Build Coastguard Worker * Returns the index in the input string of the character following the 1039*0e209d39SAndroid Build Coastguard Worker * text matched by the specified capture group during the previous match operation. 1040*0e209d39SAndroid Build Coastguard Worker * 1041*0e209d39SAndroid Build Coastguard Worker * @param group the capture group number 1042*0e209d39SAndroid Build Coastguard Worker * @param status A reference to a UErrorCode to receive any errors. Possible 1043*0e209d39SAndroid Build Coastguard Worker * errors are U_REGEX_INVALID_STATE if no match has been 1044*0e209d39SAndroid Build Coastguard Worker * attempted or the last match failed and 1045*0e209d39SAndroid Build Coastguard Worker * U_INDEX_OUTOFBOUNDS_ERROR for a bad capture group number 1046*0e209d39SAndroid Build Coastguard Worker * @return the index of the first character following the text 1047*0e209d39SAndroid Build Coastguard Worker * captured by the specified group during the previous match operation. 1048*0e209d39SAndroid Build Coastguard Worker * Return -1 if the capture group exists in the pattern but was not part of the match. 1049*0e209d39SAndroid Build Coastguard Worker * The index value returned is a native index, corresponding to 1050*0e209d39SAndroid Build Coastguard Worker * code units for the underlying encoding type, for example, 1051*0e209d39SAndroid Build Coastguard Worker * a byte index for UTF8. 1052*0e209d39SAndroid Build Coastguard Worker * @stable ICU 4.6 1053*0e209d39SAndroid Build Coastguard Worker */ 1054*0e209d39SAndroid Build Coastguard Worker virtual int64_t end64(int32_t group, UErrorCode &status) const; 1055*0e209d39SAndroid Build Coastguard Worker 1056*0e209d39SAndroid Build Coastguard Worker /** 1057*0e209d39SAndroid Build Coastguard Worker * Resets this matcher. The effect is to remove any memory of previous matches, 1058*0e209d39SAndroid Build Coastguard Worker * and to cause subsequent find() operations to begin at the beginning of 1059*0e209d39SAndroid Build Coastguard Worker * the input string. 1060*0e209d39SAndroid Build Coastguard Worker * 1061*0e209d39SAndroid Build Coastguard Worker * @return this RegexMatcher. 1062*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.4 1063*0e209d39SAndroid Build Coastguard Worker */ 1064*0e209d39SAndroid Build Coastguard Worker virtual RegexMatcher &reset(); 1065*0e209d39SAndroid Build Coastguard Worker 1066*0e209d39SAndroid Build Coastguard Worker 1067*0e209d39SAndroid Build Coastguard Worker /** 1068*0e209d39SAndroid Build Coastguard Worker * Resets this matcher, and set the current input position. 1069*0e209d39SAndroid Build Coastguard Worker * The effect is to remove any memory of previous matches, 1070*0e209d39SAndroid Build Coastguard Worker * and to cause subsequent find() operations to begin at 1071*0e209d39SAndroid Build Coastguard Worker * the specified (native) position in the input string. 1072*0e209d39SAndroid Build Coastguard Worker * 1073*0e209d39SAndroid Build Coastguard Worker * The matcher's region is reset to its default, which is the entire 1074*0e209d39SAndroid Build Coastguard Worker * input string. 1075*0e209d39SAndroid Build Coastguard Worker * 1076*0e209d39SAndroid Build Coastguard Worker * An alternative to this function is to set a match region 1077*0e209d39SAndroid Build Coastguard Worker * beginning at the desired index. 1078*0e209d39SAndroid Build Coastguard Worker * 1079*0e209d39SAndroid Build Coastguard Worker * @return this RegexMatcher. 1080*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.8 1081*0e209d39SAndroid Build Coastguard Worker */ 1082*0e209d39SAndroid Build Coastguard Worker virtual RegexMatcher &reset(int64_t index, UErrorCode &status); 1083*0e209d39SAndroid Build Coastguard Worker 1084*0e209d39SAndroid Build Coastguard Worker 1085*0e209d39SAndroid Build Coastguard Worker /** 1086*0e209d39SAndroid Build Coastguard Worker * Resets this matcher with a new input string. This allows instances of RegexMatcher 1087*0e209d39SAndroid Build Coastguard Worker * to be reused, which is more efficient than creating a new RegexMatcher for 1088*0e209d39SAndroid Build Coastguard Worker * each input string to be processed. 1089*0e209d39SAndroid Build Coastguard Worker * @param input The new string on which subsequent pattern matches will operate. 1090*0e209d39SAndroid Build Coastguard Worker * The matcher retains a reference to the callers string, and operates 1091*0e209d39SAndroid Build Coastguard Worker * directly on that. Ownership of the string remains with the caller. 1092*0e209d39SAndroid Build Coastguard Worker * Because no copy of the string is made, it is essential that the 1093*0e209d39SAndroid Build Coastguard Worker * caller not delete the string until after regexp operations on it 1094*0e209d39SAndroid Build Coastguard Worker * are done. 1095*0e209d39SAndroid Build Coastguard Worker * Note that while a reset on the matcher with an input string that is then 1096*0e209d39SAndroid Build Coastguard Worker * modified across/during matcher operations may be supported currently for UnicodeString, 1097*0e209d39SAndroid Build Coastguard Worker * this was not originally intended behavior, and support for this is not guaranteed 1098*0e209d39SAndroid Build Coastguard Worker * in upcoming versions of ICU. 1099*0e209d39SAndroid Build Coastguard Worker * @return this RegexMatcher. 1100*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.4 1101*0e209d39SAndroid Build Coastguard Worker */ 1102*0e209d39SAndroid Build Coastguard Worker virtual RegexMatcher &reset(const UnicodeString &input); 1103*0e209d39SAndroid Build Coastguard Worker 1104*0e209d39SAndroid Build Coastguard Worker 1105*0e209d39SAndroid Build Coastguard Worker /** 1106*0e209d39SAndroid Build Coastguard Worker * Resets this matcher with a new input string. This allows instances of RegexMatcher 1107*0e209d39SAndroid Build Coastguard Worker * to be reused, which is more efficient than creating a new RegexMatcher for 1108*0e209d39SAndroid Build Coastguard Worker * each input string to be processed. 1109*0e209d39SAndroid Build Coastguard Worker * @param input The new string on which subsequent pattern matches will operate. 1110*0e209d39SAndroid Build Coastguard Worker * The matcher makes a shallow clone of the given text; ownership of the 1111*0e209d39SAndroid Build Coastguard Worker * original string remains with the caller. Because no deep copy of the 1112*0e209d39SAndroid Build Coastguard Worker * text is made, it is essential that the caller not modify the string 1113*0e209d39SAndroid Build Coastguard Worker * until after regexp operations on it are done. 1114*0e209d39SAndroid Build Coastguard Worker * @return this RegexMatcher. 1115*0e209d39SAndroid Build Coastguard Worker * 1116*0e209d39SAndroid Build Coastguard Worker * @stable ICU 4.6 1117*0e209d39SAndroid Build Coastguard Worker */ 1118*0e209d39SAndroid Build Coastguard Worker virtual RegexMatcher &reset(UText *input); 1119*0e209d39SAndroid Build Coastguard Worker 1120*0e209d39SAndroid Build Coastguard Worker 1121*0e209d39SAndroid Build Coastguard Worker /** 1122*0e209d39SAndroid Build Coastguard Worker * Set the subject text string upon which the regular expression is looking for matches 1123*0e209d39SAndroid Build Coastguard Worker * without changing any other aspect of the matching state. 1124*0e209d39SAndroid Build Coastguard Worker * The new and previous text strings must have the same content. 1125*0e209d39SAndroid Build Coastguard Worker * 1126*0e209d39SAndroid Build Coastguard Worker * This function is intended for use in environments where ICU is operating on 1127*0e209d39SAndroid Build Coastguard Worker * strings that may move around in memory. It provides a mechanism for notifying 1128*0e209d39SAndroid Build Coastguard Worker * ICU that the string has been relocated, and providing a new UText to access the 1129*0e209d39SAndroid Build Coastguard Worker * string in its new position. 1130*0e209d39SAndroid Build Coastguard Worker * 1131*0e209d39SAndroid Build Coastguard Worker * Note that the regular expression implementation never copies the underlying text 1132*0e209d39SAndroid Build Coastguard Worker * of a string being matched, but always operates directly on the original text 1133*0e209d39SAndroid Build Coastguard Worker * provided by the user. Refreshing simply drops the references to the old text 1134*0e209d39SAndroid Build Coastguard Worker * and replaces them with references to the new. 1135*0e209d39SAndroid Build Coastguard Worker * 1136*0e209d39SAndroid Build Coastguard Worker * Caution: this function is normally used only by very specialized, 1137*0e209d39SAndroid Build Coastguard Worker * system-level code. One example use case is with garbage collection that moves 1138*0e209d39SAndroid Build Coastguard Worker * the text in memory. 1139*0e209d39SAndroid Build Coastguard Worker * 1140*0e209d39SAndroid Build Coastguard Worker * @param input The new (moved) text string. 1141*0e209d39SAndroid Build Coastguard Worker * @param status Receives errors detected by this function. 1142*0e209d39SAndroid Build Coastguard Worker * 1143*0e209d39SAndroid Build Coastguard Worker * @stable ICU 4.8 1144*0e209d39SAndroid Build Coastguard Worker */ 1145*0e209d39SAndroid Build Coastguard Worker virtual RegexMatcher &refreshInputText(UText *input, UErrorCode &status); 1146*0e209d39SAndroid Build Coastguard Worker 1147*0e209d39SAndroid Build Coastguard Worker private: 1148*0e209d39SAndroid Build Coastguard Worker /** 1149*0e209d39SAndroid Build Coastguard Worker * Cause a compilation error if an application accidentally attempts to 1150*0e209d39SAndroid Build Coastguard Worker * reset a matcher with a (char16_t *) string as input rather than 1151*0e209d39SAndroid Build Coastguard Worker * a UnicodeString. Avoids a dangling reference to a temporary string. 1152*0e209d39SAndroid Build Coastguard Worker * 1153*0e209d39SAndroid Build Coastguard Worker * To efficiently work with char16_t *strings, wrap the data in a UnicodeString 1154*0e209d39SAndroid Build Coastguard Worker * using one of the aliasing constructors, such as 1155*0e209d39SAndroid Build Coastguard Worker * `UnicodeString(UBool isTerminated, const char16_t *text, int32_t textLength);` 1156*0e209d39SAndroid Build Coastguard Worker * or in a UText, using 1157*0e209d39SAndroid Build Coastguard Worker * `utext_openUChars(UText *ut, const char16_t *text, int64_t textLength, UErrorCode *status);` 1158*0e209d39SAndroid Build Coastguard Worker * 1159*0e209d39SAndroid Build Coastguard Worker */ 1160*0e209d39SAndroid Build Coastguard Worker RegexMatcher &reset(const char16_t *input) = delete; 1161*0e209d39SAndroid Build Coastguard Worker public: 1162*0e209d39SAndroid Build Coastguard Worker 1163*0e209d39SAndroid Build Coastguard Worker /** 1164*0e209d39SAndroid Build Coastguard Worker * Returns the input string being matched. Ownership of the string belongs to 1165*0e209d39SAndroid Build Coastguard Worker * the matcher; it should not be altered or deleted. This method will work even if the input 1166*0e209d39SAndroid Build Coastguard Worker * was originally supplied as a UText. 1167*0e209d39SAndroid Build Coastguard Worker * @return the input string 1168*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.4 1169*0e209d39SAndroid Build Coastguard Worker */ 1170*0e209d39SAndroid Build Coastguard Worker virtual const UnicodeString &input() const; 1171*0e209d39SAndroid Build Coastguard Worker 1172*0e209d39SAndroid Build Coastguard Worker /** 1173*0e209d39SAndroid Build Coastguard Worker * Returns the input string being matched. This is the live input text; it should not be 1174*0e209d39SAndroid Build Coastguard Worker * altered or deleted. This method will work even if the input was originally supplied as 1175*0e209d39SAndroid Build Coastguard Worker * a UnicodeString. 1176*0e209d39SAndroid Build Coastguard Worker * @return the input text 1177*0e209d39SAndroid Build Coastguard Worker * 1178*0e209d39SAndroid Build Coastguard Worker * @stable ICU 4.6 1179*0e209d39SAndroid Build Coastguard Worker */ 1180*0e209d39SAndroid Build Coastguard Worker virtual UText *inputText() const; 1181*0e209d39SAndroid Build Coastguard Worker 1182*0e209d39SAndroid Build Coastguard Worker /** 1183*0e209d39SAndroid Build Coastguard Worker * Returns the input string being matched, either by copying it into the provided 1184*0e209d39SAndroid Build Coastguard Worker * UText parameter or by returning a shallow clone of the live input. Note that copying 1185*0e209d39SAndroid Build Coastguard Worker * the entire input may cause significant performance and memory issues. 1186*0e209d39SAndroid Build Coastguard Worker * @param dest The UText into which the input should be copied, or nullptr to create a new UText 1187*0e209d39SAndroid Build Coastguard Worker * @param status error code 1188*0e209d39SAndroid Build Coastguard Worker * @return dest if non-nullptr, a shallow copy of the input text otherwise 1189*0e209d39SAndroid Build Coastguard Worker * 1190*0e209d39SAndroid Build Coastguard Worker * @stable ICU 4.6 1191*0e209d39SAndroid Build Coastguard Worker */ 1192*0e209d39SAndroid Build Coastguard Worker virtual UText *getInput(UText *dest, UErrorCode &status) const; 1193*0e209d39SAndroid Build Coastguard Worker 1194*0e209d39SAndroid Build Coastguard Worker 1195*0e209d39SAndroid Build Coastguard Worker /** Sets the limits of this matcher's region. 1196*0e209d39SAndroid Build Coastguard Worker * The region is the part of the input string that will be searched to find a match. 1197*0e209d39SAndroid Build Coastguard Worker * Invoking this method resets the matcher, and then sets the region to start 1198*0e209d39SAndroid Build Coastguard Worker * at the index specified by the start parameter and end at the index specified 1199*0e209d39SAndroid Build Coastguard Worker * by the end parameter. 1200*0e209d39SAndroid Build Coastguard Worker * 1201*0e209d39SAndroid Build Coastguard Worker * Depending on the transparency and anchoring being used (see useTransparentBounds 1202*0e209d39SAndroid Build Coastguard Worker * and useAnchoringBounds), certain constructs such as anchors may behave differently 1203*0e209d39SAndroid Build Coastguard Worker * at or around the boundaries of the region 1204*0e209d39SAndroid Build Coastguard Worker * 1205*0e209d39SAndroid Build Coastguard Worker * The function will fail if start is greater than limit, or if either index 1206*0e209d39SAndroid Build Coastguard Worker * is less than zero or greater than the length of the string being matched. 1207*0e209d39SAndroid Build Coastguard Worker * 1208*0e209d39SAndroid Build Coastguard Worker * @param start The (native) index to begin searches at. 1209*0e209d39SAndroid Build Coastguard Worker * @param limit The index to end searches at (exclusive). 1210*0e209d39SAndroid Build Coastguard Worker * @param status A reference to a UErrorCode to receive any errors. 1211*0e209d39SAndroid Build Coastguard Worker * @stable ICU 4.0 1212*0e209d39SAndroid Build Coastguard Worker */ 1213*0e209d39SAndroid Build Coastguard Worker virtual RegexMatcher ®ion(int64_t start, int64_t limit, UErrorCode &status); 1214*0e209d39SAndroid Build Coastguard Worker 1215*0e209d39SAndroid Build Coastguard Worker /** 1216*0e209d39SAndroid Build Coastguard Worker * Identical to region(start, limit, status) but also allows a start position without 1217*0e209d39SAndroid Build Coastguard Worker * resetting the region state. 1218*0e209d39SAndroid Build Coastguard Worker * @param regionStart The region start 1219*0e209d39SAndroid Build Coastguard Worker * @param regionLimit the limit of the region 1220*0e209d39SAndroid Build Coastguard Worker * @param startIndex The (native) index within the region bounds at which to begin searches. 1221*0e209d39SAndroid Build Coastguard Worker * @param status A reference to a UErrorCode to receive any errors. 1222*0e209d39SAndroid Build Coastguard Worker * If startIndex is not within the specified region bounds, 1223*0e209d39SAndroid Build Coastguard Worker * U_INDEX_OUTOFBOUNDS_ERROR is returned. 1224*0e209d39SAndroid Build Coastguard Worker * @stable ICU 4.6 1225*0e209d39SAndroid Build Coastguard Worker */ 1226*0e209d39SAndroid Build Coastguard Worker virtual RegexMatcher ®ion(int64_t regionStart, int64_t regionLimit, int64_t startIndex, UErrorCode &status); 1227*0e209d39SAndroid Build Coastguard Worker 1228*0e209d39SAndroid Build Coastguard Worker /** 1229*0e209d39SAndroid Build Coastguard Worker * Reports the start index of this matcher's region. The searches this matcher 1230*0e209d39SAndroid Build Coastguard Worker * conducts are limited to finding matches within regionStart (inclusive) and 1231*0e209d39SAndroid Build Coastguard Worker * regionEnd (exclusive). 1232*0e209d39SAndroid Build Coastguard Worker * 1233*0e209d39SAndroid Build Coastguard Worker * @return The starting (native) index of this matcher's region. 1234*0e209d39SAndroid Build Coastguard Worker * @stable ICU 4.0 1235*0e209d39SAndroid Build Coastguard Worker */ 1236*0e209d39SAndroid Build Coastguard Worker virtual int32_t regionStart() const; 1237*0e209d39SAndroid Build Coastguard Worker 1238*0e209d39SAndroid Build Coastguard Worker /** 1239*0e209d39SAndroid Build Coastguard Worker * Reports the start index of this matcher's region. The searches this matcher 1240*0e209d39SAndroid Build Coastguard Worker * conducts are limited to finding matches within regionStart (inclusive) and 1241*0e209d39SAndroid Build Coastguard Worker * regionEnd (exclusive). 1242*0e209d39SAndroid Build Coastguard Worker * 1243*0e209d39SAndroid Build Coastguard Worker * @return The starting (native) index of this matcher's region. 1244*0e209d39SAndroid Build Coastguard Worker * @stable ICU 4.6 1245*0e209d39SAndroid Build Coastguard Worker */ 1246*0e209d39SAndroid Build Coastguard Worker virtual int64_t regionStart64() const; 1247*0e209d39SAndroid Build Coastguard Worker 1248*0e209d39SAndroid Build Coastguard Worker 1249*0e209d39SAndroid Build Coastguard Worker /** 1250*0e209d39SAndroid Build Coastguard Worker * Reports the end (limit) index (exclusive) of this matcher's region. The searches 1251*0e209d39SAndroid Build Coastguard Worker * this matcher conducts are limited to finding matches within regionStart 1252*0e209d39SAndroid Build Coastguard Worker * (inclusive) and regionEnd (exclusive). 1253*0e209d39SAndroid Build Coastguard Worker * 1254*0e209d39SAndroid Build Coastguard Worker * @return The ending point (native) of this matcher's region. 1255*0e209d39SAndroid Build Coastguard Worker * @stable ICU 4.0 1256*0e209d39SAndroid Build Coastguard Worker */ 1257*0e209d39SAndroid Build Coastguard Worker virtual int32_t regionEnd() const; 1258*0e209d39SAndroid Build Coastguard Worker 1259*0e209d39SAndroid Build Coastguard Worker /** 1260*0e209d39SAndroid Build Coastguard Worker * Reports the end (limit) index (exclusive) of this matcher's region. The searches 1261*0e209d39SAndroid Build Coastguard Worker * this matcher conducts are limited to finding matches within regionStart 1262*0e209d39SAndroid Build Coastguard Worker * (inclusive) and regionEnd (exclusive). 1263*0e209d39SAndroid Build Coastguard Worker * 1264*0e209d39SAndroid Build Coastguard Worker * @return The ending point (native) of this matcher's region. 1265*0e209d39SAndroid Build Coastguard Worker * @stable ICU 4.6 1266*0e209d39SAndroid Build Coastguard Worker */ 1267*0e209d39SAndroid Build Coastguard Worker virtual int64_t regionEnd64() const; 1268*0e209d39SAndroid Build Coastguard Worker 1269*0e209d39SAndroid Build Coastguard Worker /** 1270*0e209d39SAndroid Build Coastguard Worker * Queries the transparency of region bounds for this matcher. 1271*0e209d39SAndroid Build Coastguard Worker * See useTransparentBounds for a description of transparent and opaque bounds. 1272*0e209d39SAndroid Build Coastguard Worker * By default, a matcher uses opaque region boundaries. 1273*0e209d39SAndroid Build Coastguard Worker * 1274*0e209d39SAndroid Build Coastguard Worker * @return true if this matcher is using opaque bounds, false if it is not. 1275*0e209d39SAndroid Build Coastguard Worker * @stable ICU 4.0 1276*0e209d39SAndroid Build Coastguard Worker */ 1277*0e209d39SAndroid Build Coastguard Worker virtual UBool hasTransparentBounds() const; 1278*0e209d39SAndroid Build Coastguard Worker 1279*0e209d39SAndroid Build Coastguard Worker /** 1280*0e209d39SAndroid Build Coastguard Worker * Sets the transparency of region bounds for this matcher. 1281*0e209d39SAndroid Build Coastguard Worker * Invoking this function with an argument of true will set this matcher to use transparent bounds. 1282*0e209d39SAndroid Build Coastguard Worker * If the boolean argument is false, then opaque bounds will be used. 1283*0e209d39SAndroid Build Coastguard Worker * 1284*0e209d39SAndroid Build Coastguard Worker * Using transparent bounds, the boundaries of this matcher's region are transparent 1285*0e209d39SAndroid Build Coastguard Worker * to lookahead, lookbehind, and boundary matching constructs. Those constructs can 1286*0e209d39SAndroid Build Coastguard Worker * see text beyond the boundaries of the region while checking for a match. 1287*0e209d39SAndroid Build Coastguard Worker * 1288*0e209d39SAndroid Build Coastguard Worker * With opaque bounds, no text outside of the matcher's region is visible to lookahead, 1289*0e209d39SAndroid Build Coastguard Worker * lookbehind, and boundary matching constructs. 1290*0e209d39SAndroid Build Coastguard Worker * 1291*0e209d39SAndroid Build Coastguard Worker * By default, a matcher uses opaque bounds. 1292*0e209d39SAndroid Build Coastguard Worker * 1293*0e209d39SAndroid Build Coastguard Worker * @param b true for transparent bounds; false for opaque bounds 1294*0e209d39SAndroid Build Coastguard Worker * @return This Matcher; 1295*0e209d39SAndroid Build Coastguard Worker * @stable ICU 4.0 1296*0e209d39SAndroid Build Coastguard Worker **/ 1297*0e209d39SAndroid Build Coastguard Worker virtual RegexMatcher &useTransparentBounds(UBool b); 1298*0e209d39SAndroid Build Coastguard Worker 1299*0e209d39SAndroid Build Coastguard Worker 1300*0e209d39SAndroid Build Coastguard Worker /** 1301*0e209d39SAndroid Build Coastguard Worker * Return true if this matcher is using anchoring bounds. 1302*0e209d39SAndroid Build Coastguard Worker * By default, matchers use anchoring region bounds. 1303*0e209d39SAndroid Build Coastguard Worker * 1304*0e209d39SAndroid Build Coastguard Worker * @return true if this matcher is using anchoring bounds. 1305*0e209d39SAndroid Build Coastguard Worker * @stable ICU 4.0 1306*0e209d39SAndroid Build Coastguard Worker */ 1307*0e209d39SAndroid Build Coastguard Worker virtual UBool hasAnchoringBounds() const; 1308*0e209d39SAndroid Build Coastguard Worker 1309*0e209d39SAndroid Build Coastguard Worker 1310*0e209d39SAndroid Build Coastguard Worker /** 1311*0e209d39SAndroid Build Coastguard Worker * Set whether this matcher is using Anchoring Bounds for its region. 1312*0e209d39SAndroid Build Coastguard Worker * With anchoring bounds, pattern anchors such as ^ and $ will match at the start 1313*0e209d39SAndroid Build Coastguard Worker * and end of the region. Without Anchoring Bounds, anchors will only match at 1314*0e209d39SAndroid Build Coastguard Worker * the positions they would in the complete text. 1315*0e209d39SAndroid Build Coastguard Worker * 1316*0e209d39SAndroid Build Coastguard Worker * Anchoring Bounds are the default for regions. 1317*0e209d39SAndroid Build Coastguard Worker * 1318*0e209d39SAndroid Build Coastguard Worker * @param b true if to enable anchoring bounds; false to disable them. 1319*0e209d39SAndroid Build Coastguard Worker * @return This Matcher 1320*0e209d39SAndroid Build Coastguard Worker * @stable ICU 4.0 1321*0e209d39SAndroid Build Coastguard Worker */ 1322*0e209d39SAndroid Build Coastguard Worker virtual RegexMatcher &useAnchoringBounds(UBool b); 1323*0e209d39SAndroid Build Coastguard Worker 1324*0e209d39SAndroid Build Coastguard Worker 1325*0e209d39SAndroid Build Coastguard Worker /** 1326*0e209d39SAndroid Build Coastguard Worker * Return true if the most recent matching operation attempted to access 1327*0e209d39SAndroid Build Coastguard Worker * additional input beyond the available input text. 1328*0e209d39SAndroid Build Coastguard Worker * In this case, additional input text could change the results of the match. 1329*0e209d39SAndroid Build Coastguard Worker * 1330*0e209d39SAndroid Build Coastguard Worker * hitEnd() is defined for both successful and unsuccessful matches. 1331*0e209d39SAndroid Build Coastguard Worker * In either case hitEnd() will return true if if the end of the text was 1332*0e209d39SAndroid Build Coastguard Worker * reached at any point during the matching process. 1333*0e209d39SAndroid Build Coastguard Worker * 1334*0e209d39SAndroid Build Coastguard Worker * @return true if the most recent match hit the end of input 1335*0e209d39SAndroid Build Coastguard Worker * @stable ICU 4.0 1336*0e209d39SAndroid Build Coastguard Worker */ 1337*0e209d39SAndroid Build Coastguard Worker virtual UBool hitEnd() const; 1338*0e209d39SAndroid Build Coastguard Worker 1339*0e209d39SAndroid Build Coastguard Worker /** 1340*0e209d39SAndroid Build Coastguard Worker * Return true the most recent match succeeded and additional input could cause 1341*0e209d39SAndroid Build Coastguard Worker * it to fail. If this method returns false and a match was found, then more input 1342*0e209d39SAndroid Build Coastguard Worker * might change the match but the match won't be lost. If a match was not found, 1343*0e209d39SAndroid Build Coastguard Worker * then requireEnd has no meaning. 1344*0e209d39SAndroid Build Coastguard Worker * 1345*0e209d39SAndroid Build Coastguard Worker * @return true if more input could cause the most recent match to no longer match. 1346*0e209d39SAndroid Build Coastguard Worker * @stable ICU 4.0 1347*0e209d39SAndroid Build Coastguard Worker */ 1348*0e209d39SAndroid Build Coastguard Worker virtual UBool requireEnd() const; 1349*0e209d39SAndroid Build Coastguard Worker 1350*0e209d39SAndroid Build Coastguard Worker 1351*0e209d39SAndroid Build Coastguard Worker /** 1352*0e209d39SAndroid Build Coastguard Worker * Returns the pattern that is interpreted by this matcher. 1353*0e209d39SAndroid Build Coastguard Worker * @return the RegexPattern for this RegexMatcher 1354*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.4 1355*0e209d39SAndroid Build Coastguard Worker */ 1356*0e209d39SAndroid Build Coastguard Worker virtual const RegexPattern &pattern() const; 1357*0e209d39SAndroid Build Coastguard Worker 1358*0e209d39SAndroid Build Coastguard Worker 1359*0e209d39SAndroid Build Coastguard Worker /** 1360*0e209d39SAndroid Build Coastguard Worker * Replaces every substring of the input that matches the pattern 1361*0e209d39SAndroid Build Coastguard Worker * with the given replacement string. This is a convenience function that 1362*0e209d39SAndroid Build Coastguard Worker * provides a complete find-and-replace-all operation. 1363*0e209d39SAndroid Build Coastguard Worker * 1364*0e209d39SAndroid Build Coastguard Worker * This method first resets this matcher. It then scans the input string 1365*0e209d39SAndroid Build Coastguard Worker * looking for matches of the pattern. Input that is not part of any 1366*0e209d39SAndroid Build Coastguard Worker * match is left unchanged; each match is replaced in the result by the 1367*0e209d39SAndroid Build Coastguard Worker * replacement string. The replacement string may contain references to 1368*0e209d39SAndroid Build Coastguard Worker * capture groups. 1369*0e209d39SAndroid Build Coastguard Worker * 1370*0e209d39SAndroid Build Coastguard Worker * @param replacement a string containing the replacement text. 1371*0e209d39SAndroid Build Coastguard Worker * @param status a reference to a UErrorCode to receive any errors. 1372*0e209d39SAndroid Build Coastguard Worker * @return a string containing the results of the find and replace. 1373*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.4 1374*0e209d39SAndroid Build Coastguard Worker */ 1375*0e209d39SAndroid Build Coastguard Worker virtual UnicodeString replaceAll(const UnicodeString &replacement, UErrorCode &status); 1376*0e209d39SAndroid Build Coastguard Worker 1377*0e209d39SAndroid Build Coastguard Worker 1378*0e209d39SAndroid Build Coastguard Worker /** 1379*0e209d39SAndroid Build Coastguard Worker * Replaces every substring of the input that matches the pattern 1380*0e209d39SAndroid Build Coastguard Worker * with the given replacement string. This is a convenience function that 1381*0e209d39SAndroid Build Coastguard Worker * provides a complete find-and-replace-all operation. 1382*0e209d39SAndroid Build Coastguard Worker * 1383*0e209d39SAndroid Build Coastguard Worker * This method first resets this matcher. It then scans the input string 1384*0e209d39SAndroid Build Coastguard Worker * looking for matches of the pattern. Input that is not part of any 1385*0e209d39SAndroid Build Coastguard Worker * match is left unchanged; each match is replaced in the result by the 1386*0e209d39SAndroid Build Coastguard Worker * replacement string. The replacement string may contain references to 1387*0e209d39SAndroid Build Coastguard Worker * capture groups. 1388*0e209d39SAndroid Build Coastguard Worker * 1389*0e209d39SAndroid Build Coastguard Worker * @param replacement a string containing the replacement text. 1390*0e209d39SAndroid Build Coastguard Worker * @param dest a mutable UText in which the results are placed. 1391*0e209d39SAndroid Build Coastguard Worker * If nullptr, a new UText will be created (which may not be mutable). 1392*0e209d39SAndroid Build Coastguard Worker * @param status a reference to a UErrorCode to receive any errors. 1393*0e209d39SAndroid Build Coastguard Worker * @return a string containing the results of the find and replace. 1394*0e209d39SAndroid Build Coastguard Worker * If a pre-allocated UText was provided, it will always be used and returned. 1395*0e209d39SAndroid Build Coastguard Worker * 1396*0e209d39SAndroid Build Coastguard Worker * @stable ICU 4.6 1397*0e209d39SAndroid Build Coastguard Worker */ 1398*0e209d39SAndroid Build Coastguard Worker virtual UText *replaceAll(UText *replacement, UText *dest, UErrorCode &status); 1399*0e209d39SAndroid Build Coastguard Worker 1400*0e209d39SAndroid Build Coastguard Worker 1401*0e209d39SAndroid Build Coastguard Worker /** 1402*0e209d39SAndroid Build Coastguard Worker * Replaces the first substring of the input that matches 1403*0e209d39SAndroid Build Coastguard Worker * the pattern with the replacement string. This is a convenience 1404*0e209d39SAndroid Build Coastguard Worker * function that provides a complete find-and-replace operation. 1405*0e209d39SAndroid Build Coastguard Worker * 1406*0e209d39SAndroid Build Coastguard Worker * This function first resets this RegexMatcher. It then scans the input string 1407*0e209d39SAndroid Build Coastguard Worker * looking for a match of the pattern. Input that is not part 1408*0e209d39SAndroid Build Coastguard Worker * of the match is appended directly to the result string; the match is replaced 1409*0e209d39SAndroid Build Coastguard Worker * in the result by the replacement string. The replacement string may contain 1410*0e209d39SAndroid Build Coastguard Worker * references to captured groups. 1411*0e209d39SAndroid Build Coastguard Worker * 1412*0e209d39SAndroid Build Coastguard Worker * The state of the matcher (the position at which a subsequent find() 1413*0e209d39SAndroid Build Coastguard Worker * would begin) after completing a replaceFirst() is not specified. The 1414*0e209d39SAndroid Build Coastguard Worker * RegexMatcher should be reset before doing additional find() operations. 1415*0e209d39SAndroid Build Coastguard Worker * 1416*0e209d39SAndroid Build Coastguard Worker * @param replacement a string containing the replacement text. 1417*0e209d39SAndroid Build Coastguard Worker * @param status a reference to a UErrorCode to receive any errors. 1418*0e209d39SAndroid Build Coastguard Worker * @return a string containing the results of the find and replace. 1419*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.4 1420*0e209d39SAndroid Build Coastguard Worker */ 1421*0e209d39SAndroid Build Coastguard Worker virtual UnicodeString replaceFirst(const UnicodeString &replacement, UErrorCode &status); 1422*0e209d39SAndroid Build Coastguard Worker 1423*0e209d39SAndroid Build Coastguard Worker 1424*0e209d39SAndroid Build Coastguard Worker /** 1425*0e209d39SAndroid Build Coastguard Worker * Replaces the first substring of the input that matches 1426*0e209d39SAndroid Build Coastguard Worker * the pattern with the replacement string. This is a convenience 1427*0e209d39SAndroid Build Coastguard Worker * function that provides a complete find-and-replace operation. 1428*0e209d39SAndroid Build Coastguard Worker * 1429*0e209d39SAndroid Build Coastguard Worker * This function first resets this RegexMatcher. It then scans the input string 1430*0e209d39SAndroid Build Coastguard Worker * looking for a match of the pattern. Input that is not part 1431*0e209d39SAndroid Build Coastguard Worker * of the match is appended directly to the result string; the match is replaced 1432*0e209d39SAndroid Build Coastguard Worker * in the result by the replacement string. The replacement string may contain 1433*0e209d39SAndroid Build Coastguard Worker * references to captured groups. 1434*0e209d39SAndroid Build Coastguard Worker * 1435*0e209d39SAndroid Build Coastguard Worker * The state of the matcher (the position at which a subsequent find() 1436*0e209d39SAndroid Build Coastguard Worker * would begin) after completing a replaceFirst() is not specified. The 1437*0e209d39SAndroid Build Coastguard Worker * RegexMatcher should be reset before doing additional find() operations. 1438*0e209d39SAndroid Build Coastguard Worker * 1439*0e209d39SAndroid Build Coastguard Worker * @param replacement a string containing the replacement text. 1440*0e209d39SAndroid Build Coastguard Worker * @param dest a mutable UText in which the results are placed. 1441*0e209d39SAndroid Build Coastguard Worker * If nullptr, a new UText will be created (which may not be mutable). 1442*0e209d39SAndroid Build Coastguard Worker * @param status a reference to a UErrorCode to receive any errors. 1443*0e209d39SAndroid Build Coastguard Worker * @return a string containing the results of the find and replace. 1444*0e209d39SAndroid Build Coastguard Worker * If a pre-allocated UText was provided, it will always be used and returned. 1445*0e209d39SAndroid Build Coastguard Worker * 1446*0e209d39SAndroid Build Coastguard Worker * @stable ICU 4.6 1447*0e209d39SAndroid Build Coastguard Worker */ 1448*0e209d39SAndroid Build Coastguard Worker virtual UText *replaceFirst(UText *replacement, UText *dest, UErrorCode &status); 1449*0e209d39SAndroid Build Coastguard Worker 1450*0e209d39SAndroid Build Coastguard Worker 1451*0e209d39SAndroid Build Coastguard Worker /** 1452*0e209d39SAndroid Build Coastguard Worker * Implements a replace operation intended to be used as part of an 1453*0e209d39SAndroid Build Coastguard Worker * incremental find-and-replace. 1454*0e209d39SAndroid Build Coastguard Worker * 1455*0e209d39SAndroid Build Coastguard Worker * The input string, starting from the end of the previous replacement and ending at 1456*0e209d39SAndroid Build Coastguard Worker * the start of the current match, is appended to the destination string. Then the 1457*0e209d39SAndroid Build Coastguard Worker * replacement string is appended to the output string, 1458*0e209d39SAndroid Build Coastguard Worker * including handling any substitutions of captured text. 1459*0e209d39SAndroid Build Coastguard Worker * 1460*0e209d39SAndroid Build Coastguard Worker * For simple, prepackaged, non-incremental find-and-replace 1461*0e209d39SAndroid Build Coastguard Worker * operations, see replaceFirst() or replaceAll(). 1462*0e209d39SAndroid Build Coastguard Worker * 1463*0e209d39SAndroid Build Coastguard Worker * @param dest A UnicodeString to which the results of the find-and-replace are appended. 1464*0e209d39SAndroid Build Coastguard Worker * @param replacement A UnicodeString that provides the text to be substituted for 1465*0e209d39SAndroid Build Coastguard Worker * the input text that matched the regexp pattern. The replacement 1466*0e209d39SAndroid Build Coastguard Worker * text may contain references to captured text from the 1467*0e209d39SAndroid Build Coastguard Worker * input. 1468*0e209d39SAndroid Build Coastguard Worker * @param status A reference to a UErrorCode to receive any errors. Possible 1469*0e209d39SAndroid Build Coastguard Worker * errors are U_REGEX_INVALID_STATE if no match has been 1470*0e209d39SAndroid Build Coastguard Worker * attempted or the last match failed, and U_INDEX_OUTOFBOUNDS_ERROR 1471*0e209d39SAndroid Build Coastguard Worker * if the replacement text specifies a capture group that 1472*0e209d39SAndroid Build Coastguard Worker * does not exist in the pattern. 1473*0e209d39SAndroid Build Coastguard Worker * 1474*0e209d39SAndroid Build Coastguard Worker * @return this RegexMatcher 1475*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.4 1476*0e209d39SAndroid Build Coastguard Worker * 1477*0e209d39SAndroid Build Coastguard Worker */ 1478*0e209d39SAndroid Build Coastguard Worker virtual RegexMatcher &appendReplacement(UnicodeString &dest, 1479*0e209d39SAndroid Build Coastguard Worker const UnicodeString &replacement, UErrorCode &status); 1480*0e209d39SAndroid Build Coastguard Worker 1481*0e209d39SAndroid Build Coastguard Worker 1482*0e209d39SAndroid Build Coastguard Worker /** 1483*0e209d39SAndroid Build Coastguard Worker * Implements a replace operation intended to be used as part of an 1484*0e209d39SAndroid Build Coastguard Worker * incremental find-and-replace. 1485*0e209d39SAndroid Build Coastguard Worker * 1486*0e209d39SAndroid Build Coastguard Worker * The input string, starting from the end of the previous replacement and ending at 1487*0e209d39SAndroid Build Coastguard Worker * the start of the current match, is appended to the destination string. Then the 1488*0e209d39SAndroid Build Coastguard Worker * replacement string is appended to the output string, 1489*0e209d39SAndroid Build Coastguard Worker * including handling any substitutions of captured text. 1490*0e209d39SAndroid Build Coastguard Worker * 1491*0e209d39SAndroid Build Coastguard Worker * For simple, prepackaged, non-incremental find-and-replace 1492*0e209d39SAndroid Build Coastguard Worker * operations, see replaceFirst() or replaceAll(). 1493*0e209d39SAndroid Build Coastguard Worker * 1494*0e209d39SAndroid Build Coastguard Worker * @param dest A mutable UText to which the results of the find-and-replace are appended. 1495*0e209d39SAndroid Build Coastguard Worker * Must not be nullptr. 1496*0e209d39SAndroid Build Coastguard Worker * @param replacement A UText that provides the text to be substituted for 1497*0e209d39SAndroid Build Coastguard Worker * the input text that matched the regexp pattern. The replacement 1498*0e209d39SAndroid Build Coastguard Worker * text may contain references to captured text from the input. 1499*0e209d39SAndroid Build Coastguard Worker * @param status A reference to a UErrorCode to receive any errors. Possible 1500*0e209d39SAndroid Build Coastguard Worker * errors are U_REGEX_INVALID_STATE if no match has been 1501*0e209d39SAndroid Build Coastguard Worker * attempted or the last match failed, and U_INDEX_OUTOFBOUNDS_ERROR 1502*0e209d39SAndroid Build Coastguard Worker * if the replacement text specifies a capture group that 1503*0e209d39SAndroid Build Coastguard Worker * does not exist in the pattern. 1504*0e209d39SAndroid Build Coastguard Worker * 1505*0e209d39SAndroid Build Coastguard Worker * @return this RegexMatcher 1506*0e209d39SAndroid Build Coastguard Worker * 1507*0e209d39SAndroid Build Coastguard Worker * @stable ICU 4.6 1508*0e209d39SAndroid Build Coastguard Worker */ 1509*0e209d39SAndroid Build Coastguard Worker virtual RegexMatcher &appendReplacement(UText *dest, 1510*0e209d39SAndroid Build Coastguard Worker UText *replacement, UErrorCode &status); 1511*0e209d39SAndroid Build Coastguard Worker 1512*0e209d39SAndroid Build Coastguard Worker 1513*0e209d39SAndroid Build Coastguard Worker /** 1514*0e209d39SAndroid Build Coastguard Worker * As the final step in a find-and-replace operation, append the remainder 1515*0e209d39SAndroid Build Coastguard Worker * of the input string, starting at the position following the last appendReplacement(), 1516*0e209d39SAndroid Build Coastguard Worker * to the destination string. `appendTail()` is intended to be invoked after one 1517*0e209d39SAndroid Build Coastguard Worker * or more invocations of the `RegexMatcher::appendReplacement()`. 1518*0e209d39SAndroid Build Coastguard Worker * 1519*0e209d39SAndroid Build Coastguard Worker * @param dest A UnicodeString to which the results of the find-and-replace are appended. 1520*0e209d39SAndroid Build Coastguard Worker * @return the destination string. 1521*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.4 1522*0e209d39SAndroid Build Coastguard Worker */ 1523*0e209d39SAndroid Build Coastguard Worker virtual UnicodeString &appendTail(UnicodeString &dest); 1524*0e209d39SAndroid Build Coastguard Worker 1525*0e209d39SAndroid Build Coastguard Worker 1526*0e209d39SAndroid Build Coastguard Worker /** 1527*0e209d39SAndroid Build Coastguard Worker * As the final step in a find-and-replace operation, append the remainder 1528*0e209d39SAndroid Build Coastguard Worker * of the input string, starting at the position following the last appendReplacement(), 1529*0e209d39SAndroid Build Coastguard Worker * to the destination string. `appendTail()` is intended to be invoked after one 1530*0e209d39SAndroid Build Coastguard Worker * or more invocations of the `RegexMatcher::appendReplacement()`. 1531*0e209d39SAndroid Build Coastguard Worker * 1532*0e209d39SAndroid Build Coastguard Worker * @param dest A mutable UText to which the results of the find-and-replace are appended. 1533*0e209d39SAndroid Build Coastguard Worker * Must not be nullptr. 1534*0e209d39SAndroid Build Coastguard Worker * @param status error cod 1535*0e209d39SAndroid Build Coastguard Worker * @return the destination string. 1536*0e209d39SAndroid Build Coastguard Worker * 1537*0e209d39SAndroid Build Coastguard Worker * @stable ICU 4.6 1538*0e209d39SAndroid Build Coastguard Worker */ 1539*0e209d39SAndroid Build Coastguard Worker virtual UText *appendTail(UText *dest, UErrorCode &status); 1540*0e209d39SAndroid Build Coastguard Worker 1541*0e209d39SAndroid Build Coastguard Worker 1542*0e209d39SAndroid Build Coastguard Worker /** 1543*0e209d39SAndroid Build Coastguard Worker * Split a string into fields. Somewhat like %split() from Perl. 1544*0e209d39SAndroid Build Coastguard Worker * The pattern matches identify delimiters that separate the input 1545*0e209d39SAndroid Build Coastguard Worker * into fields. The input data between the matches becomes the 1546*0e209d39SAndroid Build Coastguard Worker * fields themselves. 1547*0e209d39SAndroid Build Coastguard Worker * 1548*0e209d39SAndroid Build Coastguard Worker * @param input The string to be split into fields. The field delimiters 1549*0e209d39SAndroid Build Coastguard Worker * match the pattern (in the "this" object). This matcher 1550*0e209d39SAndroid Build Coastguard Worker * will be reset to this input string. 1551*0e209d39SAndroid Build Coastguard Worker * @param dest An array of UnicodeStrings to receive the results of the split. 1552*0e209d39SAndroid Build Coastguard Worker * This is an array of actual UnicodeString objects, not an 1553*0e209d39SAndroid Build Coastguard Worker * array of pointers to strings. Local (stack based) arrays can 1554*0e209d39SAndroid Build Coastguard Worker * work well here. 1555*0e209d39SAndroid Build Coastguard Worker * @param destCapacity The number of elements in the destination array. 1556*0e209d39SAndroid Build Coastguard Worker * If the number of fields found is less than destCapacity, the 1557*0e209d39SAndroid Build Coastguard Worker * extra strings in the destination array are not altered. 1558*0e209d39SAndroid Build Coastguard Worker * If the number of destination strings is less than the number 1559*0e209d39SAndroid Build Coastguard Worker * of fields, the trailing part of the input string, including any 1560*0e209d39SAndroid Build Coastguard Worker * field delimiters, is placed in the last destination string. 1561*0e209d39SAndroid Build Coastguard Worker * @param status A reference to a UErrorCode to receive any errors. 1562*0e209d39SAndroid Build Coastguard Worker * @return The number of fields into which the input string was split. 1563*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.6 1564*0e209d39SAndroid Build Coastguard Worker */ 1565*0e209d39SAndroid Build Coastguard Worker virtual int32_t split(const UnicodeString &input, 1566*0e209d39SAndroid Build Coastguard Worker UnicodeString dest[], 1567*0e209d39SAndroid Build Coastguard Worker int32_t destCapacity, 1568*0e209d39SAndroid Build Coastguard Worker UErrorCode &status); 1569*0e209d39SAndroid Build Coastguard Worker 1570*0e209d39SAndroid Build Coastguard Worker 1571*0e209d39SAndroid Build Coastguard Worker /** 1572*0e209d39SAndroid Build Coastguard Worker * Split a string into fields. Somewhat like %split() from Perl. 1573*0e209d39SAndroid Build Coastguard Worker * The pattern matches identify delimiters that separate the input 1574*0e209d39SAndroid Build Coastguard Worker * into fields. The input data between the matches becomes the 1575*0e209d39SAndroid Build Coastguard Worker * fields themselves. 1576*0e209d39SAndroid Build Coastguard Worker * 1577*0e209d39SAndroid Build Coastguard Worker * @param input The string to be split into fields. The field delimiters 1578*0e209d39SAndroid Build Coastguard Worker * match the pattern (in the "this" object). This matcher 1579*0e209d39SAndroid Build Coastguard Worker * will be reset to this input string. 1580*0e209d39SAndroid Build Coastguard Worker * @param dest An array of mutable UText structs to receive the results of the split. 1581*0e209d39SAndroid Build Coastguard Worker * If a field is nullptr, a new UText is allocated to contain the results for 1582*0e209d39SAndroid Build Coastguard Worker * that field. This new UText is not guaranteed to be mutable. 1583*0e209d39SAndroid Build Coastguard Worker * @param destCapacity The number of elements in the destination array. 1584*0e209d39SAndroid Build Coastguard Worker * If the number of fields found is less than destCapacity, the 1585*0e209d39SAndroid Build Coastguard Worker * extra strings in the destination array are not altered. 1586*0e209d39SAndroid Build Coastguard Worker * If the number of destination strings is less than the number 1587*0e209d39SAndroid Build Coastguard Worker * of fields, the trailing part of the input string, including any 1588*0e209d39SAndroid Build Coastguard Worker * field delimiters, is placed in the last destination string. 1589*0e209d39SAndroid Build Coastguard Worker * @param status A reference to a UErrorCode to receive any errors. 1590*0e209d39SAndroid Build Coastguard Worker * @return The number of fields into which the input string was split. 1591*0e209d39SAndroid Build Coastguard Worker * 1592*0e209d39SAndroid Build Coastguard Worker * @stable ICU 4.6 1593*0e209d39SAndroid Build Coastguard Worker */ 1594*0e209d39SAndroid Build Coastguard Worker virtual int32_t split(UText *input, 1595*0e209d39SAndroid Build Coastguard Worker UText *dest[], 1596*0e209d39SAndroid Build Coastguard Worker int32_t destCapacity, 1597*0e209d39SAndroid Build Coastguard Worker UErrorCode &status); 1598*0e209d39SAndroid Build Coastguard Worker 1599*0e209d39SAndroid Build Coastguard Worker /** 1600*0e209d39SAndroid Build Coastguard Worker * Set a processing time limit for match operations with this Matcher. 1601*0e209d39SAndroid Build Coastguard Worker * 1602*0e209d39SAndroid Build Coastguard Worker * Some patterns, when matching certain strings, can run in exponential time. 1603*0e209d39SAndroid Build Coastguard Worker * For practical purposes, the match operation may appear to be in an 1604*0e209d39SAndroid Build Coastguard Worker * infinite loop. 1605*0e209d39SAndroid Build Coastguard Worker * When a limit is set a match operation will fail with an error if the 1606*0e209d39SAndroid Build Coastguard Worker * limit is exceeded. 1607*0e209d39SAndroid Build Coastguard Worker * 1608*0e209d39SAndroid Build Coastguard Worker * The units of the limit are steps of the match engine. 1609*0e209d39SAndroid Build Coastguard Worker * Correspondence with actual processor time will depend on the speed 1610*0e209d39SAndroid Build Coastguard Worker * of the processor and the details of the specific pattern, but will 1611*0e209d39SAndroid Build Coastguard Worker * typically be on the order of milliseconds. 1612*0e209d39SAndroid Build Coastguard Worker * 1613*0e209d39SAndroid Build Coastguard Worker * By default, the matching time is not limited. 1614*0e209d39SAndroid Build Coastguard Worker * 1615*0e209d39SAndroid Build Coastguard Worker * 1616*0e209d39SAndroid Build Coastguard Worker * @param limit The limit value, or 0 for no limit. 1617*0e209d39SAndroid Build Coastguard Worker * @param status A reference to a UErrorCode to receive any errors. 1618*0e209d39SAndroid Build Coastguard Worker * @stable ICU 4.0 1619*0e209d39SAndroid Build Coastguard Worker */ 1620*0e209d39SAndroid Build Coastguard Worker virtual void setTimeLimit(int32_t limit, UErrorCode &status); 1621*0e209d39SAndroid Build Coastguard Worker 1622*0e209d39SAndroid Build Coastguard Worker /** 1623*0e209d39SAndroid Build Coastguard Worker * Get the time limit, if any, for match operations made with this Matcher. 1624*0e209d39SAndroid Build Coastguard Worker * 1625*0e209d39SAndroid Build Coastguard Worker * @return the maximum allowed time for a match, in units of processing steps. 1626*0e209d39SAndroid Build Coastguard Worker * @stable ICU 4.0 1627*0e209d39SAndroid Build Coastguard Worker */ 1628*0e209d39SAndroid Build Coastguard Worker virtual int32_t getTimeLimit() const; 1629*0e209d39SAndroid Build Coastguard Worker 1630*0e209d39SAndroid Build Coastguard Worker /** 1631*0e209d39SAndroid Build Coastguard Worker * Set the amount of heap storage available for use by the match backtracking stack. 1632*0e209d39SAndroid Build Coastguard Worker * The matcher is also reset, discarding any results from previous matches. 1633*0e209d39SAndroid Build Coastguard Worker * 1634*0e209d39SAndroid Build Coastguard Worker * ICU uses a backtracking regular expression engine, with the backtrack stack 1635*0e209d39SAndroid Build Coastguard Worker * maintained on the heap. This function sets the limit to the amount of memory 1636*0e209d39SAndroid Build Coastguard Worker * that can be used for this purpose. A backtracking stack overflow will 1637*0e209d39SAndroid Build Coastguard Worker * result in an error from the match operation that caused it. 1638*0e209d39SAndroid Build Coastguard Worker * 1639*0e209d39SAndroid Build Coastguard Worker * A limit is desirable because a malicious or poorly designed pattern can use 1640*0e209d39SAndroid Build Coastguard Worker * excessive memory, potentially crashing the process. A limit is enabled 1641*0e209d39SAndroid Build Coastguard Worker * by default. 1642*0e209d39SAndroid Build Coastguard Worker * 1643*0e209d39SAndroid Build Coastguard Worker * @param limit The maximum size, in bytes, of the matching backtrack stack. 1644*0e209d39SAndroid Build Coastguard Worker * A value of zero means no limit. 1645*0e209d39SAndroid Build Coastguard Worker * The limit must be greater or equal to zero. 1646*0e209d39SAndroid Build Coastguard Worker * 1647*0e209d39SAndroid Build Coastguard Worker * @param status A reference to a UErrorCode to receive any errors. 1648*0e209d39SAndroid Build Coastguard Worker * 1649*0e209d39SAndroid Build Coastguard Worker * @stable ICU 4.0 1650*0e209d39SAndroid Build Coastguard Worker */ 1651*0e209d39SAndroid Build Coastguard Worker virtual void setStackLimit(int32_t limit, UErrorCode &status); 1652*0e209d39SAndroid Build Coastguard Worker 1653*0e209d39SAndroid Build Coastguard Worker /** 1654*0e209d39SAndroid Build Coastguard Worker * Get the size of the heap storage available for use by the back tracking stack. 1655*0e209d39SAndroid Build Coastguard Worker * 1656*0e209d39SAndroid Build Coastguard Worker * @return the maximum backtracking stack size, in bytes, or zero if the 1657*0e209d39SAndroid Build Coastguard Worker * stack size is unlimited. 1658*0e209d39SAndroid Build Coastguard Worker * @stable ICU 4.0 1659*0e209d39SAndroid Build Coastguard Worker */ 1660*0e209d39SAndroid Build Coastguard Worker virtual int32_t getStackLimit() const; 1661*0e209d39SAndroid Build Coastguard Worker 1662*0e209d39SAndroid Build Coastguard Worker 1663*0e209d39SAndroid Build Coastguard Worker /** 1664*0e209d39SAndroid Build Coastguard Worker * Set a callback function for use with this Matcher. 1665*0e209d39SAndroid Build Coastguard Worker * During matching operations the function will be called periodically, 1666*0e209d39SAndroid Build Coastguard Worker * giving the application the opportunity to terminate a long-running 1667*0e209d39SAndroid Build Coastguard Worker * match. 1668*0e209d39SAndroid Build Coastguard Worker * 1669*0e209d39SAndroid Build Coastguard Worker * @param callback A pointer to the user-supplied callback function. 1670*0e209d39SAndroid Build Coastguard Worker * @param context User context pointer. The value supplied at the 1671*0e209d39SAndroid Build Coastguard Worker * time the callback function is set will be saved 1672*0e209d39SAndroid Build Coastguard Worker * and passed to the callback each time that it is called. 1673*0e209d39SAndroid Build Coastguard Worker * @param status A reference to a UErrorCode to receive any errors. 1674*0e209d39SAndroid Build Coastguard Worker * @stable ICU 4.0 1675*0e209d39SAndroid Build Coastguard Worker */ 1676*0e209d39SAndroid Build Coastguard Worker virtual void setMatchCallback(URegexMatchCallback *callback, 1677*0e209d39SAndroid Build Coastguard Worker const void *context, 1678*0e209d39SAndroid Build Coastguard Worker UErrorCode &status); 1679*0e209d39SAndroid Build Coastguard Worker 1680*0e209d39SAndroid Build Coastguard Worker 1681*0e209d39SAndroid Build Coastguard Worker /** 1682*0e209d39SAndroid Build Coastguard Worker * Get the callback function for this URegularExpression. 1683*0e209d39SAndroid Build Coastguard Worker * 1684*0e209d39SAndroid Build Coastguard Worker * @param callback Out parameter, receives a pointer to the user-supplied 1685*0e209d39SAndroid Build Coastguard Worker * callback function. 1686*0e209d39SAndroid Build Coastguard Worker * @param context Out parameter, receives the user context pointer that 1687*0e209d39SAndroid Build Coastguard Worker * was set when uregex_setMatchCallback() was called. 1688*0e209d39SAndroid Build Coastguard Worker * @param status A reference to a UErrorCode to receive any errors. 1689*0e209d39SAndroid Build Coastguard Worker * @stable ICU 4.0 1690*0e209d39SAndroid Build Coastguard Worker */ 1691*0e209d39SAndroid Build Coastguard Worker virtual void getMatchCallback(URegexMatchCallback *&callback, 1692*0e209d39SAndroid Build Coastguard Worker const void *&context, 1693*0e209d39SAndroid Build Coastguard Worker UErrorCode &status); 1694*0e209d39SAndroid Build Coastguard Worker 1695*0e209d39SAndroid Build Coastguard Worker 1696*0e209d39SAndroid Build Coastguard Worker /** 1697*0e209d39SAndroid Build Coastguard Worker * Set a progress callback function for use with find operations on this Matcher. 1698*0e209d39SAndroid Build Coastguard Worker * During find operations, the callback will be invoked after each return from a 1699*0e209d39SAndroid Build Coastguard Worker * match attempt, giving the application the opportunity to terminate a long-running 1700*0e209d39SAndroid Build Coastguard Worker * find operation. 1701*0e209d39SAndroid Build Coastguard Worker * 1702*0e209d39SAndroid Build Coastguard Worker * @param callback A pointer to the user-supplied callback function. 1703*0e209d39SAndroid Build Coastguard Worker * @param context User context pointer. The value supplied at the 1704*0e209d39SAndroid Build Coastguard Worker * time the callback function is set will be saved 1705*0e209d39SAndroid Build Coastguard Worker * and passed to the callback each time that it is called. 1706*0e209d39SAndroid Build Coastguard Worker * @param status A reference to a UErrorCode to receive any errors. 1707*0e209d39SAndroid Build Coastguard Worker * @stable ICU 4.6 1708*0e209d39SAndroid Build Coastguard Worker */ 1709*0e209d39SAndroid Build Coastguard Worker virtual void setFindProgressCallback(URegexFindProgressCallback *callback, 1710*0e209d39SAndroid Build Coastguard Worker const void *context, 1711*0e209d39SAndroid Build Coastguard Worker UErrorCode &status); 1712*0e209d39SAndroid Build Coastguard Worker 1713*0e209d39SAndroid Build Coastguard Worker 1714*0e209d39SAndroid Build Coastguard Worker /** 1715*0e209d39SAndroid Build Coastguard Worker * Get the find progress callback function for this URegularExpression. 1716*0e209d39SAndroid Build Coastguard Worker * 1717*0e209d39SAndroid Build Coastguard Worker * @param callback Out parameter, receives a pointer to the user-supplied 1718*0e209d39SAndroid Build Coastguard Worker * callback function. 1719*0e209d39SAndroid Build Coastguard Worker * @param context Out parameter, receives the user context pointer that 1720*0e209d39SAndroid Build Coastguard Worker * was set when uregex_setFindProgressCallback() was called. 1721*0e209d39SAndroid Build Coastguard Worker * @param status A reference to a UErrorCode to receive any errors. 1722*0e209d39SAndroid Build Coastguard Worker * @stable ICU 4.6 1723*0e209d39SAndroid Build Coastguard Worker */ 1724*0e209d39SAndroid Build Coastguard Worker virtual void getFindProgressCallback(URegexFindProgressCallback *&callback, 1725*0e209d39SAndroid Build Coastguard Worker const void *&context, 1726*0e209d39SAndroid Build Coastguard Worker UErrorCode &status); 1727*0e209d39SAndroid Build Coastguard Worker 1728*0e209d39SAndroid Build Coastguard Worker #ifndef U_HIDE_INTERNAL_API 1729*0e209d39SAndroid Build Coastguard Worker /** 1730*0e209d39SAndroid Build Coastguard Worker * setTrace Debug function, enable/disable tracing of the matching engine. 1731*0e209d39SAndroid Build Coastguard Worker * For internal ICU development use only. DO NO USE!!!! 1732*0e209d39SAndroid Build Coastguard Worker * @internal 1733*0e209d39SAndroid Build Coastguard Worker */ 1734*0e209d39SAndroid Build Coastguard Worker void setTrace(UBool state); 1735*0e209d39SAndroid Build Coastguard Worker #endif /* U_HIDE_INTERNAL_API */ 1736*0e209d39SAndroid Build Coastguard Worker 1737*0e209d39SAndroid Build Coastguard Worker /** 1738*0e209d39SAndroid Build Coastguard Worker * ICU "poor man's RTTI", returns a UClassID for this class. 1739*0e209d39SAndroid Build Coastguard Worker * 1740*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.2 1741*0e209d39SAndroid Build Coastguard Worker */ 1742*0e209d39SAndroid Build Coastguard Worker static UClassID U_EXPORT2 getStaticClassID(); 1743*0e209d39SAndroid Build Coastguard Worker 1744*0e209d39SAndroid Build Coastguard Worker /** 1745*0e209d39SAndroid Build Coastguard Worker * ICU "poor man's RTTI", returns a UClassID for the actual class. 1746*0e209d39SAndroid Build Coastguard Worker * 1747*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.2 1748*0e209d39SAndroid Build Coastguard Worker */ 1749*0e209d39SAndroid Build Coastguard Worker virtual UClassID getDynamicClassID() const override; 1750*0e209d39SAndroid Build Coastguard Worker 1751*0e209d39SAndroid Build Coastguard Worker private: 1752*0e209d39SAndroid Build Coastguard Worker // Constructors and other object boilerplate are private. 1753*0e209d39SAndroid Build Coastguard Worker // Instances of RegexMatcher can not be assigned, copied, cloned, etc. 1754*0e209d39SAndroid Build Coastguard Worker RegexMatcher() = delete; // default constructor not implemented 1755*0e209d39SAndroid Build Coastguard Worker RegexMatcher(const RegexPattern *pat); 1756*0e209d39SAndroid Build Coastguard Worker RegexMatcher(const RegexMatcher &other) = delete; 1757*0e209d39SAndroid Build Coastguard Worker RegexMatcher &operator =(const RegexMatcher &rhs) = delete; 1758*0e209d39SAndroid Build Coastguard Worker void init(UErrorCode &status); // Common initialization 1759*0e209d39SAndroid Build Coastguard Worker void init2(UText *t, UErrorCode &e); // Common initialization, part 2. 1760*0e209d39SAndroid Build Coastguard Worker 1761*0e209d39SAndroid Build Coastguard Worker friend class RegexPattern; 1762*0e209d39SAndroid Build Coastguard Worker friend class RegexCImpl; 1763*0e209d39SAndroid Build Coastguard Worker public: 1764*0e209d39SAndroid Build Coastguard Worker #ifndef U_HIDE_INTERNAL_API 1765*0e209d39SAndroid Build Coastguard Worker /** @internal */ 1766*0e209d39SAndroid Build Coastguard Worker void resetPreserveRegion(); // Reset matcher state, but preserve any region. 1767*0e209d39SAndroid Build Coastguard Worker #endif /* U_HIDE_INTERNAL_API */ 1768*0e209d39SAndroid Build Coastguard Worker private: 1769*0e209d39SAndroid Build Coastguard Worker 1770*0e209d39SAndroid Build Coastguard Worker // 1771*0e209d39SAndroid Build Coastguard Worker // MatchAt This is the internal interface to the match engine itself. 1772*0e209d39SAndroid Build Coastguard Worker // Match status comes back in matcher member variables. 1773*0e209d39SAndroid Build Coastguard Worker // 1774*0e209d39SAndroid Build Coastguard Worker void MatchAt(int64_t startIdx, UBool toEnd, UErrorCode &status); 1775*0e209d39SAndroid Build Coastguard Worker inline void backTrack(int64_t &inputIdx, int32_t &patIdx); 1776*0e209d39SAndroid Build Coastguard Worker UBool isWordBoundary(int64_t pos); // perform Perl-like \b test 1777*0e209d39SAndroid Build Coastguard Worker UBool isUWordBoundary(int64_t pos, UErrorCode &status); // perform RBBI based \b test 1778*0e209d39SAndroid Build Coastguard Worker // Find a grapheme cluster boundary using a break iterator. For handling \X in regexes. 1779*0e209d39SAndroid Build Coastguard Worker int64_t followingGCBoundary(int64_t pos, UErrorCode &status); 1780*0e209d39SAndroid Build Coastguard Worker REStackFrame *resetStack(); 1781*0e209d39SAndroid Build Coastguard Worker inline REStackFrame *StateSave(REStackFrame *fp, int64_t savePatIdx, UErrorCode &status); 1782*0e209d39SAndroid Build Coastguard Worker void IncrementTime(UErrorCode &status); 1783*0e209d39SAndroid Build Coastguard Worker 1784*0e209d39SAndroid Build Coastguard Worker // Call user find callback function, if set. Return true if operation should be interrupted. 1785*0e209d39SAndroid Build Coastguard Worker inline UBool findProgressInterrupt(int64_t matchIndex, UErrorCode &status); 1786*0e209d39SAndroid Build Coastguard Worker 1787*0e209d39SAndroid Build Coastguard Worker int64_t appendGroup(int32_t groupNum, UText *dest, UErrorCode &status) const; 1788*0e209d39SAndroid Build Coastguard Worker 1789*0e209d39SAndroid Build Coastguard Worker UBool findUsingChunk(UErrorCode &status); 1790*0e209d39SAndroid Build Coastguard Worker void MatchChunkAt(int32_t startIdx, UBool toEnd, UErrorCode &status); 1791*0e209d39SAndroid Build Coastguard Worker UBool isChunkWordBoundary(int32_t pos); 1792*0e209d39SAndroid Build Coastguard Worker 1793*0e209d39SAndroid Build Coastguard Worker const RegexPattern *fPattern; 1794*0e209d39SAndroid Build Coastguard Worker RegexPattern *fPatternOwned; // Non-nullptr if this matcher owns the pattern, and 1795*0e209d39SAndroid Build Coastguard Worker // should delete it when through. 1796*0e209d39SAndroid Build Coastguard Worker 1797*0e209d39SAndroid Build Coastguard Worker const UnicodeString *fInput; // The string being matched. Only used for input() 1798*0e209d39SAndroid Build Coastguard Worker UText *fInputText; // The text being matched. Is never nullptr. 1799*0e209d39SAndroid Build Coastguard Worker UText *fAltInputText; // A shallow copy of the text being matched. 1800*0e209d39SAndroid Build Coastguard Worker // Only created if the pattern contains backreferences. 1801*0e209d39SAndroid Build Coastguard Worker int64_t fInputLength; // Full length of the input text. 1802*0e209d39SAndroid Build Coastguard Worker int32_t fFrameSize; // The size of a frame in the backtrack stack. 1803*0e209d39SAndroid Build Coastguard Worker 1804*0e209d39SAndroid Build Coastguard Worker int64_t fRegionStart; // Start of the input region, default = 0. 1805*0e209d39SAndroid Build Coastguard Worker int64_t fRegionLimit; // End of input region, default to input.length. 1806*0e209d39SAndroid Build Coastguard Worker 1807*0e209d39SAndroid Build Coastguard Worker int64_t fAnchorStart; // Region bounds for anchoring operations (^ or $). 1808*0e209d39SAndroid Build Coastguard Worker int64_t fAnchorLimit; // See useAnchoringBounds 1809*0e209d39SAndroid Build Coastguard Worker 1810*0e209d39SAndroid Build Coastguard Worker int64_t fLookStart; // Region bounds for look-ahead/behind and 1811*0e209d39SAndroid Build Coastguard Worker int64_t fLookLimit; // and other boundary tests. See 1812*0e209d39SAndroid Build Coastguard Worker // useTransparentBounds 1813*0e209d39SAndroid Build Coastguard Worker 1814*0e209d39SAndroid Build Coastguard Worker int64_t fActiveStart; // Currently active bounds for matching. 1815*0e209d39SAndroid Build Coastguard Worker int64_t fActiveLimit; // Usually is the same as region, but 1816*0e209d39SAndroid Build Coastguard Worker // is changed to fLookStart/Limit when 1817*0e209d39SAndroid Build Coastguard Worker // entering look around regions. 1818*0e209d39SAndroid Build Coastguard Worker 1819*0e209d39SAndroid Build Coastguard Worker UBool fTransparentBounds; // True if using transparent bounds. 1820*0e209d39SAndroid Build Coastguard Worker UBool fAnchoringBounds; // True if using anchoring bounds. 1821*0e209d39SAndroid Build Coastguard Worker 1822*0e209d39SAndroid Build Coastguard Worker UBool fMatch; // True if the last attempted match was successful. 1823*0e209d39SAndroid Build Coastguard Worker int64_t fMatchStart; // Position of the start of the most recent match 1824*0e209d39SAndroid Build Coastguard Worker int64_t fMatchEnd; // First position after the end of the most recent match 1825*0e209d39SAndroid Build Coastguard Worker // Zero if no previous match, even when a region 1826*0e209d39SAndroid Build Coastguard Worker // is active. 1827*0e209d39SAndroid Build Coastguard Worker int64_t fLastMatchEnd; // First position after the end of the previous match, 1828*0e209d39SAndroid Build Coastguard Worker // or -1 if there was no previous match. 1829*0e209d39SAndroid Build Coastguard Worker int64_t fAppendPosition; // First position after the end of the previous 1830*0e209d39SAndroid Build Coastguard Worker // appendReplacement(). As described by the 1831*0e209d39SAndroid Build Coastguard Worker // JavaDoc for Java Matcher, where it is called 1832*0e209d39SAndroid Build Coastguard Worker // "append position" 1833*0e209d39SAndroid Build Coastguard Worker UBool fHitEnd; // True if the last match touched the end of input. 1834*0e209d39SAndroid Build Coastguard Worker UBool fRequireEnd; // True if the last match required end-of-input 1835*0e209d39SAndroid Build Coastguard Worker // (matched $ or Z) 1836*0e209d39SAndroid Build Coastguard Worker 1837*0e209d39SAndroid Build Coastguard Worker UVector64 *fStack; 1838*0e209d39SAndroid Build Coastguard Worker REStackFrame *fFrame; // After finding a match, the last active stack frame, 1839*0e209d39SAndroid Build Coastguard Worker // which will contain the capture group results. 1840*0e209d39SAndroid Build Coastguard Worker // NOT valid while match engine is running. 1841*0e209d39SAndroid Build Coastguard Worker 1842*0e209d39SAndroid Build Coastguard Worker int64_t *fData; // Data area for use by the compiled pattern. 1843*0e209d39SAndroid Build Coastguard Worker int64_t fSmallData[8]; // Use this for data if it's enough. 1844*0e209d39SAndroid Build Coastguard Worker 1845*0e209d39SAndroid Build Coastguard Worker int32_t fTimeLimit; // Max time (in arbitrary steps) to let the 1846*0e209d39SAndroid Build Coastguard Worker // match engine run. Zero for unlimited. 1847*0e209d39SAndroid Build Coastguard Worker 1848*0e209d39SAndroid Build Coastguard Worker int32_t fTime; // Match time, accumulates while matching. 1849*0e209d39SAndroid Build Coastguard Worker int32_t fTickCounter; // Low bits counter for time. Counts down StateSaves. 1850*0e209d39SAndroid Build Coastguard Worker // Kept separately from fTime to keep as much 1851*0e209d39SAndroid Build Coastguard Worker // code as possible out of the inline 1852*0e209d39SAndroid Build Coastguard Worker // StateSave function. 1853*0e209d39SAndroid Build Coastguard Worker 1854*0e209d39SAndroid Build Coastguard Worker int32_t fStackLimit; // Maximum memory size to use for the backtrack 1855*0e209d39SAndroid Build Coastguard Worker // stack, in bytes. Zero for unlimited. 1856*0e209d39SAndroid Build Coastguard Worker 1857*0e209d39SAndroid Build Coastguard Worker URegexMatchCallback *fCallbackFn; // Pointer to match progress callback funct. 1858*0e209d39SAndroid Build Coastguard Worker // nullptr if there is no callback. 1859*0e209d39SAndroid Build Coastguard Worker const void *fCallbackContext; // User Context ptr for callback function. 1860*0e209d39SAndroid Build Coastguard Worker 1861*0e209d39SAndroid Build Coastguard Worker URegexFindProgressCallback *fFindProgressCallbackFn; // Pointer to match progress callback funct. 1862*0e209d39SAndroid Build Coastguard Worker // nullptr if there is no callback. 1863*0e209d39SAndroid Build Coastguard Worker const void *fFindProgressCallbackContext; // User Context ptr for callback function. 1864*0e209d39SAndroid Build Coastguard Worker 1865*0e209d39SAndroid Build Coastguard Worker 1866*0e209d39SAndroid Build Coastguard Worker UBool fInputUniStrMaybeMutable; // Set when fInputText wraps a UnicodeString that may be mutable - compatibility. 1867*0e209d39SAndroid Build Coastguard Worker 1868*0e209d39SAndroid Build Coastguard Worker UBool fTraceDebug; // Set true for debug tracing of match engine. 1869*0e209d39SAndroid Build Coastguard Worker 1870*0e209d39SAndroid Build Coastguard Worker UErrorCode fDeferredStatus; // Save error state that cannot be immediately 1871*0e209d39SAndroid Build Coastguard Worker // reported, or that permanently disables this matcher. 1872*0e209d39SAndroid Build Coastguard Worker 1873*0e209d39SAndroid Build Coastguard Worker BreakIterator *fWordBreakItr; 1874*0e209d39SAndroid Build Coastguard Worker BreakIterator *fGCBreakItr; 1875*0e209d39SAndroid Build Coastguard Worker }; 1876*0e209d39SAndroid Build Coastguard Worker 1877*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_END 1878*0e209d39SAndroid Build Coastguard Worker #endif // UCONFIG_NO_REGULAR_EXPRESSIONS 1879*0e209d39SAndroid Build Coastguard Worker 1880*0e209d39SAndroid Build Coastguard Worker #endif /* U_SHOW_CPLUSPLUS_API */ 1881*0e209d39SAndroid Build Coastguard Worker 1882*0e209d39SAndroid Build Coastguard Worker #endif 1883