1*0e209d39SAndroid Build Coastguard Worker // © 2016 and later: Unicode, Inc. and others. 2*0e209d39SAndroid Build Coastguard Worker // License & terms of use: http://www.unicode.org/copyright.html 3*0e209d39SAndroid Build Coastguard Worker /* 4*0e209d39SAndroid Build Coastguard Worker ********************************************************************** 5*0e209d39SAndroid Build Coastguard Worker * Copyright (C) 2001-2011 IBM and others. All rights reserved. 6*0e209d39SAndroid Build Coastguard Worker ********************************************************************** 7*0e209d39SAndroid Build Coastguard Worker * Date Name Description 8*0e209d39SAndroid Build Coastguard Worker * 03/22/2000 helena Creation. 9*0e209d39SAndroid Build Coastguard Worker ********************************************************************** 10*0e209d39SAndroid Build Coastguard Worker */ 11*0e209d39SAndroid Build Coastguard Worker 12*0e209d39SAndroid Build Coastguard Worker #ifndef SEARCH_H 13*0e209d39SAndroid Build Coastguard Worker #define SEARCH_H 14*0e209d39SAndroid Build Coastguard Worker 15*0e209d39SAndroid Build Coastguard Worker #include "unicode/utypes.h" 16*0e209d39SAndroid Build Coastguard Worker 17*0e209d39SAndroid Build Coastguard Worker #if U_SHOW_CPLUSPLUS_API 18*0e209d39SAndroid Build Coastguard Worker 19*0e209d39SAndroid Build Coastguard Worker /** 20*0e209d39SAndroid Build Coastguard Worker * \file 21*0e209d39SAndroid Build Coastguard Worker * \brief C++ API: SearchIterator object. 22*0e209d39SAndroid Build Coastguard Worker */ 23*0e209d39SAndroid Build Coastguard Worker 24*0e209d39SAndroid Build Coastguard Worker #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION 25*0e209d39SAndroid Build Coastguard Worker 26*0e209d39SAndroid Build Coastguard Worker #include "unicode/uobject.h" 27*0e209d39SAndroid Build Coastguard Worker #include "unicode/unistr.h" 28*0e209d39SAndroid Build Coastguard Worker #include "unicode/chariter.h" 29*0e209d39SAndroid Build Coastguard Worker #include "unicode/brkiter.h" 30*0e209d39SAndroid Build Coastguard Worker #include "unicode/usearch.h" 31*0e209d39SAndroid Build Coastguard Worker 32*0e209d39SAndroid Build Coastguard Worker /** 33*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.0 34*0e209d39SAndroid Build Coastguard Worker */ 35*0e209d39SAndroid Build Coastguard Worker struct USearch; 36*0e209d39SAndroid Build Coastguard Worker /** 37*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.0 38*0e209d39SAndroid Build Coastguard Worker */ 39*0e209d39SAndroid Build Coastguard Worker typedef struct USearch USearch; 40*0e209d39SAndroid Build Coastguard Worker 41*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_BEGIN 42*0e209d39SAndroid Build Coastguard Worker 43*0e209d39SAndroid Build Coastguard Worker /** 44*0e209d39SAndroid Build Coastguard Worker * 45*0e209d39SAndroid Build Coastguard Worker * <tt>SearchIterator</tt> is an abstract base class that provides 46*0e209d39SAndroid Build Coastguard Worker * methods to search for a pattern within a text string. Instances of 47*0e209d39SAndroid Build Coastguard Worker * <tt>SearchIterator</tt> maintain a current position and scans over the 48*0e209d39SAndroid Build Coastguard Worker * target text, returning the indices the pattern is matched and the length 49*0e209d39SAndroid Build Coastguard Worker * of each match. 50*0e209d39SAndroid Build Coastguard Worker * <p> 51*0e209d39SAndroid Build Coastguard Worker * <tt>SearchIterator</tt> defines a protocol for text searching. 52*0e209d39SAndroid Build Coastguard Worker * Subclasses provide concrete implementations of various search algorithms. 53*0e209d39SAndroid Build Coastguard Worker * For example, <tt>StringSearch</tt> implements language-sensitive pattern 54*0e209d39SAndroid Build Coastguard Worker * matching based on the comparison rules defined in a 55*0e209d39SAndroid Build Coastguard Worker * <tt>RuleBasedCollator</tt> object. 56*0e209d39SAndroid Build Coastguard Worker * <p> 57*0e209d39SAndroid Build Coastguard Worker * Other options for searching includes using a BreakIterator to restrict 58*0e209d39SAndroid Build Coastguard Worker * the points at which matches are detected. 59*0e209d39SAndroid Build Coastguard Worker * <p> 60*0e209d39SAndroid Build Coastguard Worker * <tt>SearchIterator</tt> provides an API that is similar to that of 61*0e209d39SAndroid Build Coastguard Worker * other text iteration classes such as <tt>BreakIterator</tt>. Using 62*0e209d39SAndroid Build Coastguard Worker * this class, it is easy to scan through text looking for all occurrences of 63*0e209d39SAndroid Build Coastguard Worker * a given pattern. The following example uses a <tt>StringSearch</tt> 64*0e209d39SAndroid Build Coastguard Worker * object to find all instances of "fox" in the target string. Any other 65*0e209d39SAndroid Build Coastguard Worker * subclass of <tt>SearchIterator</tt> can be used in an identical 66*0e209d39SAndroid Build Coastguard Worker * manner. 67*0e209d39SAndroid Build Coastguard Worker * <pre><code> 68*0e209d39SAndroid Build Coastguard Worker * UnicodeString target("The quick brown fox jumped over the lazy fox"); 69*0e209d39SAndroid Build Coastguard Worker * UnicodeString pattern("fox"); 70*0e209d39SAndroid Build Coastguard Worker * 71*0e209d39SAndroid Build Coastguard Worker * SearchIterator *iter = new StringSearch(pattern, target); 72*0e209d39SAndroid Build Coastguard Worker * UErrorCode error = U_ZERO_ERROR; 73*0e209d39SAndroid Build Coastguard Worker * for (int pos = iter->first(error); pos != USEARCH_DONE; 74*0e209d39SAndroid Build Coastguard Worker * pos = iter->next(error)) { 75*0e209d39SAndroid Build Coastguard Worker * printf("Found match at %d pos, length is %d\n", pos, iter.getMatchedLength()); 76*0e209d39SAndroid Build Coastguard Worker * } 77*0e209d39SAndroid Build Coastguard Worker * </code></pre> 78*0e209d39SAndroid Build Coastguard Worker * 79*0e209d39SAndroid Build Coastguard Worker * @see StringSearch 80*0e209d39SAndroid Build Coastguard Worker * @see RuleBasedCollator 81*0e209d39SAndroid Build Coastguard Worker */ 82*0e209d39SAndroid Build Coastguard Worker class U_I18N_API SearchIterator : public UObject { 83*0e209d39SAndroid Build Coastguard Worker 84*0e209d39SAndroid Build Coastguard Worker public: 85*0e209d39SAndroid Build Coastguard Worker 86*0e209d39SAndroid Build Coastguard Worker // public constructors and destructors ------------------------------- 87*0e209d39SAndroid Build Coastguard Worker 88*0e209d39SAndroid Build Coastguard Worker /** 89*0e209d39SAndroid Build Coastguard Worker * Copy constructor that creates a SearchIterator instance with the same 90*0e209d39SAndroid Build Coastguard Worker * behavior, and iterating over the same text. 91*0e209d39SAndroid Build Coastguard Worker * @param other the SearchIterator instance to be copied. 92*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.0 93*0e209d39SAndroid Build Coastguard Worker */ 94*0e209d39SAndroid Build Coastguard Worker SearchIterator(const SearchIterator &other); 95*0e209d39SAndroid Build Coastguard Worker 96*0e209d39SAndroid Build Coastguard Worker /** 97*0e209d39SAndroid Build Coastguard Worker * Destructor. Cleans up the search iterator data struct. 98*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.0 99*0e209d39SAndroid Build Coastguard Worker */ 100*0e209d39SAndroid Build Coastguard Worker virtual ~SearchIterator(); 101*0e209d39SAndroid Build Coastguard Worker 102*0e209d39SAndroid Build Coastguard Worker // public get and set methods ---------------------------------------- 103*0e209d39SAndroid Build Coastguard Worker 104*0e209d39SAndroid Build Coastguard Worker /** 105*0e209d39SAndroid Build Coastguard Worker * Sets the index to point to the given position, and clears any state 106*0e209d39SAndroid Build Coastguard Worker * that's affected. 107*0e209d39SAndroid Build Coastguard Worker * <p> 108*0e209d39SAndroid Build Coastguard Worker * This method takes the argument index and sets the position in the text 109*0e209d39SAndroid Build Coastguard Worker * string accordingly without checking if the index is pointing to a 110*0e209d39SAndroid Build Coastguard Worker * valid starting point to begin searching. 111*0e209d39SAndroid Build Coastguard Worker * @param position within the text to be set. If position is less 112*0e209d39SAndroid Build Coastguard Worker * than or greater than the text range for searching, 113*0e209d39SAndroid Build Coastguard Worker * an U_INDEX_OUTOFBOUNDS_ERROR will be returned 114*0e209d39SAndroid Build Coastguard Worker * @param status for errors if it occurs 115*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.0 116*0e209d39SAndroid Build Coastguard Worker */ 117*0e209d39SAndroid Build Coastguard Worker virtual void setOffset(int32_t position, UErrorCode &status) = 0; 118*0e209d39SAndroid Build Coastguard Worker 119*0e209d39SAndroid Build Coastguard Worker /** 120*0e209d39SAndroid Build Coastguard Worker * Return the current index in the text being searched. 121*0e209d39SAndroid Build Coastguard Worker * If the iteration has gone past the end of the text 122*0e209d39SAndroid Build Coastguard Worker * (or past the beginning for a backwards search), USEARCH_DONE 123*0e209d39SAndroid Build Coastguard Worker * is returned. 124*0e209d39SAndroid Build Coastguard Worker * @return current index in the text being searched. 125*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.0 126*0e209d39SAndroid Build Coastguard Worker */ 127*0e209d39SAndroid Build Coastguard Worker virtual int32_t getOffset() const = 0; 128*0e209d39SAndroid Build Coastguard Worker 129*0e209d39SAndroid Build Coastguard Worker /** 130*0e209d39SAndroid Build Coastguard Worker * Sets the text searching attributes located in the enum 131*0e209d39SAndroid Build Coastguard Worker * USearchAttribute with values from the enum USearchAttributeValue. 132*0e209d39SAndroid Build Coastguard Worker * USEARCH_DEFAULT can be used for all attributes for resetting. 133*0e209d39SAndroid Build Coastguard Worker * @param attribute text attribute (enum USearchAttribute) to be set 134*0e209d39SAndroid Build Coastguard Worker * @param value text attribute value 135*0e209d39SAndroid Build Coastguard Worker * @param status for errors if it occurs 136*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.0 137*0e209d39SAndroid Build Coastguard Worker */ 138*0e209d39SAndroid Build Coastguard Worker void setAttribute(USearchAttribute attribute, 139*0e209d39SAndroid Build Coastguard Worker USearchAttributeValue value, 140*0e209d39SAndroid Build Coastguard Worker UErrorCode &status); 141*0e209d39SAndroid Build Coastguard Worker 142*0e209d39SAndroid Build Coastguard Worker /** 143*0e209d39SAndroid Build Coastguard Worker * Gets the text searching attributes 144*0e209d39SAndroid Build Coastguard Worker * @param attribute text attribute (enum USearchAttribute) to be retrieve 145*0e209d39SAndroid Build Coastguard Worker * @return text attribute value 146*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.0 147*0e209d39SAndroid Build Coastguard Worker */ 148*0e209d39SAndroid Build Coastguard Worker USearchAttributeValue getAttribute(USearchAttribute attribute) const; 149*0e209d39SAndroid Build Coastguard Worker 150*0e209d39SAndroid Build Coastguard Worker /** 151*0e209d39SAndroid Build Coastguard Worker * Returns the index to the match in the text string that was searched. 152*0e209d39SAndroid Build Coastguard Worker * This call returns a valid result only after a successful call to 153*0e209d39SAndroid Build Coastguard Worker * <tt>first</tt>, <tt>next</tt>, <tt>previous</tt>, or <tt>last</tt>. 154*0e209d39SAndroid Build Coastguard Worker * Just after construction, or after a searching method returns 155*0e209d39SAndroid Build Coastguard Worker * <tt>USEARCH_DONE</tt>, this method will return <tt>USEARCH_DONE</tt>. 156*0e209d39SAndroid Build Coastguard Worker * <p> 157*0e209d39SAndroid Build Coastguard Worker * Use getMatchedLength to get the matched string length. 158*0e209d39SAndroid Build Coastguard Worker * @return index of a substring within the text string that is being 159*0e209d39SAndroid Build Coastguard Worker * searched. 160*0e209d39SAndroid Build Coastguard Worker * @see #first 161*0e209d39SAndroid Build Coastguard Worker * @see #next 162*0e209d39SAndroid Build Coastguard Worker * @see #previous 163*0e209d39SAndroid Build Coastguard Worker * @see #last 164*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.0 165*0e209d39SAndroid Build Coastguard Worker */ 166*0e209d39SAndroid Build Coastguard Worker int32_t getMatchedStart() const; 167*0e209d39SAndroid Build Coastguard Worker 168*0e209d39SAndroid Build Coastguard Worker /** 169*0e209d39SAndroid Build Coastguard Worker * Returns the length of text in the string which matches the search 170*0e209d39SAndroid Build Coastguard Worker * pattern. This call returns a valid result only after a successful call 171*0e209d39SAndroid Build Coastguard Worker * to <tt>first</tt>, <tt>next</tt>, <tt>previous</tt>, or <tt>last</tt>. 172*0e209d39SAndroid Build Coastguard Worker * Just after construction, or after a searching method returns 173*0e209d39SAndroid Build Coastguard Worker * <tt>USEARCH_DONE</tt>, this method will return 0. 174*0e209d39SAndroid Build Coastguard Worker * @return The length of the match in the target text, or 0 if there 175*0e209d39SAndroid Build Coastguard Worker * is no match currently. 176*0e209d39SAndroid Build Coastguard Worker * @see #first 177*0e209d39SAndroid Build Coastguard Worker * @see #next 178*0e209d39SAndroid Build Coastguard Worker * @see #previous 179*0e209d39SAndroid Build Coastguard Worker * @see #last 180*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.0 181*0e209d39SAndroid Build Coastguard Worker */ 182*0e209d39SAndroid Build Coastguard Worker int32_t getMatchedLength() const; 183*0e209d39SAndroid Build Coastguard Worker 184*0e209d39SAndroid Build Coastguard Worker /** 185*0e209d39SAndroid Build Coastguard Worker * Returns the text that was matched by the most recent call to 186*0e209d39SAndroid Build Coastguard Worker * <tt>first</tt>, <tt>next</tt>, <tt>previous</tt>, or <tt>last</tt>. 187*0e209d39SAndroid Build Coastguard Worker * If the iterator is not pointing at a valid match (e.g. just after 188*0e209d39SAndroid Build Coastguard Worker * construction or after <tt>USEARCH_DONE</tt> has been returned, 189*0e209d39SAndroid Build Coastguard Worker * returns an empty string. 190*0e209d39SAndroid Build Coastguard Worker * @param result stores the matched string or an empty string if a match 191*0e209d39SAndroid Build Coastguard Worker * is not found. 192*0e209d39SAndroid Build Coastguard Worker * @see #first 193*0e209d39SAndroid Build Coastguard Worker * @see #next 194*0e209d39SAndroid Build Coastguard Worker * @see #previous 195*0e209d39SAndroid Build Coastguard Worker * @see #last 196*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.0 197*0e209d39SAndroid Build Coastguard Worker */ 198*0e209d39SAndroid Build Coastguard Worker void getMatchedText(UnicodeString &result) const; 199*0e209d39SAndroid Build Coastguard Worker 200*0e209d39SAndroid Build Coastguard Worker /** 201*0e209d39SAndroid Build Coastguard Worker * Set the BreakIterator that will be used to restrict the points 202*0e209d39SAndroid Build Coastguard Worker * at which matches are detected. The user is responsible for deleting 203*0e209d39SAndroid Build Coastguard Worker * the breakiterator. 204*0e209d39SAndroid Build Coastguard Worker * @param breakiter A BreakIterator that will be used to restrict the 205*0e209d39SAndroid Build Coastguard Worker * points at which matches are detected. If a match is 206*0e209d39SAndroid Build Coastguard Worker * found, but the match's start or end index is not a 207*0e209d39SAndroid Build Coastguard Worker * boundary as determined by the <tt>BreakIterator</tt>, 208*0e209d39SAndroid Build Coastguard Worker * the match will be rejected and another will be searched 209*0e209d39SAndroid Build Coastguard Worker * for. If this parameter is <tt>nullptr</tt>, no break 210*0e209d39SAndroid Build Coastguard Worker * detection is attempted. 211*0e209d39SAndroid Build Coastguard Worker * @param status for errors if it occurs 212*0e209d39SAndroid Build Coastguard Worker * @see BreakIterator 213*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.0 214*0e209d39SAndroid Build Coastguard Worker */ 215*0e209d39SAndroid Build Coastguard Worker void setBreakIterator(BreakIterator *breakiter, UErrorCode &status); 216*0e209d39SAndroid Build Coastguard Worker 217*0e209d39SAndroid Build Coastguard Worker /** 218*0e209d39SAndroid Build Coastguard Worker * Returns the BreakIterator that is used to restrict the points at 219*0e209d39SAndroid Build Coastguard Worker * which matches are detected. This will be the same object that was 220*0e209d39SAndroid Build Coastguard Worker * passed to the constructor or to <tt>setBreakIterator</tt>. 221*0e209d39SAndroid Build Coastguard Worker * Note that <tt>nullptr</tt> is a legal value; it means that break 222*0e209d39SAndroid Build Coastguard Worker * detection should not be attempted. 223*0e209d39SAndroid Build Coastguard Worker * @return BreakIterator used to restrict matchings. 224*0e209d39SAndroid Build Coastguard Worker * @see #setBreakIterator 225*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.0 226*0e209d39SAndroid Build Coastguard Worker */ 227*0e209d39SAndroid Build Coastguard Worker const BreakIterator* getBreakIterator() const; 228*0e209d39SAndroid Build Coastguard Worker 229*0e209d39SAndroid Build Coastguard Worker /** 230*0e209d39SAndroid Build Coastguard Worker * Set the string text to be searched. Text iteration will hence begin at 231*0e209d39SAndroid Build Coastguard Worker * the start of the text string. This method is useful if you want to 232*0e209d39SAndroid Build Coastguard Worker * re-use an iterator to search for the same pattern within a different 233*0e209d39SAndroid Build Coastguard Worker * body of text. The user is responsible for deleting the text. 234*0e209d39SAndroid Build Coastguard Worker * @param text string to be searched. 235*0e209d39SAndroid Build Coastguard Worker * @param status for errors. If the text length is 0, 236*0e209d39SAndroid Build Coastguard Worker * an U_ILLEGAL_ARGUMENT_ERROR is returned. 237*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.0 238*0e209d39SAndroid Build Coastguard Worker */ 239*0e209d39SAndroid Build Coastguard Worker virtual void setText(const UnicodeString &text, UErrorCode &status); 240*0e209d39SAndroid Build Coastguard Worker 241*0e209d39SAndroid Build Coastguard Worker /** 242*0e209d39SAndroid Build Coastguard Worker * Set the string text to be searched. Text iteration will hence begin at 243*0e209d39SAndroid Build Coastguard Worker * the start of the text string. This method is useful if you want to 244*0e209d39SAndroid Build Coastguard Worker * re-use an iterator to search for the same pattern within a different 245*0e209d39SAndroid Build Coastguard Worker * body of text. 246*0e209d39SAndroid Build Coastguard Worker * <p> 247*0e209d39SAndroid Build Coastguard Worker * Note: No parsing of the text within the <tt>CharacterIterator</tt> 248*0e209d39SAndroid Build Coastguard Worker * will be done during searching for this version. The block of text 249*0e209d39SAndroid Build Coastguard Worker * in <tt>CharacterIterator</tt> will be used as it is. 250*0e209d39SAndroid Build Coastguard Worker * The user is responsible for deleting the text. 251*0e209d39SAndroid Build Coastguard Worker * @param text string iterator to be searched. 252*0e209d39SAndroid Build Coastguard Worker * @param status for errors if any. If the text length is 0 then an 253*0e209d39SAndroid Build Coastguard Worker * U_ILLEGAL_ARGUMENT_ERROR is returned. 254*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.0 255*0e209d39SAndroid Build Coastguard Worker */ 256*0e209d39SAndroid Build Coastguard Worker virtual void setText(CharacterIterator &text, UErrorCode &status); 257*0e209d39SAndroid Build Coastguard Worker 258*0e209d39SAndroid Build Coastguard Worker /** 259*0e209d39SAndroid Build Coastguard Worker * Return the string text to be searched. 260*0e209d39SAndroid Build Coastguard Worker * @return text string to be searched. 261*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.0 262*0e209d39SAndroid Build Coastguard Worker */ 263*0e209d39SAndroid Build Coastguard Worker const UnicodeString& getText() const; 264*0e209d39SAndroid Build Coastguard Worker 265*0e209d39SAndroid Build Coastguard Worker // operator overloading ---------------------------------------------- 266*0e209d39SAndroid Build Coastguard Worker 267*0e209d39SAndroid Build Coastguard Worker /** 268*0e209d39SAndroid Build Coastguard Worker * Equality operator. 269*0e209d39SAndroid Build Coastguard Worker * @param that SearchIterator instance to be compared. 270*0e209d39SAndroid Build Coastguard Worker * @return true if both BreakIterators are of the same class, have the 271*0e209d39SAndroid Build Coastguard Worker * same behavior, terates over the same text and have the same 272*0e209d39SAndroid Build Coastguard Worker * attributes. false otherwise. 273*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.0 274*0e209d39SAndroid Build Coastguard Worker */ 275*0e209d39SAndroid Build Coastguard Worker virtual bool operator==(const SearchIterator &that) const; 276*0e209d39SAndroid Build Coastguard Worker 277*0e209d39SAndroid Build Coastguard Worker /** 278*0e209d39SAndroid Build Coastguard Worker * Not-equal operator. 279*0e209d39SAndroid Build Coastguard Worker * @param that SearchIterator instance to be compared. 280*0e209d39SAndroid Build Coastguard Worker * @return false if operator== returns true, and vice versa. 281*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.0 282*0e209d39SAndroid Build Coastguard Worker */ 283*0e209d39SAndroid Build Coastguard Worker bool operator!=(const SearchIterator &that) const; 284*0e209d39SAndroid Build Coastguard Worker 285*0e209d39SAndroid Build Coastguard Worker // public methods ---------------------------------------------------- 286*0e209d39SAndroid Build Coastguard Worker 287*0e209d39SAndroid Build Coastguard Worker /** 288*0e209d39SAndroid Build Coastguard Worker * Returns a copy of SearchIterator with the same behavior, and 289*0e209d39SAndroid Build Coastguard Worker * iterating over the same text, as this one. Note that all data will be 290*0e209d39SAndroid Build Coastguard Worker * replicated, except for the text string to be searched. 291*0e209d39SAndroid Build Coastguard Worker * @return cloned object 292*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.0 293*0e209d39SAndroid Build Coastguard Worker */ 294*0e209d39SAndroid Build Coastguard Worker virtual SearchIterator* safeClone() const = 0; 295*0e209d39SAndroid Build Coastguard Worker 296*0e209d39SAndroid Build Coastguard Worker /** 297*0e209d39SAndroid Build Coastguard Worker * Returns the first index at which the string text matches the search 298*0e209d39SAndroid Build Coastguard Worker * pattern. The iterator is adjusted so that its current index (as 299*0e209d39SAndroid Build Coastguard Worker * returned by <tt>getOffset</tt>) is the match position if one 300*0e209d39SAndroid Build Coastguard Worker * was found. 301*0e209d39SAndroid Build Coastguard Worker * If a match is not found, <tt>USEARCH_DONE</tt> will be returned and 302*0e209d39SAndroid Build Coastguard Worker * the iterator will be adjusted to the index USEARCH_DONE 303*0e209d39SAndroid Build Coastguard Worker * @param status for errors if it occurs 304*0e209d39SAndroid Build Coastguard Worker * @return The character index of the first match, or 305*0e209d39SAndroid Build Coastguard Worker * <tt>USEARCH_DONE</tt> if there are no matches. 306*0e209d39SAndroid Build Coastguard Worker * @see #getOffset 307*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.0 308*0e209d39SAndroid Build Coastguard Worker */ 309*0e209d39SAndroid Build Coastguard Worker int32_t first(UErrorCode &status); 310*0e209d39SAndroid Build Coastguard Worker 311*0e209d39SAndroid Build Coastguard Worker /** 312*0e209d39SAndroid Build Coastguard Worker * Returns the first index equal or greater than <tt>position</tt> at which the 313*0e209d39SAndroid Build Coastguard Worker * string text matches the search pattern. The iterator is adjusted so 314*0e209d39SAndroid Build Coastguard Worker * that its current index (as returned by <tt>getOffset</tt>) is the 315*0e209d39SAndroid Build Coastguard Worker * match position if one was found. 316*0e209d39SAndroid Build Coastguard Worker * If a match is not found, <tt>USEARCH_DONE</tt> will be returned and the 317*0e209d39SAndroid Build Coastguard Worker * iterator will be adjusted to the index <tt>USEARCH_DONE</tt>. 318*0e209d39SAndroid Build Coastguard Worker * @param position where search if to start from. If position is less 319*0e209d39SAndroid Build Coastguard Worker * than or greater than the text range for searching, 320*0e209d39SAndroid Build Coastguard Worker * an U_INDEX_OUTOFBOUNDS_ERROR will be returned 321*0e209d39SAndroid Build Coastguard Worker * @param status for errors if it occurs 322*0e209d39SAndroid Build Coastguard Worker * @return The character index of the first match following 323*0e209d39SAndroid Build Coastguard Worker * <tt>position</tt>, or <tt>USEARCH_DONE</tt> if there are no 324*0e209d39SAndroid Build Coastguard Worker * matches. 325*0e209d39SAndroid Build Coastguard Worker * @see #getOffset 326*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.0 327*0e209d39SAndroid Build Coastguard Worker */ 328*0e209d39SAndroid Build Coastguard Worker int32_t following(int32_t position, UErrorCode &status); 329*0e209d39SAndroid Build Coastguard Worker 330*0e209d39SAndroid Build Coastguard Worker /** 331*0e209d39SAndroid Build Coastguard Worker * Returns the last index in the target text at which it matches the 332*0e209d39SAndroid Build Coastguard Worker * search pattern. The iterator is adjusted so that its current index 333*0e209d39SAndroid Build Coastguard Worker * (as returned by <tt>getOffset</tt>) is the match position if one was 334*0e209d39SAndroid Build Coastguard Worker * found. 335*0e209d39SAndroid Build Coastguard Worker * If a match is not found, <tt>USEARCH_DONE</tt> will be returned and 336*0e209d39SAndroid Build Coastguard Worker * the iterator will be adjusted to the index USEARCH_DONE. 337*0e209d39SAndroid Build Coastguard Worker * @param status for errors if it occurs 338*0e209d39SAndroid Build Coastguard Worker * @return The index of the first match, or <tt>USEARCH_DONE</tt> if 339*0e209d39SAndroid Build Coastguard Worker * there are no matches. 340*0e209d39SAndroid Build Coastguard Worker * @see #getOffset 341*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.0 342*0e209d39SAndroid Build Coastguard Worker */ 343*0e209d39SAndroid Build Coastguard Worker int32_t last(UErrorCode &status); 344*0e209d39SAndroid Build Coastguard Worker 345*0e209d39SAndroid Build Coastguard Worker /** 346*0e209d39SAndroid Build Coastguard Worker * Returns the first index less than <tt>position</tt> at which the string 347*0e209d39SAndroid Build Coastguard Worker * text matches the search pattern. The iterator is adjusted so that its 348*0e209d39SAndroid Build Coastguard Worker * current index (as returned by <tt>getOffset</tt>) is the match 349*0e209d39SAndroid Build Coastguard Worker * position if one was found. If a match is not found, 350*0e209d39SAndroid Build Coastguard Worker * <tt>USEARCH_DONE</tt> will be returned and the iterator will be 351*0e209d39SAndroid Build Coastguard Worker * adjusted to the index USEARCH_DONE 352*0e209d39SAndroid Build Coastguard Worker * <p> 353*0e209d39SAndroid Build Coastguard Worker * When <tt>USEARCH_OVERLAP</tt> option is off, the last index of the 354*0e209d39SAndroid Build Coastguard Worker * result match is always less than <tt>position</tt>. 355*0e209d39SAndroid Build Coastguard Worker * When <tt>USERARCH_OVERLAP</tt> is on, the result match may span across 356*0e209d39SAndroid Build Coastguard Worker * <tt>position</tt>. 357*0e209d39SAndroid Build Coastguard Worker * 358*0e209d39SAndroid Build Coastguard Worker * @param position where search is to start from. If position is less 359*0e209d39SAndroid Build Coastguard Worker * than or greater than the text range for searching, 360*0e209d39SAndroid Build Coastguard Worker * an U_INDEX_OUTOFBOUNDS_ERROR will be returned 361*0e209d39SAndroid Build Coastguard Worker * @param status for errors if it occurs 362*0e209d39SAndroid Build Coastguard Worker * @return The character index of the first match preceding 363*0e209d39SAndroid Build Coastguard Worker * <tt>position</tt>, or <tt>USEARCH_DONE</tt> if there are 364*0e209d39SAndroid Build Coastguard Worker * no matches. 365*0e209d39SAndroid Build Coastguard Worker * @see #getOffset 366*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.0 367*0e209d39SAndroid Build Coastguard Worker */ 368*0e209d39SAndroid Build Coastguard Worker int32_t preceding(int32_t position, UErrorCode &status); 369*0e209d39SAndroid Build Coastguard Worker 370*0e209d39SAndroid Build Coastguard Worker /** 371*0e209d39SAndroid Build Coastguard Worker * Returns the index of the next point at which the text matches the 372*0e209d39SAndroid Build Coastguard Worker * search pattern, starting from the current position 373*0e209d39SAndroid Build Coastguard Worker * The iterator is adjusted so that its current index (as returned by 374*0e209d39SAndroid Build Coastguard Worker * <tt>getOffset</tt>) is the match position if one was found. 375*0e209d39SAndroid Build Coastguard Worker * If a match is not found, <tt>USEARCH_DONE</tt> will be returned and 376*0e209d39SAndroid Build Coastguard Worker * the iterator will be adjusted to a position after the end of the text 377*0e209d39SAndroid Build Coastguard Worker * string. 378*0e209d39SAndroid Build Coastguard Worker * @param status for errors if it occurs 379*0e209d39SAndroid Build Coastguard Worker * @return The index of the next match after the current position, 380*0e209d39SAndroid Build Coastguard Worker * or <tt>USEARCH_DONE</tt> if there are no more matches. 381*0e209d39SAndroid Build Coastguard Worker * @see #getOffset 382*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.0 383*0e209d39SAndroid Build Coastguard Worker */ 384*0e209d39SAndroid Build Coastguard Worker int32_t next(UErrorCode &status); 385*0e209d39SAndroid Build Coastguard Worker 386*0e209d39SAndroid Build Coastguard Worker /** 387*0e209d39SAndroid Build Coastguard Worker * Returns the index of the previous point at which the string text 388*0e209d39SAndroid Build Coastguard Worker * matches the search pattern, starting at the current position. 389*0e209d39SAndroid Build Coastguard Worker * The iterator is adjusted so that its current index (as returned by 390*0e209d39SAndroid Build Coastguard Worker * <tt>getOffset</tt>) is the match position if one was found. 391*0e209d39SAndroid Build Coastguard Worker * If a match is not found, <tt>USEARCH_DONE</tt> will be returned and 392*0e209d39SAndroid Build Coastguard Worker * the iterator will be adjusted to the index USEARCH_DONE 393*0e209d39SAndroid Build Coastguard Worker * @param status for errors if it occurs 394*0e209d39SAndroid Build Coastguard Worker * @return The index of the previous match before the current position, 395*0e209d39SAndroid Build Coastguard Worker * or <tt>USEARCH_DONE</tt> if there are no more matches. 396*0e209d39SAndroid Build Coastguard Worker * @see #getOffset 397*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.0 398*0e209d39SAndroid Build Coastguard Worker */ 399*0e209d39SAndroid Build Coastguard Worker int32_t previous(UErrorCode &status); 400*0e209d39SAndroid Build Coastguard Worker 401*0e209d39SAndroid Build Coastguard Worker /** 402*0e209d39SAndroid Build Coastguard Worker * Resets the iteration. 403*0e209d39SAndroid Build Coastguard Worker * Search will begin at the start of the text string if a forward 404*0e209d39SAndroid Build Coastguard Worker * iteration is initiated before a backwards iteration. Otherwise if a 405*0e209d39SAndroid Build Coastguard Worker * backwards iteration is initiated before a forwards iteration, the 406*0e209d39SAndroid Build Coastguard Worker * search will begin at the end of the text string. 407*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.0 408*0e209d39SAndroid Build Coastguard Worker */ 409*0e209d39SAndroid Build Coastguard Worker virtual void reset(); 410*0e209d39SAndroid Build Coastguard Worker 411*0e209d39SAndroid Build Coastguard Worker protected: 412*0e209d39SAndroid Build Coastguard Worker // protected data members --------------------------------------------- 413*0e209d39SAndroid Build Coastguard Worker 414*0e209d39SAndroid Build Coastguard Worker /** 415*0e209d39SAndroid Build Coastguard Worker * C search data struct 416*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.0 417*0e209d39SAndroid Build Coastguard Worker */ 418*0e209d39SAndroid Build Coastguard Worker USearch *m_search_; 419*0e209d39SAndroid Build Coastguard Worker 420*0e209d39SAndroid Build Coastguard Worker /** 421*0e209d39SAndroid Build Coastguard Worker * Break iterator. 422*0e209d39SAndroid Build Coastguard Worker * Currently the C++ breakiterator does not have getRules etc to reproduce 423*0e209d39SAndroid Build Coastguard Worker * another in C. Hence we keep the original around and do the verification 424*0e209d39SAndroid Build Coastguard Worker * at the end of the match. The user is responsible for deleting this 425*0e209d39SAndroid Build Coastguard Worker * break iterator. 426*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.0 427*0e209d39SAndroid Build Coastguard Worker */ 428*0e209d39SAndroid Build Coastguard Worker BreakIterator *m_breakiterator_; 429*0e209d39SAndroid Build Coastguard Worker 430*0e209d39SAndroid Build Coastguard Worker /** 431*0e209d39SAndroid Build Coastguard Worker * Unicode string version of the search text 432*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.0 433*0e209d39SAndroid Build Coastguard Worker */ 434*0e209d39SAndroid Build Coastguard Worker UnicodeString m_text_; 435*0e209d39SAndroid Build Coastguard Worker 436*0e209d39SAndroid Build Coastguard Worker // protected constructors and destructors ----------------------------- 437*0e209d39SAndroid Build Coastguard Worker 438*0e209d39SAndroid Build Coastguard Worker /** 439*0e209d39SAndroid Build Coastguard Worker * Default constructor. 440*0e209d39SAndroid Build Coastguard Worker * Initializes data to the default values. 441*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.0 442*0e209d39SAndroid Build Coastguard Worker */ 443*0e209d39SAndroid Build Coastguard Worker SearchIterator(); 444*0e209d39SAndroid Build Coastguard Worker 445*0e209d39SAndroid Build Coastguard Worker /** 446*0e209d39SAndroid Build Coastguard Worker * Constructor for use by subclasses. 447*0e209d39SAndroid Build Coastguard Worker * @param text The target text to be searched. 448*0e209d39SAndroid Build Coastguard Worker * @param breakiter A {@link BreakIterator} that is used to restrict the 449*0e209d39SAndroid Build Coastguard Worker * points at which matches are detected. If 450*0e209d39SAndroid Build Coastguard Worker * <tt>handleNext</tt> or <tt>handlePrev</tt> finds a 451*0e209d39SAndroid Build Coastguard Worker * match, but the match's start or end index is not a 452*0e209d39SAndroid Build Coastguard Worker * boundary as determined by the <tt>BreakIterator</tt>, 453*0e209d39SAndroid Build Coastguard Worker * the match is rejected and <tt>handleNext</tt> or 454*0e209d39SAndroid Build Coastguard Worker * <tt>handlePrev</tt> is called again. If this parameter 455*0e209d39SAndroid Build Coastguard Worker * is <tt>nullptr</tt>, no break detection is attempted. 456*0e209d39SAndroid Build Coastguard Worker * @see #handleNext 457*0e209d39SAndroid Build Coastguard Worker * @see #handlePrev 458*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.0 459*0e209d39SAndroid Build Coastguard Worker */ 460*0e209d39SAndroid Build Coastguard Worker SearchIterator(const UnicodeString &text, 461*0e209d39SAndroid Build Coastguard Worker BreakIterator *breakiter = nullptr); 462*0e209d39SAndroid Build Coastguard Worker 463*0e209d39SAndroid Build Coastguard Worker /** 464*0e209d39SAndroid Build Coastguard Worker * Constructor for use by subclasses. 465*0e209d39SAndroid Build Coastguard Worker * <p> 466*0e209d39SAndroid Build Coastguard Worker * Note: No parsing of the text within the <tt>CharacterIterator</tt> 467*0e209d39SAndroid Build Coastguard Worker * will be done during searching for this version. The block of text 468*0e209d39SAndroid Build Coastguard Worker * in <tt>CharacterIterator</tt> will be used as it is. 469*0e209d39SAndroid Build Coastguard Worker * @param text The target text to be searched. 470*0e209d39SAndroid Build Coastguard Worker * @param breakiter A {@link BreakIterator} that is used to restrict the 471*0e209d39SAndroid Build Coastguard Worker * points at which matches are detected. If 472*0e209d39SAndroid Build Coastguard Worker * <tt>handleNext</tt> or <tt>handlePrev</tt> finds a 473*0e209d39SAndroid Build Coastguard Worker * match, but the match's start or end index is not a 474*0e209d39SAndroid Build Coastguard Worker * boundary as determined by the <tt>BreakIterator</tt>, 475*0e209d39SAndroid Build Coastguard Worker * the match is rejected and <tt>handleNext</tt> or 476*0e209d39SAndroid Build Coastguard Worker * <tt>handlePrev</tt> is called again. If this parameter 477*0e209d39SAndroid Build Coastguard Worker * is <tt>nullptr</tt>, no break detection is attempted. 478*0e209d39SAndroid Build Coastguard Worker * @see #handleNext 479*0e209d39SAndroid Build Coastguard Worker * @see #handlePrev 480*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.0 481*0e209d39SAndroid Build Coastguard Worker */ 482*0e209d39SAndroid Build Coastguard Worker SearchIterator(CharacterIterator &text, BreakIterator *breakiter = nullptr); 483*0e209d39SAndroid Build Coastguard Worker 484*0e209d39SAndroid Build Coastguard Worker // protected methods -------------------------------------------------- 485*0e209d39SAndroid Build Coastguard Worker 486*0e209d39SAndroid Build Coastguard Worker /** 487*0e209d39SAndroid Build Coastguard Worker * Assignment operator. Sets this iterator to have the same behavior, 488*0e209d39SAndroid Build Coastguard Worker * and iterate over the same text, as the one passed in. 489*0e209d39SAndroid Build Coastguard Worker * @param that instance to be copied. 490*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.0 491*0e209d39SAndroid Build Coastguard Worker */ 492*0e209d39SAndroid Build Coastguard Worker SearchIterator & operator=(const SearchIterator &that); 493*0e209d39SAndroid Build Coastguard Worker 494*0e209d39SAndroid Build Coastguard Worker /** 495*0e209d39SAndroid Build Coastguard Worker * Abstract method which subclasses override to provide the mechanism 496*0e209d39SAndroid Build Coastguard Worker * for finding the next match in the target text. This allows different 497*0e209d39SAndroid Build Coastguard Worker * subclasses to provide different search algorithms. 498*0e209d39SAndroid Build Coastguard Worker * <p> 499*0e209d39SAndroid Build Coastguard Worker * If a match is found, the implementation should return the index at 500*0e209d39SAndroid Build Coastguard Worker * which the match starts and should call 501*0e209d39SAndroid Build Coastguard Worker * <tt>setMatchLength</tt> with the number of characters 502*0e209d39SAndroid Build Coastguard Worker * in the target text that make up the match. If no match is found, the 503*0e209d39SAndroid Build Coastguard Worker * method should return USEARCH_DONE. 504*0e209d39SAndroid Build Coastguard Worker * <p> 505*0e209d39SAndroid Build Coastguard Worker * @param position The index in the target text at which the search 506*0e209d39SAndroid Build Coastguard Worker * should start. 507*0e209d39SAndroid Build Coastguard Worker * @param status for error codes if it occurs. 508*0e209d39SAndroid Build Coastguard Worker * @return index at which the match starts, else if match is not found 509*0e209d39SAndroid Build Coastguard Worker * USEARCH_DONE is returned 510*0e209d39SAndroid Build Coastguard Worker * @see #setMatchLength 511*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.0 512*0e209d39SAndroid Build Coastguard Worker */ 513*0e209d39SAndroid Build Coastguard Worker virtual int32_t handleNext(int32_t position, UErrorCode &status) 514*0e209d39SAndroid Build Coastguard Worker = 0; 515*0e209d39SAndroid Build Coastguard Worker 516*0e209d39SAndroid Build Coastguard Worker /** 517*0e209d39SAndroid Build Coastguard Worker * Abstract method which subclasses override to provide the mechanism for 518*0e209d39SAndroid Build Coastguard Worker * finding the previous match in the target text. This allows different 519*0e209d39SAndroid Build Coastguard Worker * subclasses to provide different search algorithms. 520*0e209d39SAndroid Build Coastguard Worker * <p> 521*0e209d39SAndroid Build Coastguard Worker * If a match is found, the implementation should return the index at 522*0e209d39SAndroid Build Coastguard Worker * which the match starts and should call 523*0e209d39SAndroid Build Coastguard Worker * <tt>setMatchLength</tt> with the number of characters 524*0e209d39SAndroid Build Coastguard Worker * in the target text that make up the match. If no match is found, the 525*0e209d39SAndroid Build Coastguard Worker * method should return USEARCH_DONE. 526*0e209d39SAndroid Build Coastguard Worker * <p> 527*0e209d39SAndroid Build Coastguard Worker * @param position The index in the target text at which the search 528*0e209d39SAndroid Build Coastguard Worker * should start. 529*0e209d39SAndroid Build Coastguard Worker * @param status for error codes if it occurs. 530*0e209d39SAndroid Build Coastguard Worker * @return index at which the match starts, else if match is not found 531*0e209d39SAndroid Build Coastguard Worker * USEARCH_DONE is returned 532*0e209d39SAndroid Build Coastguard Worker * @see #setMatchLength 533*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.0 534*0e209d39SAndroid Build Coastguard Worker */ 535*0e209d39SAndroid Build Coastguard Worker virtual int32_t handlePrev(int32_t position, UErrorCode &status) 536*0e209d39SAndroid Build Coastguard Worker = 0; 537*0e209d39SAndroid Build Coastguard Worker 538*0e209d39SAndroid Build Coastguard Worker /** 539*0e209d39SAndroid Build Coastguard Worker * Sets the length of the currently matched string in the text string to 540*0e209d39SAndroid Build Coastguard Worker * be searched. 541*0e209d39SAndroid Build Coastguard Worker * Subclasses' <tt>handleNext</tt> and <tt>handlePrev</tt> 542*0e209d39SAndroid Build Coastguard Worker * methods should call this when they find a match in the target text. 543*0e209d39SAndroid Build Coastguard Worker * @param length length of the matched text. 544*0e209d39SAndroid Build Coastguard Worker * @see #handleNext 545*0e209d39SAndroid Build Coastguard Worker * @see #handlePrev 546*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.0 547*0e209d39SAndroid Build Coastguard Worker */ 548*0e209d39SAndroid Build Coastguard Worker virtual void setMatchLength(int32_t length); 549*0e209d39SAndroid Build Coastguard Worker 550*0e209d39SAndroid Build Coastguard Worker /** 551*0e209d39SAndroid Build Coastguard Worker * Sets the offset of the currently matched string in the text string to 552*0e209d39SAndroid Build Coastguard Worker * be searched. 553*0e209d39SAndroid Build Coastguard Worker * Subclasses' <tt>handleNext</tt> and <tt>handlePrev</tt> 554*0e209d39SAndroid Build Coastguard Worker * methods should call this when they find a match in the target text. 555*0e209d39SAndroid Build Coastguard Worker * @param position start offset of the matched text. 556*0e209d39SAndroid Build Coastguard Worker * @see #handleNext 557*0e209d39SAndroid Build Coastguard Worker * @see #handlePrev 558*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.0 559*0e209d39SAndroid Build Coastguard Worker */ 560*0e209d39SAndroid Build Coastguard Worker virtual void setMatchStart(int32_t position); 561*0e209d39SAndroid Build Coastguard Worker 562*0e209d39SAndroid Build Coastguard Worker /** 563*0e209d39SAndroid Build Coastguard Worker * sets match not found 564*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.0 565*0e209d39SAndroid Build Coastguard Worker */ 566*0e209d39SAndroid Build Coastguard Worker void setMatchNotFound(); 567*0e209d39SAndroid Build Coastguard Worker }; 568*0e209d39SAndroid Build Coastguard Worker 569*0e209d39SAndroid Build Coastguard Worker inline bool SearchIterator::operator!=(const SearchIterator &that) const 570*0e209d39SAndroid Build Coastguard Worker { 571*0e209d39SAndroid Build Coastguard Worker return !operator==(that); 572*0e209d39SAndroid Build Coastguard Worker } 573*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_END 574*0e209d39SAndroid Build Coastguard Worker 575*0e209d39SAndroid Build Coastguard Worker #endif /* #if !UCONFIG_NO_COLLATION */ 576*0e209d39SAndroid Build Coastguard Worker 577*0e209d39SAndroid Build Coastguard Worker #endif /* U_SHOW_CPLUSPLUS_API */ 578*0e209d39SAndroid Build Coastguard Worker 579*0e209d39SAndroid Build Coastguard Worker #endif 580*0e209d39SAndroid Build Coastguard Worker 581