xref: /aosp_15_r20/external/cronet/third_party/icu/source/i18n/stsearch.cpp (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 **********************************************************************
5 *   Copyright (C) 2001-2014 IBM and others. All rights reserved.
6 **********************************************************************
7 *   Date        Name        Description
8 *  03/22/2000   helena      Creation.
9 **********************************************************************
10 */
11 
12 #include "unicode/utypes.h"
13 
14 #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION
15 
16 #include "unicode/stsearch.h"
17 #include "usrchimp.h"
18 #include "cmemory.h"
19 
20 U_NAMESPACE_BEGIN
21 
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringSearch)22 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringSearch)
23 
24 // public constructors and destructors -----------------------------------
25 
26 StringSearch::StringSearch(const UnicodeString &pattern,
27                            const UnicodeString &text,
28                            const Locale        &locale,
29                                  BreakIterator *breakiter,
30                                  UErrorCode    &status) :
31                            SearchIterator(text, breakiter),
32                            m_pattern_(pattern)
33 {
34     if (U_FAILURE(status)) {
35         m_strsrch_ = nullptr;
36         return;
37     }
38 
39     m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(),
40                               m_text_.getBuffer(), m_text_.length(),
41                               locale.getName(), (UBreakIterator *)breakiter,
42                               &status);
43     uprv_free(m_search_);
44     m_search_ = nullptr;
45 
46     if (U_SUCCESS(status)) {
47         // m_search_ has been created by the base SearchIterator class
48         m_search_        = m_strsrch_->search;
49     }
50 }
51 
StringSearch(const UnicodeString & pattern,const UnicodeString & text,RuleBasedCollator * coll,BreakIterator * breakiter,UErrorCode & status)52 StringSearch::StringSearch(const UnicodeString     &pattern,
53                            const UnicodeString     &text,
54                                  RuleBasedCollator *coll,
55                                  BreakIterator     *breakiter,
56                                  UErrorCode        &status) :
57                            SearchIterator(text, breakiter),
58                            m_pattern_(pattern)
59 {
60     if (U_FAILURE(status)) {
61         m_strsrch_ = nullptr;
62         return;
63     }
64     if (coll == nullptr) {
65         status     = U_ILLEGAL_ARGUMENT_ERROR;
66         m_strsrch_ = nullptr;
67         return;
68     }
69     m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
70                                           m_pattern_.length(),
71                                           m_text_.getBuffer(),
72                                           m_text_.length(), coll->toUCollator(),
73                                           (UBreakIterator *)breakiter,
74                                           &status);
75     uprv_free(m_search_);
76     m_search_ = nullptr;
77 
78     if (U_SUCCESS(status)) {
79         // m_search_ has been created by the base SearchIterator class
80         m_search_ = m_strsrch_->search;
81     }
82 }
83 
StringSearch(const UnicodeString & pattern,CharacterIterator & text,const Locale & locale,BreakIterator * breakiter,UErrorCode & status)84 StringSearch::StringSearch(const UnicodeString     &pattern,
85                                  CharacterIterator &text,
86                            const Locale            &locale,
87                                  BreakIterator     *breakiter,
88                                  UErrorCode        &status) :
89                            SearchIterator(text, breakiter),
90                            m_pattern_(pattern)
91 {
92     if (U_FAILURE(status)) {
93         m_strsrch_ = nullptr;
94         return;
95     }
96     m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(),
97                               m_text_.getBuffer(), m_text_.length(),
98                               locale.getName(), (UBreakIterator *)breakiter,
99                               &status);
100     uprv_free(m_search_);
101     m_search_ = nullptr;
102 
103     if (U_SUCCESS(status)) {
104         // m_search_ has been created by the base SearchIterator class
105         m_search_ = m_strsrch_->search;
106     }
107 }
108 
StringSearch(const UnicodeString & pattern,CharacterIterator & text,RuleBasedCollator * coll,BreakIterator * breakiter,UErrorCode & status)109 StringSearch::StringSearch(const UnicodeString     &pattern,
110                                  CharacterIterator &text,
111                                  RuleBasedCollator *coll,
112                                  BreakIterator     *breakiter,
113                                  UErrorCode        &status) :
114                            SearchIterator(text, breakiter),
115                            m_pattern_(pattern)
116 {
117     if (U_FAILURE(status)) {
118         m_strsrch_ = nullptr;
119         return;
120     }
121     if (coll == nullptr) {
122         status     = U_ILLEGAL_ARGUMENT_ERROR;
123         m_strsrch_ = nullptr;
124         return;
125     }
126     m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
127                                           m_pattern_.length(),
128                                           m_text_.getBuffer(),
129                                           m_text_.length(), coll->toUCollator(),
130                                           (UBreakIterator *)breakiter,
131                                           &status);
132     uprv_free(m_search_);
133     m_search_ = nullptr;
134 
135     if (U_SUCCESS(status)) {
136         // m_search_ has been created by the base SearchIterator class
137         m_search_ = m_strsrch_->search;
138     }
139 }
140 
StringSearch(const StringSearch & that)141 StringSearch::StringSearch(const StringSearch &that) :
142                        SearchIterator(that.m_text_, that.m_breakiterator_),
143                        m_pattern_(that.m_pattern_)
144 {
145     UErrorCode status = U_ZERO_ERROR;
146 
147     // Free m_search_ from the superclass
148     uprv_free(m_search_);
149     m_search_ = nullptr;
150 
151     if (that.m_strsrch_ == nullptr) {
152         // This was not a good copy
153         m_strsrch_ = nullptr;
154     }
155     else {
156         // Make a deep copy
157         m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
158                                               m_pattern_.length(),
159                                               m_text_.getBuffer(),
160                                               m_text_.length(),
161                                               that.m_strsrch_->collator,
162                                              (UBreakIterator *)that.m_breakiterator_,
163                                               &status);
164         if (U_SUCCESS(status)) {
165             // m_search_ has been created by the base SearchIterator class
166             m_search_        = m_strsrch_->search;
167         }
168     }
169 }
170 
~StringSearch()171 StringSearch::~StringSearch()
172 {
173     if (m_strsrch_ != nullptr) {
174         usearch_close(m_strsrch_);
175         m_search_ = nullptr;
176     }
177 }
178 
179 StringSearch *
clone() const180 StringSearch::clone() const {
181     return new StringSearch(*this);
182 }
183 
184 // operator overloading ---------------------------------------------
operator =(const StringSearch & that)185 StringSearch & StringSearch::operator=(const StringSearch &that)
186 {
187     if (this != &that) {
188         UErrorCode status = U_ZERO_ERROR;
189         m_text_          = that.m_text_;
190         m_breakiterator_ = that.m_breakiterator_;
191         m_pattern_       = that.m_pattern_;
192         // all m_search_ in the parent class is linked up with m_strsrch_
193         usearch_close(m_strsrch_);
194         m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
195                                               m_pattern_.length(),
196                                               m_text_.getBuffer(),
197                                               m_text_.length(),
198                                               that.m_strsrch_->collator,
199                                               nullptr, &status);
200         // Check null pointer
201         if (m_strsrch_ != nullptr) {
202             m_search_ = m_strsrch_->search;
203         }
204     }
205     return *this;
206 }
207 
operator ==(const SearchIterator & that) const208 bool StringSearch::operator==(const SearchIterator &that) const
209 {
210     if (this == &that) {
211         return true;
212     }
213     if (SearchIterator::operator ==(that)) {
214         const StringSearch *thatsrch = dynamic_cast<const StringSearch *>(&that);
215         if (thatsrch == nullptr) return false;
216         return (this->m_pattern_ == thatsrch->m_pattern_ &&
217                 this->m_strsrch_->collator == thatsrch->m_strsrch_->collator);
218     }
219     return false;
220 }
221 
222 // public get and set methods ----------------------------------------
223 
setOffset(int32_t position,UErrorCode & status)224 void StringSearch::setOffset(int32_t position, UErrorCode &status)
225 {
226     // status checked in usearch_setOffset
227     usearch_setOffset(m_strsrch_, position, &status);
228 }
229 
getOffset() const230 int32_t StringSearch::getOffset() const
231 {
232     return usearch_getOffset(m_strsrch_);
233 }
234 
setText(const UnicodeString & text,UErrorCode & status)235 void StringSearch::setText(const UnicodeString &text, UErrorCode &status)
236 {
237     if (U_SUCCESS(status)) {
238         m_text_ = text;
239         usearch_setText(m_strsrch_, text.getBuffer(), text.length(), &status);
240     }
241 }
242 
setText(CharacterIterator & text,UErrorCode & status)243 void StringSearch::setText(CharacterIterator &text, UErrorCode &status)
244 {
245     if (U_SUCCESS(status)) {
246         text.getText(m_text_);
247         usearch_setText(m_strsrch_, m_text_.getBuffer(), m_text_.length(), &status);
248     }
249 }
250 
getCollator() const251 RuleBasedCollator * StringSearch::getCollator() const
252 {
253     // Note the const_cast. It would be cleaner if this const method returned a const collator.
254     return RuleBasedCollator::rbcFromUCollator(const_cast<UCollator *>(m_strsrch_->collator));
255 }
256 
setCollator(RuleBasedCollator * coll,UErrorCode & status)257 void StringSearch::setCollator(RuleBasedCollator *coll, UErrorCode &status)
258 {
259     if (U_SUCCESS(status)) {
260         usearch_setCollator(m_strsrch_, coll->toUCollator(), &status);
261     }
262 }
263 
setPattern(const UnicodeString & pattern,UErrorCode & status)264 void StringSearch::setPattern(const UnicodeString &pattern,
265                                     UErrorCode    &status)
266 {
267     if (U_SUCCESS(status)) {
268         m_pattern_ = pattern;
269         usearch_setPattern(m_strsrch_, m_pattern_.getBuffer(), m_pattern_.length(),
270                            &status);
271     }
272 }
273 
getPattern() const274 const UnicodeString & StringSearch::getPattern() const
275 {
276     return m_pattern_;
277 }
278 
279 // public methods ----------------------------------------------------
280 
reset()281 void StringSearch::reset()
282 {
283     usearch_reset(m_strsrch_);
284 }
285 
safeClone() const286 StringSearch * StringSearch::safeClone() const
287 {
288     UErrorCode status = U_ZERO_ERROR;
289     StringSearch *result = new StringSearch(m_pattern_, m_text_,
290                                             getCollator(),
291                                             m_breakiterator_,
292                                             status);
293     /* test for nullptr */
294     if (result == 0) {
295         status = U_MEMORY_ALLOCATION_ERROR;
296         return 0;
297     }
298     result->setOffset(getOffset(), status);
299     result->setMatchStart(m_strsrch_->search->matchedIndex);
300     result->setMatchLength(m_strsrch_->search->matchedLength);
301     if (U_FAILURE(status)) {
302         return nullptr;
303     }
304     return result;
305 }
306 
307 // protected method -------------------------------------------------
308 
handleNext(int32_t position,UErrorCode & status)309 int32_t StringSearch::handleNext(int32_t position, UErrorCode &status)
310 {
311     // values passed here are already in the pre-shift position
312     if (U_SUCCESS(status)) {
313         if (m_strsrch_->pattern.cesLength == 0) {
314             m_search_->matchedIndex =
315                                     m_search_->matchedIndex == USEARCH_DONE ?
316                                     getOffset() : m_search_->matchedIndex + 1;
317             m_search_->matchedLength = 0;
318             ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex,
319                            &status);
320             if (m_search_->matchedIndex == m_search_->textLength) {
321                 m_search_->matchedIndex = USEARCH_DONE;
322             }
323         }
324         else {
325             // looking at usearch.cpp, this part is shifted out to
326             // StringSearch instead of SearchIterator because m_strsrch_ is
327             // not accessible in SearchIterator
328 #if 0
329             if (position + m_strsrch_->pattern.defaultShiftSize
330                 > m_search_->textLength) {
331                 setMatchNotFound();
332                 return USEARCH_DONE;
333             }
334 #endif
335             if (m_search_->matchedLength <= 0) {
336                 // the flipping direction issue has already been handled
337                 // in next()
338                 // for boundary check purposes. this will ensure that the
339                 // next match will not precede the current offset
340                 // note search->matchedIndex will always be set to something
341                 // in the code
342                 m_search_->matchedIndex = position - 1;
343             }
344 
345             ucol_setOffset(m_strsrch_->textIter, position, &status);
346 
347 #if 0
348             for (;;) {
349                 if (m_search_->isCanonicalMatch) {
350                     // can't use exact here since extra accents are allowed.
351                     usearch_handleNextCanonical(m_strsrch_, &status);
352                 }
353                 else {
354                     usearch_handleNextExact(m_strsrch_, &status);
355                 }
356                 if (U_FAILURE(status)) {
357                     return USEARCH_DONE;
358                 }
359                 if (m_breakiterator_ == nullptr
360 #if !UCONFIG_NO_BREAK_ITERATION
361                     ||
362                     m_search_->matchedIndex == USEARCH_DONE ||
363                     (m_breakiterator_->isBoundary(m_search_->matchedIndex) &&
364                      m_breakiterator_->isBoundary(m_search_->matchedIndex +
365                                                   m_search_->matchedLength))
366 #endif
367                 ) {
368                     if (m_search_->matchedIndex == USEARCH_DONE) {
369                         ucol_setOffset(m_strsrch_->textIter,
370                                        m_search_->textLength, &status);
371                     }
372                     else {
373                         ucol_setOffset(m_strsrch_->textIter,
374                                        m_search_->matchedIndex, &status);
375                     }
376                     return m_search_->matchedIndex;
377                 }
378             }
379 #else
380             // if m_strsrch_->breakIter is always the same as m_breakiterator_
381             // then we don't need to check the match boundaries here because
382             // usearch_handleNextXXX will already have done it.
383             if (m_search_->isCanonicalMatch) {
384             	// *could* actually use exact here 'cause no extra accents allowed...
385             	usearch_handleNextCanonical(m_strsrch_, &status);
386             } else {
387             	usearch_handleNextExact(m_strsrch_, &status);
388             }
389 
390             if (U_FAILURE(status)) {
391             	return USEARCH_DONE;
392             }
393 
394             if (m_search_->matchedIndex == USEARCH_DONE) {
395             	ucol_setOffset(m_strsrch_->textIter, m_search_->textLength, &status);
396             } else {
397             	ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex, &status);
398             }
399 
400             return m_search_->matchedIndex;
401 #endif
402         }
403     }
404     return USEARCH_DONE;
405 }
406 
handlePrev(int32_t position,UErrorCode & status)407 int32_t StringSearch::handlePrev(int32_t position, UErrorCode &status)
408 {
409     // values passed here are already in the pre-shift position
410     if (U_SUCCESS(status)) {
411         if (m_strsrch_->pattern.cesLength == 0) {
412             m_search_->matchedIndex =
413                   (m_search_->matchedIndex == USEARCH_DONE ? getOffset() :
414                    m_search_->matchedIndex);
415             if (m_search_->matchedIndex == 0) {
416                 setMatchNotFound();
417             }
418             else {
419                 m_search_->matchedIndex --;
420                 ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex,
421                                &status);
422                 m_search_->matchedLength = 0;
423             }
424         }
425         else {
426             // looking at usearch.cpp, this part is shifted out to
427             // StringSearch instead of SearchIterator because m_strsrch_ is
428             // not accessible in SearchIterator
429 #if 0
430             if (!m_search_->isOverlap &&
431                 position - m_strsrch_->pattern.defaultShiftSize < 0) {
432                 setMatchNotFound();
433                 return USEARCH_DONE;
434             }
435 
436             for (;;) {
437                 if (m_search_->isCanonicalMatch) {
438                     // can't use exact here since extra accents are allowed.
439                     usearch_handlePreviousCanonical(m_strsrch_, &status);
440                 }
441                 else {
442                     usearch_handlePreviousExact(m_strsrch_, &status);
443                 }
444                 if (U_FAILURE(status)) {
445                     return USEARCH_DONE;
446                 }
447                 if (m_breakiterator_ == nullptr
448 #if !UCONFIG_NO_BREAK_ITERATION
449                     ||
450                     m_search_->matchedIndex == USEARCH_DONE ||
451                     (m_breakiterator_->isBoundary(m_search_->matchedIndex) &&
452                      m_breakiterator_->isBoundary(m_search_->matchedIndex +
453                                                   m_search_->matchedLength))
454 #endif
455                 ) {
456                     return m_search_->matchedIndex;
457                 }
458             }
459 #else
460             ucol_setOffset(m_strsrch_->textIter, position, &status);
461 
462             if (m_search_->isCanonicalMatch) {
463             	// *could* use exact match here since extra accents *not* allowed!
464             	usearch_handlePreviousCanonical(m_strsrch_, &status);
465             } else {
466             	usearch_handlePreviousExact(m_strsrch_, &status);
467             }
468 
469             if (U_FAILURE(status)) {
470             	return USEARCH_DONE;
471             }
472 
473             return m_search_->matchedIndex;
474 #endif
475         }
476 
477         return m_search_->matchedIndex;
478     }
479     return USEARCH_DONE;
480 }
481 
482 U_NAMESPACE_END
483 
484 #endif /* #if !UCONFIG_NO_COLLATION */
485