1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 **********************************************************************
5 * Copyright (C) 2001-2014 IBM and others. All rights reserved.
6 **********************************************************************
7 * Date Name Description
8 * 03/22/2000 helena Creation.
9 **********************************************************************
10 */
11
12 #include "unicode/utypes.h"
13
14 #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION
15
16 #include "unicode/stsearch.h"
17 #include "usrchimp.h"
18 #include "cmemory.h"
19
20 U_NAMESPACE_BEGIN
21
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringSearch)22 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringSearch)
23
24 // public constructors and destructors -----------------------------------
25
26 StringSearch::StringSearch(const UnicodeString &pattern,
27 const UnicodeString &text,
28 const Locale &locale,
29 BreakIterator *breakiter,
30 UErrorCode &status) :
31 SearchIterator(text, breakiter),
32 m_pattern_(pattern)
33 {
34 if (U_FAILURE(status)) {
35 m_strsrch_ = nullptr;
36 return;
37 }
38
39 m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(),
40 m_text_.getBuffer(), m_text_.length(),
41 locale.getName(), (UBreakIterator *)breakiter,
42 &status);
43 uprv_free(m_search_);
44 m_search_ = nullptr;
45
46 if (U_SUCCESS(status)) {
47 // m_search_ has been created by the base SearchIterator class
48 m_search_ = m_strsrch_->search;
49 }
50 }
51
StringSearch(const UnicodeString & pattern,const UnicodeString & text,RuleBasedCollator * coll,BreakIterator * breakiter,UErrorCode & status)52 StringSearch::StringSearch(const UnicodeString &pattern,
53 const UnicodeString &text,
54 RuleBasedCollator *coll,
55 BreakIterator *breakiter,
56 UErrorCode &status) :
57 SearchIterator(text, breakiter),
58 m_pattern_(pattern)
59 {
60 if (U_FAILURE(status)) {
61 m_strsrch_ = nullptr;
62 return;
63 }
64 if (coll == nullptr) {
65 status = U_ILLEGAL_ARGUMENT_ERROR;
66 m_strsrch_ = nullptr;
67 return;
68 }
69 m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
70 m_pattern_.length(),
71 m_text_.getBuffer(),
72 m_text_.length(), coll->toUCollator(),
73 (UBreakIterator *)breakiter,
74 &status);
75 uprv_free(m_search_);
76 m_search_ = nullptr;
77
78 if (U_SUCCESS(status)) {
79 // m_search_ has been created by the base SearchIterator class
80 m_search_ = m_strsrch_->search;
81 }
82 }
83
StringSearch(const UnicodeString & pattern,CharacterIterator & text,const Locale & locale,BreakIterator * breakiter,UErrorCode & status)84 StringSearch::StringSearch(const UnicodeString &pattern,
85 CharacterIterator &text,
86 const Locale &locale,
87 BreakIterator *breakiter,
88 UErrorCode &status) :
89 SearchIterator(text, breakiter),
90 m_pattern_(pattern)
91 {
92 if (U_FAILURE(status)) {
93 m_strsrch_ = nullptr;
94 return;
95 }
96 m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(),
97 m_text_.getBuffer(), m_text_.length(),
98 locale.getName(), (UBreakIterator *)breakiter,
99 &status);
100 uprv_free(m_search_);
101 m_search_ = nullptr;
102
103 if (U_SUCCESS(status)) {
104 // m_search_ has been created by the base SearchIterator class
105 m_search_ = m_strsrch_->search;
106 }
107 }
108
StringSearch(const UnicodeString & pattern,CharacterIterator & text,RuleBasedCollator * coll,BreakIterator * breakiter,UErrorCode & status)109 StringSearch::StringSearch(const UnicodeString &pattern,
110 CharacterIterator &text,
111 RuleBasedCollator *coll,
112 BreakIterator *breakiter,
113 UErrorCode &status) :
114 SearchIterator(text, breakiter),
115 m_pattern_(pattern)
116 {
117 if (U_FAILURE(status)) {
118 m_strsrch_ = nullptr;
119 return;
120 }
121 if (coll == nullptr) {
122 status = U_ILLEGAL_ARGUMENT_ERROR;
123 m_strsrch_ = nullptr;
124 return;
125 }
126 m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
127 m_pattern_.length(),
128 m_text_.getBuffer(),
129 m_text_.length(), coll->toUCollator(),
130 (UBreakIterator *)breakiter,
131 &status);
132 uprv_free(m_search_);
133 m_search_ = nullptr;
134
135 if (U_SUCCESS(status)) {
136 // m_search_ has been created by the base SearchIterator class
137 m_search_ = m_strsrch_->search;
138 }
139 }
140
StringSearch(const StringSearch & that)141 StringSearch::StringSearch(const StringSearch &that) :
142 SearchIterator(that.m_text_, that.m_breakiterator_),
143 m_pattern_(that.m_pattern_)
144 {
145 UErrorCode status = U_ZERO_ERROR;
146
147 // Free m_search_ from the superclass
148 uprv_free(m_search_);
149 m_search_ = nullptr;
150
151 if (that.m_strsrch_ == nullptr) {
152 // This was not a good copy
153 m_strsrch_ = nullptr;
154 }
155 else {
156 // Make a deep copy
157 m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
158 m_pattern_.length(),
159 m_text_.getBuffer(),
160 m_text_.length(),
161 that.m_strsrch_->collator,
162 (UBreakIterator *)that.m_breakiterator_,
163 &status);
164 if (U_SUCCESS(status)) {
165 // m_search_ has been created by the base SearchIterator class
166 m_search_ = m_strsrch_->search;
167 }
168 }
169 }
170
~StringSearch()171 StringSearch::~StringSearch()
172 {
173 if (m_strsrch_ != nullptr) {
174 usearch_close(m_strsrch_);
175 m_search_ = nullptr;
176 }
177 }
178
179 StringSearch *
clone() const180 StringSearch::clone() const {
181 return new StringSearch(*this);
182 }
183
184 // operator overloading ---------------------------------------------
operator =(const StringSearch & that)185 StringSearch & StringSearch::operator=(const StringSearch &that)
186 {
187 if (this != &that) {
188 UErrorCode status = U_ZERO_ERROR;
189 m_text_ = that.m_text_;
190 m_breakiterator_ = that.m_breakiterator_;
191 m_pattern_ = that.m_pattern_;
192 // all m_search_ in the parent class is linked up with m_strsrch_
193 usearch_close(m_strsrch_);
194 m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
195 m_pattern_.length(),
196 m_text_.getBuffer(),
197 m_text_.length(),
198 that.m_strsrch_->collator,
199 nullptr, &status);
200 // Check null pointer
201 if (m_strsrch_ != nullptr) {
202 m_search_ = m_strsrch_->search;
203 }
204 }
205 return *this;
206 }
207
operator ==(const SearchIterator & that) const208 bool StringSearch::operator==(const SearchIterator &that) const
209 {
210 if (this == &that) {
211 return true;
212 }
213 if (SearchIterator::operator ==(that)) {
214 const StringSearch *thatsrch = dynamic_cast<const StringSearch *>(&that);
215 if (thatsrch == nullptr) return false;
216 return (this->m_pattern_ == thatsrch->m_pattern_ &&
217 this->m_strsrch_->collator == thatsrch->m_strsrch_->collator);
218 }
219 return false;
220 }
221
222 // public get and set methods ----------------------------------------
223
setOffset(int32_t position,UErrorCode & status)224 void StringSearch::setOffset(int32_t position, UErrorCode &status)
225 {
226 // status checked in usearch_setOffset
227 usearch_setOffset(m_strsrch_, position, &status);
228 }
229
getOffset() const230 int32_t StringSearch::getOffset() const
231 {
232 return usearch_getOffset(m_strsrch_);
233 }
234
setText(const UnicodeString & text,UErrorCode & status)235 void StringSearch::setText(const UnicodeString &text, UErrorCode &status)
236 {
237 if (U_SUCCESS(status)) {
238 m_text_ = text;
239 usearch_setText(m_strsrch_, text.getBuffer(), text.length(), &status);
240 }
241 }
242
setText(CharacterIterator & text,UErrorCode & status)243 void StringSearch::setText(CharacterIterator &text, UErrorCode &status)
244 {
245 if (U_SUCCESS(status)) {
246 text.getText(m_text_);
247 usearch_setText(m_strsrch_, m_text_.getBuffer(), m_text_.length(), &status);
248 }
249 }
250
getCollator() const251 RuleBasedCollator * StringSearch::getCollator() const
252 {
253 // Note the const_cast. It would be cleaner if this const method returned a const collator.
254 return RuleBasedCollator::rbcFromUCollator(const_cast<UCollator *>(m_strsrch_->collator));
255 }
256
setCollator(RuleBasedCollator * coll,UErrorCode & status)257 void StringSearch::setCollator(RuleBasedCollator *coll, UErrorCode &status)
258 {
259 if (U_SUCCESS(status)) {
260 usearch_setCollator(m_strsrch_, coll->toUCollator(), &status);
261 }
262 }
263
setPattern(const UnicodeString & pattern,UErrorCode & status)264 void StringSearch::setPattern(const UnicodeString &pattern,
265 UErrorCode &status)
266 {
267 if (U_SUCCESS(status)) {
268 m_pattern_ = pattern;
269 usearch_setPattern(m_strsrch_, m_pattern_.getBuffer(), m_pattern_.length(),
270 &status);
271 }
272 }
273
getPattern() const274 const UnicodeString & StringSearch::getPattern() const
275 {
276 return m_pattern_;
277 }
278
279 // public methods ----------------------------------------------------
280
reset()281 void StringSearch::reset()
282 {
283 usearch_reset(m_strsrch_);
284 }
285
safeClone() const286 StringSearch * StringSearch::safeClone() const
287 {
288 UErrorCode status = U_ZERO_ERROR;
289 StringSearch *result = new StringSearch(m_pattern_, m_text_,
290 getCollator(),
291 m_breakiterator_,
292 status);
293 /* test for nullptr */
294 if (result == 0) {
295 status = U_MEMORY_ALLOCATION_ERROR;
296 return 0;
297 }
298 result->setOffset(getOffset(), status);
299 result->setMatchStart(m_strsrch_->search->matchedIndex);
300 result->setMatchLength(m_strsrch_->search->matchedLength);
301 if (U_FAILURE(status)) {
302 return nullptr;
303 }
304 return result;
305 }
306
307 // protected method -------------------------------------------------
308
handleNext(int32_t position,UErrorCode & status)309 int32_t StringSearch::handleNext(int32_t position, UErrorCode &status)
310 {
311 // values passed here are already in the pre-shift position
312 if (U_SUCCESS(status)) {
313 if (m_strsrch_->pattern.cesLength == 0) {
314 m_search_->matchedIndex =
315 m_search_->matchedIndex == USEARCH_DONE ?
316 getOffset() : m_search_->matchedIndex + 1;
317 m_search_->matchedLength = 0;
318 ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex,
319 &status);
320 if (m_search_->matchedIndex == m_search_->textLength) {
321 m_search_->matchedIndex = USEARCH_DONE;
322 }
323 }
324 else {
325 // looking at usearch.cpp, this part is shifted out to
326 // StringSearch instead of SearchIterator because m_strsrch_ is
327 // not accessible in SearchIterator
328 #if 0
329 if (position + m_strsrch_->pattern.defaultShiftSize
330 > m_search_->textLength) {
331 setMatchNotFound();
332 return USEARCH_DONE;
333 }
334 #endif
335 if (m_search_->matchedLength <= 0) {
336 // the flipping direction issue has already been handled
337 // in next()
338 // for boundary check purposes. this will ensure that the
339 // next match will not precede the current offset
340 // note search->matchedIndex will always be set to something
341 // in the code
342 m_search_->matchedIndex = position - 1;
343 }
344
345 ucol_setOffset(m_strsrch_->textIter, position, &status);
346
347 #if 0
348 for (;;) {
349 if (m_search_->isCanonicalMatch) {
350 // can't use exact here since extra accents are allowed.
351 usearch_handleNextCanonical(m_strsrch_, &status);
352 }
353 else {
354 usearch_handleNextExact(m_strsrch_, &status);
355 }
356 if (U_FAILURE(status)) {
357 return USEARCH_DONE;
358 }
359 if (m_breakiterator_ == nullptr
360 #if !UCONFIG_NO_BREAK_ITERATION
361 ||
362 m_search_->matchedIndex == USEARCH_DONE ||
363 (m_breakiterator_->isBoundary(m_search_->matchedIndex) &&
364 m_breakiterator_->isBoundary(m_search_->matchedIndex +
365 m_search_->matchedLength))
366 #endif
367 ) {
368 if (m_search_->matchedIndex == USEARCH_DONE) {
369 ucol_setOffset(m_strsrch_->textIter,
370 m_search_->textLength, &status);
371 }
372 else {
373 ucol_setOffset(m_strsrch_->textIter,
374 m_search_->matchedIndex, &status);
375 }
376 return m_search_->matchedIndex;
377 }
378 }
379 #else
380 // if m_strsrch_->breakIter is always the same as m_breakiterator_
381 // then we don't need to check the match boundaries here because
382 // usearch_handleNextXXX will already have done it.
383 if (m_search_->isCanonicalMatch) {
384 // *could* actually use exact here 'cause no extra accents allowed...
385 usearch_handleNextCanonical(m_strsrch_, &status);
386 } else {
387 usearch_handleNextExact(m_strsrch_, &status);
388 }
389
390 if (U_FAILURE(status)) {
391 return USEARCH_DONE;
392 }
393
394 if (m_search_->matchedIndex == USEARCH_DONE) {
395 ucol_setOffset(m_strsrch_->textIter, m_search_->textLength, &status);
396 } else {
397 ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex, &status);
398 }
399
400 return m_search_->matchedIndex;
401 #endif
402 }
403 }
404 return USEARCH_DONE;
405 }
406
handlePrev(int32_t position,UErrorCode & status)407 int32_t StringSearch::handlePrev(int32_t position, UErrorCode &status)
408 {
409 // values passed here are already in the pre-shift position
410 if (U_SUCCESS(status)) {
411 if (m_strsrch_->pattern.cesLength == 0) {
412 m_search_->matchedIndex =
413 (m_search_->matchedIndex == USEARCH_DONE ? getOffset() :
414 m_search_->matchedIndex);
415 if (m_search_->matchedIndex == 0) {
416 setMatchNotFound();
417 }
418 else {
419 m_search_->matchedIndex --;
420 ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex,
421 &status);
422 m_search_->matchedLength = 0;
423 }
424 }
425 else {
426 // looking at usearch.cpp, this part is shifted out to
427 // StringSearch instead of SearchIterator because m_strsrch_ is
428 // not accessible in SearchIterator
429 #if 0
430 if (!m_search_->isOverlap &&
431 position - m_strsrch_->pattern.defaultShiftSize < 0) {
432 setMatchNotFound();
433 return USEARCH_DONE;
434 }
435
436 for (;;) {
437 if (m_search_->isCanonicalMatch) {
438 // can't use exact here since extra accents are allowed.
439 usearch_handlePreviousCanonical(m_strsrch_, &status);
440 }
441 else {
442 usearch_handlePreviousExact(m_strsrch_, &status);
443 }
444 if (U_FAILURE(status)) {
445 return USEARCH_DONE;
446 }
447 if (m_breakiterator_ == nullptr
448 #if !UCONFIG_NO_BREAK_ITERATION
449 ||
450 m_search_->matchedIndex == USEARCH_DONE ||
451 (m_breakiterator_->isBoundary(m_search_->matchedIndex) &&
452 m_breakiterator_->isBoundary(m_search_->matchedIndex +
453 m_search_->matchedLength))
454 #endif
455 ) {
456 return m_search_->matchedIndex;
457 }
458 }
459 #else
460 ucol_setOffset(m_strsrch_->textIter, position, &status);
461
462 if (m_search_->isCanonicalMatch) {
463 // *could* use exact match here since extra accents *not* allowed!
464 usearch_handlePreviousCanonical(m_strsrch_, &status);
465 } else {
466 usearch_handlePreviousExact(m_strsrch_, &status);
467 }
468
469 if (U_FAILURE(status)) {
470 return USEARCH_DONE;
471 }
472
473 return m_search_->matchedIndex;
474 #endif
475 }
476
477 return m_search_->matchedIndex;
478 }
479 return USEARCH_DONE;
480 }
481
482 U_NAMESPACE_END
483
484 #endif /* #if !UCONFIG_NO_COLLATION */
485