xref: /aosp_15_r20/external/libchrome/base/i18n/string_search.cc (revision 635a864187cb8b6c713ff48b7e790a6b21769273)
1*635a8641SAndroid Build Coastguard Worker // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2*635a8641SAndroid Build Coastguard Worker // Use of this source code is governed by a BSD-style license that can be
3*635a8641SAndroid Build Coastguard Worker // found in the LICENSE file.
4*635a8641SAndroid Build Coastguard Worker 
5*635a8641SAndroid Build Coastguard Worker #include <stdint.h>
6*635a8641SAndroid Build Coastguard Worker 
7*635a8641SAndroid Build Coastguard Worker #include "base/i18n/string_search.h"
8*635a8641SAndroid Build Coastguard Worker #include "base/logging.h"
9*635a8641SAndroid Build Coastguard Worker 
10*635a8641SAndroid Build Coastguard Worker #include "third_party/icu/source/i18n/unicode/usearch.h"
11*635a8641SAndroid Build Coastguard Worker 
12*635a8641SAndroid Build Coastguard Worker namespace base {
13*635a8641SAndroid Build Coastguard Worker namespace i18n {
14*635a8641SAndroid Build Coastguard Worker 
15*635a8641SAndroid Build Coastguard Worker FixedPatternStringSearchIgnoringCaseAndAccents::
FixedPatternStringSearchIgnoringCaseAndAccents(const string16 & find_this)16*635a8641SAndroid Build Coastguard Worker FixedPatternStringSearchIgnoringCaseAndAccents(const string16& find_this)
17*635a8641SAndroid Build Coastguard Worker     : find_this_(find_this) {
18*635a8641SAndroid Build Coastguard Worker   // usearch_open requires a valid string argument to be searched, even if we
19*635a8641SAndroid Build Coastguard Worker   // want to set it by usearch_setText afterwards. So, supplying a dummy text.
20*635a8641SAndroid Build Coastguard Worker   const string16& dummy = find_this_;
21*635a8641SAndroid Build Coastguard Worker 
22*635a8641SAndroid Build Coastguard Worker   UErrorCode status = U_ZERO_ERROR;
23*635a8641SAndroid Build Coastguard Worker   search_ = usearch_open(find_this_.data(), find_this_.size(), dummy.data(),
24*635a8641SAndroid Build Coastguard Worker                          dummy.size(), uloc_getDefault(),
25*635a8641SAndroid Build Coastguard Worker                          nullptr,  // breakiter
26*635a8641SAndroid Build Coastguard Worker                          &status);
27*635a8641SAndroid Build Coastguard Worker   if (U_SUCCESS(status)) {
28*635a8641SAndroid Build Coastguard Worker     UCollator* collator = usearch_getCollator(search_);
29*635a8641SAndroid Build Coastguard Worker     ucol_setStrength(collator, UCOL_PRIMARY);
30*635a8641SAndroid Build Coastguard Worker     usearch_reset(search_);
31*635a8641SAndroid Build Coastguard Worker   }
32*635a8641SAndroid Build Coastguard Worker }
33*635a8641SAndroid Build Coastguard Worker 
34*635a8641SAndroid Build Coastguard Worker FixedPatternStringSearchIgnoringCaseAndAccents::
~FixedPatternStringSearchIgnoringCaseAndAccents()35*635a8641SAndroid Build Coastguard Worker ~FixedPatternStringSearchIgnoringCaseAndAccents() {
36*635a8641SAndroid Build Coastguard Worker   if (search_)
37*635a8641SAndroid Build Coastguard Worker     usearch_close(search_);
38*635a8641SAndroid Build Coastguard Worker }
39*635a8641SAndroid Build Coastguard Worker 
Search(const string16 & in_this,size_t * match_index,size_t * match_length)40*635a8641SAndroid Build Coastguard Worker bool FixedPatternStringSearchIgnoringCaseAndAccents::Search(
41*635a8641SAndroid Build Coastguard Worker     const string16& in_this, size_t* match_index, size_t* match_length) {
42*635a8641SAndroid Build Coastguard Worker   UErrorCode status = U_ZERO_ERROR;
43*635a8641SAndroid Build Coastguard Worker   usearch_setText(search_, in_this.data(), in_this.size(), &status);
44*635a8641SAndroid Build Coastguard Worker 
45*635a8641SAndroid Build Coastguard Worker   // Default to basic substring search if usearch fails. According to
46*635a8641SAndroid Build Coastguard Worker   // http://icu-project.org/apiref/icu4c/usearch_8h.html, usearch_open will fail
47*635a8641SAndroid Build Coastguard Worker   // if either |find_this| or |in_this| are empty. In either case basic
48*635a8641SAndroid Build Coastguard Worker   // substring search will give the correct return value.
49*635a8641SAndroid Build Coastguard Worker   if (!U_SUCCESS(status)) {
50*635a8641SAndroid Build Coastguard Worker     size_t index = in_this.find(find_this_);
51*635a8641SAndroid Build Coastguard Worker     if (index == string16::npos) {
52*635a8641SAndroid Build Coastguard Worker       return false;
53*635a8641SAndroid Build Coastguard Worker     } else {
54*635a8641SAndroid Build Coastguard Worker       if (match_index)
55*635a8641SAndroid Build Coastguard Worker         *match_index = index;
56*635a8641SAndroid Build Coastguard Worker       if (match_length)
57*635a8641SAndroid Build Coastguard Worker         *match_length = find_this_.size();
58*635a8641SAndroid Build Coastguard Worker       return true;
59*635a8641SAndroid Build Coastguard Worker     }
60*635a8641SAndroid Build Coastguard Worker   }
61*635a8641SAndroid Build Coastguard Worker 
62*635a8641SAndroid Build Coastguard Worker   int32_t index = usearch_first(search_, &status);
63*635a8641SAndroid Build Coastguard Worker   if (!U_SUCCESS(status) || index == USEARCH_DONE)
64*635a8641SAndroid Build Coastguard Worker     return false;
65*635a8641SAndroid Build Coastguard Worker   if (match_index)
66*635a8641SAndroid Build Coastguard Worker     *match_index = static_cast<size_t>(index);
67*635a8641SAndroid Build Coastguard Worker   if (match_length)
68*635a8641SAndroid Build Coastguard Worker     *match_length = static_cast<size_t>(usearch_getMatchedLength(search_));
69*635a8641SAndroid Build Coastguard Worker   return true;
70*635a8641SAndroid Build Coastguard Worker }
71*635a8641SAndroid Build Coastguard Worker 
StringSearchIgnoringCaseAndAccents(const string16 & find_this,const string16 & in_this,size_t * match_index,size_t * match_length)72*635a8641SAndroid Build Coastguard Worker bool StringSearchIgnoringCaseAndAccents(const string16& find_this,
73*635a8641SAndroid Build Coastguard Worker                                         const string16& in_this,
74*635a8641SAndroid Build Coastguard Worker                                         size_t* match_index,
75*635a8641SAndroid Build Coastguard Worker                                         size_t* match_length) {
76*635a8641SAndroid Build Coastguard Worker   return FixedPatternStringSearchIgnoringCaseAndAccents(find_this).Search(
77*635a8641SAndroid Build Coastguard Worker       in_this, match_index, match_length);
78*635a8641SAndroid Build Coastguard Worker }
79*635a8641SAndroid Build Coastguard Worker 
80*635a8641SAndroid Build Coastguard Worker }  // namespace i18n
81*635a8641SAndroid Build Coastguard Worker }  // namespace base
82