1 // Copyright (C) 2024 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "icing/expand/expander-manager.h"
16
17 #include <memory>
18 #include <string>
19 #include <string_view>
20 #include <utility>
21 #include <vector>
22
23 #include "icing/text_classifier/lib3/utils/base/status.h"
24 #include "icing/text_classifier/lib3/utils/base/statusor.h"
25 #include "icing/absl_ports/mutex.h"
26 #include "icing/expand/expander.h"
27 #include "icing/expand/stemming/stemming-expander.h"
28 #include "icing/util/logging.h"
29 #include "icing/util/status-macros.h"
30 #include "unicode/uloc.h"
31
32 namespace icing {
33 namespace lib {
34
GetOrCreateStemmingExpander(const std::string & locale)35 const Expander& ExpanderManager::GetOrCreateStemmingExpander(
36 const std::string& locale) {
37 {
38 // Check if the expander already exists. This only requires a read lock.
39 absl_ports::shared_lock l(&mutex_);
40 auto itr = stemming_expanders_.find(locale);
41 if (itr != stemming_expanders_.end()) {
42 return *(itr->second);
43 }
44 }
45
46 const char* stemmer_language_code = uloc_getISO3Language(locale.c_str());
47 libtextclassifier3::StatusOr<std::unique_ptr<StemmingExpander>> expander_or =
48 StemmingExpander::Create(stemmer_language_code);
49
50 if (!expander_or.status().ok()) {
51 ICING_VLOG(1) << "Failed to create stemming expander for locale: " << locale
52 << ". Using default locale: " << default_locale_;
53 {
54 absl_ports::shared_lock l(&mutex_);
55 // stemming_expanders_[default_locale_] is guaranteed to exist as this is
56 // created during initialization.
57 return *stemming_expanders_[default_locale_];
58 }
59 }
60
61 std::unique_ptr<Expander> stemming_expander =
62 std::move(expander_or).ValueOrDie();
63 {
64 absl_ports::unique_lock l(&mutex_);
65 // Check again before emplacing into the map in case the expander was
66 // created by another thread.
67 auto itr = stemming_expanders_.find(locale);
68 if (itr == stemming_expanders_.end()) {
69 itr = stemming_expanders_.emplace(locale, std::move(stemming_expander))
70 .first;
71 }
72 return *(itr->second);
73 }
74 }
75
76 /* static */ libtextclassifier3::StatusOr<std::unique_ptr<ExpanderManager>>
Create(std::string default_locale,int max_terms_per_expander)77 ExpanderManager::Create(std::string default_locale,
78 int max_terms_per_expander) {
79 if (max_terms_per_expander <= 1) {
80 return libtextclassifier3::Status(
81 libtextclassifier3::StatusCode::INVALID_ARGUMENT,
82 "max_num_expanded_terms must be greater than 1.");
83 }
84
85 // Create a default stemming expander using defalt_locale. This is added into
86 // the stemming_expanders_ map during initialization.
87 const char* stemmer_language_code =
88 uloc_getISO3Language(default_locale.c_str());
89 libtextclassifier3::StatusOr<std::unique_ptr<StemmingExpander>> expander_or =
90 StemmingExpander::Create(stemmer_language_code);
91
92 std::unique_ptr<StemmingExpander> expander;
93 if (!expander_or.status().ok()) {
94 ICING_VLOG(1) << "Failed to create expander manager with locale: "
95 << default_locale
96 << ". Using default English locale instead.";
97 default_locale = kDefaultEnglishLocale;
98 stemmer_language_code = uloc_getISO3Language(default_locale.c_str());
99 ICING_ASSIGN_OR_RETURN(expander,
100 StemmingExpander::Create(stemmer_language_code));
101 } else {
102 expander = std::move(expander_or).ValueOrDie();
103 }
104
105 ExpandersMap stemming_expanders;
106 stemming_expanders.emplace(default_locale, std::move(expander));
107 return std::unique_ptr<ExpanderManager>(
108 new ExpanderManager(std::move(stemming_expanders),
109 std::move(default_locale), max_terms_per_expander));
110 }
111
ProcessTerm(std::string_view term,TermMatchType::Code term_match_type,const std::string & locale)112 std::vector<ExpandedTerm> ExpanderManager::ProcessTerm(
113 std::string_view term, TermMatchType::Code term_match_type,
114 const std::string& locale) {
115 switch (term_match_type) {
116 case TermMatchType_Code_UNKNOWN:
117 case TermMatchType::EXACT_ONLY:
118 case TermMatchType::PREFIX: {
119 // Return the original term.
120 std::vector<ExpandedTerm> expanded_terms;
121 expanded_terms.push_back(
122 ExpandedTerm(std::string(term), /*is_stemmed_term=*/false));
123 return expanded_terms;
124 }
125 case TermMatchType_Code_STEMMING: {
126 // The stemming expander returns at most 2 terms, and we don't allow
127 // having max_terms_per_expander < 2, so we don't need to check the size
128 // of the returned vector here.
129 return GetOrCreateStemmingExpander(locale).Expand(term);
130 }
131 }
132 }
133
134 } // namespace lib
135 } // namespace icing
136