xref: /aosp_15_r20/external/icing/icing/expand/expander-manager.cc (revision 8b6cd535a057e39b3b86660c4aa06c99747c2136)
1 // Copyright (C) 2024 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "icing/expand/expander-manager.h"
16 
17 #include <memory>
18 #include <string>
19 #include <string_view>
20 #include <utility>
21 #include <vector>
22 
23 #include "icing/text_classifier/lib3/utils/base/status.h"
24 #include "icing/text_classifier/lib3/utils/base/statusor.h"
25 #include "icing/absl_ports/mutex.h"
26 #include "icing/expand/expander.h"
27 #include "icing/expand/stemming/stemming-expander.h"
28 #include "icing/util/logging.h"
29 #include "icing/util/status-macros.h"
30 #include "unicode/uloc.h"
31 
32 namespace icing {
33 namespace lib {
34 
GetOrCreateStemmingExpander(const std::string & locale)35 const Expander& ExpanderManager::GetOrCreateStemmingExpander(
36     const std::string& locale) {
37   {
38     // Check if the expander already exists. This only requires a read lock.
39     absl_ports::shared_lock l(&mutex_);
40     auto itr = stemming_expanders_.find(locale);
41     if (itr != stemming_expanders_.end()) {
42       return *(itr->second);
43     }
44   }
45 
46   const char* stemmer_language_code = uloc_getISO3Language(locale.c_str());
47   libtextclassifier3::StatusOr<std::unique_ptr<StemmingExpander>> expander_or =
48       StemmingExpander::Create(stemmer_language_code);
49 
50   if (!expander_or.status().ok()) {
51     ICING_VLOG(1) << "Failed to create stemming expander for locale: " << locale
52                   << ". Using default locale: " << default_locale_;
53     {
54       absl_ports::shared_lock l(&mutex_);
55       // stemming_expanders_[default_locale_] is guaranteed to exist as this is
56       // created during initialization.
57       return *stemming_expanders_[default_locale_];
58     }
59   }
60 
61   std::unique_ptr<Expander> stemming_expander =
62       std::move(expander_or).ValueOrDie();
63   {
64     absl_ports::unique_lock l(&mutex_);
65     // Check again before emplacing into the map in case the expander was
66     // created by another thread.
67     auto itr = stemming_expanders_.find(locale);
68     if (itr == stemming_expanders_.end()) {
69       itr = stemming_expanders_.emplace(locale, std::move(stemming_expander))
70                 .first;
71     }
72     return *(itr->second);
73   }
74 }
75 
76 /* static */ libtextclassifier3::StatusOr<std::unique_ptr<ExpanderManager>>
Create(std::string default_locale,int max_terms_per_expander)77 ExpanderManager::Create(std::string default_locale,
78                         int max_terms_per_expander) {
79   if (max_terms_per_expander <= 1) {
80     return libtextclassifier3::Status(
81         libtextclassifier3::StatusCode::INVALID_ARGUMENT,
82         "max_num_expanded_terms must be greater than 1.");
83   }
84 
85   // Create a default stemming expander using defalt_locale. This is added into
86   // the stemming_expanders_ map during initialization.
87   const char* stemmer_language_code =
88       uloc_getISO3Language(default_locale.c_str());
89   libtextclassifier3::StatusOr<std::unique_ptr<StemmingExpander>> expander_or =
90       StemmingExpander::Create(stemmer_language_code);
91 
92   std::unique_ptr<StemmingExpander> expander;
93   if (!expander_or.status().ok()) {
94     ICING_VLOG(1) << "Failed to create expander manager with locale: "
95                   << default_locale
96                   << ". Using default English locale instead.";
97     default_locale = kDefaultEnglishLocale;
98     stemmer_language_code = uloc_getISO3Language(default_locale.c_str());
99     ICING_ASSIGN_OR_RETURN(expander,
100                            StemmingExpander::Create(stemmer_language_code));
101   } else {
102     expander = std::move(expander_or).ValueOrDie();
103   }
104 
105   ExpandersMap stemming_expanders;
106   stemming_expanders.emplace(default_locale, std::move(expander));
107   return std::unique_ptr<ExpanderManager>(
108       new ExpanderManager(std::move(stemming_expanders),
109                           std::move(default_locale), max_terms_per_expander));
110 }
111 
ProcessTerm(std::string_view term,TermMatchType::Code term_match_type,const std::string & locale)112 std::vector<ExpandedTerm> ExpanderManager::ProcessTerm(
113     std::string_view term, TermMatchType::Code term_match_type,
114     const std::string& locale) {
115   switch (term_match_type) {
116     case TermMatchType_Code_UNKNOWN:
117     case TermMatchType::EXACT_ONLY:
118     case TermMatchType::PREFIX: {
119       // Return the original term.
120       std::vector<ExpandedTerm> expanded_terms;
121       expanded_terms.push_back(
122           ExpandedTerm(std::string(term), /*is_stemmed_term=*/false));
123       return expanded_terms;
124     }
125     case TermMatchType_Code_STEMMING: {
126       // The stemming expander returns at most 2 terms, and we don't allow
127       // having max_terms_per_expander < 2, so we don't need to check the size
128       // of the returned vector here.
129       return GetOrCreateStemmingExpander(locale).Expand(term);
130     }
131   }
132 }
133 
134 }  // namespace lib
135 }  // namespace icing
136