1 // Copyright (C) 2024 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #ifndef ICING_EXPAND_EXPANDER_MANAGER_H_ 16 #define ICING_EXPAND_EXPANDER_MANAGER_H_ 17 18 #include <memory> 19 #include <string> 20 #include <string_view> 21 #include <unordered_map> 22 #include <utility> 23 #include <vector> 24 25 #include "icing/text_classifier/lib3/utils/base/statusor.h" 26 #include "icing/absl_ports/mutex.h" 27 #include "icing/absl_ports/thread_annotations.h" 28 #include "icing/expand/expander.h" 29 #include "icing/proto/term.pb.h" 30 #include "unicode/uloc.h" 31 32 namespace icing { 33 namespace lib { 34 35 // This class is a wrapper around the various expanders. It is responsible for 36 // calling the appropriate expander based on a term's match type and locale. 37 // 38 // This class is thread-safe. 39 class ExpanderManager { 40 public: 41 // This is used as the default locale if the provided locale is invalid. 42 static constexpr std::string_view kDefaultEnglishLocale = ULOC_US; 43 44 // Map of a locale to an expander. 45 // 46 // The Expander instances are managed exclusively by this class, and are never 47 // deleted from the map once created. Therefore we don't need std::shared_ptr 48 // here even though multiple threads may be accessing the same expander 49 // instance at the same time. 50 using ExpandersMap = 51 std::unordered_map<std::string, std::unique_ptr<Expander>>; 52 53 // Factory method to create an ExpanderManager. The expanders will be 54 // initialized in the default locale. 55 // 56 // Returns: 57 // - An ExpanderManager on success. 58 // - INVALID_ARGUMENT_ERROR if max_terms_per_expander <= 1. 59 // - INTERNAL_ERROR ion errors. 60 static libtextclassifier3::StatusOr<std::unique_ptr<ExpanderManager>> Create( 61 std::string default_locale, int max_terms_per_expander); 62 63 // Processes a term according to the term's match type and locale. The 64 // first ExpandedTerm in the returned list will always be the original input 65 // term. 66 // 67 // A new expander will be created when possible if the expander corresponding 68 // to the given term match type and locale does not already exist. 69 // If the locale is not supported, the term will be expanded using the default 70 // locale. 71 // 72 // Returns: a list of expanded terms. 73 std::vector<ExpandedTerm> ProcessTerm(std::string_view term, 74 TermMatchType::Code term_match_type, 75 const std::string& locale); 76 default_locale()77 const std::string& default_locale() const { return default_locale_; } 78 79 private: ExpanderManager(ExpandersMap stemming_expanders,std::string default_locale,int max_terms_per_expander)80 explicit ExpanderManager(ExpandersMap stemming_expanders, 81 std::string default_locale, 82 int max_terms_per_expander) 83 : stemming_expanders_(std::move(stemming_expanders)), 84 default_locale_(std::move(default_locale)), 85 max_terms_per_expander_(max_terms_per_expander) {} 86 87 // Returns a stemming expander for the given locale. 88 // - Returns the expander retrieved from the stemming_expanders_ map if an 89 // instance already exists for the locale. 90 // - Otherwise, creates a new expander instance and adds it to the 91 // stemming_expanders_ map before returning it. 92 const Expander& GetOrCreateStemmingExpander(const std::string& locale) 93 ICING_LOCKS_EXCLUDED(mutex_); 94 95 // Map of locale to stemming expanders. 96 ExpandersMap stemming_expanders_ ICING_GUARDED_BY(mutex_); 97 98 // Default locale to use for expanders. 99 const std::string default_locale_; 100 101 // Maximum number of terms to expand to for an input term per expander. This 102 // number includes the input term. 103 const int max_terms_per_expander_; 104 105 // Used to provide reader and writer locks 106 mutable absl_ports::shared_mutex mutex_; 107 }; 108 109 } // namespace lib 110 } // namespace icing 111 112 #endif // ICING_EXPAND_EXPANDER_MANAGER_H_ 113