1 // Copyright (C) 2024 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #ifndef ICING_EXPAND_STEMMING_STEMMING_EXPANDER_H_ 16 #define ICING_EXPAND_STEMMING_STEMMING_EXPANDER_H_ 17 18 #include <memory> 19 #include <string> 20 #include <string_view> 21 #include <utility> 22 #include <vector> 23 24 #include "icing/text_classifier/lib3/utils/base/statusor.h" 25 #include "icing/absl_ports/mutex.h" 26 #include "icing/absl_ports/thread_annotations.h" 27 #include "icing/expand/expander.h" 28 #include "icing/expand/stemming/stemmer.h" 29 30 namespace icing { 31 namespace lib { 32 33 // Used to expand a given term to its root form. 34 // 35 // This class is thread-safe. 36 class StemmingExpander : public Expander { 37 public: 38 static libtextclassifier3::StatusOr<std::unique_ptr<StemmingExpander>> Create( 39 std::string language_code); 40 ~StemmingExpander()41 ~StemmingExpander() override {} 42 43 // Expands the given term to its root form. 44 // 45 // The expanded vector will contain either: 46 // - A single element containing the original term, if the stemmer does not 47 // produce a different term, or 48 // - Two elements (the original term and its stem), with the first element 49 // being the original term. 50 std::vector<ExpandedTerm> Expand(std::string_view term) const override; 51 52 private: StemmingExpander(std::string language_code,std::unique_ptr<Stemmer> stemmer)53 explicit StemmingExpander(std::string language_code, 54 std::unique_ptr<Stemmer> stemmer) 55 : language_code_(std::move(language_code)), 56 cached_stemmer_(std::move(stemmer)) {} 57 58 // Produces a stemmer in the language of the StemmingExpander that the caller 59 // owns. 60 // - If cached_stemmer_ is not null, transfers ownership to the caller and 61 // sets cached_stemmer_ to null. 62 // - Otherwise, creates a new stemmer and transfers ownership to the caller. 63 // 64 // Note: Caller must call ReturnStemmer() after using the stemmer. 65 // 66 // Returns: 67 // - A stemmer for a given language on success.‰ 68 // - INVALID_ARGUMENT_ERROR if the language code is invalid or not supported. 69 // - INTERNAL_ERROR on errors. 70 // 71 // Requires: 72 // - language_code_ is a valid code for the stemmer. 73 libtextclassifier3::StatusOr<std::unique_ptr<Stemmer>> ProduceStemmer() const 74 ICING_LOCKS_EXCLUDED(mutex_); 75 76 // Caller transfers ownership of stemmer to the StemmingExpander. 77 // - If cached_stemmer_ is not null, stemmer will be deleted. 78 // - Otherwise, the stemmer becomes cached_stemmer_. 79 void ReturnStemmer(std::unique_ptr<Stemmer>) const 80 ICING_LOCKS_EXCLUDED(mutex_); 81 82 // The language code of the stemmer. 83 const std::string language_code_; 84 85 // A cached stemmer that is used to expand a term. 86 // 87 // The stemmer is not thread-safe. 88 mutable std::unique_ptr<Stemmer> cached_stemmer_ ICING_GUARDED_BY(mutex_); 89 90 // Used to provide reader and writer locks 91 mutable absl_ports::shared_mutex mutex_; 92 }; 93 94 } // namespace lib 95 } // namespace icing 96 97 #endif // ICING_EXPAND_STEMMING_STEMMING_EXPANDER_H_ 98