xref: /aosp_15_r20/external/icing/icing/expand/stemming/stemming-expander.h (revision 8b6cd535a057e39b3b86660c4aa06c99747c2136)
1 // Copyright (C) 2024 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef ICING_EXPAND_STEMMING_STEMMING_EXPANDER_H_
16 #define ICING_EXPAND_STEMMING_STEMMING_EXPANDER_H_
17 
18 #include <memory>
19 #include <string>
20 #include <string_view>
21 #include <utility>
22 #include <vector>
23 
24 #include "icing/text_classifier/lib3/utils/base/statusor.h"
25 #include "icing/absl_ports/mutex.h"
26 #include "icing/absl_ports/thread_annotations.h"
27 #include "icing/expand/expander.h"
28 #include "icing/expand/stemming/stemmer.h"
29 
30 namespace icing {
31 namespace lib {
32 
33 // Used to expand a given term to its root form.
34 //
35 // This class is thread-safe.
36 class StemmingExpander : public Expander {
37  public:
38   static libtextclassifier3::StatusOr<std::unique_ptr<StemmingExpander>> Create(
39       std::string language_code);
40 
~StemmingExpander()41   ~StemmingExpander() override {}
42 
43   // Expands the given term to its root form.
44   //
45   // The expanded vector will contain either:
46   //   - A single element containing the original term, if the stemmer does not
47   //     produce a different term, or
48   //   - Two elements (the original term and its stem), with the first element
49   //     being the original term.
50   std::vector<ExpandedTerm> Expand(std::string_view term) const override;
51 
52  private:
StemmingExpander(std::string language_code,std::unique_ptr<Stemmer> stemmer)53   explicit StemmingExpander(std::string language_code,
54                             std::unique_ptr<Stemmer> stemmer)
55       : language_code_(std::move(language_code)),
56         cached_stemmer_(std::move(stemmer)) {}
57 
58   // Produces a stemmer in the language of the StemmingExpander that the caller
59   // owns.
60   //  - If cached_stemmer_ is not null, transfers ownership to the caller and
61   //    sets cached_stemmer_ to null.
62   //  - Otherwise, creates a new stemmer and transfers ownership to the caller.
63   //
64   // Note: Caller must call ReturnStemmer() after using the stemmer.
65   //
66   // Returns:
67   //  - A stemmer for a given language on success.‰
68   //  - INVALID_ARGUMENT_ERROR if the language code is invalid or not supported.
69   //  - INTERNAL_ERROR on errors.
70   //
71   // Requires:
72   //  - language_code_ is a valid code for the stemmer.
73   libtextclassifier3::StatusOr<std::unique_ptr<Stemmer>> ProduceStemmer() const
74       ICING_LOCKS_EXCLUDED(mutex_);
75 
76   // Caller transfers ownership of stemmer to the StemmingExpander.
77   //  - If cached_stemmer_ is not null, stemmer will be deleted.
78   //  - Otherwise, the stemmer becomes cached_stemmer_.
79   void ReturnStemmer(std::unique_ptr<Stemmer>) const
80       ICING_LOCKS_EXCLUDED(mutex_);
81 
82   // The language code of the stemmer.
83   const std::string language_code_;
84 
85   // A cached stemmer that is used to expand a term.
86   //
87   // The stemmer is not thread-safe.
88   mutable std::unique_ptr<Stemmer> cached_stemmer_ ICING_GUARDED_BY(mutex_);
89 
90   // Used to provide reader and writer locks
91   mutable absl_ports::shared_mutex mutex_;
92 };
93 
94 }  // namespace lib
95 }  // namespace icing
96 
97 #endif  // ICING_EXPAND_STEMMING_STEMMING_EXPANDER_H_
98