xref: /aosp_15_r20/external/icing/icing/tokenization/tokenizer-factory.cc (revision 8b6cd535a057e39b3b86660c4aa06c99747c2136)
1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "icing/tokenization/tokenizer-factory.h"
16 
17 #include <memory>
18 
19 #include "icing/text_classifier/lib3/utils/base/statusor.h"
20 #include "icing/absl_ports/canonical_errors.h"
21 #include "icing/proto/schema.pb.h"
22 #include "icing/tokenization/language-segmenter.h"
23 #include "icing/tokenization/plain-tokenizer.h"
24 #include "icing/tokenization/rfc822-tokenizer.h"
25 #include "icing/tokenization/tokenizer.h"
26 
27 #ifdef ENABLE_URL_TOKENIZER
28 #include "icing/tokenization/url-tokenizer.h"
29 #endif  // ENABLE_URL_TOKENIZER
30 
31 #include "icing/tokenization/verbatim-tokenizer.h"
32 #include "icing/util/status-macros.h"
33 
34 namespace icing {
35 namespace lib {
36 
37 namespace tokenizer_factory {
38 
39 libtextclassifier3::StatusOr<std::unique_ptr<Tokenizer>>
CreateIndexingTokenizer(StringIndexingConfig::TokenizerType::Code type,const LanguageSegmenter * lang_segmenter)40 CreateIndexingTokenizer(StringIndexingConfig::TokenizerType::Code type,
41                         const LanguageSegmenter* lang_segmenter) {
42   ICING_RETURN_ERROR_IF_NULL(lang_segmenter);
43 
44   switch (type) {
45     case StringIndexingConfig::TokenizerType::PLAIN:
46       return std::make_unique<PlainTokenizer>(lang_segmenter);
47     case StringIndexingConfig::TokenizerType::VERBATIM:
48       return std::make_unique<VerbatimTokenizer>();
49     case StringIndexingConfig::TokenizerType::RFC822:
50       return std::make_unique<Rfc822Tokenizer>();
51 // TODO (b/246964044): remove ifdef guard when url-tokenizer is ready for export
52 // to Android.
53 #ifdef ENABLE_URL_TOKENIZER
54     case StringIndexingConfig::TokenizerType::URL:
55       return std::make_unique<UrlTokenizer>();
56 #endif  // ENABLE_URL_TOKENIZER
57     case StringIndexingConfig::TokenizerType::NONE:
58       [[fallthrough]];
59     default:
60       // This should never happen.
61       return absl_ports::InvalidArgumentError(
62           "Invalid tokenizer type for an indexed section");
63   }
64 }
65 
66 }  // namespace tokenizer_factory
67 
68 }  // namespace lib
69 }  // namespace icing
70