xref: /aosp_15_r20/external/icing/icing/transform/map/map-normalizer.h (revision 8b6cd535a057e39b3b86660c4aa06c99747c2136)
1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef ICING_TRANSFORM_MAP_MAP_NORMALIZER_H_
16 #define ICING_TRANSFORM_MAP_MAP_NORMALIZER_H_
17 
18 #include <string>
19 #include <string_view>
20 
21 #include "icing/transform/normalizer.h"
22 #include "icing/util/character-iterator.h"
23 
24 namespace icing {
25 namespace lib {
26 
27 class MapNormalizer : public Normalizer {
28  public:
MapNormalizer(int max_term_byte_size)29   explicit MapNormalizer(int max_term_byte_size)
30       : max_term_byte_size_(max_term_byte_size){};
31 
32   // Normalizes the input term based on character mappings. The mappings
33   // contain the following categories:
34   //   - Uppercase -> lowercase
35   //   - Hiragana -> Katakana
36   //   - Common full-width characters -> ASCII
37   //   - Common ideographic punctuation marks -> ASCII
38   //   - Common diacritic Latin characters -> ASCII
39   //
40   // Read more mapping details in normalization-map.cc
41   Normalizer::NormalizedTerm NormalizeTerm(
42       std::string_view term) const override;
43 
44   // Returns a CharacterIterator pointing to one past the end of the segment of
45   // term that (once normalized) matches with normalized_term.
46   //
47   // Ex. FindNormalizedMatchEndPosition("YELLOW", "yell") will return
48   // CharacterIterator(u8:4, u16:4, u32:4).
49   //
50   // Ex. FindNormalizedMatchEndPosition("YELLOW", "red") will return
51   // CharacterIterator(u8:0, u16:0, u32:0).
52   CharacterIterator FindNormalizedMatchEndPosition(
53       std::string_view term, std::string_view normalized_term) const override;
54 
55  private:
56   // The maximum term length allowed after normalization.
57   int max_term_byte_size_;
58 };
59 
60 }  // namespace lib
61 }  // namespace icing
62 
63 #endif  // ICING_TRANSFORM_MAP_MAP_NORMALIZER_H_
64