1 // Copyright (C) 2019 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #ifndef ICING_TRANSFORM_MAP_MAP_NORMALIZER_H_ 16 #define ICING_TRANSFORM_MAP_MAP_NORMALIZER_H_ 17 18 #include <string> 19 #include <string_view> 20 21 #include "icing/transform/normalizer.h" 22 #include "icing/util/character-iterator.h" 23 24 namespace icing { 25 namespace lib { 26 27 class MapNormalizer : public Normalizer { 28 public: MapNormalizer(int max_term_byte_size)29 explicit MapNormalizer(int max_term_byte_size) 30 : max_term_byte_size_(max_term_byte_size){}; 31 32 // Normalizes the input term based on character mappings. The mappings 33 // contain the following categories: 34 // - Uppercase -> lowercase 35 // - Hiragana -> Katakana 36 // - Common full-width characters -> ASCII 37 // - Common ideographic punctuation marks -> ASCII 38 // - Common diacritic Latin characters -> ASCII 39 // 40 // Read more mapping details in normalization-map.cc 41 Normalizer::NormalizedTerm NormalizeTerm( 42 std::string_view term) const override; 43 44 // Returns a CharacterIterator pointing to one past the end of the segment of 45 // term that (once normalized) matches with normalized_term. 46 // 47 // Ex. FindNormalizedMatchEndPosition("YELLOW", "yell") will return 48 // CharacterIterator(u8:4, u16:4, u32:4). 49 // 50 // Ex. FindNormalizedMatchEndPosition("YELLOW", "red") will return 51 // CharacterIterator(u8:0, u16:0, u32:0). 52 CharacterIterator FindNormalizedMatchEndPosition( 53 std::string_view term, std::string_view normalized_term) const override; 54 55 private: 56 // The maximum term length allowed after normalization. 57 int max_term_byte_size_; 58 }; 59 60 } // namespace lib 61 } // namespace icing 62 63 #endif // ICING_TRANSFORM_MAP_MAP_NORMALIZER_H_ 64