1 // Copyright (C) 2019 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #ifndef ICING_TRANSFORM_NORMALIZER_H_ 16 #define ICING_TRANSFORM_NORMALIZER_H_ 17 18 #include <memory> 19 #include <string> 20 #include <string_view> 21 22 #include "icing/text_classifier/lib3/utils/base/statusor.h" 23 #include "icing/util/character-iterator.h" 24 25 namespace icing { 26 namespace lib { 27 28 // Normalizes strings for text matching. 29 // 30 // Example use: 31 // ICING_ASSIGN_OR_RETURN(auto normalizer, 32 // normalizer_factory::Create(/*max_term_byte_size=*/5); 33 // 34 // std::string normalized_text = normalizer->NormalizeText("HELLO!"); 35 // ICING_LOG(INFO) << normalized_text; // prints "hello" 36 class Normalizer { 37 public: 38 virtual ~Normalizer() = default; 39 40 // Normalizes the input term based on rules. See implementation classes for 41 // specific transformation rules. 42 struct NormalizedTerm { 43 std::string text; 44 }; 45 virtual NormalizedTerm NormalizeTerm(std::string_view term) const = 0; 46 47 // Returns a CharacterIterator pointing to one past the end of the segment of 48 // term that (once normalized) matches with normalized_term. 49 // 50 // Ex. FindNormalizedMatchEndPosition("YELLOW", "yell") will return 51 // CharacterIterator(u8:4, u16:4, u32:4). 52 // 53 // Ex. FindNormalizedMatchEndPosition("YELLOW", "red") will return 54 // CharacterIterator(u8:0, u16:0, u32:0). 55 virtual CharacterIterator FindNormalizedMatchEndPosition( 56 std::string_view term, std::string_view normalized_term) const = 0; 57 }; 58 59 } // namespace lib 60 } // namespace icing 61 62 #endif // ICING_TRANSFORM_NORMALIZER_H_ 63