1 /* 2 * Copyright (C) 2018 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 // Common feature types for parser components. 18 19 #ifndef NLP_SAFT_COMPONENTS_COMMON_MOBILE_FEL_FEATURE_TYPES_H_ 20 #define NLP_SAFT_COMPONENTS_COMMON_MOBILE_FEL_FEATURE_TYPES_H_ 21 22 #include <algorithm> 23 #include <map> 24 #include <string> 25 #include <utility> 26 27 #include "lang_id/common/lite_base/integral-types.h" 28 #include "lang_id/common/lite_base/logging.h" 29 #include "lang_id/common/lite_strings/str-cat.h" 30 #include "absl/strings/match.h" 31 #include "absl/strings/string_view.h" 32 33 namespace libtextclassifier3 { 34 namespace mobile { 35 36 // TODO(djweiss) Clean this up as well. 37 // Use the same type for feature values as is used for predicated. 38 typedef int64 Predicate; 39 typedef Predicate FeatureValue; 40 41 // Each feature value in a feature vector has a feature type. The feature type 42 // is used for converting feature type and value pairs to predicate values. The 43 // feature type can also return names for feature values and calculate the size 44 // of the feature value domain. The FeatureType class is abstract and must be 45 // specialized for the concrete feature types. 46 class FeatureType { 47 public: 48 // Initializes a feature type. FeatureType(absl::string_view name)49 explicit FeatureType(absl::string_view name) 50 : name_(name), 51 base_(0), 52 is_continuous_(absl::StrContains(name, "continuous")) {} 53 ~FeatureType()54 virtual ~FeatureType() {} 55 56 // Converts a feature value to a name. 57 virtual std::string GetFeatureValueName(FeatureValue value) const = 0; 58 59 // Returns the size of the feature values domain. 60 virtual int64 GetDomainSize() const = 0; 61 62 // Returns the feature type name. name()63 const std::string &name() const { return name_; } 64 base()65 Predicate base() const { return base_; } set_base(Predicate base)66 void set_base(Predicate base) { base_ = base; } 67 68 // Returns true iff this feature is continuous; see FloatFeatureValue. is_continuous()69 bool is_continuous() const { return is_continuous_; } 70 71 private: 72 // Feature type name. 73 std::string name_; 74 75 // "Base" feature value: i.e. a "slot" in a global ordering of features. 76 Predicate base_; 77 78 // See doc for is_continuous(). 79 bool is_continuous_; 80 }; 81 82 // Feature type that is defined using an explicit map from FeatureValue to 83 // string values. This can reduce some of the boilerplate when defining 84 // features that generate enum values. Example usage: 85 // 86 // class BeverageSizeFeature : public FeatureFunction<Beverage> 87 // enum FeatureValue { SMALL, MEDIUM, LARGE }; // values for this feature 88 // void Init(TaskContext *context) override { 89 // set_feature_type(new EnumFeatureType("beverage_size", 90 // {{SMALL, "SMALL"}, {MEDIUM, "MEDIUM"}, {LARGE, "LARGE"}}); 91 // } 92 // [...] 93 // }; 94 class EnumFeatureType : public FeatureType { 95 public: EnumFeatureType(absl::string_view name,const std::map<FeatureValue,std::string> & value_names)96 EnumFeatureType(absl::string_view name, 97 const std::map<FeatureValue, std::string> &value_names) 98 : FeatureType(name), value_names_(value_names) { 99 for (const auto &pair : value_names) { 100 SAFTM_CHECK_GE(pair.first, 0) 101 << "Invalid feature value: " << pair.first << ", " << pair.second; 102 domain_size_ = std::max(domain_size_, pair.first + 1); 103 } 104 } 105 106 // Returns the feature name for a given feature value. GetFeatureValueName(FeatureValue value)107 std::string GetFeatureValueName(FeatureValue value) const override { 108 auto it = value_names_.find(value); 109 if (it == value_names_.end()) { 110 SAFTM_LOG(ERROR) << "Invalid feature value " << value << " for " 111 << name(); 112 return "<INVALID>"; 113 } 114 return it->second; 115 } 116 117 // Returns the number of possible values for this feature type. This is one 118 // greater than the largest value in the value_names map. GetDomainSize()119 FeatureValue GetDomainSize() const override { return domain_size_; } 120 121 protected: 122 // Maximum possible value this feature could take. 123 FeatureValue domain_size_ = 0; 124 125 // Names of feature values. 126 std::map<FeatureValue, std::string> value_names_; 127 }; 128 129 // Feature type for binary features. 130 class BinaryFeatureType : public FeatureType { 131 public: BinaryFeatureType(absl::string_view name,absl::string_view off,absl::string_view on)132 BinaryFeatureType(absl::string_view name, absl::string_view off, 133 absl::string_view on) 134 : FeatureType(name), off_(off), on_(on) {} 135 136 // Returns the feature name for a given feature value. GetFeatureValueName(FeatureValue value)137 std::string GetFeatureValueName(FeatureValue value) const override { 138 if (value == 0) return off_; 139 if (value == 1) return on_; 140 return ""; 141 } 142 143 // Binary features always have two feature values. GetDomainSize()144 FeatureValue GetDomainSize() const override { return 2; } 145 146 private: 147 // Feature value names for on and off. 148 std::string off_; 149 std::string on_; 150 }; 151 152 // Feature type for numeric features. 153 class NumericFeatureType : public FeatureType { 154 public: 155 // Initializes numeric feature. NumericFeatureType(absl::string_view name,FeatureValue size)156 NumericFeatureType(absl::string_view name, FeatureValue size) 157 : FeatureType(name), size_(size) {} 158 159 // Returns numeric feature value. GetFeatureValueName(FeatureValue value)160 std::string GetFeatureValueName(FeatureValue value) const override { 161 if (value < 0) return ""; 162 return LiteStrCat(value); 163 } 164 165 // Returns the number of feature values. GetDomainSize()166 FeatureValue GetDomainSize() const override { return size_; } 167 168 private: 169 // The underlying size of the numeric feature. 170 FeatureValue size_; 171 }; 172 173 // Feature type for byte features, including an "outside" value. 174 class ByteFeatureType : public NumericFeatureType { 175 public: ByteFeatureType(absl::string_view name)176 explicit ByteFeatureType(absl::string_view name) 177 : NumericFeatureType(name, 257) {} 178 GetFeatureValueName(FeatureValue value)179 std::string GetFeatureValueName(FeatureValue value) const override { 180 if (value == 256) { 181 return "<NULL>"; 182 } 183 std::string result; 184 result += static_cast<char>(value); 185 return result; 186 } 187 }; 188 189 } // namespace mobile 190 } // namespace nlp_saft 191 192 #endif // NLP_SAFT_COMPONENTS_COMMON_MOBILE_FEL_FEATURE_TYPES_H_ 193