xref: /aosp_15_r20/external/libtextclassifier/native/lang_id/common/fel/feature-types.h (revision 993b0882672172b81d12fad7a7ac0c3e5c824a12)
1 /*
2  * Copyright (C) 2018 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 // Common feature types for parser components.
18 
19 #ifndef NLP_SAFT_COMPONENTS_COMMON_MOBILE_FEL_FEATURE_TYPES_H_
20 #define NLP_SAFT_COMPONENTS_COMMON_MOBILE_FEL_FEATURE_TYPES_H_
21 
22 #include <algorithm>
23 #include <map>
24 #include <string>
25 #include <utility>
26 
27 #include "lang_id/common/lite_base/integral-types.h"
28 #include "lang_id/common/lite_base/logging.h"
29 #include "lang_id/common/lite_strings/str-cat.h"
30 #include "absl/strings/match.h"
31 #include "absl/strings/string_view.h"
32 
33 namespace libtextclassifier3 {
34 namespace mobile {
35 
36 // TODO(djweiss) Clean this up as well.
37 // Use the same type for feature values as is used for predicated.
38 typedef int64 Predicate;
39 typedef Predicate FeatureValue;
40 
41 // Each feature value in a feature vector has a feature type. The feature type
42 // is used for converting feature type and value pairs to predicate values. The
43 // feature type can also return names for feature values and calculate the size
44 // of the feature value domain. The FeatureType class is abstract and must be
45 // specialized for the concrete feature types.
46 class FeatureType {
47  public:
48   // Initializes a feature type.
FeatureType(absl::string_view name)49   explicit FeatureType(absl::string_view name)
50       : name_(name),
51         base_(0),
52         is_continuous_(absl::StrContains(name, "continuous")) {}
53 
~FeatureType()54   virtual ~FeatureType() {}
55 
56   // Converts a feature value to a name.
57   virtual std::string GetFeatureValueName(FeatureValue value) const = 0;
58 
59   // Returns the size of the feature values domain.
60   virtual int64 GetDomainSize() const = 0;
61 
62   // Returns the feature type name.
name()63   const std::string &name() const { return name_; }
64 
base()65   Predicate base() const { return base_; }
set_base(Predicate base)66   void set_base(Predicate base) { base_ = base; }
67 
68   // Returns true iff this feature is continuous; see FloatFeatureValue.
is_continuous()69   bool is_continuous() const { return is_continuous_; }
70 
71  private:
72   // Feature type name.
73   std::string name_;
74 
75   // "Base" feature value: i.e. a "slot" in a global ordering of features.
76   Predicate base_;
77 
78   // See doc for is_continuous().
79   bool is_continuous_;
80 };
81 
82 // Feature type that is defined using an explicit map from FeatureValue to
83 // string values.  This can reduce some of the boilerplate when defining
84 // features that generate enum values.  Example usage:
85 //
86 //   class BeverageSizeFeature : public FeatureFunction<Beverage>
87 //     enum FeatureValue { SMALL, MEDIUM, LARGE };  // values for this feature
88 //     void Init(TaskContext *context) override {
89 //       set_feature_type(new EnumFeatureType("beverage_size",
90 //           {{SMALL, "SMALL"}, {MEDIUM, "MEDIUM"}, {LARGE, "LARGE"}});
91 //     }
92 //     [...]
93 //   };
94 class EnumFeatureType : public FeatureType {
95  public:
EnumFeatureType(absl::string_view name,const std::map<FeatureValue,std::string> & value_names)96   EnumFeatureType(absl::string_view name,
97                   const std::map<FeatureValue, std::string> &value_names)
98       : FeatureType(name), value_names_(value_names) {
99     for (const auto &pair : value_names) {
100       SAFTM_CHECK_GE(pair.first, 0)
101           << "Invalid feature value: " << pair.first << ", " << pair.second;
102       domain_size_ = std::max(domain_size_, pair.first + 1);
103     }
104   }
105 
106   // Returns the feature name for a given feature value.
GetFeatureValueName(FeatureValue value)107   std::string GetFeatureValueName(FeatureValue value) const override {
108     auto it = value_names_.find(value);
109     if (it == value_names_.end()) {
110       SAFTM_LOG(ERROR) << "Invalid feature value " << value << " for "
111                        << name();
112       return "<INVALID>";
113     }
114     return it->second;
115   }
116 
117   // Returns the number of possible values for this feature type. This is one
118   // greater than the largest value in the value_names map.
GetDomainSize()119   FeatureValue GetDomainSize() const override { return domain_size_; }
120 
121  protected:
122   // Maximum possible value this feature could take.
123   FeatureValue domain_size_ = 0;
124 
125   // Names of feature values.
126   std::map<FeatureValue, std::string> value_names_;
127 };
128 
129 // Feature type for binary features.
130 class BinaryFeatureType : public FeatureType {
131  public:
BinaryFeatureType(absl::string_view name,absl::string_view off,absl::string_view on)132   BinaryFeatureType(absl::string_view name, absl::string_view off,
133                     absl::string_view on)
134       : FeatureType(name), off_(off), on_(on) {}
135 
136   // Returns the feature name for a given feature value.
GetFeatureValueName(FeatureValue value)137   std::string GetFeatureValueName(FeatureValue value) const override {
138     if (value == 0) return off_;
139     if (value == 1) return on_;
140     return "";
141   }
142 
143   // Binary features always have two feature values.
GetDomainSize()144   FeatureValue GetDomainSize() const override { return 2; }
145 
146  private:
147   // Feature value names for on and off.
148   std::string off_;
149   std::string on_;
150 };
151 
152 // Feature type for numeric features.
153 class NumericFeatureType : public FeatureType {
154  public:
155   // Initializes numeric feature.
NumericFeatureType(absl::string_view name,FeatureValue size)156   NumericFeatureType(absl::string_view name, FeatureValue size)
157       : FeatureType(name), size_(size) {}
158 
159   // Returns numeric feature value.
GetFeatureValueName(FeatureValue value)160   std::string GetFeatureValueName(FeatureValue value) const override {
161     if (value < 0) return "";
162     return LiteStrCat(value);
163   }
164 
165   // Returns the number of feature values.
GetDomainSize()166   FeatureValue GetDomainSize() const override { return size_; }
167 
168  private:
169   // The underlying size of the numeric feature.
170   FeatureValue size_;
171 };
172 
173 // Feature type for byte features, including an "outside" value.
174 class ByteFeatureType : public NumericFeatureType {
175  public:
ByteFeatureType(absl::string_view name)176   explicit ByteFeatureType(absl::string_view name)
177       : NumericFeatureType(name, 257) {}
178 
GetFeatureValueName(FeatureValue value)179   std::string GetFeatureValueName(FeatureValue value) const override {
180     if (value == 256) {
181       return "<NULL>";
182     }
183     std::string result;
184     result += static_cast<char>(value);
185     return result;
186   }
187 };
188 
189 }  // namespace mobile
190 }  // namespace nlp_saft
191 
192 #endif  // NLP_SAFT_COMPONENTS_COMMON_MOBILE_FEL_FEATURE_TYPES_H_
193