xref: /aosp_15_r20/external/tensorflow/tensorflow/core/example/feature_util.h (revision b6fb3261f9314811a0f4371741dbb8839866f948)
1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 // A set of lightweight wrappers which simplify access to Feature protos.
17 //
18 // TensorFlow Example proto uses associative maps on top of oneof fields.
19 // SequenceExample proto uses associative map of FeatureList.
20 // So accessing feature values is not very convenient.
21 //
22 // For example, to read a first value of integer feature "tag":
23 //   int id = example.features().feature().at("tag").int64_list().value(0);
24 //
25 // to add a value:
26 //   auto features = example->mutable_features();
27 //   (*features->mutable_feature())["tag"].mutable_int64_list()->add_value(id);
28 //
29 // For float features you have to use float_list, for string - bytes_list.
30 //
31 // To do the same with this library:
32 //   int id = GetFeatureValues<int64_t>("tag", example).Get(0);
33 //   GetFeatureValues<int64_t>("tag", &example)->Add(id);
34 //
35 // Modification of bytes features is slightly different:
36 //   auto tag = GetFeatureValues<std::string>("tag", &example);
37 //   *tag->Add() = "lorem ipsum";
38 //
39 // To copy multiple values into a feature:
40 //   AppendFeatureValues({1,2,3}, "tag", &example);
41 //
42 // GetFeatureValues gives you access to underlying data - RepeatedField object
43 // (RepeatedPtrField for byte list). So refer to its documentation of
44 // RepeatedField for full list of supported methods.
45 //
46 // NOTE: Due to the nature of oneof proto fields setting a feature of one type
47 // automatically clears all values stored as another type with the same feature
48 // key.
49 //
50 // This library also has tools to work with SequenceExample protos.
51 //
52 // To get a value from SequenceExample.context:
53 //   int id = GetFeatureValues<protobuf_int64>("tag", se.context()).Get(0);
54 // To add a value to the context:
55 //   GetFeatureValues<protobuf_int64>("tag", se.mutable_context())->Add(42);
56 //
57 // To add values to feature_lists:
58 //   AppendFeatureValues({4.0},
59 //                       GetFeatureList("images", &se)->Add());
60 //   AppendFeatureValues({5.0, 3.0},
61 //                       GetFeatureList("images", &se)->Add());
62 // This will create a feature list keyed as "images" with two features:
63 //   feature_lists {
64 //     feature_list {
65 //       key: "images"
66 //       value {
67 //         feature { float_list { value: [4.0] } }
68 //         feature { float_list { value: [5.0, 3.0] } }
69 //       }
70 //     }
71 //   }
72 // For string-valued features, note that the Append... and Set... functions
73 // support absl::string_view containers. This allows you to copy existing
74 // buffers into a Feature with only one copy:
75 //   std::vector<absl::string_view> image;
76 //   image.push_back(image_buffer);               // No copy.
77 //   SetFeatureValues(image, "image", &example);  // Copy.
78 //
79 // Functions exposed by this library:
80 //   HasFeature<[FeatureType]>(key, proto) -> bool
81 //     Returns true if a feature with the specified key, and optionally
82 //     FeatureType, belongs to the Features or Example proto.
83 //   HasFeatureList(key, sequence_example) -> bool
84 //     Returns true if SequenceExample has a feature_list with the key.
85 //
86 //   GetFeatureValues<FeatureType>(key, proto) -> RepeatedField<FeatureType>
87 //     Returns values for the specified key and the FeatureType.
88 //     Supported types for the proto: Example, Features.
89 //   GetFeatureList(key, sequence_example) -> RepeatedPtrField<Feature>
90 //     Returns Feature protos associated with a key.
91 //
92 //   AppendFeatureValues(begin, end, feature)
93 //   AppendFeatureValues(container or initializer_list, feature)
94 //     Copies values into a Feature.
95 //   AppendFeatureValues(begin, end, key, proto)
96 //   AppendFeatureValues(container or initializer_list, key, proto)
97 //     Copies values into Features and Example protos with the specified key.
98 //
99 //   ClearFeatureValues<FeatureType>(feature)
100 //     Clears the feature's repeated field of the given type.
101 //
102 //   SetFeatureValues(begin, end, feature)
103 //   SetFeatureValues(container or initializer_list, feature)
104 //     Clears a Feature, then copies values into it.
105 //   SetFeatureValues(begin, end, key, proto)
106 //   SetFeatureValues(container or initializer_list, key, proto)
107 //     Clears Features or Example protos with the specified key,
108 //     then copies values into them.
109 //
110 // Auxiliary functions, it is unlikely you'll need to use them directly:
111 //   GetFeatures(proto) -> Features
112 //     A convenience function to get Features proto.
113 //     Supported types for the proto: Example, Features.
114 //   GetFeature(key, proto) -> Feature
115 //     Returns a Feature proto for the specified key.
116 //     Supported types for the proto: Example, Features.
117 //   GetFeatureValues<FeatureType>(feature) -> RepeatedField<FeatureType>
118 //     Returns values of the feature for the FeatureType.
119 
120 #ifndef TENSORFLOW_CORE_EXAMPLE_FEATURE_UTIL_H_
121 #define TENSORFLOW_CORE_EXAMPLE_FEATURE_UTIL_H_
122 
123 #include <algorithm>
124 #include <iterator>
125 #include <string>
126 #include <type_traits>
127 #include <utility>
128 
129 #include "absl/strings/string_view.h"
130 #include "tensorflow/core/example/example.pb.h"
131 #include "tensorflow/core/example/feature.pb.h"
132 #include "tensorflow/core/platform/protobuf.h"
133 #include "tensorflow/core/platform/stringpiece.h"
134 
135 // Must come after the import for absl::string_view.
136 #ifdef ABSL_HAVE_STD_STRING_VIEW
137 #include <string_view>
138 #endif
139 
140 namespace tensorflow {
141 namespace internal {
142 
143 // TODO(gorban): Update all clients in a followup CL.
144 // Returns a reference to a feature corresponding to the name.
145 // Note: it will create a new Feature if it is missing in the example.
146 ABSL_DEPRECATED("Use GetFeature instead.")
147 Feature& ExampleFeature(absl::string_view name, Example* example);
148 
149 // Specializations of RepeatedFieldTrait define a type of RepeatedField
150 // corresponding to a selected feature type.
151 template <typename FeatureType>
152 struct RepeatedFieldTrait;
153 
154 template <>
155 struct RepeatedFieldTrait<protobuf_int64> {
156   using Type = protobuf::RepeatedField<protobuf_int64>;
157 };
158 
159 template <>
160 struct RepeatedFieldTrait<float> {
161   using Type = protobuf::RepeatedField<float>;
162 };
163 
164 template <>
165 struct RepeatedFieldTrait<tstring> {
166   using Type = protobuf::RepeatedPtrField<std::string>;
167 };
168 
169 template <>
170 struct RepeatedFieldTrait<std::string> {
171   using Type = protobuf::RepeatedPtrField<std::string>;
172 };
173 
174 // Specializations of FeatureTrait define a type of feature corresponding to a
175 // selected value type.
176 template <typename ValueType, class Enable = void>
177 struct FeatureTrait;
178 
179 template <typename ValueType>
180 struct FeatureTrait<ValueType, typename std::enable_if<
181                                    std::is_integral<ValueType>::value>::type> {
182   using Type = protobuf_int64;
183 };
184 
185 template <typename ValueType>
186 struct FeatureTrait<
187     ValueType,
188     typename std::enable_if<std::is_floating_point<ValueType>::value>::type> {
189   using Type = float;
190 };
191 
192 template <typename T>
193 struct is_string
194     : public std::integral_constant<
195           bool,
196           std::is_same<char*, typename std::decay<T>::type>::value ||
197               std::is_same<const char*, typename std::decay<T>::type>::value> {
198 };
199 
200 template <>
201 struct is_string<std::string> : std::true_type {};
202 
203 template <>
204 struct is_string<::tensorflow::StringPiece> : std::true_type {};
205 
206 template <>
207 struct is_string<tstring> : std::true_type {};
208 
209 template <typename ValueType>
210 struct FeatureTrait<
211     ValueType, typename std::enable_if<is_string<ValueType>::value>::type> {
212   using Type = std::string;
213 };
214 
215 // Port of the C++20 `requires` expressions.
216 template <typename... T, typename F>
217 constexpr bool Requires(F) {
218   return std::is_invocable<F, T...>::value;
219 }
220 
221 struct NoneSuch {};
222 
223 // True if the Feature map in a tf.Example supports heterogenous lookup.
224 // See https://abseil.io/tips/144.
225 inline constexpr bool kFeatureMapHasHeterogeneousLookup =
226     Requires<const decltype(Features::default_instance().feature())>(
227         [](auto&& c) -> decltype(c.find(NoneSuch{})) {});
228 
229 // Converts an `absl::string_view` into a string-type compatible for use in the
230 // protobuf library (e.g. as lookup keys in `proto2::Map` or as elements addable
231 // to a `proto2::RepeatedPtrField`) depending on the BUILD mode.
232 //
233 // NOTE: While the newest versions of `proto2::Map` support heterogenous lookup,
234 // it does so through `std::string_view`. If the type is just an alias (as noted
235 // by `ABSL_USES_STD_STRING_VIEW`) then nothing more needs to be done; however,
236 // when the type is not an alias an explicit conversion to is necessary.
237 //
238 // NOTE: This conversion is only necessary until the migration for protobuf to
239 // take a dependency on ABSL is complete.
240 inline auto ProtoMapKey(absl::string_view str) {
241   if constexpr (kFeatureMapHasHeterogeneousLookup) {
242 #ifdef ABSL_USES_STD_STRING_VIEW
243     return str;
244 #else
245 #ifdef ABSL_HAVE_STD_STRING_VIEW
246     return std::string_view(str.data(), str.size());
247 #else
248     return std::string(str);
249 #endif
250 #endif
251   } else {
252     return std::string(str);
253   }
254 }
255 
256 }  //  namespace internal
257 
258 // Returns true if sequence_example has a feature_list with the specified key.
259 bool HasFeatureList(absl::string_view key,
260                     const SequenceExample& sequence_example);
261 
262 template <typename T>
263 struct TypeHasFeatures : std::false_type {};
264 
265 template <>
266 struct TypeHasFeatures<Example> : std::true_type {};
267 
268 template <>
269 struct TypeHasFeatures<Features> : std::true_type {};
270 
271 // A family of template functions to return mutable Features proto from a
272 // container proto. Supported ProtoTypes: Example, Features.
273 template <typename ProtoType>
274 typename std::enable_if<TypeHasFeatures<ProtoType>::value, Features*>::type
275 GetFeatures(ProtoType* proto);
276 
277 template <typename ProtoType>
278 typename std::enable_if<TypeHasFeatures<ProtoType>::value,
279                         const Features&>::type
280 GetFeatures(const ProtoType& proto);
281 
282 // Base declaration of a family of template functions to return a read only
283 // repeated field of feature values.
284 template <typename FeatureType>
285 const typename internal::RepeatedFieldTrait<FeatureType>::Type&
286 GetFeatureValues(const Feature& feature);
287 
288 // Returns a read only repeated field corresponding to a feature with the
289 // specified name and FeatureType. Supported ProtoTypes: Example, Features.
290 template <typename FeatureType, typename ProtoType>
291 const typename internal::RepeatedFieldTrait<FeatureType>::Type&
292 GetFeatureValues(absl::string_view key, const ProtoType& proto) {
293   return GetFeatureValues<FeatureType>(
294       GetFeatures(proto).feature().at(internal::ProtoMapKey(key)));
295 }
296 
297 // Returns a mutable repeated field of a feature values.
298 template <typename FeatureType>
299 typename internal::RepeatedFieldTrait<FeatureType>::Type* GetFeatureValues(
300     Feature* feature);
301 
302 // Returns a mutable repeated field corresponding to a feature with the
303 // specified name and FeatureType. Supported ProtoTypes: Example, Features.
304 template <typename FeatureType, typename ProtoType>
305 typename internal::RepeatedFieldTrait<FeatureType>::Type* GetFeatureValues(
306     absl::string_view key, ProtoType* proto) {
307   ::tensorflow::Feature& feature =
308       (*GetFeatures(proto)->mutable_feature())[internal::ProtoMapKey(key)];
309   return GetFeatureValues<FeatureType>(&feature);
310 }
311 
312 // Returns a read-only Feature proto for the specified key, throws
313 // std::out_of_range if the key is not found. Supported types for the proto:
314 // Example, Features.
315 template <typename ProtoType>
316 const Feature& GetFeature(absl::string_view key, const ProtoType& proto) {
317   return GetFeatures(proto).feature().at(internal::ProtoMapKey(key));
318 }
319 
320 // Returns a mutable Feature proto for the specified key, creates a new if
321 // necessary. Supported types for the proto: Example, Features.
322 template <typename ProtoType>
323 Feature* GetFeature(absl::string_view key, ProtoType* proto) {
324   return &(*GetFeatures(proto)->mutable_feature())[internal::ProtoMapKey(key)];
325 }
326 
327 // Returns a repeated field with features corresponding to a feature_list key.
328 const protobuf::RepeatedPtrField<Feature>& GetFeatureList(
329     absl::string_view key, const SequenceExample& sequence_example);
330 
331 // Returns a mutable repeated field with features corresponding to a
332 // feature_list key. It will create a new FeatureList if necessary.
333 protobuf::RepeatedPtrField<Feature>* GetFeatureList(
334     absl::string_view feature_list_key, SequenceExample* sequence_example);
335 
336 template <typename IteratorType>
337 void AppendFeatureValues(IteratorType first, IteratorType last,
338                          Feature* feature) {
339   using FeatureType = typename internal::FeatureTrait<
340       typename std::iterator_traits<IteratorType>::value_type>::Type;
341   auto& values = *GetFeatureValues<FeatureType>(feature);
342   values.Reserve(std::distance(first, last));
343   for (auto it = first; it != last; ++it) {
344     *values.Add() = *it;
345   }
346 }
347 
348 template <typename ValueType>
349 void AppendFeatureValues(std::initializer_list<ValueType> container,
350                          Feature* feature) {
351   using FeatureType = typename internal::FeatureTrait<ValueType>::Type;
352   auto& values = *GetFeatureValues<FeatureType>(feature);
353   values.Reserve(container.size());
354   for (auto& elt : container) {
355     *values.Add() = std::move(elt);
356   }
357 }
358 
359 namespace internal {
360 
361 // HasSize<T>::value is true_type if T has a size() member.
362 template <typename T, typename = void>
363 struct HasSize : std::false_type {};
364 
365 template <typename T>
366 struct HasSize<T, absl::void_t<decltype(std::declval<T>().size())>>
367     : std::true_type {};
368 
369 // Reserves the container's size, if a container.size() method exists.
370 template <typename ContainerType, typename RepeatedFieldType>
371 auto ReserveIfSizeAvailable(const ContainerType& container,
372                             RepeatedFieldType& values) ->
373     typename std::enable_if_t<HasSize<ContainerType>::value, void> {
374   values.Reserve(container.size());
375 }
376 
377 template <typename ContainerType, typename RepeatedFieldType>
378 auto ReserveIfSizeAvailable(const ContainerType& container,
379                             RepeatedFieldType& values) ->
380     typename std::enable_if_t<!HasSize<ContainerType>::value, void> {}
381 
382 }  // namespace internal
383 
384 template <typename ContainerType>
385 void AppendFeatureValues(const ContainerType& container, Feature* feature) {
386   using IteratorType = typename ContainerType::const_iterator;
387   using FeatureType = typename internal::FeatureTrait<
388       typename std::iterator_traits<IteratorType>::value_type>::Type;
389   auto* values = GetFeatureValues<FeatureType>(feature);
390   internal::ReserveIfSizeAvailable(container, *values);
391   // This is equivalent to std::copy into `values` with a
392   // RepeatedFieldBackInserter, the difference is RFBI isn't compatible with
393   // types that we want to convert (e.g. absl::string_view -> std::string).
394   for (const auto& elt : container) {
395     if constexpr (internal::is_string<FeatureType>::value) {
396       *values->Add() = std::string(elt);
397     } else {
398       *values->Add() = elt;
399     }
400   }
401 }
402 
403 // Copies elements from the range, defined by [first, last) into the feature
404 // obtainable from the (proto, key) combination.
405 template <typename IteratorType, typename ProtoType>
406 void AppendFeatureValues(IteratorType first, IteratorType last,
407                          absl::string_view key, ProtoType* proto) {
408   AppendFeatureValues(first, last, GetFeature(key, GetFeatures(proto)));
409 }
410 
411 // Copies all elements from the container into a feature.
412 template <typename ContainerType, typename ProtoType>
413 void AppendFeatureValues(const ContainerType& container, absl::string_view key,
414                          ProtoType* proto) {
415   AppendFeatureValues<ContainerType>(container,
416                                      GetFeature(key, GetFeatures(proto)));
417 }
418 
419 // Copies all elements from the initializer list into a Feature contained by
420 // Features or Example proto.
421 template <typename ValueType, typename ProtoType>
422 void AppendFeatureValues(std::initializer_list<ValueType> container,
423                          absl::string_view key, ProtoType* proto) {
424   AppendFeatureValues<ValueType>(container,
425                                  GetFeature(key, GetFeatures(proto)));
426 }
427 
428 // Clears the feature's repeated field (int64, float, or string).
429 template <typename... FeatureType>
430 void ClearFeatureValues(Feature* feature);
431 
432 // Clears the feature's repeated field (int64, float, or string). Copies
433 // elements from the range, defined by [first, last) into the feature's repeated
434 // field.
435 template <typename IteratorType>
436 void SetFeatureValues(IteratorType first, IteratorType last, Feature* feature) {
437   using FeatureType = typename internal::FeatureTrait<
438       typename std::iterator_traits<IteratorType>::value_type>::Type;
439   ClearFeatureValues<FeatureType>(feature);
440   AppendFeatureValues(first, last, feature);
441 }
442 
443 // Clears the feature's repeated field (int64, float, or string). Copies all
444 // elements from the initializer list into the feature's repeated field.
445 template <typename ValueType>
446 void SetFeatureValues(std::initializer_list<ValueType> container,
447                       Feature* feature) {
448   using FeatureType = typename internal::FeatureTrait<ValueType>::Type;
449   ClearFeatureValues<FeatureType>(feature);
450   AppendFeatureValues(container, feature);
451 }
452 
453 // Clears the feature's repeated field (int64, float, or string). Copies all
454 // elements from the container into the feature's repeated field.
455 template <typename ContainerType>
456 void SetFeatureValues(const ContainerType& container, Feature* feature) {
457   using IteratorType = typename ContainerType::const_iterator;
458   using FeatureType = typename internal::FeatureTrait<
459       typename std::iterator_traits<IteratorType>::value_type>::Type;
460   ClearFeatureValues<FeatureType>(feature);
461   AppendFeatureValues(container, feature);
462 }
463 
464 // Clears the feature's repeated field (int64, float, or string). Copies
465 // elements from the range, defined by [first, last) into the feature's repeated
466 // field.
467 template <typename IteratorType, typename ProtoType>
468 void SetFeatureValues(IteratorType first, IteratorType last,
469                       absl::string_view key, ProtoType* proto) {
470   SetFeatureValues(first, last, GetFeature(key, GetFeatures(proto)));
471 }
472 
473 // Clears the feature's repeated field (int64, float, or string). Copies all
474 // elements from the container into the feature's repeated field.
475 template <typename ContainerType, typename ProtoType>
476 void SetFeatureValues(const ContainerType& container, absl::string_view key,
477                       ProtoType* proto) {
478   SetFeatureValues<ContainerType>(container,
479                                   GetFeature(key, GetFeatures(proto)));
480 }
481 
482 // Clears the feature's repeated field (int64, float, or string). Copies all
483 // elements from the initializer list into the feature's repeated field.
484 template <typename ValueType, typename ProtoType>
485 void SetFeatureValues(std::initializer_list<ValueType> container,
486                       absl::string_view key, ProtoType* proto) {
487   SetFeatureValues<ValueType>(container, GetFeature(key, GetFeatures(proto)));
488 }
489 
490 // Returns true if a feature with the specified key belongs to the Features.
491 // The template parameter pack accepts zero or one template argument - which
492 // is FeatureType. If the FeatureType not specified (zero template arguments)
493 // the function will not check the feature type. Otherwise it will return false
494 // if the feature has a wrong type.
495 template <typename... FeatureType>
496 bool HasFeature(absl::string_view key, const Features& features);
497 
498 // Returns true if a feature with the specified key belongs to the Example.
499 // Doesn't check feature type if used without FeatureType, otherwise the
500 // specialized versions return false if the feature has a wrong type.
501 template <typename... FeatureType>
502 bool HasFeature(absl::string_view key, const Example& example) {
503   return HasFeature<FeatureType...>(key, GetFeatures(example));
504 }
505 
506 // TODO(gorban): update all clients in a followup CL.
507 template <typename... FeatureType>
508 ABSL_DEPRECATED("Use HasFeature instead.")
509 bool ExampleHasFeature(absl::string_view key, const Example& example) {
510   return HasFeature<FeatureType...>(key, example);
511 }
512 
513 }  // namespace tensorflow
514 #endif  // TENSORFLOW_CORE_EXAMPLE_FEATURE_UTIL_H_
515