1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 // A set of lightweight wrappers which simplify access to Feature protos. 17 // 18 // TensorFlow Example proto uses associative maps on top of oneof fields. 19 // SequenceExample proto uses associative map of FeatureList. 20 // So accessing feature values is not very convenient. 21 // 22 // For example, to read a first value of integer feature "tag": 23 // int id = example.features().feature().at("tag").int64_list().value(0); 24 // 25 // to add a value: 26 // auto features = example->mutable_features(); 27 // (*features->mutable_feature())["tag"].mutable_int64_list()->add_value(id); 28 // 29 // For float features you have to use float_list, for string - bytes_list. 30 // 31 // To do the same with this library: 32 // int id = GetFeatureValues<int64_t>("tag", example).Get(0); 33 // GetFeatureValues<int64_t>("tag", &example)->Add(id); 34 // 35 // Modification of bytes features is slightly different: 36 // auto tag = GetFeatureValues<std::string>("tag", &example); 37 // *tag->Add() = "lorem ipsum"; 38 // 39 // To copy multiple values into a feature: 40 // AppendFeatureValues({1,2,3}, "tag", &example); 41 // 42 // GetFeatureValues gives you access to underlying data - RepeatedField object 43 // (RepeatedPtrField for byte list). So refer to its documentation of 44 // RepeatedField for full list of supported methods. 45 // 46 // NOTE: Due to the nature of oneof proto fields setting a feature of one type 47 // automatically clears all values stored as another type with the same feature 48 // key. 49 // 50 // This library also has tools to work with SequenceExample protos. 51 // 52 // To get a value from SequenceExample.context: 53 // int id = GetFeatureValues<protobuf_int64>("tag", se.context()).Get(0); 54 // To add a value to the context: 55 // GetFeatureValues<protobuf_int64>("tag", se.mutable_context())->Add(42); 56 // 57 // To add values to feature_lists: 58 // AppendFeatureValues({4.0}, 59 // GetFeatureList("images", &se)->Add()); 60 // AppendFeatureValues({5.0, 3.0}, 61 // GetFeatureList("images", &se)->Add()); 62 // This will create a feature list keyed as "images" with two features: 63 // feature_lists { 64 // feature_list { 65 // key: "images" 66 // value { 67 // feature { float_list { value: [4.0] } } 68 // feature { float_list { value: [5.0, 3.0] } } 69 // } 70 // } 71 // } 72 // For string-valued features, note that the Append... and Set... functions 73 // support absl::string_view containers. This allows you to copy existing 74 // buffers into a Feature with only one copy: 75 // std::vector<absl::string_view> image; 76 // image.push_back(image_buffer); // No copy. 77 // SetFeatureValues(image, "image", &example); // Copy. 78 // 79 // Functions exposed by this library: 80 // HasFeature<[FeatureType]>(key, proto) -> bool 81 // Returns true if a feature with the specified key, and optionally 82 // FeatureType, belongs to the Features or Example proto. 83 // HasFeatureList(key, sequence_example) -> bool 84 // Returns true if SequenceExample has a feature_list with the key. 85 // 86 // GetFeatureValues<FeatureType>(key, proto) -> RepeatedField<FeatureType> 87 // Returns values for the specified key and the FeatureType. 88 // Supported types for the proto: Example, Features. 89 // GetFeatureList(key, sequence_example) -> RepeatedPtrField<Feature> 90 // Returns Feature protos associated with a key. 91 // 92 // AppendFeatureValues(begin, end, feature) 93 // AppendFeatureValues(container or initializer_list, feature) 94 // Copies values into a Feature. 95 // AppendFeatureValues(begin, end, key, proto) 96 // AppendFeatureValues(container or initializer_list, key, proto) 97 // Copies values into Features and Example protos with the specified key. 98 // 99 // ClearFeatureValues<FeatureType>(feature) 100 // Clears the feature's repeated field of the given type. 101 // 102 // SetFeatureValues(begin, end, feature) 103 // SetFeatureValues(container or initializer_list, feature) 104 // Clears a Feature, then copies values into it. 105 // SetFeatureValues(begin, end, key, proto) 106 // SetFeatureValues(container or initializer_list, key, proto) 107 // Clears Features or Example protos with the specified key, 108 // then copies values into them. 109 // 110 // Auxiliary functions, it is unlikely you'll need to use them directly: 111 // GetFeatures(proto) -> Features 112 // A convenience function to get Features proto. 113 // Supported types for the proto: Example, Features. 114 // GetFeature(key, proto) -> Feature 115 // Returns a Feature proto for the specified key. 116 // Supported types for the proto: Example, Features. 117 // GetFeatureValues<FeatureType>(feature) -> RepeatedField<FeatureType> 118 // Returns values of the feature for the FeatureType. 119 120 #ifndef TENSORFLOW_CORE_EXAMPLE_FEATURE_UTIL_H_ 121 #define TENSORFLOW_CORE_EXAMPLE_FEATURE_UTIL_H_ 122 123 #include <algorithm> 124 #include <iterator> 125 #include <string> 126 #include <type_traits> 127 #include <utility> 128 129 #include "absl/strings/string_view.h" 130 #include "tensorflow/core/example/example.pb.h" 131 #include "tensorflow/core/example/feature.pb.h" 132 #include "tensorflow/core/platform/protobuf.h" 133 #include "tensorflow/core/platform/stringpiece.h" 134 135 // Must come after the import for absl::string_view. 136 #ifdef ABSL_HAVE_STD_STRING_VIEW 137 #include <string_view> 138 #endif 139 140 namespace tensorflow { 141 namespace internal { 142 143 // TODO(gorban): Update all clients in a followup CL. 144 // Returns a reference to a feature corresponding to the name. 145 // Note: it will create a new Feature if it is missing in the example. 146 ABSL_DEPRECATED("Use GetFeature instead.") 147 Feature& ExampleFeature(absl::string_view name, Example* example); 148 149 // Specializations of RepeatedFieldTrait define a type of RepeatedField 150 // corresponding to a selected feature type. 151 template <typename FeatureType> 152 struct RepeatedFieldTrait; 153 154 template <> 155 struct RepeatedFieldTrait<protobuf_int64> { 156 using Type = protobuf::RepeatedField<protobuf_int64>; 157 }; 158 159 template <> 160 struct RepeatedFieldTrait<float> { 161 using Type = protobuf::RepeatedField<float>; 162 }; 163 164 template <> 165 struct RepeatedFieldTrait<tstring> { 166 using Type = protobuf::RepeatedPtrField<std::string>; 167 }; 168 169 template <> 170 struct RepeatedFieldTrait<std::string> { 171 using Type = protobuf::RepeatedPtrField<std::string>; 172 }; 173 174 // Specializations of FeatureTrait define a type of feature corresponding to a 175 // selected value type. 176 template <typename ValueType, class Enable = void> 177 struct FeatureTrait; 178 179 template <typename ValueType> 180 struct FeatureTrait<ValueType, typename std::enable_if< 181 std::is_integral<ValueType>::value>::type> { 182 using Type = protobuf_int64; 183 }; 184 185 template <typename ValueType> 186 struct FeatureTrait< 187 ValueType, 188 typename std::enable_if<std::is_floating_point<ValueType>::value>::type> { 189 using Type = float; 190 }; 191 192 template <typename T> 193 struct is_string 194 : public std::integral_constant< 195 bool, 196 std::is_same<char*, typename std::decay<T>::type>::value || 197 std::is_same<const char*, typename std::decay<T>::type>::value> { 198 }; 199 200 template <> 201 struct is_string<std::string> : std::true_type {}; 202 203 template <> 204 struct is_string<::tensorflow::StringPiece> : std::true_type {}; 205 206 template <> 207 struct is_string<tstring> : std::true_type {}; 208 209 template <typename ValueType> 210 struct FeatureTrait< 211 ValueType, typename std::enable_if<is_string<ValueType>::value>::type> { 212 using Type = std::string; 213 }; 214 215 // Port of the C++20 `requires` expressions. 216 template <typename... T, typename F> 217 constexpr bool Requires(F) { 218 return std::is_invocable<F, T...>::value; 219 } 220 221 struct NoneSuch {}; 222 223 // True if the Feature map in a tf.Example supports heterogenous lookup. 224 // See https://abseil.io/tips/144. 225 inline constexpr bool kFeatureMapHasHeterogeneousLookup = 226 Requires<const decltype(Features::default_instance().feature())>( 227 [](auto&& c) -> decltype(c.find(NoneSuch{})) {}); 228 229 // Converts an `absl::string_view` into a string-type compatible for use in the 230 // protobuf library (e.g. as lookup keys in `proto2::Map` or as elements addable 231 // to a `proto2::RepeatedPtrField`) depending on the BUILD mode. 232 // 233 // NOTE: While the newest versions of `proto2::Map` support heterogenous lookup, 234 // it does so through `std::string_view`. If the type is just an alias (as noted 235 // by `ABSL_USES_STD_STRING_VIEW`) then nothing more needs to be done; however, 236 // when the type is not an alias an explicit conversion to is necessary. 237 // 238 // NOTE: This conversion is only necessary until the migration for protobuf to 239 // take a dependency on ABSL is complete. 240 inline auto ProtoMapKey(absl::string_view str) { 241 if constexpr (kFeatureMapHasHeterogeneousLookup) { 242 #ifdef ABSL_USES_STD_STRING_VIEW 243 return str; 244 #else 245 #ifdef ABSL_HAVE_STD_STRING_VIEW 246 return std::string_view(str.data(), str.size()); 247 #else 248 return std::string(str); 249 #endif 250 #endif 251 } else { 252 return std::string(str); 253 } 254 } 255 256 } // namespace internal 257 258 // Returns true if sequence_example has a feature_list with the specified key. 259 bool HasFeatureList(absl::string_view key, 260 const SequenceExample& sequence_example); 261 262 template <typename T> 263 struct TypeHasFeatures : std::false_type {}; 264 265 template <> 266 struct TypeHasFeatures<Example> : std::true_type {}; 267 268 template <> 269 struct TypeHasFeatures<Features> : std::true_type {}; 270 271 // A family of template functions to return mutable Features proto from a 272 // container proto. Supported ProtoTypes: Example, Features. 273 template <typename ProtoType> 274 typename std::enable_if<TypeHasFeatures<ProtoType>::value, Features*>::type 275 GetFeatures(ProtoType* proto); 276 277 template <typename ProtoType> 278 typename std::enable_if<TypeHasFeatures<ProtoType>::value, 279 const Features&>::type 280 GetFeatures(const ProtoType& proto); 281 282 // Base declaration of a family of template functions to return a read only 283 // repeated field of feature values. 284 template <typename FeatureType> 285 const typename internal::RepeatedFieldTrait<FeatureType>::Type& 286 GetFeatureValues(const Feature& feature); 287 288 // Returns a read only repeated field corresponding to a feature with the 289 // specified name and FeatureType. Supported ProtoTypes: Example, Features. 290 template <typename FeatureType, typename ProtoType> 291 const typename internal::RepeatedFieldTrait<FeatureType>::Type& 292 GetFeatureValues(absl::string_view key, const ProtoType& proto) { 293 return GetFeatureValues<FeatureType>( 294 GetFeatures(proto).feature().at(internal::ProtoMapKey(key))); 295 } 296 297 // Returns a mutable repeated field of a feature values. 298 template <typename FeatureType> 299 typename internal::RepeatedFieldTrait<FeatureType>::Type* GetFeatureValues( 300 Feature* feature); 301 302 // Returns a mutable repeated field corresponding to a feature with the 303 // specified name and FeatureType. Supported ProtoTypes: Example, Features. 304 template <typename FeatureType, typename ProtoType> 305 typename internal::RepeatedFieldTrait<FeatureType>::Type* GetFeatureValues( 306 absl::string_view key, ProtoType* proto) { 307 ::tensorflow::Feature& feature = 308 (*GetFeatures(proto)->mutable_feature())[internal::ProtoMapKey(key)]; 309 return GetFeatureValues<FeatureType>(&feature); 310 } 311 312 // Returns a read-only Feature proto for the specified key, throws 313 // std::out_of_range if the key is not found. Supported types for the proto: 314 // Example, Features. 315 template <typename ProtoType> 316 const Feature& GetFeature(absl::string_view key, const ProtoType& proto) { 317 return GetFeatures(proto).feature().at(internal::ProtoMapKey(key)); 318 } 319 320 // Returns a mutable Feature proto for the specified key, creates a new if 321 // necessary. Supported types for the proto: Example, Features. 322 template <typename ProtoType> 323 Feature* GetFeature(absl::string_view key, ProtoType* proto) { 324 return &(*GetFeatures(proto)->mutable_feature())[internal::ProtoMapKey(key)]; 325 } 326 327 // Returns a repeated field with features corresponding to a feature_list key. 328 const protobuf::RepeatedPtrField<Feature>& GetFeatureList( 329 absl::string_view key, const SequenceExample& sequence_example); 330 331 // Returns a mutable repeated field with features corresponding to a 332 // feature_list key. It will create a new FeatureList if necessary. 333 protobuf::RepeatedPtrField<Feature>* GetFeatureList( 334 absl::string_view feature_list_key, SequenceExample* sequence_example); 335 336 template <typename IteratorType> 337 void AppendFeatureValues(IteratorType first, IteratorType last, 338 Feature* feature) { 339 using FeatureType = typename internal::FeatureTrait< 340 typename std::iterator_traits<IteratorType>::value_type>::Type; 341 auto& values = *GetFeatureValues<FeatureType>(feature); 342 values.Reserve(std::distance(first, last)); 343 for (auto it = first; it != last; ++it) { 344 *values.Add() = *it; 345 } 346 } 347 348 template <typename ValueType> 349 void AppendFeatureValues(std::initializer_list<ValueType> container, 350 Feature* feature) { 351 using FeatureType = typename internal::FeatureTrait<ValueType>::Type; 352 auto& values = *GetFeatureValues<FeatureType>(feature); 353 values.Reserve(container.size()); 354 for (auto& elt : container) { 355 *values.Add() = std::move(elt); 356 } 357 } 358 359 namespace internal { 360 361 // HasSize<T>::value is true_type if T has a size() member. 362 template <typename T, typename = void> 363 struct HasSize : std::false_type {}; 364 365 template <typename T> 366 struct HasSize<T, absl::void_t<decltype(std::declval<T>().size())>> 367 : std::true_type {}; 368 369 // Reserves the container's size, if a container.size() method exists. 370 template <typename ContainerType, typename RepeatedFieldType> 371 auto ReserveIfSizeAvailable(const ContainerType& container, 372 RepeatedFieldType& values) -> 373 typename std::enable_if_t<HasSize<ContainerType>::value, void> { 374 values.Reserve(container.size()); 375 } 376 377 template <typename ContainerType, typename RepeatedFieldType> 378 auto ReserveIfSizeAvailable(const ContainerType& container, 379 RepeatedFieldType& values) -> 380 typename std::enable_if_t<!HasSize<ContainerType>::value, void> {} 381 382 } // namespace internal 383 384 template <typename ContainerType> 385 void AppendFeatureValues(const ContainerType& container, Feature* feature) { 386 using IteratorType = typename ContainerType::const_iterator; 387 using FeatureType = typename internal::FeatureTrait< 388 typename std::iterator_traits<IteratorType>::value_type>::Type; 389 auto* values = GetFeatureValues<FeatureType>(feature); 390 internal::ReserveIfSizeAvailable(container, *values); 391 // This is equivalent to std::copy into `values` with a 392 // RepeatedFieldBackInserter, the difference is RFBI isn't compatible with 393 // types that we want to convert (e.g. absl::string_view -> std::string). 394 for (const auto& elt : container) { 395 if constexpr (internal::is_string<FeatureType>::value) { 396 *values->Add() = std::string(elt); 397 } else { 398 *values->Add() = elt; 399 } 400 } 401 } 402 403 // Copies elements from the range, defined by [first, last) into the feature 404 // obtainable from the (proto, key) combination. 405 template <typename IteratorType, typename ProtoType> 406 void AppendFeatureValues(IteratorType first, IteratorType last, 407 absl::string_view key, ProtoType* proto) { 408 AppendFeatureValues(first, last, GetFeature(key, GetFeatures(proto))); 409 } 410 411 // Copies all elements from the container into a feature. 412 template <typename ContainerType, typename ProtoType> 413 void AppendFeatureValues(const ContainerType& container, absl::string_view key, 414 ProtoType* proto) { 415 AppendFeatureValues<ContainerType>(container, 416 GetFeature(key, GetFeatures(proto))); 417 } 418 419 // Copies all elements from the initializer list into a Feature contained by 420 // Features or Example proto. 421 template <typename ValueType, typename ProtoType> 422 void AppendFeatureValues(std::initializer_list<ValueType> container, 423 absl::string_view key, ProtoType* proto) { 424 AppendFeatureValues<ValueType>(container, 425 GetFeature(key, GetFeatures(proto))); 426 } 427 428 // Clears the feature's repeated field (int64, float, or string). 429 template <typename... FeatureType> 430 void ClearFeatureValues(Feature* feature); 431 432 // Clears the feature's repeated field (int64, float, or string). Copies 433 // elements from the range, defined by [first, last) into the feature's repeated 434 // field. 435 template <typename IteratorType> 436 void SetFeatureValues(IteratorType first, IteratorType last, Feature* feature) { 437 using FeatureType = typename internal::FeatureTrait< 438 typename std::iterator_traits<IteratorType>::value_type>::Type; 439 ClearFeatureValues<FeatureType>(feature); 440 AppendFeatureValues(first, last, feature); 441 } 442 443 // Clears the feature's repeated field (int64, float, or string). Copies all 444 // elements from the initializer list into the feature's repeated field. 445 template <typename ValueType> 446 void SetFeatureValues(std::initializer_list<ValueType> container, 447 Feature* feature) { 448 using FeatureType = typename internal::FeatureTrait<ValueType>::Type; 449 ClearFeatureValues<FeatureType>(feature); 450 AppendFeatureValues(container, feature); 451 } 452 453 // Clears the feature's repeated field (int64, float, or string). Copies all 454 // elements from the container into the feature's repeated field. 455 template <typename ContainerType> 456 void SetFeatureValues(const ContainerType& container, Feature* feature) { 457 using IteratorType = typename ContainerType::const_iterator; 458 using FeatureType = typename internal::FeatureTrait< 459 typename std::iterator_traits<IteratorType>::value_type>::Type; 460 ClearFeatureValues<FeatureType>(feature); 461 AppendFeatureValues(container, feature); 462 } 463 464 // Clears the feature's repeated field (int64, float, or string). Copies 465 // elements from the range, defined by [first, last) into the feature's repeated 466 // field. 467 template <typename IteratorType, typename ProtoType> 468 void SetFeatureValues(IteratorType first, IteratorType last, 469 absl::string_view key, ProtoType* proto) { 470 SetFeatureValues(first, last, GetFeature(key, GetFeatures(proto))); 471 } 472 473 // Clears the feature's repeated field (int64, float, or string). Copies all 474 // elements from the container into the feature's repeated field. 475 template <typename ContainerType, typename ProtoType> 476 void SetFeatureValues(const ContainerType& container, absl::string_view key, 477 ProtoType* proto) { 478 SetFeatureValues<ContainerType>(container, 479 GetFeature(key, GetFeatures(proto))); 480 } 481 482 // Clears the feature's repeated field (int64, float, or string). Copies all 483 // elements from the initializer list into the feature's repeated field. 484 template <typename ValueType, typename ProtoType> 485 void SetFeatureValues(std::initializer_list<ValueType> container, 486 absl::string_view key, ProtoType* proto) { 487 SetFeatureValues<ValueType>(container, GetFeature(key, GetFeatures(proto))); 488 } 489 490 // Returns true if a feature with the specified key belongs to the Features. 491 // The template parameter pack accepts zero or one template argument - which 492 // is FeatureType. If the FeatureType not specified (zero template arguments) 493 // the function will not check the feature type. Otherwise it will return false 494 // if the feature has a wrong type. 495 template <typename... FeatureType> 496 bool HasFeature(absl::string_view key, const Features& features); 497 498 // Returns true if a feature with the specified key belongs to the Example. 499 // Doesn't check feature type if used without FeatureType, otherwise the 500 // specialized versions return false if the feature has a wrong type. 501 template <typename... FeatureType> 502 bool HasFeature(absl::string_view key, const Example& example) { 503 return HasFeature<FeatureType...>(key, GetFeatures(example)); 504 } 505 506 // TODO(gorban): update all clients in a followup CL. 507 template <typename... FeatureType> 508 ABSL_DEPRECATED("Use HasFeature instead.") 509 bool ExampleHasFeature(absl::string_view key, const Example& example) { 510 return HasFeature<FeatureType...>(key, example); 511 } 512 513 } // namespace tensorflow 514 #endif // TENSORFLOW_CORE_EXAMPLE_FEATURE_UTIL_H_ 515