1 /* Copyright 2022 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_CORE_PROFILER_CONVERT_OP_PROFILE_BUILDER_H_ 17 #define TENSORFLOW_CORE_PROFILER_CONVERT_OP_PROFILE_BUILDER_H_ 18 19 #include <cstdint> 20 #include <string> 21 22 #include "absl/container/flat_hash_map.h" 23 #include "absl/container/node_hash_map.h" 24 #include "tensorflow/core/platform/protobuf.h" 25 #include "tensorflow/core/profiler/protobuf/op_metrics.pb.h" 26 #include "tensorflow/core/profiler/protobuf/op_profile.pb.h" 27 28 namespace tensorflow { 29 namespace profiler { 30 31 struct OpProfileOptions { 32 bool group_by_program = true; 33 bool group_by_deduplicated_name = true; 34 int children_per_node = 100; 35 }; 36 37 class OpProfileBuilder { 38 public: 39 OpProfileBuilder(const OpProfileOptions& options, op_profile::Node* root, 40 const tensorflow::protobuf::Map<uint64_t, std::string>* 41 program_name_map = nullptr); 42 43 void AddOp(const OpMetrics& op_metrics); 44 45 void Finalize(double peak_gigaflops_per_second_per_core, 46 double peak_gibibytes_per_second_per_core, 47 uint64_t total_time_ps); 48 49 private: 50 struct Category { 51 op_profile::Node* node; 52 absl::flat_hash_map<std::string, op_profile::Node*> deduplicated_nodes; 53 }; 54 55 struct Program { 56 op_profile::Node* node; 57 absl::flat_hash_map<std::string, Category> categories; 58 }; 59 60 std::string GenerateProgramName(uint64_t program_id) const; 61 62 // Adds and returns a node for op_metrics. 63 // If op_metrics corresponds to a fusion, adds children to the node for the 64 // fused instructions. 65 // If deduplicated_node is not null, adds the node under it. 66 // Otherwise, if category is not null, adds the node under category. 67 // Otherwise, adds the node under root. 68 op_profile::Node* AddOpNode(const OpMetrics& op_metrics, 69 Category* category = nullptr, 70 op_profile::Node* deduplicated_node = nullptr); 71 72 // Returns a node for op_metrics.deduplicated_name(). 73 // Adds a node to the tree if necessary. 74 op_profile::Node* LookupOrAddDeduplicatedNode(const OpMetrics& op_metrics, 75 Category* category); 76 77 // Returns a node for op_metrics.category(). 78 // Adds a node to the tree if necessary. 79 // If program is not null, the category node is added under program. 80 // Otherwise, the category node is added under root. 81 Category* LookupOrAddCategoryNode(const OpMetrics& op_metrics, 82 Program* program); 83 84 // Returns a node for op_metrics.hlo_module_id(). 85 // Adds a node to the Node tree if necessary. 86 Program* LookupOrAddProgramNode(const OpMetrics& op_metrics); 87 88 OpProfileOptions options_; 89 op_profile::Node* root_; 90 91 // Map to look up and aggregate OpMetrics. 92 absl::node_hash_map<op_profile::Node*, OpMetrics> metrics_; 93 94 // Maps to look up if a category / program / deduplicated node has 95 // already been added to the tree. 96 absl::flat_hash_map<uint64_t, Program> programs_map_; 97 absl::flat_hash_map<std::string, Category> category_map_; 98 99 // Map to look up program names by id. 100 const tensorflow::protobuf::Map<uint64_t, std::string>* program_name_map_ = 101 nullptr; 102 }; 103 } // namespace profiler 104 } // namespace tensorflow 105 106 #endif // TENSORFLOW_CORE_PROFILER_CONVERT_OP_PROFILE_BUILDER_H_ 107