1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_HUMAN_READABLE_PROFILE_BUILDER_H_ 17 #define TENSORFLOW_COMPILER_XLA_SERVICE_HUMAN_READABLE_PROFILE_BUILDER_H_ 18 19 #include <string> 20 #include <vector> 21 22 #include "absl/strings/string_view.h" 23 #include "tensorflow/compiler/xla/types.h" 24 #include "tensorflow/core/platform/logging.h" 25 26 namespace xla { 27 28 // HumanReadableProfileBuilder helps you create a textual profile of a 29 // computation, suitable for consumption by humans. 30 class HumanReadableProfileBuilder { 31 public: HumanReadableProfileBuilder(absl::string_view computation_name,bool is_entry_computation,int64_t total_cycles,double clock_rate_ghz)32 explicit HumanReadableProfileBuilder(absl::string_view computation_name, 33 bool is_entry_computation, 34 int64_t total_cycles, 35 double clock_rate_ghz) 36 : computation_name_(computation_name), 37 is_entry_computation_(is_entry_computation), 38 total_cycles_(total_cycles), 39 clock_rate_ghz_(clock_rate_ghz) { 40 CHECK_GE(clock_rate_ghz, 1e-9); 41 } 42 total_cycles()43 int64_t total_cycles() const { return total_cycles_; } 44 45 // Adds an operation to the profile. If you don't know the number of 46 // floating-point ops or bytes touched by the op, or if you don't know how 47 // fast it would run optimally, pass -1 for that param. AddOp(absl::string_view op_name,absl::string_view short_name,absl::string_view category,int64_t cycles,int64_t flop_count,int64_t transcendental_count,int64_t bytes_accessed,float optimal_seconds)48 void AddOp(absl::string_view op_name, absl::string_view short_name, 49 absl::string_view category, int64_t cycles, int64_t flop_count, 50 int64_t transcendental_count, int64_t bytes_accessed, 51 float optimal_seconds) { 52 op_infos_.push_back({std::string(op_name), std::string(short_name), 53 std::string(category), cycles, flop_count, 54 transcendental_count, bytes_accessed, 55 optimal_seconds}); 56 } 57 58 // Gets the human-readable profile. 59 std::string ToString() const; 60 61 private: 62 struct OpInfo { 63 std::string name; 64 std::string short_name; 65 std::string category; 66 int64_t cycles; 67 int64_t flop_count; // -1 if unknown 68 int64_t transcendental_count; 69 int64_t bytes_accessed; // -1 if unknown 70 float optimal_seconds; // -1 if unknown 71 }; 72 CyclesToSeconds(int64_t cycles)73 double CyclesToSeconds(int64_t cycles) const { 74 return cycles / clock_rate_ghz_ / 1e9; 75 } CyclesToMicroseconds(int64_t cycles)76 double CyclesToMicroseconds(int64_t cycles) const { 77 return cycles / clock_rate_ghz_ / 1000.0; 78 } 79 80 std::string computation_name_; 81 bool is_entry_computation_; 82 int64_t total_cycles_; 83 double clock_rate_ghz_; 84 std::vector<OpInfo> op_infos_; 85 }; 86 87 } // namespace xla 88 89 #endif // TENSORFLOW_COMPILER_XLA_SERVICE_HUMAN_READABLE_PROFILE_BUILDER_H_ 90