1syntax = "proto3"; 2 3package tensorflow.profiler; 4 5import "google/protobuf/any.proto"; 6import "tensorflow/core/profiler/protobuf/diagnostics.proto"; 7import "tensorflow/core/profiler/protobuf/input_pipeline.proto"; 8 9// Overview result for a TensorFlow Op. 10message OverviewTfOp { 11 // Name of the Op. 12 string name = 1; 13 // Category of the Op. 14 string category = 2; 15 // The amount of time that this Op takes by itself 16 // as fraction of the total execution time on the device or host. 17 double self_time_fraction = 3; 18 // The cumulative time upto this Op as fraction of the total execution time. 19 double cumulative_time_fraction = 4; 20 // How many GFlops/sec that this Op achieves. 21 double flop_rate = 5; 22 // Whether the Op is eligible to use TensorCores. 23 bool is_op_tensorcore_eligible = 6; 24 // Whether at least one of the kernels launched in this op is using 25 // TensorCore. 26 bool is_op_using_tensorcore = 7; 27} 28 29// Overview result for general analysis. 30message OverviewPageAnalysis { 31 // MXU utilization in percentage. 32 double mxu_utilization_percent = 1; 33 // Percentage of the device time that is idle. 34 double device_idle_time_percent = 2; 35 // Percentage of the host time that is idle. 36 double host_idle_time_percent = 3; 37 // Top TF Ops executed on the device. 38 repeated OverviewTfOp top_device_ops = 4; 39 // Remark text in the performance summary section. 40 string remark_text = 5; 41 // Color of the remark text. 42 string remark_color = 6; 43 // FLOP rate utilization relative to the roofline in percentage. 44 double flop_rate_utilization_relative_to_roofline_percent = 7; 45 // Memory bandwidth utilization relative to the hw limit in percentage. 46 double memory_bw_utilization_relative_to_hw_limit_percent = 8; 47 // Percentage of device computation that is 16-bit. 48 double device_compute_16bit_percent = 9; 49 // Percentage of device computation that is 32-bit. 50 double device_compute_32bit_percent = 10; 51 // Percentage of TF ops executed on the host. 52 double host_tf_op_percent = 11; 53 // Percentage of TF ops executed on the device. 54 double device_tf_op_percent = 12; 55 // Host trace level. 56 uint32 host_trace_level = 13; 57 // Percentage of TF-op execution time on the host (excluding the idle time) 58 // that are in eager mode. 59 double host_op_time_eager_percent = 14; 60 // Percentage of TF-op execution time on the device (excluding the idle time) 61 // that are in eager mode. 62 double device_op_time_eager_percent = 15; 63 // Percentage of TF-op execution time on the device (excluding the idle time) 64 // that are for outside compilation. 65 double device_op_time_outside_compilation_percent = 16; 66 // Percentage of the device time that is in use. 67 double device_duty_cycle_percent = 17; 68} 69 70// Overview result for a performance tip to users. 71message OverviewPageTip { 72 // Link to the tip. 73 string link = 1; 74} 75 76message GenericRecommendation { 77 // Indicates if kernel launch is a performance bottleneck. Possible values: 78 // "no", "moderate", "high". 79 string kernel_launch_bottleneck = 1; 80 // A statement that recommends if we need to further investigate kernel-launch 81 // performance. 82 string kernel_launch_statement = 2; 83 // Indicates if all other is a performance bottleneck. Possible values: "no", 84 // "moderate", "high". 85 string all_other_bottleneck = 3; 86 // A statement that recommends if we need to further investigate all-other 87 // performance. 88 string all_other_statement = 4; 89 // A statement that recommends if the user should try using lower precision. 90 // Shows this statement to users only if it is not empty. 91 string precision_statement = 5; 92 // Indicates if device collectives are a performance bottleneck. Possible 93 // values: "no", "moderate", "high". 94 string device_collectives_bottleneck = 6; 95 // A statement that recommends if we need to further investigate 96 // device-collectives performance. 97 string device_collectives_statement = 7; 98} 99 100// Overview result for the recommendation section. 101message OverviewPageRecommendation { 102 // Possible performance bottleneck: "host", "device", "both". 103 string bottleneck = 1; 104 // A statement for input that recommends the next steps for investigating the 105 // bottleneck. 106 string statement = 2; 107 // A list of tips for tackling input bottleneck. 108 repeated OverviewPageTip input_tips = 11; 109 // A statement for output that recommends the next steps for investigating the 110 // bottleneck. 111 string output_statement = 9; 112 // A statement that recommends the next steps for investigating eager-mode 113 // related bottleneck (it is an html so that it can link to other tools/docs.) 114 string eager_statement_html = 12; 115 // A statement that recommends the next steps for investigating 116 // outside-compilation related bottleneck (it is an html so that it can link 117 // to other tools/docs.) 118 string outside_compilation_statement_html = 13; 119 // A statement that recommends the next steps for investigating tf-function 120 // related bottleneck (it is an html so that it can link to other tools/docs.) 121 string tf_function_statement_html = 10; 122 // A list of tips for improving host performance. 123 repeated OverviewPageTip host_tips = 3; 124 // A list of tips for improving device performance. 125 repeated OverviewPageTip device_tips = 4; 126 // A list of links to related useful documents. 127 repeated OverviewPageTip documentation_tips = 5; 128 // // The recommendation made to the user. Can be unpacked into a 129 // GenericRecommendation. 130 google.protobuf.Any recommendation = 6; 131 // A list of tips for FAQ. 132 repeated OverviewPageTip faq_tips = 7; 133 // A list of tips for inference run. 134 repeated OverviewPageTip inference_tips = 8; 135} 136 137// Result proto for host-independent job information. 138message OverviewPageHostIndependentJobInfo { 139 // The change-list number of this build. 140 int64 change_list = 1; 141 // The time of this build (nanoseconds since the Unix epoch). 142 int64 build_time = 2; 143 // The target of this build. 144 string build_target = 3; 145 // Profiling duration (in ms). 146 uint32 profile_duration_ms = 4; 147} 148 149// Result proto for host-dependent job information. 150message OverviewPageHostDependentJobInfo { 151 // This ID of the host where the job was run on. 152 string host_id = 1; 153 // The command line used to run the job. 154 string command_line = 2; 155 // The start time of this run (nanoseconds since the Unix epoch). 156 int64 start_time = 3; 157 // BNS address specified by client at time of profiling request. 158 string bns_address = 4; 159 // Profiling start walltime (in ns). 160 uint64 profile_time_ns = 5; 161} 162 163// The run environment of a profiling session. 164message OverviewPageRunEnvironment { 165 // Number of hosts used. 166 int32 host_count = 1; 167 // Number of tasks used. 168 int32 task_count = 2; 169 // Distinct hostnames seen. 170 map<string, bool> hostnames = 3; 171 // The type of device used. 172 string device_type = 4; 173 // The number of device cores used. 174 // In TPU case, this corresponds to the number of TPU cores 175 // In GPU case, this corresponds to the number of GPUs (not the number of 176 // SMs). 177 int32 device_core_count = 5; 178 // Host-independent information about this job. 179 OverviewPageHostIndependentJobInfo host_independent_job_info = 7; 180 // Host-dependent information about this job. 181 repeated OverviewPageHostDependentJobInfo host_dependent_job_info = 8; 182 // The number of replicas, corresponds to input parallelism. 183 // If there is no model parallelism, replica_count = device_core_count 184 int32 replica_count = 9; 185 // The number of cores used for a single replica, e.g. model parallelism. 186 // If there is no model parallelism, then num_cores_per_replica = 1 187 int32 num_cores_per_replica = 10; 188 reserved 6; 189} 190 191message OverviewPage { 192 // The run environment of the profiled session. 193 OverviewPageRunEnvironment run_environment = 6; 194 // The step-time result. 195 InputPipelineAnalysisResult input_analysis = 2; 196 // The other analysis result. 197 OverviewPageAnalysis analysis = 3; 198 // The recommendation made to the user. 199 OverviewPageRecommendation recommendation = 4; 200 // Error and warning messages for diagnosing profiling issues. 201 Diagnostics diagnostics = 8; 202 reserved 1, 5, 7; 203} 204