1syntax = "proto3"; 2 3package tensorflow.profiler; 4 5import "google/protobuf/any.proto"; 6import "tensorflow/core/profiler/protobuf/diagnostics.proto"; 7 8// Generic hardware bottleneck. 9message BottleneckAnalysis { 10 // Percentage of step time that is spent on input. 11 double input_percent = 7; 12 // Percentage of step time that is spent on output. 13 double output_percent = 8; 14 // Percentage of step time that is idle for non-I/O-related reason. 15 double idle_percent = 9; 16 // Percentage of step time that is spent on compute. 17 double compute_percent = 10; 18 // Indicates if input is a bottleneck. Possible values: "host", "device", 19 // "both", or "unknown" 20 string input_classification = 1; 21 // A human-readable description of the input bottleneck. 22 string input_statement = 2; 23 // Indicates if kernel launching is a bottleneck. Possible values: "no", 24 // "moderate", "high". 25 string kernel_launch_classification = 3; 26 // A human-readable description of the kernel launching overhead. 27 string kernel_launch_statement = 4; 28 // Indicates if all other is a bottleneck. Possible values: "no", "moderate", 29 // "high". 30 string all_other_classification = 5; 31 // A human-readable description of the all other overhead. 32 string all_other_statement = 6; 33 // Indicates if device collective communication is a bottleneck. Possible 34 // values: "no", "moderate", "high". 35 string device_collectives_classification = 11; 36 // A human-readable description of the device collective communication 37 // overhead. 38 string device_collectives_statement = 12; 39} 40 41// Used for both step duration and Op duration. 42message StepSummary { 43 double average = 1; 44 double standard_deviation = 2; 45 double minimum = 3; 46 double maximum = 4; 47} 48 49// Per-step details on generic hardware. 50message PerGenericStepDetails { 51 // The step number of a step. 52 int32 step_number = 1; 53 // The step name. 54 string step_name = 14; 55 // The step time (in ms). 56 double step_time_ms = 2; 57 // Breakdown of the step time in different event categories. 58 // The unknown time (in ms). 59 double unknown_time_ms = 3; 60 // The time (in ms) in which the host is waiting for input data to be ready. 61 double host_wait_input_ms = 11; 62 // The time (in ms) in which the host is sending input data to the device. 63 // Total input time = host_wait_input_ms + host_to_device_ms. 64 double host_to_device_ms = 12; 65 // The output time (in ms). 66 double output_ms = 5; 67 // The device-compute time (in ms). 68 double device_compute_ms = 6; 69 // The device-to-device communication time (in ms). 70 double device_to_device_ms = 7; 71 // The device time spent on collective communications (in ms). 72 double device_collectives_ms = 13; 73 // The host-compute time (in ms). 74 double host_compute_ms = 8; 75 // The host-prepare time (in ms). 76 double host_prepare_ms = 9; 77 // The time spent on compiling (in ms). 78 double host_compile_ms = 10; 79 reserved 4; 80} 81 82message InputTimeBreakdown { 83 // Time spent on demanded file read in microseconds. 84 double demanded_file_read_us = 1; 85 // Time spent on advanced file read in microseconds. 86 double advanced_file_read_us = 2; 87 // Time spent on data preprocessing in microseconds. 88 double preprocessing_us = 3; 89 // The infeed enqueue time in microseconds. 90 double enqueue_us = 4; 91 // This entry is for the situtation where we can't further 92 // break down the non-enqueue input time (because the input pipeline 93 // is not instrumented). 94 double unclassified_non_enqueue_us = 5; 95} 96 97message InputOpDetails { 98 // The Op's name. 99 string op_name = 1; 100 // The number of occurrences. 101 uint64 count = 2; 102 // Time (accumulated over all occurrences) in milliseconds. 103 double time_in_ms = 3; 104 // Time (accumulated over all occurrences) in 105 // percentage of the total input processing time. 106 double time_in_percent = 4; 107 // Self time (accumulated over all occurrences) in milliseconds. 108 double self_time_in_ms = 5; 109 // Self time (accumulated over all occurrences) in 110 // percentage of the total input processing time. 111 double self_time_in_percent = 6; 112 // Possible categories: "Enqueue", "Advanced file read", 113 // "Demanded file read", "Preprocessing", "Unknown". 114 string category = 7; 115} 116 117message InputPipelineAnalysisRecommendation { 118 // A list of detailed recommendations. 119 repeated string details = 1; 120 // An analysis of different types of bottlenecks. Can be unpacked into a 121 // BottleneckAnalysis. 122 google.protobuf.Any bottleneck_analysis = 2; 123 // A suggested step to take next. 124 string summary_next_step = 3; 125} 126 127message GenericStepTimeBreakdown { 128 // Summary of all unknown time as a part of step in ms. 129 StepSummary unknown_time_ms_summary = 1; 130 // Summary of all host-wait-input time as a part of step in ms. 131 StepSummary host_wait_input_ms_summary = 9; 132 // Summary of all host-to-device time as a part of step in ms. 133 StepSummary host_to_device_ms_summary = 10; 134 // Summary of all input time as a part of step in ms. 135 StepSummary input_ms_summary = 11; 136 // Summary of all output time as a part of step in ms. 137 StepSummary output_ms_summary = 3; 138 // Summary of all device-compute time as a part of step in ms. 139 StepSummary device_compute_ms_summary = 4; 140 // Summary of all device-to-device time as a part of step in ms. 141 StepSummary device_to_device_ms_summary = 5; 142 // Summary of all device-collectives time as a part of step in ms. 143 StepSummary device_collectives_ms_summary = 12; 144 // Summary of all host-compute time as a part of step in ms. 145 StepSummary host_compute_ms_summary = 6; 146 // Summary of all host-prepare time as a part of step in ms. 147 StepSummary host_prepare_ms_summary = 7; 148 // Summary of all compilation time as a part of step in ms. 149 StepSummary host_compile_ms_summary = 8; 150 reserved 2; 151} 152 153message InputPipelineAnalysisResult { 154 // Hardware type. 155 string hardware_type = 9; 156 // Summary of all step duration across all cores. 157 StepSummary step_time_summary = 2; 158 // Summary of all input-related stall as percentage of step duration. 159 StepSummary input_percent_summary = 3; 160 // Percentage of step time that is waiting for input. 161 double input_percent = 11; 162 // Percentage of step time that is doing output. 163 double output_percent = 13; 164 // Percentage of step time that is idle for non-I/O-related reason. 165 double idle_percent = 14; 166 // Percentage of step time that is doing compute. 167 double compute_percent = 15; 168 // Details of each step. Can be unpacked into a PerGenericStepDetails. 169 repeated google.protobuf.Any step_details = 4; 170 // The breakdown of the input processing time. 171 InputTimeBreakdown input_time_breakdown = 5; 172 // Details of each input Op executed. 173 repeated InputOpDetails input_op_details = 6; 174 // Recommendation for next steps to users. 175 InputPipelineAnalysisRecommendation recommendation = 7; 176 // Breakdown of the step time. Can be unpacked into a 177 // GenericStepTimeBreakdown. 178 google.protobuf.Any step_time_breakdown = 8; 179 // Error and warning messages for diagnosing profiling issues. 180 Diagnostics diagnostics = 12; 181 reserved 1, 10; 182} 183