xref: /aosp_15_r20/external/tensorflow/tensorflow/core/profiler/protobuf/overview_page.proto (revision b6fb3261f9314811a0f4371741dbb8839866f948)
1syntax = "proto3";
2
3package tensorflow.profiler;
4
5import "google/protobuf/any.proto";
6import "tensorflow/core/profiler/protobuf/diagnostics.proto";
7import "tensorflow/core/profiler/protobuf/input_pipeline.proto";
8
9// Overview result for a TensorFlow Op.
10message OverviewTfOp {
11  // Name of the Op.
12  string name = 1;
13  // Category of the Op.
14  string category = 2;
15  // The amount of time that this Op takes by itself
16  // as fraction of the total execution time on the device or host.
17  double self_time_fraction = 3;
18  // The cumulative time upto this Op as fraction of the total execution time.
19  double cumulative_time_fraction = 4;
20  // How many GFlops/sec that this Op achieves.
21  double flop_rate = 5;
22  // Whether the Op is eligible to use TensorCores.
23  bool is_op_tensorcore_eligible = 6;
24  // Whether at least one of the kernels launched in this op is using
25  // TensorCore.
26  bool is_op_using_tensorcore = 7;
27}
28
29// Overview result for general analysis.
30message OverviewPageAnalysis {
31  // MXU utilization in percentage.
32  double mxu_utilization_percent = 1;
33  // Percentage of the device time that is idle.
34  double device_idle_time_percent = 2;
35  // Percentage of the host time that is idle.
36  double host_idle_time_percent = 3;
37  // Top TF Ops executed on the device.
38  repeated OverviewTfOp top_device_ops = 4;
39  // Remark text in the performance summary section.
40  string remark_text = 5;
41  // Color of the remark text.
42  string remark_color = 6;
43  // FLOP rate utilization relative to the roofline in percentage.
44  double flop_rate_utilization_relative_to_roofline_percent = 7;
45  // Memory bandwidth utilization relative to the hw limit in percentage.
46  double memory_bw_utilization_relative_to_hw_limit_percent = 8;
47  // Percentage of device computation that is 16-bit.
48  double device_compute_16bit_percent = 9;
49  // Percentage of device computation that is 32-bit.
50  double device_compute_32bit_percent = 10;
51  // Percentage of TF ops executed on the host.
52  double host_tf_op_percent = 11;
53  // Percentage of TF ops executed on the device.
54  double device_tf_op_percent = 12;
55  // Host trace level.
56  uint32 host_trace_level = 13;
57  // Percentage of TF-op execution time on the host (excluding the idle time)
58  // that are in eager mode.
59  double host_op_time_eager_percent = 14;
60  // Percentage of TF-op execution time on the device (excluding the idle time)
61  // that are in eager mode.
62  double device_op_time_eager_percent = 15;
63  // Percentage of TF-op execution time on the device (excluding the idle time)
64  // that are for outside compilation.
65  double device_op_time_outside_compilation_percent = 16;
66  // Percentage of the device time that is in use.
67  double device_duty_cycle_percent = 17;
68}
69
70// Overview result for a performance tip to users.
71message OverviewPageTip {
72  // Link to the tip.
73  string link = 1;
74}
75
76message GenericRecommendation {
77  // Indicates if kernel launch is a performance bottleneck. Possible values:
78  // "no", "moderate", "high".
79  string kernel_launch_bottleneck = 1;
80  // A statement that recommends if we need to further investigate kernel-launch
81  // performance.
82  string kernel_launch_statement = 2;
83  // Indicates if all other is a performance bottleneck. Possible values: "no",
84  // "moderate", "high".
85  string all_other_bottleneck = 3;
86  // A statement that recommends if we need to further investigate all-other
87  // performance.
88  string all_other_statement = 4;
89  // A statement that recommends if the user should try using lower precision.
90  // Shows this statement to users only if it is not empty.
91  string precision_statement = 5;
92  // Indicates if device collectives are a performance bottleneck. Possible
93  // values: "no", "moderate", "high".
94  string device_collectives_bottleneck = 6;
95  // A statement that recommends if we need to further investigate
96  // device-collectives performance.
97  string device_collectives_statement = 7;
98}
99
100// Overview result for the recommendation section.
101message OverviewPageRecommendation {
102  // Possible performance bottleneck: "host", "device", "both".
103  string bottleneck = 1;
104  // A statement for input that recommends the next steps for investigating the
105  // bottleneck.
106  string statement = 2;
107  // A list of tips for tackling input bottleneck.
108  repeated OverviewPageTip input_tips = 11;
109  // A statement for output that recommends the next steps for investigating the
110  // bottleneck.
111  string output_statement = 9;
112  // A statement that recommends the next steps for investigating eager-mode
113  // related bottleneck (it is an html so that it can link to other tools/docs.)
114  string eager_statement_html = 12;
115  // A statement that recommends the next steps for investigating
116  // outside-compilation related bottleneck (it is an html so that it can link
117  // to other tools/docs.)
118  string outside_compilation_statement_html = 13;
119  // A statement that recommends the next steps for investigating tf-function
120  // related bottleneck (it is an html so that it can link to other tools/docs.)
121  string tf_function_statement_html = 10;
122  // A list of tips for improving host performance.
123  repeated OverviewPageTip host_tips = 3;
124  // A list of tips for improving device performance.
125  repeated OverviewPageTip device_tips = 4;
126  // A list of links to related useful documents.
127  repeated OverviewPageTip documentation_tips = 5;
128  // // The recommendation made to the user. Can be unpacked into a
129  // GenericRecommendation.
130  google.protobuf.Any recommendation = 6;
131  // A list of tips for FAQ.
132  repeated OverviewPageTip faq_tips = 7;
133  // A list of tips for inference run.
134  repeated OverviewPageTip inference_tips = 8;
135}
136
137// Result proto for host-independent job information.
138message OverviewPageHostIndependentJobInfo {
139  // The change-list number of this build.
140  int64 change_list = 1;
141  // The time of this build (nanoseconds since the Unix epoch).
142  int64 build_time = 2;
143  // The target of this build.
144  string build_target = 3;
145  // Profiling duration (in ms).
146  uint32 profile_duration_ms = 4;
147}
148
149// Result proto for host-dependent job information.
150message OverviewPageHostDependentJobInfo {
151  // This ID of the host where the job was run on.
152  string host_id = 1;
153  // The command line used to run the job.
154  string command_line = 2;
155  // The start time of this run (nanoseconds since the Unix epoch).
156  int64 start_time = 3;
157  // BNS address specified by client at time of profiling request.
158  string bns_address = 4;
159  // Profiling start walltime (in ns).
160  uint64 profile_time_ns = 5;
161}
162
163// The run environment of a profiling session.
164message OverviewPageRunEnvironment {
165  // Number of hosts used.
166  int32 host_count = 1;
167  // Number of tasks used.
168  int32 task_count = 2;
169  // Distinct hostnames seen.
170  map<string, bool> hostnames = 3;
171  // The type of device used.
172  string device_type = 4;
173  // The number of device cores used.
174  //   In TPU case, this corresponds to the number of TPU cores
175  //   In GPU case, this corresponds to the number of GPUs (not the number of
176  //   SMs).
177  int32 device_core_count = 5;
178  // Host-independent information about this job.
179  OverviewPageHostIndependentJobInfo host_independent_job_info = 7;
180  // Host-dependent information about this job.
181  repeated OverviewPageHostDependentJobInfo host_dependent_job_info = 8;
182  // The number of replicas, corresponds to input parallelism.
183  // If there is no model parallelism, replica_count = device_core_count
184  int32 replica_count = 9;
185  // The number of cores used for a single replica, e.g. model parallelism.
186  // If there is no model parallelism, then num_cores_per_replica = 1
187  int32 num_cores_per_replica = 10;
188  reserved 6;
189}
190
191message OverviewPage {
192  // The run environment of the profiled session.
193  OverviewPageRunEnvironment run_environment = 6;
194  // The step-time result.
195  InputPipelineAnalysisResult input_analysis = 2;
196  // The other analysis result.
197  OverviewPageAnalysis analysis = 3;
198  // The recommendation made to the user.
199  OverviewPageRecommendation recommendation = 4;
200  // Error and warning messages for diagnosing profiling issues.
201  Diagnostics diagnostics = 8;
202  reserved 1, 5, 7;
203}
204