xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/freedreno/freedreno_perfetto.cc (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2021 Google, Inc.
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include <perfetto.h>
7 
8 #include "util/perf/u_perfetto.h"
9 #include "util/perf/u_perfetto_renderpass.h"
10 
11 #include "freedreno_tracepoints.h"
12 
13 static uint32_t gpu_clock_id;
14 static uint64_t next_clock_sync_ns; /* cpu time of next clk sync */
15 
16 /**
17  * The timestamp at the point where we first emitted the clock_sync..
18  * this  will be a *later* timestamp that the first GPU traces (since
19  * we capture the first clock_sync from the CPU *after* the first GPU
20  * tracepoints happen).  To avoid confusing perfetto we need to drop
21  * the GPU traces with timestamps before this.
22  */
23 static uint64_t sync_gpu_ts;
24 
25 struct FdRenderpassIncrementalState {
26    bool was_cleared = true;
27 };
28 
29 struct FdRenderpassTraits : public perfetto::DefaultDataSourceTraits {
30    using IncrementalStateType = FdRenderpassIncrementalState;
31 };
32 
33 class FdRenderpassDataSource : public MesaRenderpassDataSource<FdRenderpassDataSource, FdRenderpassTraits> {
34 public:
35 
OnStart(const StartArgs & args)36    void OnStart(const StartArgs &args) override
37    {
38       MesaRenderpassDataSource<FdRenderpassDataSource, FdRenderpassTraits>::OnStart(args);
39 
40       /* Note: clock_id's below 128 are reserved.. for custom clock sources,
41        * using the hash of a namespaced string is the recommended approach.
42        * See: https://perfetto.dev/docs/concepts/clock-sync
43        */
44       gpu_clock_id =
45          _mesa_hash_string("org.freedesktop.mesa.freedreno") | 0x80000000;
46    }
47 };
48 
49 PERFETTO_DECLARE_DATA_SOURCE_STATIC_MEMBERS(FdRenderpassDataSource);
50 PERFETTO_DEFINE_DATA_SOURCE_STATIC_MEMBERS(FdRenderpassDataSource);
51 
52 static void
send_descriptors(FdRenderpassDataSource::TraceContext & ctx,uint64_t ts_ns)53 send_descriptors(FdRenderpassDataSource::TraceContext &ctx, uint64_t ts_ns)
54 {
55    PERFETTO_LOG("Sending renderstage descriptors");
56 
57    auto packet = ctx.NewTracePacket();
58 
59    packet->set_timestamp(0);
60 //   packet->set_timestamp(ts_ns);
61 //   packet->set_timestamp_clock_id(gpu_clock_id);
62 
63    auto event = packet->set_gpu_render_stage_event();
64    event->set_gpu_id(0);
65 
66    auto spec = event->set_specifications();
67 
68    for (unsigned i = 0; i < ARRAY_SIZE(queues); i++) {
69       auto desc = spec->add_hw_queue();
70 
71       desc->set_name(queues[i].name);
72       desc->set_description(queues[i].desc);
73    }
74 
75    for (unsigned i = 0; i < ARRAY_SIZE(stages); i++) {
76       auto desc = spec->add_stage();
77 
78       desc->set_name(stages[i].name);
79       if (stages[i].desc)
80          desc->set_description(stages[i].desc);
81    }
82 }
83 
84 static void
stage_start(struct pipe_context * pctx,uint64_t ts_ns,enum fd_stage_id stage)85 stage_start(struct pipe_context *pctx, uint64_t ts_ns, enum fd_stage_id stage)
86 {
87    struct fd_context *ctx = fd_context(pctx);
88    struct fd_perfetto_state *p = &ctx->perfetto;
89 
90    p->start_ts[stage] = ts_ns;
91 }
92 
93 static void
stage_end(struct pipe_context * pctx,uint64_t ts_ns,enum fd_stage_id stage)94 stage_end(struct pipe_context *pctx, uint64_t ts_ns, enum fd_stage_id stage)
95 {
96    struct fd_context *ctx = fd_context(pctx);
97    struct fd_perfetto_state *p = &ctx->perfetto;
98 
99    /* If we haven't managed to calibrate the alignment between GPU and CPU
100     * timestamps yet, then skip this trace, otherwise perfetto won't know
101     * what to do with it.
102     */
103    if (!sync_gpu_ts)
104       return;
105 
106    FdRenderpassDataSource::Trace([=](FdRenderpassDataSource::TraceContext tctx) {
107       if (auto state = tctx.GetIncrementalState(); state->was_cleared) {
108          send_descriptors(tctx, p->start_ts[stage]);
109          state->was_cleared = false;
110       }
111 
112       auto packet = tctx.NewTracePacket();
113 
114       packet->set_timestamp(p->start_ts[stage]);
115       packet->set_timestamp_clock_id(gpu_clock_id);
116 
117       auto event = packet->set_gpu_render_stage_event();
118       event->set_event_id(0); // ???
119       event->set_hw_queue_id(DEFAULT_HW_QUEUE_ID);
120       event->set_duration(ts_ns - p->start_ts[stage]);
121       event->set_stage_id(stage);
122       event->set_context((uintptr_t)pctx);
123 
124       /* The "surface" meta-stage has extra info about render target: */
125       if (stage == SURFACE_STAGE_ID) {
126 
127          event->set_submission_id(p->submit_id);
128 
129          if (p->cbuf0_format) {
130             auto data = event->add_extra_data();
131 
132             data->set_name("color0 format");
133             data->set_value(util_format_short_name(p->cbuf0_format));
134          }
135 
136          if (p->zs_format) {
137             auto data = event->add_extra_data();
138 
139             data->set_name("zs format");
140             data->set_value(util_format_short_name(p->zs_format));
141          }
142 
143          {
144             auto data = event->add_extra_data();
145 
146             data->set_name("width");
147             data->set_value(std::to_string(p->width));
148          }
149 
150          {
151             auto data = event->add_extra_data();
152 
153             data->set_name("height");
154             data->set_value(std::to_string(p->height));
155          }
156 
157          {
158             auto data = event->add_extra_data();
159 
160             data->set_name("MSAA");
161             data->set_value(std::to_string(p->samples));
162          }
163 
164          {
165             auto data = event->add_extra_data();
166 
167             data->set_name("MRTs");
168             data->set_value(std::to_string(p->mrts));
169          }
170 
171          // "renderMode"
172          // "surfaceID"
173 
174          if (p->nbins) {
175             auto data = event->add_extra_data();
176 
177             data->set_name("numberOfBins");
178             data->set_value(std::to_string(p->nbins));
179          }
180 
181          if (p->binw) {
182             auto data = event->add_extra_data();
183 
184             data->set_name("binWidth");
185             data->set_value(std::to_string(p->binw));
186          }
187 
188          if (p->binh) {
189             auto data = event->add_extra_data();
190 
191             data->set_name("binHeight");
192             data->set_value(std::to_string(p->binh));
193          }
194       } else if (stage == COMPUTE_STAGE_ID) {
195          {
196             auto data = event->add_extra_data();
197 
198             data->set_name("indirect");
199             data->set_value(std::to_string(p->indirect));
200          }
201 
202          {
203             auto data = event->add_extra_data();
204 
205             data->set_name("work_dim");
206             data->set_value(std::to_string(p->work_dim));
207          }
208 
209          {
210             auto data = event->add_extra_data();
211 
212             data->set_name("local_size_x");
213             data->set_value(std::to_string(p->local_size_x));
214          }
215 
216          {
217             auto data = event->add_extra_data();
218 
219             data->set_name("local_size_y");
220             data->set_value(std::to_string(p->local_size_y));
221          }
222 
223          {
224             auto data = event->add_extra_data();
225 
226             data->set_name("local_size_z");
227             data->set_value(std::to_string(p->local_size_z));
228          }
229 
230          {
231             auto data = event->add_extra_data();
232 
233             data->set_name("num_groups_x");
234             data->set_value(std::to_string(p->num_groups_x));
235          }
236 
237          {
238             auto data = event->add_extra_data();
239 
240             data->set_name("num_groups_y");
241             data->set_value(std::to_string(p->num_groups_y));
242          }
243 
244          {
245             auto data = event->add_extra_data();
246 
247             data->set_name("num_groups_z");
248             data->set_value(std::to_string(p->num_groups_z));
249          }
250 
251          {
252             auto data = event->add_extra_data();
253 
254             data->set_name("shader_id");
255             data->set_value(std::to_string(p->shader_id));
256          }
257       }
258    });
259 }
260 
261 #ifdef __cplusplus
262 extern "C" {
263 #endif
264 
265 void
fd_perfetto_init(void)266 fd_perfetto_init(void)
267 {
268    util_perfetto_init();
269 
270    perfetto::DataSourceDescriptor dsd;
271    dsd.set_name("gpu.renderstages.msm");
272    FdRenderpassDataSource::Register(dsd);
273 }
274 
275 static void
sync_timestamp(struct fd_context * ctx)276 sync_timestamp(struct fd_context *ctx)
277 {
278    uint64_t cpu_ts = perfetto::base::GetBootTimeNs().count();
279    uint64_t gpu_ts;
280 
281    if (!ctx->ts_to_ns)
282       return;
283 
284    if (cpu_ts < next_clock_sync_ns)
285       return;
286 
287    if (fd_pipe_get_param(ctx->pipe, FD_TIMESTAMP, &gpu_ts)) {
288       PERFETTO_ELOG("Could not sync CPU and GPU clocks");
289       return;
290    }
291 
292    /* get cpu timestamp again because FD_TIMESTAMP can take >100us */
293    cpu_ts = perfetto::base::GetBootTimeNs().count();
294 
295    /* convert GPU ts into ns: */
296    gpu_ts = ctx->ts_to_ns(gpu_ts);
297 
298    FdRenderpassDataSource::Trace([=](auto tctx) {
299       MesaRenderpassDataSource<FdRenderpassDataSource,
300                                FdRenderpassTraits>::EmitClockSync(tctx, cpu_ts,
301                                                                   gpu_ts, gpu_clock_id);
302    });
303 
304    sync_gpu_ts = gpu_ts;
305    next_clock_sync_ns = cpu_ts + 30000000;
306 }
307 
308 static void
emit_submit_id(struct fd_context * ctx)309 emit_submit_id(struct fd_context *ctx)
310 {
311    FdRenderpassDataSource::Trace([=](FdRenderpassDataSource::TraceContext tctx) {
312       auto packet = tctx.NewTracePacket();
313 
314       packet->set_timestamp(perfetto::base::GetBootTimeNs().count());
315 
316       auto event = packet->set_vulkan_api_event();
317       auto submit = event->set_vk_queue_submit();
318 
319       submit->set_submission_id(ctx->submit_count);
320    });
321 }
322 
323 void
fd_perfetto_submit(struct fd_context * ctx)324 fd_perfetto_submit(struct fd_context *ctx)
325 {
326    /* sync_timestamp isn't free */
327    if (!u_trace_perfetto_active(&ctx->trace_context))
328       return;
329 
330    sync_timestamp(ctx);
331    emit_submit_id(ctx);
332 }
333 
334 /*
335  * Trace callbacks, called from u_trace once the timestamps from GPU have been
336  * collected.
337  */
338 
339 void
fd_start_render_pass(struct pipe_context * pctx,uint64_t ts_ns,uint16_t tp_idx,const void * flush_data,const struct trace_start_render_pass * payload,const void * indirect_data)340 fd_start_render_pass(struct pipe_context *pctx, uint64_t ts_ns,
341                      uint16_t tp_idx, const void *flush_data,
342                      const struct trace_start_render_pass *payload,
343                      const void *indirect_data)
344 {
345    stage_start(pctx, ts_ns, SURFACE_STAGE_ID);
346 
347    struct fd_perfetto_state *p = &fd_context(pctx)->perfetto;
348 
349    p->submit_id = payload->submit_id;
350    p->cbuf0_format = payload->cbuf0_format;
351    p->zs_format = payload->zs_format;
352    p->width = payload->width;
353    p->height = payload->height;
354    p->mrts = payload->mrts;
355    p->samples = payload->samples;
356    p->nbins = payload->nbins;
357    p->binw = payload->binw;
358    p->binh = payload->binh;
359 }
360 
361 void
fd_end_render_pass(struct pipe_context * pctx,uint64_t ts_ns,uint16_t tp_idx,const void * flush_data,const struct trace_end_render_pass * payload,const void * indirect_data)362 fd_end_render_pass(struct pipe_context *pctx, uint64_t ts_ns,
363                    uint16_t tp_idx, const void *flush_data,
364                    const struct trace_end_render_pass *payload,
365                    const void *indirect_data)
366 {
367    stage_end(pctx, ts_ns, SURFACE_STAGE_ID);
368 }
369 
370 void
fd_start_binning_ib(struct pipe_context * pctx,uint64_t ts_ns,uint16_t tp_idx,const void * flush_data,const struct trace_start_binning_ib * payload,const void * indirect_data)371 fd_start_binning_ib(struct pipe_context *pctx, uint64_t ts_ns,
372                     uint16_t tp_idx, const void *flush_data,
373                     const struct trace_start_binning_ib *payload,
374                     const void *indirect_data)
375 {
376    stage_start(pctx, ts_ns, BINNING_STAGE_ID);
377 }
378 
379 void
fd_end_binning_ib(struct pipe_context * pctx,uint64_t ts_ns,uint16_t tp_idx,const void * flush_data,const struct trace_end_binning_ib * payload,const void * indirect_data)380 fd_end_binning_ib(struct pipe_context *pctx, uint64_t ts_ns,
381                   uint16_t tp_idx, const void *flush_data,
382                   const struct trace_end_binning_ib *payload,
383                   const void *indirect_data)
384 {
385    stage_end(pctx, ts_ns, BINNING_STAGE_ID);
386 }
387 
388 void
fd_start_draw_ib(struct pipe_context * pctx,uint64_t ts_ns,uint16_t tp_idx,const void * flush_data,const struct trace_start_draw_ib * payload,const void * indirect_data)389 fd_start_draw_ib(struct pipe_context *pctx, uint64_t ts_ns,
390                  uint16_t tp_idx, const void *flush_data,
391                  const struct trace_start_draw_ib *payload,
392                  const void *indirect_data)
393 {
394    stage_start(
395       pctx, ts_ns,
396       fd_context(pctx)->perfetto.nbins ? GMEM_STAGE_ID : BYPASS_STAGE_ID);
397 }
398 
399 void
fd_end_draw_ib(struct pipe_context * pctx,uint64_t ts_ns,uint16_t tp_idx,const void * flush_data,const struct trace_end_draw_ib * payload,const void * indirect_data)400 fd_end_draw_ib(struct pipe_context *pctx, uint64_t ts_ns,
401                uint16_t tp_idx, const void *flush_data,
402                const struct trace_end_draw_ib *payload,
403                const void *indirect_data)
404 {
405    stage_end(
406       pctx, ts_ns,
407       fd_context(pctx)->perfetto.nbins ? GMEM_STAGE_ID : BYPASS_STAGE_ID);
408 }
409 
410 void
fd_start_blit(struct pipe_context * pctx,uint64_t ts_ns,uint16_t tp_idx,const void * flush_data,const struct trace_start_blit * payload,const void * indirect_data)411 fd_start_blit(struct pipe_context *pctx, uint64_t ts_ns,
412               uint16_t tp_idx, const void *flush_data,
413               const struct trace_start_blit *payload,
414               const void *indirect_data)
415 {
416    stage_start(pctx, ts_ns, BLIT_STAGE_ID);
417 }
418 
419 void
fd_end_blit(struct pipe_context * pctx,uint64_t ts_ns,uint16_t tp_idx,const void * flush_data,const struct trace_end_blit * payload,const void * indirect_data)420 fd_end_blit(struct pipe_context *pctx, uint64_t ts_ns,
421             uint16_t tp_idx, const void *flush_data,
422             const struct trace_end_blit *payload,
423             const void *indirect_data)
424 {
425    stage_end(pctx, ts_ns, BLIT_STAGE_ID);
426 }
427 
428 void
fd_start_compute(struct pipe_context * pctx,uint64_t ts_ns,uint16_t tp_idx,const void * flush_data,const struct trace_start_compute * payload,const void * indirect_data)429 fd_start_compute(struct pipe_context *pctx, uint64_t ts_ns,
430                  uint16_t tp_idx, const void *flush_data,
431                  const struct trace_start_compute *payload,
432                  const void *indirect_data)
433 {
434    stage_start(pctx, ts_ns, COMPUTE_STAGE_ID);
435 
436    struct fd_perfetto_state *p = &fd_context(pctx)->perfetto;
437 
438    p->indirect = payload->indirect;
439    p->work_dim = payload->work_dim;
440    p->local_size_x = payload->local_size_x;
441    p->local_size_y = payload->local_size_y;
442    p->local_size_z = payload->local_size_z;
443    p->num_groups_x = payload->num_groups_x;
444    p->num_groups_y = payload->num_groups_y;
445    p->num_groups_z = payload->num_groups_z;
446    p->shader_id    = payload->shader_id;
447 }
448 
449 void
fd_end_compute(struct pipe_context * pctx,uint64_t ts_ns,uint16_t tp_idx,const void * flush_data,const struct trace_end_compute * payload,const void * indirect_data)450 fd_end_compute(struct pipe_context *pctx, uint64_t ts_ns,
451                uint16_t tp_idx, const void *flush_data,
452                const struct trace_end_compute *payload,
453                const void *indirect_data)
454 {
455    stage_end(pctx, ts_ns, COMPUTE_STAGE_ID);
456 }
457 
458 void
fd_start_clears(struct pipe_context * pctx,uint64_t ts_ns,uint16_t tp_idx,const void * flush_data,const struct trace_start_clears * payload,const void * indirect_data)459 fd_start_clears(struct pipe_context *pctx, uint64_t ts_ns,
460                 uint16_t tp_idx, const void *flush_data,
461                 const struct trace_start_clears *payload,
462                 const void *indirect_data)
463 {
464    stage_start(pctx, ts_ns, CLEAR_STAGE_ID);
465 }
466 
467 void
fd_end_clears(struct pipe_context * pctx,uint64_t ts_ns,uint16_t tp_idx,const void * flush_data,const struct trace_end_clears * payload,const void * indirect_data)468 fd_end_clears(struct pipe_context *pctx, uint64_t ts_ns,
469               uint16_t tp_idx, const void *flush_data,
470               const struct trace_end_clears *payload,
471               const void *indirect_data)
472 {
473    stage_end(pctx, ts_ns, CLEAR_STAGE_ID);
474 }
475 
476 void
fd_start_tile_loads(struct pipe_context * pctx,uint64_t ts_ns,uint16_t tp_idx,const void * flush_data,const struct trace_start_tile_loads * payload,const void * indirect_data)477 fd_start_tile_loads(struct pipe_context *pctx, uint64_t ts_ns,
478                     uint16_t tp_idx, const void *flush_data,
479                     const struct trace_start_tile_loads *payload,
480                     const void *indirect_data)
481 {
482    stage_start(pctx, ts_ns, TILE_LOAD_STAGE_ID);
483 }
484 
485 void
fd_end_tile_loads(struct pipe_context * pctx,uint64_t ts_ns,uint16_t tp_idx,const void * flush_data,const struct trace_end_tile_loads * payload,const void * indirect_data)486 fd_end_tile_loads(struct pipe_context *pctx, uint64_t ts_ns,
487                   uint16_t tp_idx, const void *flush_data,
488                   const struct trace_end_tile_loads *payload,
489                   const void *indirect_data)
490 {
491    stage_end(pctx, ts_ns, TILE_LOAD_STAGE_ID);
492 }
493 
494 void
fd_start_tile_stores(struct pipe_context * pctx,uint64_t ts_ns,uint16_t tp_idx,const void * flush_data,const struct trace_start_tile_stores * payload,const void * indirect_data)495 fd_start_tile_stores(struct pipe_context *pctx, uint64_t ts_ns,
496                      uint16_t tp_idx, const void *flush_data,
497                      const struct trace_start_tile_stores *payload,
498                      const void *indirect_data)
499 {
500    stage_start(pctx, ts_ns, TILE_STORE_STAGE_ID);
501 }
502 
503 void
fd_end_tile_stores(struct pipe_context * pctx,uint64_t ts_ns,uint16_t tp_idx,const void * flush_data,const struct trace_end_tile_stores * payload,const void * indirect_data)504 fd_end_tile_stores(struct pipe_context *pctx, uint64_t ts_ns,
505                    uint16_t tp_idx, const void *flush_data,
506                    const struct trace_end_tile_stores *payload,
507                    const void *indirect_data)
508 {
509    stage_end(pctx, ts_ns, TILE_STORE_STAGE_ID);
510 }
511 
512 void
fd_start_state_restore(struct pipe_context * pctx,uint64_t ts_ns,uint16_t tp_idx,const void * flush_data,const struct trace_start_state_restore * payload,const void * indirect_data)513 fd_start_state_restore(struct pipe_context *pctx, uint64_t ts_ns,
514                        uint16_t tp_idx, const void *flush_data,
515                        const struct trace_start_state_restore *payload,
516                        const void *indirect_data)
517 {
518    stage_start(pctx, ts_ns, STATE_RESTORE_STAGE_ID);
519 }
520 
521 void
fd_end_state_restore(struct pipe_context * pctx,uint64_t ts_ns,uint16_t tp_idx,const void * flush_data,const struct trace_end_state_restore * payload,const void * indirect_data)522 fd_end_state_restore(struct pipe_context *pctx, uint64_t ts_ns,
523                      uint16_t tp_idx, const void *flush_data,
524                      const struct trace_end_state_restore *payload,
525                      const void *indirect_data)
526 {
527    stage_end(pctx, ts_ns, STATE_RESTORE_STAGE_ID);
528 }
529 
530 void
fd_start_vsc_overflow_test(struct pipe_context * pctx,uint64_t ts_ns,uint16_t tp_idx,const void * flush_data,const struct trace_start_vsc_overflow_test * payload,const void * indirect_data)531 fd_start_vsc_overflow_test(struct pipe_context *pctx, uint64_t ts_ns,
532                            uint16_t tp_idx, const void *flush_data,
533                            const struct trace_start_vsc_overflow_test *payload,
534                            const void *indirect_data)
535 {
536    stage_start(pctx, ts_ns, VSC_OVERFLOW_STAGE_ID);
537 }
538 
539 void
fd_end_vsc_overflow_test(struct pipe_context * pctx,uint64_t ts_ns,uint16_t tp_idx,const void * flush_data,const struct trace_end_vsc_overflow_test * payload,const void * indirect_data)540 fd_end_vsc_overflow_test(struct pipe_context *pctx, uint64_t ts_ns,
541                          uint16_t tp_idx, const void *flush_data,
542                          const struct trace_end_vsc_overflow_test *payload,
543                          const void *indirect_data)
544 {
545    stage_end(pctx, ts_ns, VSC_OVERFLOW_STAGE_ID);
546 }
547 
548 void
fd_start_prologue(struct pipe_context * pctx,uint64_t ts_ns,uint16_t tp_idx,const void * flush_data,const struct trace_start_prologue * payload,const void * indirect_data)549 fd_start_prologue(struct pipe_context *pctx, uint64_t ts_ns,
550                   uint16_t tp_idx, const void *flush_data,
551                   const struct trace_start_prologue *payload,
552                   const void *indirect_data)
553 {
554    stage_start(pctx, ts_ns, PROLOGUE_STAGE_ID);
555 }
556 
557 void
fd_end_prologue(struct pipe_context * pctx,uint64_t ts_ns,uint16_t tp_idx,const void * flush_data,const struct trace_end_prologue * payload,const void * indirect_data)558 fd_end_prologue(struct pipe_context *pctx, uint64_t ts_ns,
559                 uint16_t tp_idx, const void *flush_data,
560                 const struct trace_end_prologue *payload,
561                 const void *indirect_data)
562 {
563    stage_end(pctx, ts_ns, PROLOGUE_STAGE_ID);
564 }
565 
566 #ifdef __cplusplus
567 }
568 #endif
569