1 /*
2 * Copyright © 2021 Google, Inc.
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include <perfetto.h>
7
8 #include "util/perf/u_perfetto.h"
9 #include "util/perf/u_perfetto_renderpass.h"
10
11 #include "freedreno_tracepoints.h"
12
13 static uint32_t gpu_clock_id;
14 static uint64_t next_clock_sync_ns; /* cpu time of next clk sync */
15
16 /**
17 * The timestamp at the point where we first emitted the clock_sync..
18 * this will be a *later* timestamp that the first GPU traces (since
19 * we capture the first clock_sync from the CPU *after* the first GPU
20 * tracepoints happen). To avoid confusing perfetto we need to drop
21 * the GPU traces with timestamps before this.
22 */
23 static uint64_t sync_gpu_ts;
24
25 struct FdRenderpassIncrementalState {
26 bool was_cleared = true;
27 };
28
29 struct FdRenderpassTraits : public perfetto::DefaultDataSourceTraits {
30 using IncrementalStateType = FdRenderpassIncrementalState;
31 };
32
33 class FdRenderpassDataSource : public MesaRenderpassDataSource<FdRenderpassDataSource, FdRenderpassTraits> {
34 public:
35
OnStart(const StartArgs & args)36 void OnStart(const StartArgs &args) override
37 {
38 MesaRenderpassDataSource<FdRenderpassDataSource, FdRenderpassTraits>::OnStart(args);
39
40 /* Note: clock_id's below 128 are reserved.. for custom clock sources,
41 * using the hash of a namespaced string is the recommended approach.
42 * See: https://perfetto.dev/docs/concepts/clock-sync
43 */
44 gpu_clock_id =
45 _mesa_hash_string("org.freedesktop.mesa.freedreno") | 0x80000000;
46 }
47 };
48
49 PERFETTO_DECLARE_DATA_SOURCE_STATIC_MEMBERS(FdRenderpassDataSource);
50 PERFETTO_DEFINE_DATA_SOURCE_STATIC_MEMBERS(FdRenderpassDataSource);
51
52 static void
send_descriptors(FdRenderpassDataSource::TraceContext & ctx,uint64_t ts_ns)53 send_descriptors(FdRenderpassDataSource::TraceContext &ctx, uint64_t ts_ns)
54 {
55 PERFETTO_LOG("Sending renderstage descriptors");
56
57 auto packet = ctx.NewTracePacket();
58
59 packet->set_timestamp(0);
60 // packet->set_timestamp(ts_ns);
61 // packet->set_timestamp_clock_id(gpu_clock_id);
62
63 auto event = packet->set_gpu_render_stage_event();
64 event->set_gpu_id(0);
65
66 auto spec = event->set_specifications();
67
68 for (unsigned i = 0; i < ARRAY_SIZE(queues); i++) {
69 auto desc = spec->add_hw_queue();
70
71 desc->set_name(queues[i].name);
72 desc->set_description(queues[i].desc);
73 }
74
75 for (unsigned i = 0; i < ARRAY_SIZE(stages); i++) {
76 auto desc = spec->add_stage();
77
78 desc->set_name(stages[i].name);
79 if (stages[i].desc)
80 desc->set_description(stages[i].desc);
81 }
82 }
83
84 static void
stage_start(struct pipe_context * pctx,uint64_t ts_ns,enum fd_stage_id stage)85 stage_start(struct pipe_context *pctx, uint64_t ts_ns, enum fd_stage_id stage)
86 {
87 struct fd_context *ctx = fd_context(pctx);
88 struct fd_perfetto_state *p = &ctx->perfetto;
89
90 p->start_ts[stage] = ts_ns;
91 }
92
93 static void
stage_end(struct pipe_context * pctx,uint64_t ts_ns,enum fd_stage_id stage)94 stage_end(struct pipe_context *pctx, uint64_t ts_ns, enum fd_stage_id stage)
95 {
96 struct fd_context *ctx = fd_context(pctx);
97 struct fd_perfetto_state *p = &ctx->perfetto;
98
99 /* If we haven't managed to calibrate the alignment between GPU and CPU
100 * timestamps yet, then skip this trace, otherwise perfetto won't know
101 * what to do with it.
102 */
103 if (!sync_gpu_ts)
104 return;
105
106 FdRenderpassDataSource::Trace([=](FdRenderpassDataSource::TraceContext tctx) {
107 if (auto state = tctx.GetIncrementalState(); state->was_cleared) {
108 send_descriptors(tctx, p->start_ts[stage]);
109 state->was_cleared = false;
110 }
111
112 auto packet = tctx.NewTracePacket();
113
114 packet->set_timestamp(p->start_ts[stage]);
115 packet->set_timestamp_clock_id(gpu_clock_id);
116
117 auto event = packet->set_gpu_render_stage_event();
118 event->set_event_id(0); // ???
119 event->set_hw_queue_id(DEFAULT_HW_QUEUE_ID);
120 event->set_duration(ts_ns - p->start_ts[stage]);
121 event->set_stage_id(stage);
122 event->set_context((uintptr_t)pctx);
123
124 /* The "surface" meta-stage has extra info about render target: */
125 if (stage == SURFACE_STAGE_ID) {
126
127 event->set_submission_id(p->submit_id);
128
129 if (p->cbuf0_format) {
130 auto data = event->add_extra_data();
131
132 data->set_name("color0 format");
133 data->set_value(util_format_short_name(p->cbuf0_format));
134 }
135
136 if (p->zs_format) {
137 auto data = event->add_extra_data();
138
139 data->set_name("zs format");
140 data->set_value(util_format_short_name(p->zs_format));
141 }
142
143 {
144 auto data = event->add_extra_data();
145
146 data->set_name("width");
147 data->set_value(std::to_string(p->width));
148 }
149
150 {
151 auto data = event->add_extra_data();
152
153 data->set_name("height");
154 data->set_value(std::to_string(p->height));
155 }
156
157 {
158 auto data = event->add_extra_data();
159
160 data->set_name("MSAA");
161 data->set_value(std::to_string(p->samples));
162 }
163
164 {
165 auto data = event->add_extra_data();
166
167 data->set_name("MRTs");
168 data->set_value(std::to_string(p->mrts));
169 }
170
171 // "renderMode"
172 // "surfaceID"
173
174 if (p->nbins) {
175 auto data = event->add_extra_data();
176
177 data->set_name("numberOfBins");
178 data->set_value(std::to_string(p->nbins));
179 }
180
181 if (p->binw) {
182 auto data = event->add_extra_data();
183
184 data->set_name("binWidth");
185 data->set_value(std::to_string(p->binw));
186 }
187
188 if (p->binh) {
189 auto data = event->add_extra_data();
190
191 data->set_name("binHeight");
192 data->set_value(std::to_string(p->binh));
193 }
194 } else if (stage == COMPUTE_STAGE_ID) {
195 {
196 auto data = event->add_extra_data();
197
198 data->set_name("indirect");
199 data->set_value(std::to_string(p->indirect));
200 }
201
202 {
203 auto data = event->add_extra_data();
204
205 data->set_name("work_dim");
206 data->set_value(std::to_string(p->work_dim));
207 }
208
209 {
210 auto data = event->add_extra_data();
211
212 data->set_name("local_size_x");
213 data->set_value(std::to_string(p->local_size_x));
214 }
215
216 {
217 auto data = event->add_extra_data();
218
219 data->set_name("local_size_y");
220 data->set_value(std::to_string(p->local_size_y));
221 }
222
223 {
224 auto data = event->add_extra_data();
225
226 data->set_name("local_size_z");
227 data->set_value(std::to_string(p->local_size_z));
228 }
229
230 {
231 auto data = event->add_extra_data();
232
233 data->set_name("num_groups_x");
234 data->set_value(std::to_string(p->num_groups_x));
235 }
236
237 {
238 auto data = event->add_extra_data();
239
240 data->set_name("num_groups_y");
241 data->set_value(std::to_string(p->num_groups_y));
242 }
243
244 {
245 auto data = event->add_extra_data();
246
247 data->set_name("num_groups_z");
248 data->set_value(std::to_string(p->num_groups_z));
249 }
250
251 {
252 auto data = event->add_extra_data();
253
254 data->set_name("shader_id");
255 data->set_value(std::to_string(p->shader_id));
256 }
257 }
258 });
259 }
260
261 #ifdef __cplusplus
262 extern "C" {
263 #endif
264
265 void
fd_perfetto_init(void)266 fd_perfetto_init(void)
267 {
268 util_perfetto_init();
269
270 perfetto::DataSourceDescriptor dsd;
271 dsd.set_name("gpu.renderstages.msm");
272 FdRenderpassDataSource::Register(dsd);
273 }
274
275 static void
sync_timestamp(struct fd_context * ctx)276 sync_timestamp(struct fd_context *ctx)
277 {
278 uint64_t cpu_ts = perfetto::base::GetBootTimeNs().count();
279 uint64_t gpu_ts;
280
281 if (!ctx->ts_to_ns)
282 return;
283
284 if (cpu_ts < next_clock_sync_ns)
285 return;
286
287 if (fd_pipe_get_param(ctx->pipe, FD_TIMESTAMP, &gpu_ts)) {
288 PERFETTO_ELOG("Could not sync CPU and GPU clocks");
289 return;
290 }
291
292 /* get cpu timestamp again because FD_TIMESTAMP can take >100us */
293 cpu_ts = perfetto::base::GetBootTimeNs().count();
294
295 /* convert GPU ts into ns: */
296 gpu_ts = ctx->ts_to_ns(gpu_ts);
297
298 FdRenderpassDataSource::Trace([=](auto tctx) {
299 MesaRenderpassDataSource<FdRenderpassDataSource,
300 FdRenderpassTraits>::EmitClockSync(tctx, cpu_ts,
301 gpu_ts, gpu_clock_id);
302 });
303
304 sync_gpu_ts = gpu_ts;
305 next_clock_sync_ns = cpu_ts + 30000000;
306 }
307
308 static void
emit_submit_id(struct fd_context * ctx)309 emit_submit_id(struct fd_context *ctx)
310 {
311 FdRenderpassDataSource::Trace([=](FdRenderpassDataSource::TraceContext tctx) {
312 auto packet = tctx.NewTracePacket();
313
314 packet->set_timestamp(perfetto::base::GetBootTimeNs().count());
315
316 auto event = packet->set_vulkan_api_event();
317 auto submit = event->set_vk_queue_submit();
318
319 submit->set_submission_id(ctx->submit_count);
320 });
321 }
322
323 void
fd_perfetto_submit(struct fd_context * ctx)324 fd_perfetto_submit(struct fd_context *ctx)
325 {
326 /* sync_timestamp isn't free */
327 if (!u_trace_perfetto_active(&ctx->trace_context))
328 return;
329
330 sync_timestamp(ctx);
331 emit_submit_id(ctx);
332 }
333
334 /*
335 * Trace callbacks, called from u_trace once the timestamps from GPU have been
336 * collected.
337 */
338
339 void
fd_start_render_pass(struct pipe_context * pctx,uint64_t ts_ns,uint16_t tp_idx,const void * flush_data,const struct trace_start_render_pass * payload,const void * indirect_data)340 fd_start_render_pass(struct pipe_context *pctx, uint64_t ts_ns,
341 uint16_t tp_idx, const void *flush_data,
342 const struct trace_start_render_pass *payload,
343 const void *indirect_data)
344 {
345 stage_start(pctx, ts_ns, SURFACE_STAGE_ID);
346
347 struct fd_perfetto_state *p = &fd_context(pctx)->perfetto;
348
349 p->submit_id = payload->submit_id;
350 p->cbuf0_format = payload->cbuf0_format;
351 p->zs_format = payload->zs_format;
352 p->width = payload->width;
353 p->height = payload->height;
354 p->mrts = payload->mrts;
355 p->samples = payload->samples;
356 p->nbins = payload->nbins;
357 p->binw = payload->binw;
358 p->binh = payload->binh;
359 }
360
361 void
fd_end_render_pass(struct pipe_context * pctx,uint64_t ts_ns,uint16_t tp_idx,const void * flush_data,const struct trace_end_render_pass * payload,const void * indirect_data)362 fd_end_render_pass(struct pipe_context *pctx, uint64_t ts_ns,
363 uint16_t tp_idx, const void *flush_data,
364 const struct trace_end_render_pass *payload,
365 const void *indirect_data)
366 {
367 stage_end(pctx, ts_ns, SURFACE_STAGE_ID);
368 }
369
370 void
fd_start_binning_ib(struct pipe_context * pctx,uint64_t ts_ns,uint16_t tp_idx,const void * flush_data,const struct trace_start_binning_ib * payload,const void * indirect_data)371 fd_start_binning_ib(struct pipe_context *pctx, uint64_t ts_ns,
372 uint16_t tp_idx, const void *flush_data,
373 const struct trace_start_binning_ib *payload,
374 const void *indirect_data)
375 {
376 stage_start(pctx, ts_ns, BINNING_STAGE_ID);
377 }
378
379 void
fd_end_binning_ib(struct pipe_context * pctx,uint64_t ts_ns,uint16_t tp_idx,const void * flush_data,const struct trace_end_binning_ib * payload,const void * indirect_data)380 fd_end_binning_ib(struct pipe_context *pctx, uint64_t ts_ns,
381 uint16_t tp_idx, const void *flush_data,
382 const struct trace_end_binning_ib *payload,
383 const void *indirect_data)
384 {
385 stage_end(pctx, ts_ns, BINNING_STAGE_ID);
386 }
387
388 void
fd_start_draw_ib(struct pipe_context * pctx,uint64_t ts_ns,uint16_t tp_idx,const void * flush_data,const struct trace_start_draw_ib * payload,const void * indirect_data)389 fd_start_draw_ib(struct pipe_context *pctx, uint64_t ts_ns,
390 uint16_t tp_idx, const void *flush_data,
391 const struct trace_start_draw_ib *payload,
392 const void *indirect_data)
393 {
394 stage_start(
395 pctx, ts_ns,
396 fd_context(pctx)->perfetto.nbins ? GMEM_STAGE_ID : BYPASS_STAGE_ID);
397 }
398
399 void
fd_end_draw_ib(struct pipe_context * pctx,uint64_t ts_ns,uint16_t tp_idx,const void * flush_data,const struct trace_end_draw_ib * payload,const void * indirect_data)400 fd_end_draw_ib(struct pipe_context *pctx, uint64_t ts_ns,
401 uint16_t tp_idx, const void *flush_data,
402 const struct trace_end_draw_ib *payload,
403 const void *indirect_data)
404 {
405 stage_end(
406 pctx, ts_ns,
407 fd_context(pctx)->perfetto.nbins ? GMEM_STAGE_ID : BYPASS_STAGE_ID);
408 }
409
410 void
fd_start_blit(struct pipe_context * pctx,uint64_t ts_ns,uint16_t tp_idx,const void * flush_data,const struct trace_start_blit * payload,const void * indirect_data)411 fd_start_blit(struct pipe_context *pctx, uint64_t ts_ns,
412 uint16_t tp_idx, const void *flush_data,
413 const struct trace_start_blit *payload,
414 const void *indirect_data)
415 {
416 stage_start(pctx, ts_ns, BLIT_STAGE_ID);
417 }
418
419 void
fd_end_blit(struct pipe_context * pctx,uint64_t ts_ns,uint16_t tp_idx,const void * flush_data,const struct trace_end_blit * payload,const void * indirect_data)420 fd_end_blit(struct pipe_context *pctx, uint64_t ts_ns,
421 uint16_t tp_idx, const void *flush_data,
422 const struct trace_end_blit *payload,
423 const void *indirect_data)
424 {
425 stage_end(pctx, ts_ns, BLIT_STAGE_ID);
426 }
427
428 void
fd_start_compute(struct pipe_context * pctx,uint64_t ts_ns,uint16_t tp_idx,const void * flush_data,const struct trace_start_compute * payload,const void * indirect_data)429 fd_start_compute(struct pipe_context *pctx, uint64_t ts_ns,
430 uint16_t tp_idx, const void *flush_data,
431 const struct trace_start_compute *payload,
432 const void *indirect_data)
433 {
434 stage_start(pctx, ts_ns, COMPUTE_STAGE_ID);
435
436 struct fd_perfetto_state *p = &fd_context(pctx)->perfetto;
437
438 p->indirect = payload->indirect;
439 p->work_dim = payload->work_dim;
440 p->local_size_x = payload->local_size_x;
441 p->local_size_y = payload->local_size_y;
442 p->local_size_z = payload->local_size_z;
443 p->num_groups_x = payload->num_groups_x;
444 p->num_groups_y = payload->num_groups_y;
445 p->num_groups_z = payload->num_groups_z;
446 p->shader_id = payload->shader_id;
447 }
448
449 void
fd_end_compute(struct pipe_context * pctx,uint64_t ts_ns,uint16_t tp_idx,const void * flush_data,const struct trace_end_compute * payload,const void * indirect_data)450 fd_end_compute(struct pipe_context *pctx, uint64_t ts_ns,
451 uint16_t tp_idx, const void *flush_data,
452 const struct trace_end_compute *payload,
453 const void *indirect_data)
454 {
455 stage_end(pctx, ts_ns, COMPUTE_STAGE_ID);
456 }
457
458 void
fd_start_clears(struct pipe_context * pctx,uint64_t ts_ns,uint16_t tp_idx,const void * flush_data,const struct trace_start_clears * payload,const void * indirect_data)459 fd_start_clears(struct pipe_context *pctx, uint64_t ts_ns,
460 uint16_t tp_idx, const void *flush_data,
461 const struct trace_start_clears *payload,
462 const void *indirect_data)
463 {
464 stage_start(pctx, ts_ns, CLEAR_STAGE_ID);
465 }
466
467 void
fd_end_clears(struct pipe_context * pctx,uint64_t ts_ns,uint16_t tp_idx,const void * flush_data,const struct trace_end_clears * payload,const void * indirect_data)468 fd_end_clears(struct pipe_context *pctx, uint64_t ts_ns,
469 uint16_t tp_idx, const void *flush_data,
470 const struct trace_end_clears *payload,
471 const void *indirect_data)
472 {
473 stage_end(pctx, ts_ns, CLEAR_STAGE_ID);
474 }
475
476 void
fd_start_tile_loads(struct pipe_context * pctx,uint64_t ts_ns,uint16_t tp_idx,const void * flush_data,const struct trace_start_tile_loads * payload,const void * indirect_data)477 fd_start_tile_loads(struct pipe_context *pctx, uint64_t ts_ns,
478 uint16_t tp_idx, const void *flush_data,
479 const struct trace_start_tile_loads *payload,
480 const void *indirect_data)
481 {
482 stage_start(pctx, ts_ns, TILE_LOAD_STAGE_ID);
483 }
484
485 void
fd_end_tile_loads(struct pipe_context * pctx,uint64_t ts_ns,uint16_t tp_idx,const void * flush_data,const struct trace_end_tile_loads * payload,const void * indirect_data)486 fd_end_tile_loads(struct pipe_context *pctx, uint64_t ts_ns,
487 uint16_t tp_idx, const void *flush_data,
488 const struct trace_end_tile_loads *payload,
489 const void *indirect_data)
490 {
491 stage_end(pctx, ts_ns, TILE_LOAD_STAGE_ID);
492 }
493
494 void
fd_start_tile_stores(struct pipe_context * pctx,uint64_t ts_ns,uint16_t tp_idx,const void * flush_data,const struct trace_start_tile_stores * payload,const void * indirect_data)495 fd_start_tile_stores(struct pipe_context *pctx, uint64_t ts_ns,
496 uint16_t tp_idx, const void *flush_data,
497 const struct trace_start_tile_stores *payload,
498 const void *indirect_data)
499 {
500 stage_start(pctx, ts_ns, TILE_STORE_STAGE_ID);
501 }
502
503 void
fd_end_tile_stores(struct pipe_context * pctx,uint64_t ts_ns,uint16_t tp_idx,const void * flush_data,const struct trace_end_tile_stores * payload,const void * indirect_data)504 fd_end_tile_stores(struct pipe_context *pctx, uint64_t ts_ns,
505 uint16_t tp_idx, const void *flush_data,
506 const struct trace_end_tile_stores *payload,
507 const void *indirect_data)
508 {
509 stage_end(pctx, ts_ns, TILE_STORE_STAGE_ID);
510 }
511
512 void
fd_start_state_restore(struct pipe_context * pctx,uint64_t ts_ns,uint16_t tp_idx,const void * flush_data,const struct trace_start_state_restore * payload,const void * indirect_data)513 fd_start_state_restore(struct pipe_context *pctx, uint64_t ts_ns,
514 uint16_t tp_idx, const void *flush_data,
515 const struct trace_start_state_restore *payload,
516 const void *indirect_data)
517 {
518 stage_start(pctx, ts_ns, STATE_RESTORE_STAGE_ID);
519 }
520
521 void
fd_end_state_restore(struct pipe_context * pctx,uint64_t ts_ns,uint16_t tp_idx,const void * flush_data,const struct trace_end_state_restore * payload,const void * indirect_data)522 fd_end_state_restore(struct pipe_context *pctx, uint64_t ts_ns,
523 uint16_t tp_idx, const void *flush_data,
524 const struct trace_end_state_restore *payload,
525 const void *indirect_data)
526 {
527 stage_end(pctx, ts_ns, STATE_RESTORE_STAGE_ID);
528 }
529
530 void
fd_start_vsc_overflow_test(struct pipe_context * pctx,uint64_t ts_ns,uint16_t tp_idx,const void * flush_data,const struct trace_start_vsc_overflow_test * payload,const void * indirect_data)531 fd_start_vsc_overflow_test(struct pipe_context *pctx, uint64_t ts_ns,
532 uint16_t tp_idx, const void *flush_data,
533 const struct trace_start_vsc_overflow_test *payload,
534 const void *indirect_data)
535 {
536 stage_start(pctx, ts_ns, VSC_OVERFLOW_STAGE_ID);
537 }
538
539 void
fd_end_vsc_overflow_test(struct pipe_context * pctx,uint64_t ts_ns,uint16_t tp_idx,const void * flush_data,const struct trace_end_vsc_overflow_test * payload,const void * indirect_data)540 fd_end_vsc_overflow_test(struct pipe_context *pctx, uint64_t ts_ns,
541 uint16_t tp_idx, const void *flush_data,
542 const struct trace_end_vsc_overflow_test *payload,
543 const void *indirect_data)
544 {
545 stage_end(pctx, ts_ns, VSC_OVERFLOW_STAGE_ID);
546 }
547
548 void
fd_start_prologue(struct pipe_context * pctx,uint64_t ts_ns,uint16_t tp_idx,const void * flush_data,const struct trace_start_prologue * payload,const void * indirect_data)549 fd_start_prologue(struct pipe_context *pctx, uint64_t ts_ns,
550 uint16_t tp_idx, const void *flush_data,
551 const struct trace_start_prologue *payload,
552 const void *indirect_data)
553 {
554 stage_start(pctx, ts_ns, PROLOGUE_STAGE_ID);
555 }
556
557 void
fd_end_prologue(struct pipe_context * pctx,uint64_t ts_ns,uint16_t tp_idx,const void * flush_data,const struct trace_end_prologue * payload,const void * indirect_data)558 fd_end_prologue(struct pipe_context *pctx, uint64_t ts_ns,
559 uint16_t tp_idx, const void *flush_data,
560 const struct trace_end_prologue *payload,
561 const void *indirect_data)
562 {
563 stage_end(pctx, ts_ns, PROLOGUE_STAGE_ID);
564 }
565
566 #ifdef __cplusplus
567 }
568 #endif
569