1 /*
2 * Copyright © 2020 Valve Corporation
3 *
4 * SPDX-License-Identifier: MIT
5 */
6
7 #include "radv_cmd_buffer.h"
8 #include "radv_cs.h"
9 #include "radv_entrypoints.h"
10 #include "radv_pipeline_rt.h"
11 #include "radv_queue.h"
12 #include "radv_shader.h"
13 #include "radv_sqtt.h"
14 #include "vk_common_entrypoints.h"
15 #include "vk_semaphore.h"
16
17 #include "ac_rgp.h"
18 #include "ac_sqtt.h"
19
20 void
radv_sqtt_emit_relocated_shaders(struct radv_cmd_buffer * cmd_buffer,struct radv_graphics_pipeline * pipeline)21 radv_sqtt_emit_relocated_shaders(struct radv_cmd_buffer *cmd_buffer, struct radv_graphics_pipeline *pipeline)
22 {
23 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
24 const struct radv_physical_device *pdev = radv_device_physical(device);
25 const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
26 struct radv_sqtt_shaders_reloc *reloc = pipeline->sqtt_shaders_reloc;
27 struct radeon_cmdbuf *cs = cmd_buffer->cs;
28 uint64_t va;
29
30 radv_cs_add_buffer(device->ws, cs, reloc->bo);
31
32 /* VS */
33 if (pipeline->base.shaders[MESA_SHADER_VERTEX]) {
34 struct radv_shader *vs = pipeline->base.shaders[MESA_SHADER_VERTEX];
35
36 va = reloc->va[MESA_SHADER_VERTEX];
37 if (vs->info.vs.as_ls) {
38 radeon_set_sh_reg(cs, vs->info.regs.pgm_lo, va >> 8);
39 } else if (vs->info.vs.as_es) {
40 radeon_set_sh_reg_seq(cs, vs->info.regs.pgm_lo, 2);
41 radeon_emit(cs, va >> 8);
42 radeon_emit(cs, S_00B324_MEM_BASE(va >> 40));
43 } else if (vs->info.is_ngg) {
44 radeon_set_sh_reg(cs, vs->info.regs.pgm_lo, va >> 8);
45 } else {
46 radeon_set_sh_reg_seq(cs, vs->info.regs.pgm_lo, 2);
47 radeon_emit(cs, va >> 8);
48 radeon_emit(cs, S_00B124_MEM_BASE(va >> 40));
49 }
50 }
51
52 /* TCS */
53 if (pipeline->base.shaders[MESA_SHADER_TESS_CTRL]) {
54 const struct radv_shader *tcs = pipeline->base.shaders[MESA_SHADER_TESS_CTRL];
55
56 va = reloc->va[MESA_SHADER_TESS_CTRL];
57
58 if (gfx_level >= GFX9) {
59 radeon_set_sh_reg(cs, tcs->info.regs.pgm_lo, va >> 8);
60 } else {
61 radeon_set_sh_reg_seq(cs, tcs->info.regs.pgm_lo, 2);
62 radeon_emit(cs, va >> 8);
63 radeon_emit(cs, S_00B424_MEM_BASE(va >> 40));
64 }
65 }
66
67 /* TES */
68 if (pipeline->base.shaders[MESA_SHADER_TESS_EVAL]) {
69 struct radv_shader *tes = pipeline->base.shaders[MESA_SHADER_TESS_EVAL];
70
71 va = reloc->va[MESA_SHADER_TESS_EVAL];
72 if (tes->info.is_ngg) {
73 radeon_set_sh_reg(cs, tes->info.regs.pgm_lo, va >> 8);
74 } else if (tes->info.tes.as_es) {
75 radeon_set_sh_reg_seq(cs, tes->info.regs.pgm_lo, 2);
76 radeon_emit(cs, va >> 8);
77 radeon_emit(cs, S_00B324_MEM_BASE(va >> 40));
78 } else {
79 radeon_set_sh_reg_seq(cs, tes->info.regs.pgm_lo, 2);
80 radeon_emit(cs, va >> 8);
81 radeon_emit(cs, S_00B124_MEM_BASE(va >> 40));
82 }
83 }
84
85 /* GS */
86 if (pipeline->base.shaders[MESA_SHADER_GEOMETRY]) {
87 struct radv_shader *gs = pipeline->base.shaders[MESA_SHADER_GEOMETRY];
88
89 va = reloc->va[MESA_SHADER_GEOMETRY];
90 if (gs->info.is_ngg) {
91 radeon_set_sh_reg(cs, gs->info.regs.pgm_lo, va >> 8);
92 } else {
93 if (gfx_level >= GFX9) {
94 radeon_set_sh_reg(cs, gs->info.regs.pgm_lo, va >> 8);
95 } else {
96 radeon_set_sh_reg_seq(cs, gs->info.regs.pgm_lo, 2);
97 radeon_emit(cs, va >> 8);
98 radeon_emit(cs, S_00B224_MEM_BASE(va >> 40));
99 }
100 }
101 }
102
103 /* FS */
104 if (pipeline->base.shaders[MESA_SHADER_FRAGMENT]) {
105 const struct radv_shader *ps = pipeline->base.shaders[MESA_SHADER_FRAGMENT];
106
107 va = reloc->va[MESA_SHADER_FRAGMENT];
108
109 radeon_set_sh_reg_seq(cs, ps->info.regs.pgm_lo, 2);
110 radeon_emit(cs, va >> 8);
111 radeon_emit(cs, S_00B024_MEM_BASE(va >> 40));
112 }
113
114 /* MS */
115 if (pipeline->base.shaders[MESA_SHADER_MESH]) {
116 const struct radv_shader *ms = pipeline->base.shaders[MESA_SHADER_MESH];
117
118 va = reloc->va[MESA_SHADER_MESH];
119
120 radeon_set_sh_reg(cs, ms->info.regs.pgm_lo, va >> 8);
121 }
122 }
123
124 static uint64_t
radv_sqtt_shader_get_va_reloc(struct radv_pipeline * pipeline,gl_shader_stage stage)125 radv_sqtt_shader_get_va_reloc(struct radv_pipeline *pipeline, gl_shader_stage stage)
126 {
127 if (pipeline->type == RADV_PIPELINE_GRAPHICS) {
128 struct radv_graphics_pipeline *graphics_pipeline = radv_pipeline_to_graphics(pipeline);
129 struct radv_sqtt_shaders_reloc *reloc = graphics_pipeline->sqtt_shaders_reloc;
130 return reloc->va[stage];
131 }
132
133 return radv_shader_get_va(pipeline->shaders[stage]);
134 }
135
136 static VkResult
radv_sqtt_reloc_graphics_shaders(struct radv_device * device,struct radv_graphics_pipeline * pipeline)137 radv_sqtt_reloc_graphics_shaders(struct radv_device *device, struct radv_graphics_pipeline *pipeline)
138 {
139 struct radv_shader_dma_submission *submission = NULL;
140 struct radv_sqtt_shaders_reloc *reloc;
141 uint32_t code_size = 0;
142
143 reloc = calloc(1, sizeof(*reloc));
144 if (!reloc)
145 return VK_ERROR_OUT_OF_HOST_MEMORY;
146
147 /* Compute the total code size. */
148 for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; i++) {
149 const struct radv_shader *shader = pipeline->base.shaders[i];
150 if (!shader)
151 continue;
152
153 code_size += align(shader->code_size, RADV_SHADER_ALLOC_ALIGNMENT);
154 }
155
156 /* Allocate memory for all shader binaries. */
157 reloc->alloc = radv_alloc_shader_memory(device, code_size, false, pipeline);
158 if (!reloc->alloc) {
159 free(reloc);
160 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
161 }
162
163 reloc->bo = reloc->alloc->arena->bo;
164
165 /* Relocate shader binaries to be contiguous in memory as requested by RGP. */
166 uint64_t slab_va = radv_buffer_get_va(reloc->bo) + reloc->alloc->offset;
167 char *slab_ptr = reloc->alloc->arena->ptr + reloc->alloc->offset;
168 uint64_t offset = 0;
169
170 if (device->shader_use_invisible_vram) {
171 submission = radv_shader_dma_get_submission(device, reloc->bo, slab_va, code_size);
172 if (!submission)
173 return VK_ERROR_UNKNOWN;
174 }
175
176 for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; ++i) {
177 const struct radv_shader *shader = pipeline->base.shaders[i];
178 void *dest_ptr;
179 if (!shader)
180 continue;
181
182 reloc->va[i] = slab_va + offset;
183
184 if (device->shader_use_invisible_vram)
185 dest_ptr = submission->ptr + offset;
186 else
187 dest_ptr = slab_ptr + offset;
188
189 memcpy(dest_ptr, shader->code, shader->code_size);
190
191 offset += align(shader->code_size, RADV_SHADER_ALLOC_ALIGNMENT);
192 }
193
194 if (device->shader_use_invisible_vram) {
195 uint64_t upload_seq = 0;
196
197 if (!radv_shader_dma_submit(device, submission, &upload_seq))
198 return VK_ERROR_UNKNOWN;
199
200 for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; ++i) {
201 struct radv_shader *shader = pipeline->base.shaders[i];
202
203 if (!shader)
204 continue;
205
206 shader->upload_seq = upload_seq;
207 }
208
209 if (pipeline->base.gs_copy_shader)
210 pipeline->base.gs_copy_shader->upload_seq = upload_seq;
211 }
212
213 pipeline->sqtt_shaders_reloc = reloc;
214
215 return VK_SUCCESS;
216 }
217
218 static void
radv_write_begin_general_api_marker(struct radv_cmd_buffer * cmd_buffer,enum rgp_sqtt_marker_general_api_type api_type)219 radv_write_begin_general_api_marker(struct radv_cmd_buffer *cmd_buffer, enum rgp_sqtt_marker_general_api_type api_type)
220 {
221 struct rgp_sqtt_marker_general_api marker = {0};
222
223 marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_GENERAL_API;
224 marker.api_type = api_type;
225
226 radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
227 }
228
229 static void
radv_write_end_general_api_marker(struct radv_cmd_buffer * cmd_buffer,enum rgp_sqtt_marker_general_api_type api_type)230 radv_write_end_general_api_marker(struct radv_cmd_buffer *cmd_buffer, enum rgp_sqtt_marker_general_api_type api_type)
231 {
232 struct rgp_sqtt_marker_general_api marker = {0};
233
234 marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_GENERAL_API;
235 marker.api_type = api_type;
236 marker.is_end = 1;
237
238 radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
239 }
240
241 static void
radv_write_event_marker(struct radv_cmd_buffer * cmd_buffer,enum rgp_sqtt_marker_event_type api_type,uint32_t vertex_offset_user_data,uint32_t instance_offset_user_data,uint32_t draw_index_user_data)242 radv_write_event_marker(struct radv_cmd_buffer *cmd_buffer, enum rgp_sqtt_marker_event_type api_type,
243 uint32_t vertex_offset_user_data, uint32_t instance_offset_user_data,
244 uint32_t draw_index_user_data)
245 {
246 struct rgp_sqtt_marker_event marker = {0};
247
248 marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_EVENT;
249 marker.api_type = api_type;
250 marker.cmd_id = cmd_buffer->state.num_events++;
251 marker.cb_id = cmd_buffer->sqtt_cb_id;
252
253 if (vertex_offset_user_data == UINT_MAX || instance_offset_user_data == UINT_MAX) {
254 vertex_offset_user_data = 0;
255 instance_offset_user_data = 0;
256 }
257
258 if (draw_index_user_data == UINT_MAX)
259 draw_index_user_data = vertex_offset_user_data;
260
261 marker.vertex_offset_reg_idx = vertex_offset_user_data;
262 marker.instance_offset_reg_idx = instance_offset_user_data;
263 marker.draw_index_reg_idx = draw_index_user_data;
264
265 radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
266 }
267
268 static void
radv_write_event_with_dims_marker(struct radv_cmd_buffer * cmd_buffer,enum rgp_sqtt_marker_event_type api_type,uint32_t x,uint32_t y,uint32_t z)269 radv_write_event_with_dims_marker(struct radv_cmd_buffer *cmd_buffer, enum rgp_sqtt_marker_event_type api_type,
270 uint32_t x, uint32_t y, uint32_t z)
271 {
272 struct rgp_sqtt_marker_event_with_dims marker = {0};
273
274 marker.event.identifier = RGP_SQTT_MARKER_IDENTIFIER_EVENT;
275 marker.event.api_type = api_type;
276 marker.event.cmd_id = cmd_buffer->state.num_events++;
277 marker.event.cb_id = cmd_buffer->sqtt_cb_id;
278 marker.event.has_thread_dims = 1;
279
280 marker.thread_x = x;
281 marker.thread_y = y;
282 marker.thread_z = z;
283
284 radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
285 }
286
287 void
radv_write_user_event_marker(struct radv_cmd_buffer * cmd_buffer,enum rgp_sqtt_marker_user_event_type type,const char * str)288 radv_write_user_event_marker(struct radv_cmd_buffer *cmd_buffer, enum rgp_sqtt_marker_user_event_type type,
289 const char *str)
290 {
291 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
292
293 if (likely(!device->sqtt.bo))
294 return;
295
296 if (type == UserEventPop) {
297 assert(str == NULL);
298 struct rgp_sqtt_marker_user_event marker = {0};
299 marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_USER_EVENT;
300 marker.data_type = type;
301
302 radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
303 } else {
304 assert(str != NULL);
305 unsigned len = strlen(str);
306 struct rgp_sqtt_marker_user_event_with_length marker = {0};
307 marker.user_event.identifier = RGP_SQTT_MARKER_IDENTIFIER_USER_EVENT;
308 marker.user_event.data_type = type;
309 marker.length = align(len, 4);
310
311 uint8_t *buffer = alloca(sizeof(marker) + marker.length);
312 memset(buffer, 0, sizeof(marker) + marker.length);
313 memcpy(buffer, &marker, sizeof(marker));
314 memcpy(buffer + sizeof(marker), str, len);
315
316 radv_emit_sqtt_userdata(cmd_buffer, buffer, sizeof(marker) / 4 + marker.length / 4);
317 }
318 }
319
320 void
radv_describe_begin_cmd_buffer(struct radv_cmd_buffer * cmd_buffer)321 radv_describe_begin_cmd_buffer(struct radv_cmd_buffer *cmd_buffer)
322 {
323 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
324 uint64_t device_id = (uintptr_t)device;
325 struct rgp_sqtt_marker_cb_start marker = {0};
326
327 if (likely(!device->sqtt.bo))
328 return;
329
330 /* Reserve a command buffer ID for SQTT. */
331 const struct radv_physical_device *pdev = radv_device_physical(device);
332 enum amd_ip_type ip_type = radv_queue_family_to_ring(pdev, cmd_buffer->qf);
333 union rgp_sqtt_marker_cb_id cb_id = ac_sqtt_get_next_cmdbuf_id(&device->sqtt, ip_type);
334 cmd_buffer->sqtt_cb_id = cb_id.all;
335
336 marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_CB_START;
337 marker.cb_id = cmd_buffer->sqtt_cb_id;
338 marker.device_id_low = device_id;
339 marker.device_id_high = device_id >> 32;
340 marker.queue = cmd_buffer->qf;
341 marker.queue_flags = VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT;
342
343 if (cmd_buffer->qf == RADV_QUEUE_GENERAL)
344 marker.queue_flags |= VK_QUEUE_GRAPHICS_BIT;
345
346 if (!radv_sparse_queue_enabled(pdev))
347 marker.queue_flags |= VK_QUEUE_SPARSE_BINDING_BIT;
348
349 radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
350 }
351
352 void
radv_describe_end_cmd_buffer(struct radv_cmd_buffer * cmd_buffer)353 radv_describe_end_cmd_buffer(struct radv_cmd_buffer *cmd_buffer)
354 {
355 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
356 uint64_t device_id = (uintptr_t)device;
357 struct rgp_sqtt_marker_cb_end marker = {0};
358
359 if (likely(!device->sqtt.bo))
360 return;
361
362 marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_CB_END;
363 marker.cb_id = cmd_buffer->sqtt_cb_id;
364 marker.device_id_low = device_id;
365 marker.device_id_high = device_id >> 32;
366
367 radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
368 }
369
370 void
radv_describe_draw(struct radv_cmd_buffer * cmd_buffer)371 radv_describe_draw(struct radv_cmd_buffer *cmd_buffer)
372 {
373 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
374
375 if (likely(!device->sqtt.bo))
376 return;
377
378 radv_write_event_marker(cmd_buffer, cmd_buffer->state.current_event_type, UINT_MAX, UINT_MAX, UINT_MAX);
379 }
380
381 void
radv_describe_dispatch(struct radv_cmd_buffer * cmd_buffer,const struct radv_dispatch_info * info)382 radv_describe_dispatch(struct radv_cmd_buffer *cmd_buffer, const struct radv_dispatch_info *info)
383 {
384 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
385
386 if (likely(!device->sqtt.bo))
387 return;
388
389 if (info->indirect) {
390 radv_write_event_marker(cmd_buffer, cmd_buffer->state.current_event_type, UINT_MAX, UINT_MAX, UINT_MAX);
391 } else {
392 radv_write_event_with_dims_marker(cmd_buffer, cmd_buffer->state.current_event_type, info->blocks[0],
393 info->blocks[1], info->blocks[2]);
394 }
395 }
396
397 void
radv_describe_begin_render_pass_clear(struct radv_cmd_buffer * cmd_buffer,VkImageAspectFlagBits aspects)398 radv_describe_begin_render_pass_clear(struct radv_cmd_buffer *cmd_buffer, VkImageAspectFlagBits aspects)
399 {
400 cmd_buffer->state.current_event_type =
401 (aspects & VK_IMAGE_ASPECT_COLOR_BIT) ? EventRenderPassColorClear : EventRenderPassDepthStencilClear;
402 }
403
404 void
radv_describe_end_render_pass_clear(struct radv_cmd_buffer * cmd_buffer)405 radv_describe_end_render_pass_clear(struct radv_cmd_buffer *cmd_buffer)
406 {
407 cmd_buffer->state.current_event_type = EventInternalUnknown;
408 }
409
410 void
radv_describe_begin_render_pass_resolve(struct radv_cmd_buffer * cmd_buffer)411 radv_describe_begin_render_pass_resolve(struct radv_cmd_buffer *cmd_buffer)
412 {
413 cmd_buffer->state.current_event_type = EventRenderPassResolve;
414 }
415
416 void
radv_describe_end_render_pass_resolve(struct radv_cmd_buffer * cmd_buffer)417 radv_describe_end_render_pass_resolve(struct radv_cmd_buffer *cmd_buffer)
418 {
419 cmd_buffer->state.current_event_type = EventInternalUnknown;
420 }
421
422 void
radv_describe_barrier_end_delayed(struct radv_cmd_buffer * cmd_buffer)423 radv_describe_barrier_end_delayed(struct radv_cmd_buffer *cmd_buffer)
424 {
425 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
426 struct rgp_sqtt_marker_barrier_end marker = {0};
427
428 if (likely(!device->sqtt.bo) || !cmd_buffer->state.pending_sqtt_barrier_end)
429 return;
430
431 cmd_buffer->state.pending_sqtt_barrier_end = false;
432
433 marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_BARRIER_END;
434 marker.cb_id = cmd_buffer->sqtt_cb_id;
435
436 marker.num_layout_transitions = cmd_buffer->state.num_layout_transitions;
437
438 if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_WAIT_ON_EOP_TS)
439 marker.wait_on_eop_ts = true;
440 if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_VS_PARTIAL_FLUSH)
441 marker.vs_partial_flush = true;
442 if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_PS_PARTIAL_FLUSH)
443 marker.ps_partial_flush = true;
444 if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_CS_PARTIAL_FLUSH)
445 marker.cs_partial_flush = true;
446 if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_PFP_SYNC_ME)
447 marker.pfp_sync_me = true;
448 if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_SYNC_CP_DMA)
449 marker.sync_cp_dma = true;
450 if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_VMEM_L0)
451 marker.inval_tcp = true;
452 if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_ICACHE)
453 marker.inval_sqI = true;
454 if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_SMEM_L0)
455 marker.inval_sqK = true;
456 if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_FLUSH_L2)
457 marker.flush_tcc = true;
458 if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_L2)
459 marker.inval_tcc = true;
460 if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_FLUSH_CB)
461 marker.flush_cb = true;
462 if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_CB)
463 marker.inval_cb = true;
464 if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_FLUSH_DB)
465 marker.flush_db = true;
466 if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_DB)
467 marker.inval_db = true;
468 if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_L1)
469 marker.inval_gl1 = true;
470
471 radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
472
473 cmd_buffer->state.num_layout_transitions = 0;
474 }
475
476 void
radv_describe_barrier_start(struct radv_cmd_buffer * cmd_buffer,enum rgp_barrier_reason reason)477 radv_describe_barrier_start(struct radv_cmd_buffer *cmd_buffer, enum rgp_barrier_reason reason)
478 {
479 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
480 struct rgp_sqtt_marker_barrier_start marker = {0};
481
482 if (likely(!device->sqtt.bo))
483 return;
484
485 if (cmd_buffer->state.in_barrier) {
486 assert(!"attempted to start a barrier while already in a barrier");
487 return;
488 }
489
490 radv_describe_barrier_end_delayed(cmd_buffer);
491 cmd_buffer->state.sqtt_flush_bits = 0;
492 cmd_buffer->state.in_barrier = true;
493
494 marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_BARRIER_START;
495 marker.cb_id = cmd_buffer->sqtt_cb_id;
496 marker.dword02 = reason;
497
498 radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
499 }
500
501 void
radv_describe_barrier_end(struct radv_cmd_buffer * cmd_buffer)502 radv_describe_barrier_end(struct radv_cmd_buffer *cmd_buffer)
503 {
504 cmd_buffer->state.in_barrier = false;
505 cmd_buffer->state.pending_sqtt_barrier_end = true;
506 }
507
508 void
radv_describe_layout_transition(struct radv_cmd_buffer * cmd_buffer,const struct radv_barrier_data * barrier)509 radv_describe_layout_transition(struct radv_cmd_buffer *cmd_buffer, const struct radv_barrier_data *barrier)
510 {
511 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
512 struct rgp_sqtt_marker_layout_transition marker = {0};
513
514 if (likely(!device->sqtt.bo))
515 return;
516
517 if (!cmd_buffer->state.in_barrier) {
518 assert(!"layout transition marker should be only emitted inside a barrier marker");
519 return;
520 }
521
522 marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_LAYOUT_TRANSITION;
523 marker.depth_stencil_expand = barrier->layout_transitions.depth_stencil_expand;
524 marker.htile_hiz_range_expand = barrier->layout_transitions.htile_hiz_range_expand;
525 marker.depth_stencil_resummarize = barrier->layout_transitions.depth_stencil_resummarize;
526 marker.dcc_decompress = barrier->layout_transitions.dcc_decompress;
527 marker.fmask_decompress = barrier->layout_transitions.fmask_decompress;
528 marker.fast_clear_eliminate = barrier->layout_transitions.fast_clear_eliminate;
529 marker.fmask_color_expand = barrier->layout_transitions.fmask_color_expand;
530 marker.init_mask_ram = barrier->layout_transitions.init_mask_ram;
531
532 radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
533
534 cmd_buffer->state.num_layout_transitions++;
535 }
536
537 void
radv_describe_begin_accel_struct_build(struct radv_cmd_buffer * cmd_buffer,uint32_t count)538 radv_describe_begin_accel_struct_build(struct radv_cmd_buffer *cmd_buffer, uint32_t count)
539 {
540 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
541
542 if (likely(!device->sqtt.bo))
543 return;
544
545 char marker[64];
546 snprintf(marker, sizeof(marker), "vkCmdBuildAccelerationStructuresKHR(%u)", count);
547 radv_write_user_event_marker(cmd_buffer, UserEventPush, marker);
548 }
549
550 void
radv_describe_end_accel_struct_build(struct radv_cmd_buffer * cmd_buffer)551 radv_describe_end_accel_struct_build(struct radv_cmd_buffer *cmd_buffer)
552 {
553 radv_write_user_event_marker(cmd_buffer, UserEventPop, NULL);
554 }
555
556 static void
radv_describe_pipeline_bind(struct radv_cmd_buffer * cmd_buffer,VkPipelineBindPoint pipelineBindPoint,struct radv_pipeline * pipeline)557 radv_describe_pipeline_bind(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint pipelineBindPoint,
558 struct radv_pipeline *pipeline)
559 {
560 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
561 struct rgp_sqtt_marker_pipeline_bind marker = {0};
562
563 if (likely(!device->sqtt.bo))
564 return;
565
566 marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_BIND_PIPELINE;
567 marker.cb_id = cmd_buffer->sqtt_cb_id;
568 marker.bind_point = pipelineBindPoint;
569 marker.api_pso_hash[0] = pipeline->pipeline_hash;
570 marker.api_pso_hash[1] = pipeline->pipeline_hash >> 32;
571
572 radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
573 }
574
575 /* Queue events */
576 static void
radv_describe_queue_event(struct radv_queue * queue,struct rgp_queue_event_record * record)577 radv_describe_queue_event(struct radv_queue *queue, struct rgp_queue_event_record *record)
578 {
579 struct radv_device *device = radv_queue_device(queue);
580 struct ac_sqtt *sqtt = &device->sqtt;
581 struct rgp_queue_event *queue_event = &sqtt->rgp_queue_event;
582
583 simple_mtx_lock(&queue_event->lock);
584 list_addtail(&record->list, &queue_event->record);
585 queue_event->record_count++;
586 simple_mtx_unlock(&queue_event->lock);
587 }
588
589 static VkResult
radv_describe_queue_present(struct radv_queue * queue,uint64_t cpu_timestamp,void * gpu_timestamp_ptr)590 radv_describe_queue_present(struct radv_queue *queue, uint64_t cpu_timestamp, void *gpu_timestamp_ptr)
591 {
592 struct rgp_queue_event_record *record;
593
594 record = calloc(1, sizeof(struct rgp_queue_event_record));
595 if (!record)
596 return VK_ERROR_OUT_OF_HOST_MEMORY;
597
598 record->event_type = SQTT_QUEUE_TIMING_EVENT_PRESENT;
599 record->cpu_timestamp = cpu_timestamp;
600 record->gpu_timestamps[0] = gpu_timestamp_ptr;
601 record->queue_info_index = queue->vk.queue_family_index;
602
603 radv_describe_queue_event(queue, record);
604
605 return VK_SUCCESS;
606 }
607
608 static VkResult
radv_describe_queue_submit(struct radv_queue * queue,struct radv_cmd_buffer * cmd_buffer,uint32_t cmdbuf_idx,uint64_t cpu_timestamp,void * pre_gpu_timestamp_ptr,void * post_gpu_timestamp_ptr)609 radv_describe_queue_submit(struct radv_queue *queue, struct radv_cmd_buffer *cmd_buffer, uint32_t cmdbuf_idx,
610 uint64_t cpu_timestamp, void *pre_gpu_timestamp_ptr, void *post_gpu_timestamp_ptr)
611 {
612 struct radv_device *device = radv_queue_device(queue);
613 struct rgp_queue_event_record *record;
614
615 record = calloc(1, sizeof(struct rgp_queue_event_record));
616 if (!record)
617 return VK_ERROR_OUT_OF_HOST_MEMORY;
618
619 record->event_type = SQTT_QUEUE_TIMING_EVENT_CMDBUF_SUBMIT;
620 record->api_id = (uintptr_t)cmd_buffer;
621 record->cpu_timestamp = cpu_timestamp;
622 record->frame_index = device->vk.current_frame;
623 record->gpu_timestamps[0] = pre_gpu_timestamp_ptr;
624 record->gpu_timestamps[1] = post_gpu_timestamp_ptr;
625 record->queue_info_index = queue->vk.queue_family_index;
626 record->submit_sub_index = cmdbuf_idx;
627
628 radv_describe_queue_event(queue, record);
629
630 return VK_SUCCESS;
631 }
632
633 static VkResult
radv_describe_queue_semaphore(struct radv_queue * queue,struct vk_semaphore * sync,enum sqtt_queue_event_type event_type)634 radv_describe_queue_semaphore(struct radv_queue *queue, struct vk_semaphore *sync,
635 enum sqtt_queue_event_type event_type)
636 {
637 struct rgp_queue_event_record *record;
638
639 record = calloc(1, sizeof(struct rgp_queue_event_record));
640 if (!record)
641 return VK_ERROR_OUT_OF_HOST_MEMORY;
642
643 record->event_type = event_type;
644 record->api_id = (uintptr_t)sync;
645 record->cpu_timestamp = os_time_get_nano();
646 record->queue_info_index = queue->vk.queue_family_index;
647
648 radv_describe_queue_event(queue, record);
649
650 return VK_SUCCESS;
651 }
652
653 static void
radv_handle_sqtt(VkQueue _queue)654 radv_handle_sqtt(VkQueue _queue)
655 {
656 VK_FROM_HANDLE(radv_queue, queue, _queue);
657 struct radv_device *device = radv_queue_device(queue);
658 const struct radv_physical_device *pdev = radv_device_physical(device);
659 bool trigger = device->sqtt_triggered;
660 device->sqtt_triggered = false;
661
662 if (device->sqtt_enabled) {
663 struct ac_sqtt_trace sqtt_trace = {0};
664
665 radv_end_sqtt(queue);
666 device->sqtt_enabled = false;
667
668 /* TODO: Do something better than this whole sync. */
669 device->vk.dispatch_table.QueueWaitIdle(_queue);
670
671 if (radv_get_sqtt_trace(queue, &sqtt_trace)) {
672 struct ac_spm_trace spm_trace;
673
674 if (device->spm.bo)
675 ac_spm_get_trace(&device->spm, &spm_trace);
676
677 ac_dump_rgp_capture(&pdev->info, &sqtt_trace, device->spm.bo ? &spm_trace : NULL);
678 } else {
679 /* Trigger a new capture if the driver failed to get
680 * the trace because the buffer was too small.
681 */
682 trigger = true;
683 }
684
685 /* Clear resources used for this capture. */
686 radv_reset_sqtt_trace(device);
687 }
688
689 if (trigger) {
690 if (ac_check_profile_state(&pdev->info)) {
691 fprintf(stderr, "radv: Canceling RGP trace request as a hang condition has been "
692 "detected. Force the GPU into a profiling mode with e.g. "
693 "\"echo profile_peak > "
694 "/sys/class/drm/card0/device/power_dpm_force_performance_level\"\n");
695 return;
696 }
697
698 /* Sample CPU/GPU clocks before starting the trace. */
699 if (!radv_sqtt_sample_clocks(device)) {
700 fprintf(stderr, "radv: Failed to sample clocks\n");
701 }
702
703 radv_begin_sqtt(queue);
704 assert(!device->sqtt_enabled);
705 device->sqtt_enabled = true;
706 }
707 }
708
709 VKAPI_ATTR VkResult VKAPI_CALL
sqtt_QueuePresentKHR(VkQueue _queue,const VkPresentInfoKHR * pPresentInfo)710 sqtt_QueuePresentKHR(VkQueue _queue, const VkPresentInfoKHR *pPresentInfo)
711 {
712 VK_FROM_HANDLE(radv_queue, queue, _queue);
713 struct radv_device *device = radv_queue_device(queue);
714 VkResult result;
715
716 queue->sqtt_present = true;
717
718 result = device->layer_dispatch.rgp.QueuePresentKHR(_queue, pPresentInfo);
719 if (result != VK_SUCCESS && result != VK_SUBOPTIMAL_KHR)
720 return result;
721
722 queue->sqtt_present = false;
723
724 radv_handle_sqtt(_queue);
725
726 return VK_SUCCESS;
727 }
728
729 static VkResult
radv_sqtt_wsi_submit(VkQueue _queue,uint32_t submitCount,const VkSubmitInfo2 * pSubmits,VkFence _fence)730 radv_sqtt_wsi_submit(VkQueue _queue, uint32_t submitCount, const VkSubmitInfo2 *pSubmits, VkFence _fence)
731 {
732 VK_FROM_HANDLE(radv_queue, queue, _queue);
733 struct radv_device *device = radv_queue_device(queue);
734 VkCommandBufferSubmitInfo *new_cmdbufs = NULL;
735 struct radeon_winsys_bo *gpu_timestamp_bo;
736 uint32_t gpu_timestamp_offset;
737 VkCommandBuffer timed_cmdbuf;
738 void *gpu_timestamp_ptr;
739 uint64_t cpu_timestamp;
740 VkResult result = VK_SUCCESS;
741
742 assert(submitCount <= 1 && pSubmits != NULL);
743
744 for (uint32_t i = 0; i < submitCount; i++) {
745 const VkSubmitInfo2 *pSubmit = &pSubmits[i];
746 VkSubmitInfo2 sqtt_submit = *pSubmit;
747
748 assert(sqtt_submit.commandBufferInfoCount <= 1);
749
750 /* Command buffers */
751 uint32_t new_cmdbuf_count = sqtt_submit.commandBufferInfoCount + 1;
752
753 new_cmdbufs = malloc(new_cmdbuf_count * sizeof(*new_cmdbufs));
754 if (!new_cmdbufs)
755 return VK_ERROR_OUT_OF_HOST_MEMORY;
756
757 /* Sample the current CPU time before building the GPU timestamp cmdbuf. */
758 cpu_timestamp = os_time_get_nano();
759
760 result = radv_sqtt_acquire_gpu_timestamp(device, &gpu_timestamp_bo, &gpu_timestamp_offset, &gpu_timestamp_ptr);
761 if (result != VK_SUCCESS)
762 goto fail;
763
764 result = radv_sqtt_get_timed_cmdbuf(queue, gpu_timestamp_bo, gpu_timestamp_offset,
765 VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT, &timed_cmdbuf);
766 if (result != VK_SUCCESS)
767 goto fail;
768
769 new_cmdbufs[0] = (VkCommandBufferSubmitInfo){
770 .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO,
771 .commandBuffer = timed_cmdbuf,
772 };
773
774 if (sqtt_submit.commandBufferInfoCount == 1)
775 new_cmdbufs[1] = sqtt_submit.pCommandBufferInfos[0];
776
777 sqtt_submit.commandBufferInfoCount = new_cmdbuf_count;
778 sqtt_submit.pCommandBufferInfos = new_cmdbufs;
779
780 radv_describe_queue_present(queue, cpu_timestamp, gpu_timestamp_ptr);
781
782 result = device->layer_dispatch.rgp.QueueSubmit2(_queue, 1, &sqtt_submit, _fence);
783 if (result != VK_SUCCESS)
784 goto fail;
785
786 FREE(new_cmdbufs);
787 }
788
789 return result;
790
791 fail:
792 FREE(new_cmdbufs);
793 return result;
794 }
795
796 VKAPI_ATTR VkResult VKAPI_CALL
sqtt_QueueSubmit2(VkQueue _queue,uint32_t submitCount,const VkSubmitInfo2 * pSubmits,VkFence _fence)797 sqtt_QueueSubmit2(VkQueue _queue, uint32_t submitCount, const VkSubmitInfo2 *pSubmits, VkFence _fence)
798 {
799 VK_FROM_HANDLE(radv_queue, queue, _queue);
800 struct radv_device *device = radv_queue_device(queue);
801 const bool is_gfx_or_ace = queue->state.qf == RADV_QUEUE_GENERAL || queue->state.qf == RADV_QUEUE_COMPUTE;
802 VkCommandBufferSubmitInfo *new_cmdbufs = NULL;
803 VkResult result = VK_SUCCESS;
804
805 /* Only consider queue events on graphics/compute when enabled. */
806 if (!device->sqtt_enabled || !radv_sqtt_queue_events_enabled() || !is_gfx_or_ace)
807 return device->layer_dispatch.rgp.QueueSubmit2(_queue, submitCount, pSubmits, _fence);
808
809 for (uint32_t i = 0; i < submitCount; i++) {
810 const VkSubmitInfo2 *pSubmit = &pSubmits[i];
811
812 /* Wait semaphores */
813 for (uint32_t j = 0; j < pSubmit->waitSemaphoreInfoCount; j++) {
814 const VkSemaphoreSubmitInfo *pWaitSemaphoreInfo = &pSubmit->pWaitSemaphoreInfos[j];
815 VK_FROM_HANDLE(vk_semaphore, sem, pWaitSemaphoreInfo->semaphore);
816 radv_describe_queue_semaphore(queue, sem, SQTT_QUEUE_TIMING_EVENT_WAIT_SEMAPHORE);
817 }
818 }
819
820 if (queue->sqtt_present)
821 return radv_sqtt_wsi_submit(_queue, submitCount, pSubmits, _fence);
822
823 for (uint32_t i = 0; i < submitCount; i++) {
824 const VkSubmitInfo2 *pSubmit = &pSubmits[i];
825 VkSubmitInfo2 sqtt_submit = *pSubmit;
826
827 /* Command buffers */
828 uint32_t new_cmdbuf_count = sqtt_submit.commandBufferInfoCount * 3;
829 uint32_t cmdbuf_idx = 0;
830
831 new_cmdbufs = malloc(new_cmdbuf_count * sizeof(*new_cmdbufs));
832 if (!new_cmdbufs)
833 return VK_ERROR_OUT_OF_HOST_MEMORY;
834
835 for (uint32_t j = 0; j < sqtt_submit.commandBufferInfoCount; j++) {
836 const VkCommandBufferSubmitInfo *pCommandBufferInfo = &sqtt_submit.pCommandBufferInfos[j];
837 struct radeon_winsys_bo *gpu_timestamps_bo[2];
838 uint32_t gpu_timestamps_offset[2];
839 VkCommandBuffer pre_timed_cmdbuf, post_timed_cmdbuf;
840 void *gpu_timestamps_ptr[2];
841 uint64_t cpu_timestamp;
842
843 /* Sample the current CPU time before building the timed cmdbufs. */
844 cpu_timestamp = os_time_get_nano();
845
846 result = radv_sqtt_acquire_gpu_timestamp(device, &gpu_timestamps_bo[0], &gpu_timestamps_offset[0],
847 &gpu_timestamps_ptr[0]);
848 if (result != VK_SUCCESS)
849 goto fail;
850
851 result = radv_sqtt_get_timed_cmdbuf(queue, gpu_timestamps_bo[0], gpu_timestamps_offset[0],
852 VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT, &pre_timed_cmdbuf);
853 if (result != VK_SUCCESS)
854 goto fail;
855
856 new_cmdbufs[cmdbuf_idx++] = (VkCommandBufferSubmitInfo){
857 .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO,
858 .commandBuffer = pre_timed_cmdbuf,
859 };
860
861 new_cmdbufs[cmdbuf_idx++] = *pCommandBufferInfo;
862
863 result = radv_sqtt_acquire_gpu_timestamp(device, &gpu_timestamps_bo[1], &gpu_timestamps_offset[1],
864 &gpu_timestamps_ptr[1]);
865 if (result != VK_SUCCESS)
866 goto fail;
867
868 result = radv_sqtt_get_timed_cmdbuf(queue, gpu_timestamps_bo[1], gpu_timestamps_offset[1],
869 VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT, &post_timed_cmdbuf);
870 if (result != VK_SUCCESS)
871 goto fail;
872
873 new_cmdbufs[cmdbuf_idx++] = (VkCommandBufferSubmitInfo){
874 .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO,
875 .commandBuffer = post_timed_cmdbuf,
876 };
877
878 VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, pCommandBufferInfo->commandBuffer);
879 radv_describe_queue_submit(queue, cmd_buffer, j, cpu_timestamp, gpu_timestamps_ptr[0], gpu_timestamps_ptr[1]);
880 }
881
882 sqtt_submit.commandBufferInfoCount = new_cmdbuf_count;
883 sqtt_submit.pCommandBufferInfos = new_cmdbufs;
884
885 result = device->layer_dispatch.rgp.QueueSubmit2(_queue, 1, &sqtt_submit, _fence);
886 if (result != VK_SUCCESS)
887 goto fail;
888
889 /* Signal semaphores */
890 for (uint32_t j = 0; j < sqtt_submit.signalSemaphoreInfoCount; j++) {
891 const VkSemaphoreSubmitInfo *pSignalSemaphoreInfo = &sqtt_submit.pSignalSemaphoreInfos[j];
892 VK_FROM_HANDLE(vk_semaphore, sem, pSignalSemaphoreInfo->semaphore);
893 radv_describe_queue_semaphore(queue, sem, SQTT_QUEUE_TIMING_EVENT_SIGNAL_SEMAPHORE);
894 }
895
896 FREE(new_cmdbufs);
897 }
898
899 return result;
900
901 fail:
902 FREE(new_cmdbufs);
903 return result;
904 }
905
906 #define EVENT_MARKER_BASE(cmd_name, api_name, event_name, ...) \
907 VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); \
908 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); \
909 radv_write_begin_general_api_marker(cmd_buffer, ApiCmd##api_name); \
910 cmd_buffer->state.current_event_type = EventCmd##event_name; \
911 device->layer_dispatch.rgp.Cmd##cmd_name(__VA_ARGS__); \
912 cmd_buffer->state.current_event_type = EventInternalUnknown; \
913 radv_write_end_general_api_marker(cmd_buffer, ApiCmd##api_name);
914
915 #define EVENT_MARKER_ALIAS(cmd_name, api_name, ...) EVENT_MARKER_BASE(cmd_name, api_name, api_name, __VA_ARGS__);
916
917 #define EVENT_MARKER(cmd_name, ...) EVENT_MARKER_ALIAS(cmd_name, cmd_name, __VA_ARGS__);
918
919 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdDraw(VkCommandBuffer commandBuffer,uint32_t vertexCount,uint32_t instanceCount,uint32_t firstVertex,uint32_t firstInstance)920 sqtt_CmdDraw(VkCommandBuffer commandBuffer, uint32_t vertexCount, uint32_t instanceCount, uint32_t firstVertex,
921 uint32_t firstInstance)
922 {
923 EVENT_MARKER(Draw, commandBuffer, vertexCount, instanceCount, firstVertex, firstInstance);
924 }
925
926 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdDrawIndexed(VkCommandBuffer commandBuffer,uint32_t indexCount,uint32_t instanceCount,uint32_t firstIndex,int32_t vertexOffset,uint32_t firstInstance)927 sqtt_CmdDrawIndexed(VkCommandBuffer commandBuffer, uint32_t indexCount, uint32_t instanceCount, uint32_t firstIndex,
928 int32_t vertexOffset, uint32_t firstInstance)
929 {
930 EVENT_MARKER(DrawIndexed, commandBuffer, indexCount, instanceCount, firstIndex, vertexOffset, firstInstance);
931 }
932
933 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdDrawIndirect(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,uint32_t drawCount,uint32_t stride)934 sqtt_CmdDrawIndirect(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, uint32_t drawCount,
935 uint32_t stride)
936 {
937 EVENT_MARKER(DrawIndirect, commandBuffer, buffer, offset, drawCount, stride);
938 }
939
940 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,uint32_t drawCount,uint32_t stride)941 sqtt_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, uint32_t drawCount,
942 uint32_t stride)
943 {
944 EVENT_MARKER(DrawIndexedIndirect, commandBuffer, buffer, offset, drawCount, stride);
945 }
946
947 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdDrawIndirectCount(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,VkBuffer countBuffer,VkDeviceSize countBufferOffset,uint32_t maxDrawCount,uint32_t stride)948 sqtt_CmdDrawIndirectCount(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, VkBuffer countBuffer,
949 VkDeviceSize countBufferOffset, uint32_t maxDrawCount, uint32_t stride)
950 {
951 EVENT_MARKER(DrawIndirectCount, commandBuffer, buffer, offset, countBuffer, countBufferOffset, maxDrawCount, stride);
952 }
953
954 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdDrawIndexedIndirectCount(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,VkBuffer countBuffer,VkDeviceSize countBufferOffset,uint32_t maxDrawCount,uint32_t stride)955 sqtt_CmdDrawIndexedIndirectCount(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset,
956 VkBuffer countBuffer, VkDeviceSize countBufferOffset, uint32_t maxDrawCount,
957 uint32_t stride)
958 {
959 EVENT_MARKER(DrawIndexedIndirectCount, commandBuffer, buffer, offset, countBuffer, countBufferOffset, maxDrawCount,
960 stride);
961 }
962
963 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdDispatch(VkCommandBuffer commandBuffer,uint32_t x,uint32_t y,uint32_t z)964 sqtt_CmdDispatch(VkCommandBuffer commandBuffer, uint32_t x, uint32_t y, uint32_t z)
965 {
966 EVENT_MARKER_ALIAS(DispatchBase, Dispatch, commandBuffer, 0, 0, 0, x, y, z);
967 }
968
969 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdDispatchIndirect(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset)970 sqtt_CmdDispatchIndirect(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset)
971 {
972 EVENT_MARKER(DispatchIndirect, commandBuffer, buffer, offset);
973 }
974
975 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdCopyBuffer2(VkCommandBuffer commandBuffer,const VkCopyBufferInfo2 * pCopyBufferInfo)976 sqtt_CmdCopyBuffer2(VkCommandBuffer commandBuffer, const VkCopyBufferInfo2 *pCopyBufferInfo)
977 {
978 EVENT_MARKER_ALIAS(CopyBuffer2, CopyBuffer, commandBuffer, pCopyBufferInfo);
979 }
980
981 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdFillBuffer(VkCommandBuffer commandBuffer,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize fillSize,uint32_t data)982 sqtt_CmdFillBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDeviceSize dstOffset, VkDeviceSize fillSize,
983 uint32_t data)
984 {
985 EVENT_MARKER(FillBuffer, commandBuffer, dstBuffer, dstOffset, fillSize, data);
986 }
987
988 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdUpdateBuffer(VkCommandBuffer commandBuffer,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize dataSize,const void * pData)989 sqtt_CmdUpdateBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDeviceSize dstOffset, VkDeviceSize dataSize,
990 const void *pData)
991 {
992 EVENT_MARKER(UpdateBuffer, commandBuffer, dstBuffer, dstOffset, dataSize, pData);
993 }
994
995 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdCopyImage2(VkCommandBuffer commandBuffer,const VkCopyImageInfo2 * pCopyImageInfo)996 sqtt_CmdCopyImage2(VkCommandBuffer commandBuffer, const VkCopyImageInfo2 *pCopyImageInfo)
997 {
998 EVENT_MARKER_ALIAS(CopyImage2, CopyImage, commandBuffer, pCopyImageInfo);
999 }
1000
1001 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer,const VkCopyBufferToImageInfo2 * pCopyBufferToImageInfo)1002 sqtt_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer, const VkCopyBufferToImageInfo2 *pCopyBufferToImageInfo)
1003 {
1004 EVENT_MARKER_ALIAS(CopyBufferToImage2, CopyBufferToImage, commandBuffer, pCopyBufferToImageInfo);
1005 }
1006
1007 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer,const VkCopyImageToBufferInfo2 * pCopyImageToBufferInfo)1008 sqtt_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer, const VkCopyImageToBufferInfo2 *pCopyImageToBufferInfo)
1009 {
1010 EVENT_MARKER_ALIAS(CopyImageToBuffer2, CopyImageToBuffer, commandBuffer, pCopyImageToBufferInfo);
1011 }
1012
1013 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdBlitImage2(VkCommandBuffer commandBuffer,const VkBlitImageInfo2 * pBlitImageInfo)1014 sqtt_CmdBlitImage2(VkCommandBuffer commandBuffer, const VkBlitImageInfo2 *pBlitImageInfo)
1015 {
1016 EVENT_MARKER_ALIAS(BlitImage2, BlitImage, commandBuffer, pBlitImageInfo);
1017 }
1018
1019 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdClearColorImage(VkCommandBuffer commandBuffer,VkImage image_h,VkImageLayout imageLayout,const VkClearColorValue * pColor,uint32_t rangeCount,const VkImageSubresourceRange * pRanges)1020 sqtt_CmdClearColorImage(VkCommandBuffer commandBuffer, VkImage image_h, VkImageLayout imageLayout,
1021 const VkClearColorValue *pColor, uint32_t rangeCount, const VkImageSubresourceRange *pRanges)
1022 {
1023 EVENT_MARKER(ClearColorImage, commandBuffer, image_h, imageLayout, pColor, rangeCount, pRanges);
1024 }
1025
1026 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer,VkImage image_h,VkImageLayout imageLayout,const VkClearDepthStencilValue * pDepthStencil,uint32_t rangeCount,const VkImageSubresourceRange * pRanges)1027 sqtt_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer, VkImage image_h, VkImageLayout imageLayout,
1028 const VkClearDepthStencilValue *pDepthStencil, uint32_t rangeCount,
1029 const VkImageSubresourceRange *pRanges)
1030 {
1031 EVENT_MARKER(ClearDepthStencilImage, commandBuffer, image_h, imageLayout, pDepthStencil, rangeCount, pRanges);
1032 }
1033
1034 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdClearAttachments(VkCommandBuffer commandBuffer,uint32_t attachmentCount,const VkClearAttachment * pAttachments,uint32_t rectCount,const VkClearRect * pRects)1035 sqtt_CmdClearAttachments(VkCommandBuffer commandBuffer, uint32_t attachmentCount, const VkClearAttachment *pAttachments,
1036 uint32_t rectCount, const VkClearRect *pRects)
1037 {
1038 EVENT_MARKER(ClearAttachments, commandBuffer, attachmentCount, pAttachments, rectCount, pRects);
1039 }
1040
1041 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdResolveImage2(VkCommandBuffer commandBuffer,const VkResolveImageInfo2 * pResolveImageInfo)1042 sqtt_CmdResolveImage2(VkCommandBuffer commandBuffer, const VkResolveImageInfo2 *pResolveImageInfo)
1043 {
1044 EVENT_MARKER_ALIAS(ResolveImage2, ResolveImage, commandBuffer, pResolveImageInfo);
1045 }
1046
1047 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdWaitEvents2(VkCommandBuffer commandBuffer,uint32_t eventCount,const VkEvent * pEvents,const VkDependencyInfo * pDependencyInfos)1048 sqtt_CmdWaitEvents2(VkCommandBuffer commandBuffer, uint32_t eventCount, const VkEvent *pEvents,
1049 const VkDependencyInfo *pDependencyInfos)
1050 {
1051 EVENT_MARKER_ALIAS(WaitEvents2, WaitEvents, commandBuffer, eventCount, pEvents, pDependencyInfos);
1052 }
1053
1054 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdPipelineBarrier2(VkCommandBuffer commandBuffer,const VkDependencyInfo * pDependencyInfo)1055 sqtt_CmdPipelineBarrier2(VkCommandBuffer commandBuffer, const VkDependencyInfo *pDependencyInfo)
1056 {
1057 EVENT_MARKER_ALIAS(PipelineBarrier2, PipelineBarrier, commandBuffer, pDependencyInfo);
1058 }
1059
1060 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdResetQueryPool(VkCommandBuffer commandBuffer,VkQueryPool queryPool,uint32_t firstQuery,uint32_t queryCount)1061 sqtt_CmdResetQueryPool(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t firstQuery, uint32_t queryCount)
1062 {
1063 EVENT_MARKER(ResetQueryPool, commandBuffer, queryPool, firstQuery, queryCount);
1064 }
1065
1066 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer,VkQueryPool queryPool,uint32_t firstQuery,uint32_t queryCount,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize stride,VkQueryResultFlags flags)1067 sqtt_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t firstQuery,
1068 uint32_t queryCount, VkBuffer dstBuffer, VkDeviceSize dstOffset, VkDeviceSize stride,
1069 VkQueryResultFlags flags)
1070 {
1071 EVENT_MARKER(CopyQueryPoolResults, commandBuffer, queryPool, firstQuery, queryCount, dstBuffer, dstOffset, stride,
1072 flags);
1073 }
1074
1075 #define EVENT_RT_MARKER(cmd_name, flags, ...) EVENT_MARKER_BASE(cmd_name, Dispatch, cmd_name | flags, __VA_ARGS__);
1076
1077 #define EVENT_RT_MARKER_ALIAS(cmd_name, event_name, flags, ...) \
1078 EVENT_MARKER_BASE(cmd_name, Dispatch, event_name | flags, __VA_ARGS__);
1079
1080 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdTraceRaysKHR(VkCommandBuffer commandBuffer,const VkStridedDeviceAddressRegionKHR * pRaygenShaderBindingTable,const VkStridedDeviceAddressRegionKHR * pMissShaderBindingTable,const VkStridedDeviceAddressRegionKHR * pHitShaderBindingTable,const VkStridedDeviceAddressRegionKHR * pCallableShaderBindingTable,uint32_t width,uint32_t height,uint32_t depth)1081 sqtt_CmdTraceRaysKHR(VkCommandBuffer commandBuffer, const VkStridedDeviceAddressRegionKHR *pRaygenShaderBindingTable,
1082 const VkStridedDeviceAddressRegionKHR *pMissShaderBindingTable,
1083 const VkStridedDeviceAddressRegionKHR *pHitShaderBindingTable,
1084 const VkStridedDeviceAddressRegionKHR *pCallableShaderBindingTable, uint32_t width,
1085 uint32_t height, uint32_t depth)
1086 {
1087 EVENT_RT_MARKER(TraceRaysKHR, ApiRayTracingSeparateCompiled, commandBuffer, pRaygenShaderBindingTable,
1088 pMissShaderBindingTable, pHitShaderBindingTable, pCallableShaderBindingTable, width, height, depth);
1089 }
1090
1091 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdTraceRaysIndirectKHR(VkCommandBuffer commandBuffer,const VkStridedDeviceAddressRegionKHR * pRaygenShaderBindingTable,const VkStridedDeviceAddressRegionKHR * pMissShaderBindingTable,const VkStridedDeviceAddressRegionKHR * pHitShaderBindingTable,const VkStridedDeviceAddressRegionKHR * pCallableShaderBindingTable,VkDeviceAddress indirectDeviceAddress)1092 sqtt_CmdTraceRaysIndirectKHR(VkCommandBuffer commandBuffer,
1093 const VkStridedDeviceAddressRegionKHR *pRaygenShaderBindingTable,
1094 const VkStridedDeviceAddressRegionKHR *pMissShaderBindingTable,
1095 const VkStridedDeviceAddressRegionKHR *pHitShaderBindingTable,
1096 const VkStridedDeviceAddressRegionKHR *pCallableShaderBindingTable,
1097 VkDeviceAddress indirectDeviceAddress)
1098 {
1099 EVENT_RT_MARKER(TraceRaysIndirectKHR, ApiRayTracingSeparateCompiled, commandBuffer, pRaygenShaderBindingTable,
1100 pMissShaderBindingTable, pHitShaderBindingTable, pCallableShaderBindingTable, indirectDeviceAddress);
1101 }
1102
1103 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdTraceRaysIndirect2KHR(VkCommandBuffer commandBuffer,VkDeviceAddress indirectDeviceAddress)1104 sqtt_CmdTraceRaysIndirect2KHR(VkCommandBuffer commandBuffer, VkDeviceAddress indirectDeviceAddress)
1105 {
1106 EVENT_RT_MARKER_ALIAS(TraceRaysIndirect2KHR, TraceRaysIndirectKHR, ApiRayTracingSeparateCompiled, commandBuffer,
1107 indirectDeviceAddress);
1108 }
1109
1110 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdCopyAccelerationStructureKHR(VkCommandBuffer commandBuffer,const VkCopyAccelerationStructureInfoKHR * pInfo)1111 sqtt_CmdCopyAccelerationStructureKHR(VkCommandBuffer commandBuffer, const VkCopyAccelerationStructureInfoKHR *pInfo)
1112 {
1113 EVENT_RT_MARKER(CopyAccelerationStructureKHR, 0, commandBuffer, pInfo);
1114 }
1115
1116 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdCopyAccelerationStructureToMemoryKHR(VkCommandBuffer commandBuffer,const VkCopyAccelerationStructureToMemoryInfoKHR * pInfo)1117 sqtt_CmdCopyAccelerationStructureToMemoryKHR(VkCommandBuffer commandBuffer,
1118 const VkCopyAccelerationStructureToMemoryInfoKHR *pInfo)
1119 {
1120 EVENT_RT_MARKER(CopyAccelerationStructureToMemoryKHR, 0, commandBuffer, pInfo);
1121 }
1122
1123 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdCopyMemoryToAccelerationStructureKHR(VkCommandBuffer commandBuffer,const VkCopyMemoryToAccelerationStructureInfoKHR * pInfo)1124 sqtt_CmdCopyMemoryToAccelerationStructureKHR(VkCommandBuffer commandBuffer,
1125 const VkCopyMemoryToAccelerationStructureInfoKHR *pInfo)
1126 {
1127 EVENT_RT_MARKER(CopyMemoryToAccelerationStructureKHR, 0, commandBuffer, pInfo);
1128 }
1129
1130 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdDrawMeshTasksEXT(VkCommandBuffer commandBuffer,uint32_t x,uint32_t y,uint32_t z)1131 sqtt_CmdDrawMeshTasksEXT(VkCommandBuffer commandBuffer, uint32_t x, uint32_t y, uint32_t z)
1132 {
1133 EVENT_MARKER(DrawMeshTasksEXT, commandBuffer, x, y, z);
1134 }
1135
1136 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdDrawMeshTasksIndirectEXT(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,uint32_t drawCount,uint32_t stride)1137 sqtt_CmdDrawMeshTasksIndirectEXT(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset,
1138 uint32_t drawCount, uint32_t stride)
1139 {
1140 EVENT_MARKER(DrawMeshTasksIndirectEXT, commandBuffer, buffer, offset, drawCount, stride);
1141 }
1142
1143 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdDrawMeshTasksIndirectCountEXT(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,VkBuffer countBuffer,VkDeviceSize countBufferOffset,uint32_t maxDrawCount,uint32_t stride)1144 sqtt_CmdDrawMeshTasksIndirectCountEXT(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset,
1145 VkBuffer countBuffer, VkDeviceSize countBufferOffset, uint32_t maxDrawCount,
1146 uint32_t stride)
1147 {
1148 EVENT_MARKER(DrawMeshTasksIndirectCountEXT, commandBuffer, buffer, offset, countBuffer, countBufferOffset,
1149 maxDrawCount, stride);
1150 }
1151
1152 #undef EVENT_RT_MARKER_ALIAS
1153 #undef EVENT_RT_MARKER
1154
1155 #undef EVENT_MARKER
1156 #undef EVENT_MARKER_ALIAS
1157 #undef EVENT_MARKER_BASE
1158
1159 #define API_MARKER_ALIAS(cmd_name, api_name, ...) \
1160 VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); \
1161 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); \
1162 radv_write_begin_general_api_marker(cmd_buffer, ApiCmd##api_name); \
1163 device->layer_dispatch.rgp.Cmd##cmd_name(__VA_ARGS__); \
1164 radv_write_end_general_api_marker(cmd_buffer, ApiCmd##api_name);
1165
1166 #define API_MARKER(cmd_name, ...) API_MARKER_ALIAS(cmd_name, cmd_name, __VA_ARGS__);
1167
1168 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdBindPipeline(VkCommandBuffer commandBuffer,VkPipelineBindPoint pipelineBindPoint,VkPipeline _pipeline)1169 sqtt_CmdBindPipeline(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint, VkPipeline _pipeline)
1170 {
1171 VK_FROM_HANDLE(radv_pipeline, pipeline, _pipeline);
1172
1173 API_MARKER(BindPipeline, commandBuffer, pipelineBindPoint, _pipeline);
1174
1175 if (pipelineBindPoint == VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR) {
1176 /* RGP seems to expect a compute bind point to detect and report RT pipelines, which makes
1177 * sense somehow given that RT shaders are compiled to an unified compute shader.
1178 */
1179 radv_describe_pipeline_bind(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1180 } else {
1181 radv_describe_pipeline_bind(cmd_buffer, pipelineBindPoint, pipeline);
1182 }
1183 }
1184
1185 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdBindDescriptorSets(VkCommandBuffer commandBuffer,VkPipelineBindPoint pipelineBindPoint,VkPipelineLayout layout,uint32_t firstSet,uint32_t descriptorSetCount,const VkDescriptorSet * pDescriptorSets,uint32_t dynamicOffsetCount,const uint32_t * pDynamicOffsets)1186 sqtt_CmdBindDescriptorSets(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint,
1187 VkPipelineLayout layout, uint32_t firstSet, uint32_t descriptorSetCount,
1188 const VkDescriptorSet *pDescriptorSets, uint32_t dynamicOffsetCount,
1189 const uint32_t *pDynamicOffsets)
1190 {
1191 API_MARKER(BindDescriptorSets, commandBuffer, pipelineBindPoint, layout, firstSet, descriptorSetCount,
1192 pDescriptorSets, dynamicOffsetCount, pDynamicOffsets);
1193 }
1194
1195 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdBindIndexBuffer(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,VkIndexType indexType)1196 sqtt_CmdBindIndexBuffer(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, VkIndexType indexType)
1197 {
1198 API_MARKER(BindIndexBuffer, commandBuffer, buffer, offset, indexType);
1199 }
1200
1201 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdBindVertexBuffers2(VkCommandBuffer commandBuffer,uint32_t firstBinding,uint32_t bindingCount,const VkBuffer * pBuffers,const VkDeviceSize * pOffsets,const VkDeviceSize * pSizes,const VkDeviceSize * pStrides)1202 sqtt_CmdBindVertexBuffers2(VkCommandBuffer commandBuffer, uint32_t firstBinding, uint32_t bindingCount,
1203 const VkBuffer *pBuffers, const VkDeviceSize *pOffsets, const VkDeviceSize *pSizes,
1204 const VkDeviceSize *pStrides)
1205 {
1206 API_MARKER_ALIAS(BindVertexBuffers2, BindVertexBuffers, commandBuffer, firstBinding, bindingCount, pBuffers,
1207 pOffsets, pSizes, pStrides);
1208 }
1209
1210 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdBeginQuery(VkCommandBuffer commandBuffer,VkQueryPool queryPool,uint32_t query,VkQueryControlFlags flags)1211 sqtt_CmdBeginQuery(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t query, VkQueryControlFlags flags)
1212 {
1213 API_MARKER(BeginQuery, commandBuffer, queryPool, query, flags);
1214 }
1215
1216 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdEndQuery(VkCommandBuffer commandBuffer,VkQueryPool queryPool,uint32_t query)1217 sqtt_CmdEndQuery(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t query)
1218 {
1219 API_MARKER(EndQuery, commandBuffer, queryPool, query);
1220 }
1221
1222 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdWriteTimestamp2(VkCommandBuffer commandBuffer,VkPipelineStageFlags2 stage,VkQueryPool queryPool,uint32_t query)1223 sqtt_CmdWriteTimestamp2(VkCommandBuffer commandBuffer, VkPipelineStageFlags2 stage, VkQueryPool queryPool,
1224 uint32_t query)
1225 {
1226 API_MARKER_ALIAS(WriteTimestamp2, WriteTimestamp, commandBuffer, stage, queryPool, query);
1227 }
1228
1229 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdPushConstants(VkCommandBuffer commandBuffer,VkPipelineLayout layout,VkShaderStageFlags stageFlags,uint32_t offset,uint32_t size,const void * pValues)1230 sqtt_CmdPushConstants(VkCommandBuffer commandBuffer, VkPipelineLayout layout, VkShaderStageFlags stageFlags,
1231 uint32_t offset, uint32_t size, const void *pValues)
1232 {
1233 API_MARKER(PushConstants, commandBuffer, layout, stageFlags, offset, size, pValues);
1234 }
1235
1236 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdBeginRendering(VkCommandBuffer commandBuffer,const VkRenderingInfo * pRenderingInfo)1237 sqtt_CmdBeginRendering(VkCommandBuffer commandBuffer, const VkRenderingInfo *pRenderingInfo)
1238 {
1239 API_MARKER_ALIAS(BeginRendering, BeginRenderPass, commandBuffer, pRenderingInfo);
1240 }
1241
1242 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdEndRendering(VkCommandBuffer commandBuffer)1243 sqtt_CmdEndRendering(VkCommandBuffer commandBuffer)
1244 {
1245 API_MARKER_ALIAS(EndRendering, EndRenderPass, commandBuffer);
1246 }
1247
1248 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdExecuteCommands(VkCommandBuffer commandBuffer,uint32_t commandBufferCount,const VkCommandBuffer * pCmdBuffers)1249 sqtt_CmdExecuteCommands(VkCommandBuffer commandBuffer, uint32_t commandBufferCount, const VkCommandBuffer *pCmdBuffers)
1250 {
1251 API_MARKER(ExecuteCommands, commandBuffer, commandBufferCount, pCmdBuffers);
1252 }
1253
1254 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdExecuteGeneratedCommandsNV(VkCommandBuffer commandBuffer,VkBool32 isPreprocessed,const VkGeneratedCommandsInfoNV * pGeneratedCommandsInfo)1255 sqtt_CmdExecuteGeneratedCommandsNV(VkCommandBuffer commandBuffer, VkBool32 isPreprocessed,
1256 const VkGeneratedCommandsInfoNV *pGeneratedCommandsInfo)
1257 {
1258 /* There is no ExecuteIndirect Vulkan event in RGP yet. */
1259 API_MARKER_ALIAS(ExecuteGeneratedCommandsNV, ExecuteCommands, commandBuffer, isPreprocessed, pGeneratedCommandsInfo);
1260 }
1261
1262 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdSetViewport(VkCommandBuffer commandBuffer,uint32_t firstViewport,uint32_t viewportCount,const VkViewport * pViewports)1263 sqtt_CmdSetViewport(VkCommandBuffer commandBuffer, uint32_t firstViewport, uint32_t viewportCount,
1264 const VkViewport *pViewports)
1265 {
1266 API_MARKER(SetViewport, commandBuffer, firstViewport, viewportCount, pViewports);
1267 }
1268
1269 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdSetScissor(VkCommandBuffer commandBuffer,uint32_t firstScissor,uint32_t scissorCount,const VkRect2D * pScissors)1270 sqtt_CmdSetScissor(VkCommandBuffer commandBuffer, uint32_t firstScissor, uint32_t scissorCount,
1271 const VkRect2D *pScissors)
1272 {
1273 API_MARKER(SetScissor, commandBuffer, firstScissor, scissorCount, pScissors);
1274 }
1275
1276 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdSetLineWidth(VkCommandBuffer commandBuffer,float lineWidth)1277 sqtt_CmdSetLineWidth(VkCommandBuffer commandBuffer, float lineWidth)
1278 {
1279 API_MARKER(SetLineWidth, commandBuffer, lineWidth);
1280 }
1281
1282 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdSetDepthBias(VkCommandBuffer commandBuffer,float depthBiasConstantFactor,float depthBiasClamp,float depthBiasSlopeFactor)1283 sqtt_CmdSetDepthBias(VkCommandBuffer commandBuffer, float depthBiasConstantFactor, float depthBiasClamp,
1284 float depthBiasSlopeFactor)
1285 {
1286 API_MARKER(SetDepthBias, commandBuffer, depthBiasConstantFactor, depthBiasClamp, depthBiasSlopeFactor);
1287 }
1288
1289 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdSetBlendConstants(VkCommandBuffer commandBuffer,const float blendConstants[4])1290 sqtt_CmdSetBlendConstants(VkCommandBuffer commandBuffer, const float blendConstants[4])
1291 {
1292 API_MARKER(SetBlendConstants, commandBuffer, blendConstants);
1293 }
1294
1295 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdSetDepthBounds(VkCommandBuffer commandBuffer,float minDepthBounds,float maxDepthBounds)1296 sqtt_CmdSetDepthBounds(VkCommandBuffer commandBuffer, float minDepthBounds, float maxDepthBounds)
1297 {
1298 API_MARKER(SetDepthBounds, commandBuffer, minDepthBounds, maxDepthBounds);
1299 }
1300
1301 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdSetStencilCompareMask(VkCommandBuffer commandBuffer,VkStencilFaceFlags faceMask,uint32_t compareMask)1302 sqtt_CmdSetStencilCompareMask(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, uint32_t compareMask)
1303 {
1304 API_MARKER(SetStencilCompareMask, commandBuffer, faceMask, compareMask);
1305 }
1306
1307 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdSetStencilWriteMask(VkCommandBuffer commandBuffer,VkStencilFaceFlags faceMask,uint32_t writeMask)1308 sqtt_CmdSetStencilWriteMask(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, uint32_t writeMask)
1309 {
1310 API_MARKER(SetStencilWriteMask, commandBuffer, faceMask, writeMask);
1311 }
1312
1313 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdSetStencilReference(VkCommandBuffer commandBuffer,VkStencilFaceFlags faceMask,uint32_t reference)1314 sqtt_CmdSetStencilReference(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, uint32_t reference)
1315 {
1316 API_MARKER(SetStencilReference, commandBuffer, faceMask, reference);
1317 }
1318
1319 /* VK_EXT_debug_marker */
1320 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdDebugMarkerBeginEXT(VkCommandBuffer commandBuffer,const VkDebugMarkerMarkerInfoEXT * pMarkerInfo)1321 sqtt_CmdDebugMarkerBeginEXT(VkCommandBuffer commandBuffer, const VkDebugMarkerMarkerInfoEXT *pMarkerInfo)
1322 {
1323 VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
1324 radv_write_user_event_marker(cmd_buffer, UserEventPush, pMarkerInfo->pMarkerName);
1325 }
1326
1327 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdDebugMarkerEndEXT(VkCommandBuffer commandBuffer)1328 sqtt_CmdDebugMarkerEndEXT(VkCommandBuffer commandBuffer)
1329 {
1330 VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
1331 radv_write_user_event_marker(cmd_buffer, UserEventPop, NULL);
1332 }
1333
1334 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdDebugMarkerInsertEXT(VkCommandBuffer commandBuffer,const VkDebugMarkerMarkerInfoEXT * pMarkerInfo)1335 sqtt_CmdDebugMarkerInsertEXT(VkCommandBuffer commandBuffer, const VkDebugMarkerMarkerInfoEXT *pMarkerInfo)
1336 {
1337 VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
1338 radv_write_user_event_marker(cmd_buffer, UserEventTrigger, pMarkerInfo->pMarkerName);
1339 }
1340
1341 VKAPI_ATTR VkResult VKAPI_CALL
sqtt_DebugMarkerSetObjectTagEXT(VkDevice device,const VkDebugMarkerObjectTagInfoEXT * pTagInfo)1342 sqtt_DebugMarkerSetObjectTagEXT(VkDevice device, const VkDebugMarkerObjectTagInfoEXT *pTagInfo)
1343 {
1344 /* no-op */
1345 return VK_SUCCESS;
1346 }
1347
1348 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdBeginDebugUtilsLabelEXT(VkCommandBuffer commandBuffer,const VkDebugUtilsLabelEXT * pLabelInfo)1349 sqtt_CmdBeginDebugUtilsLabelEXT(VkCommandBuffer commandBuffer, const VkDebugUtilsLabelEXT *pLabelInfo)
1350 {
1351 VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
1352 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1353
1354 radv_write_user_event_marker(cmd_buffer, UserEventPush, pLabelInfo->pLabelName);
1355
1356 device->layer_dispatch.rgp.CmdBeginDebugUtilsLabelEXT(commandBuffer, pLabelInfo);
1357 }
1358
1359 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdEndDebugUtilsLabelEXT(VkCommandBuffer commandBuffer)1360 sqtt_CmdEndDebugUtilsLabelEXT(VkCommandBuffer commandBuffer)
1361 {
1362 VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
1363 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1364
1365 radv_write_user_event_marker(cmd_buffer, UserEventPop, NULL);
1366
1367 device->layer_dispatch.rgp.CmdEndDebugUtilsLabelEXT(commandBuffer);
1368 }
1369
1370 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdInsertDebugUtilsLabelEXT(VkCommandBuffer commandBuffer,const VkDebugUtilsLabelEXT * pLabelInfo)1371 sqtt_CmdInsertDebugUtilsLabelEXT(VkCommandBuffer commandBuffer, const VkDebugUtilsLabelEXT *pLabelInfo)
1372 {
1373 VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
1374 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1375
1376 radv_write_user_event_marker(cmd_buffer, UserEventTrigger, pLabelInfo->pLabelName);
1377
1378 device->layer_dispatch.rgp.CmdInsertDebugUtilsLabelEXT(commandBuffer, pLabelInfo);
1379 }
1380
1381 /* Pipelines */
1382 static enum rgp_hardware_stages
radv_get_rgp_shader_stage(struct radv_shader * shader)1383 radv_get_rgp_shader_stage(struct radv_shader *shader)
1384 {
1385 switch (shader->info.stage) {
1386 case MESA_SHADER_VERTEX:
1387 if (shader->info.vs.as_ls)
1388 return RGP_HW_STAGE_LS;
1389 else if (shader->info.vs.as_es)
1390 return RGP_HW_STAGE_ES;
1391 else if (shader->info.is_ngg)
1392 return RGP_HW_STAGE_GS;
1393 else
1394 return RGP_HW_STAGE_VS;
1395 case MESA_SHADER_TESS_CTRL:
1396 return RGP_HW_STAGE_HS;
1397 case MESA_SHADER_TESS_EVAL:
1398 if (shader->info.tes.as_es)
1399 return RGP_HW_STAGE_ES;
1400 else if (shader->info.is_ngg)
1401 return RGP_HW_STAGE_GS;
1402 else
1403 return RGP_HW_STAGE_VS;
1404 case MESA_SHADER_MESH:
1405 case MESA_SHADER_GEOMETRY:
1406 return RGP_HW_STAGE_GS;
1407 case MESA_SHADER_FRAGMENT:
1408 return RGP_HW_STAGE_PS;
1409 case MESA_SHADER_TASK:
1410 case MESA_SHADER_COMPUTE:
1411 case MESA_SHADER_RAYGEN:
1412 case MESA_SHADER_CLOSEST_HIT:
1413 case MESA_SHADER_ANY_HIT:
1414 case MESA_SHADER_INTERSECTION:
1415 case MESA_SHADER_MISS:
1416 case MESA_SHADER_CALLABLE:
1417 return RGP_HW_STAGE_CS;
1418 default:
1419 unreachable("invalid mesa shader stage");
1420 }
1421 }
1422
1423 static void
radv_fill_code_object_record(struct radv_device * device,struct rgp_shader_data * shader_data,struct radv_shader * shader,uint64_t va)1424 radv_fill_code_object_record(struct radv_device *device, struct rgp_shader_data *shader_data,
1425 struct radv_shader *shader, uint64_t va)
1426 {
1427 const struct radv_physical_device *pdev = radv_device_physical(device);
1428 unsigned lds_increment = pdev->info.gfx_level >= GFX11 && shader->info.stage == MESA_SHADER_FRAGMENT
1429 ? 1024
1430 : pdev->info.lds_encode_granularity;
1431
1432 memset(shader_data->rt_shader_name, 0, sizeof(shader_data->rt_shader_name));
1433 shader_data->hash[0] = (uint64_t)(uintptr_t)shader;
1434 shader_data->hash[1] = (uint64_t)(uintptr_t)shader >> 32;
1435 shader_data->code_size = shader->code_size;
1436 shader_data->code = shader->code;
1437 shader_data->vgpr_count = shader->config.num_vgprs;
1438 shader_data->sgpr_count = shader->config.num_sgprs;
1439 shader_data->scratch_memory_size = shader->config.scratch_bytes_per_wave;
1440 shader_data->lds_size = shader->config.lds_size * lds_increment;
1441 shader_data->wavefront_size = shader->info.wave_size;
1442 shader_data->base_address = va & 0xffffffffffff;
1443 shader_data->elf_symbol_offset = 0;
1444 shader_data->hw_stage = radv_get_rgp_shader_stage(shader);
1445 shader_data->is_combined = false;
1446 }
1447
1448 static VkResult
radv_add_code_object(struct radv_device * device,struct radv_pipeline * pipeline)1449 radv_add_code_object(struct radv_device *device, struct radv_pipeline *pipeline)
1450 {
1451 struct ac_sqtt *sqtt = &device->sqtt;
1452 struct rgp_code_object *code_object = &sqtt->rgp_code_object;
1453 struct rgp_code_object_record *record;
1454
1455 record = malloc(sizeof(struct rgp_code_object_record));
1456 if (!record)
1457 return VK_ERROR_OUT_OF_HOST_MEMORY;
1458
1459 record->shader_stages_mask = 0;
1460 record->num_shaders_combined = 0;
1461 record->pipeline_hash[0] = pipeline->pipeline_hash;
1462 record->pipeline_hash[1] = pipeline->pipeline_hash;
1463 record->is_rt = false;
1464
1465 for (unsigned i = 0; i < MESA_VULKAN_SHADER_STAGES; i++) {
1466 struct radv_shader *shader = pipeline->shaders[i];
1467
1468 if (!shader)
1469 continue;
1470
1471 radv_fill_code_object_record(device, &record->shader_data[i], shader, radv_sqtt_shader_get_va_reloc(pipeline, i));
1472
1473 record->shader_stages_mask |= (1 << i);
1474 record->num_shaders_combined++;
1475 }
1476
1477 simple_mtx_lock(&code_object->lock);
1478 list_addtail(&record->list, &code_object->record);
1479 code_object->record_count++;
1480 simple_mtx_unlock(&code_object->lock);
1481
1482 return VK_SUCCESS;
1483 }
1484
1485 static VkResult
radv_add_rt_record(struct radv_device * device,struct rgp_code_object * code_object,struct radv_ray_tracing_pipeline * pipeline,struct radv_shader * shader,uint32_t stack_size,uint32_t index,uint64_t hash)1486 radv_add_rt_record(struct radv_device *device, struct rgp_code_object *code_object,
1487 struct radv_ray_tracing_pipeline *pipeline, struct radv_shader *shader, uint32_t stack_size,
1488 uint32_t index, uint64_t hash)
1489 {
1490 struct rgp_code_object_record *record = malloc(sizeof(struct rgp_code_object_record));
1491 if (!record)
1492 return VK_ERROR_OUT_OF_HOST_MEMORY;
1493
1494 struct rgp_shader_data *shader_data = &record->shader_data[shader->info.stage];
1495
1496 record->shader_stages_mask = 0;
1497 record->num_shaders_combined = 0;
1498 record->pipeline_hash[0] = hash;
1499 record->pipeline_hash[1] = hash;
1500
1501 radv_fill_code_object_record(device, shader_data, shader, shader->va);
1502 shader_data->rt_stack_size = stack_size;
1503
1504 record->shader_stages_mask |= (1 << shader->info.stage);
1505 record->is_rt = true;
1506 switch (shader->info.stage) {
1507 case MESA_SHADER_RAYGEN:
1508 snprintf(shader_data->rt_shader_name, sizeof(shader_data->rt_shader_name), "rgen_%d", index);
1509 break;
1510 case MESA_SHADER_CLOSEST_HIT:
1511 snprintf(shader_data->rt_shader_name, sizeof(shader_data->rt_shader_name), "chit_%d", index);
1512 break;
1513 case MESA_SHADER_MISS:
1514 snprintf(shader_data->rt_shader_name, sizeof(shader_data->rt_shader_name), "miss_%d", index);
1515 break;
1516 case MESA_SHADER_INTERSECTION:
1517 snprintf(shader_data->rt_shader_name, sizeof(shader_data->rt_shader_name), "traversal");
1518 break;
1519 case MESA_SHADER_CALLABLE:
1520 snprintf(shader_data->rt_shader_name, sizeof(shader_data->rt_shader_name), "call_%d", index);
1521 break;
1522 case MESA_SHADER_COMPUTE:
1523 snprintf(shader_data->rt_shader_name, sizeof(shader_data->rt_shader_name), "_amdgpu_cs_main");
1524 break;
1525 default:
1526 unreachable("invalid rt stage");
1527 }
1528 record->num_shaders_combined = 1;
1529
1530 simple_mtx_lock(&code_object->lock);
1531 list_addtail(&record->list, &code_object->record);
1532 code_object->record_count++;
1533 simple_mtx_unlock(&code_object->lock);
1534
1535 return VK_SUCCESS;
1536 }
1537
1538 static void
compute_unique_rt_sha(uint64_t pipeline_hash,unsigned index,unsigned char sha1[SHA1_DIGEST_LENGTH])1539 compute_unique_rt_sha(uint64_t pipeline_hash, unsigned index, unsigned char sha1[SHA1_DIGEST_LENGTH])
1540 {
1541 struct mesa_sha1 ctx;
1542 _mesa_sha1_init(&ctx);
1543 _mesa_sha1_update(&ctx, &pipeline_hash, sizeof(pipeline_hash));
1544 _mesa_sha1_update(&ctx, &index, sizeof(index));
1545 _mesa_sha1_final(&ctx, sha1);
1546 }
1547
1548 static VkResult
radv_register_rt_stage(struct radv_device * device,struct radv_ray_tracing_pipeline * pipeline,uint32_t index,uint32_t stack_size,struct radv_shader * shader)1549 radv_register_rt_stage(struct radv_device *device, struct radv_ray_tracing_pipeline *pipeline, uint32_t index,
1550 uint32_t stack_size, struct radv_shader *shader)
1551 {
1552 unsigned char sha1[SHA1_DIGEST_LENGTH];
1553 VkResult result;
1554
1555 compute_unique_rt_sha(pipeline->base.base.pipeline_hash, index, sha1);
1556
1557 result = ac_sqtt_add_pso_correlation(&device->sqtt, *(uint64_t *)sha1, pipeline->base.base.pipeline_hash);
1558 if (!result)
1559 return VK_ERROR_OUT_OF_HOST_MEMORY;
1560 result = ac_sqtt_add_code_object_loader_event(&device->sqtt, *(uint64_t *)sha1, shader->va);
1561 if (!result)
1562 return VK_ERROR_OUT_OF_HOST_MEMORY;
1563 result =
1564 radv_add_rt_record(device, &device->sqtt.rgp_code_object, pipeline, shader, stack_size, index, *(uint64_t *)sha1);
1565 return result;
1566 }
1567
1568 static VkResult
radv_register_rt_pipeline(struct radv_device * device,struct radv_ray_tracing_pipeline * pipeline)1569 radv_register_rt_pipeline(struct radv_device *device, struct radv_ray_tracing_pipeline *pipeline)
1570 {
1571 VkResult result = VK_SUCCESS;
1572
1573 uint32_t max_any_hit_stack_size = 0;
1574 uint32_t max_intersection_stack_size = 0;
1575
1576 for (unsigned i = 0; i < pipeline->stage_count; i++) {
1577 struct radv_ray_tracing_stage *stage = &pipeline->stages[i];
1578 if (stage->stage == MESA_SHADER_ANY_HIT)
1579 max_any_hit_stack_size = MAX2(max_any_hit_stack_size, stage->stack_size);
1580 else if (stage->stage == MESA_SHADER_INTERSECTION)
1581 max_intersection_stack_size = MAX2(max_intersection_stack_size, stage->stack_size);
1582
1583 if (!pipeline->stages[i].shader)
1584 continue;
1585
1586 result = radv_register_rt_stage(device, pipeline, i, stage->stack_size, stage->shader);
1587 if (result != VK_SUCCESS)
1588 return result;
1589 }
1590
1591 uint32_t idx = pipeline->stage_count;
1592
1593 /* Combined traversal shader */
1594 if (pipeline->base.base.shaders[MESA_SHADER_INTERSECTION]) {
1595 result = radv_register_rt_stage(device, pipeline, idx++, max_any_hit_stack_size + max_intersection_stack_size,
1596 pipeline->base.base.shaders[MESA_SHADER_INTERSECTION]);
1597 if (result != VK_SUCCESS)
1598 return result;
1599 }
1600
1601 /* Prolog */
1602 result = radv_register_rt_stage(device, pipeline, idx++, 0, pipeline->prolog);
1603
1604 return result;
1605 }
1606
1607 static VkResult
radv_register_pipeline(struct radv_device * device,struct radv_pipeline * pipeline)1608 radv_register_pipeline(struct radv_device *device, struct radv_pipeline *pipeline)
1609 {
1610 bool result;
1611 uint64_t base_va = ~0;
1612
1613 result = ac_sqtt_add_pso_correlation(&device->sqtt, pipeline->pipeline_hash, pipeline->pipeline_hash);
1614 if (!result)
1615 return VK_ERROR_OUT_OF_HOST_MEMORY;
1616
1617 /* Find the lowest shader BO VA. */
1618 for (unsigned i = 0; i < MESA_VULKAN_SHADER_STAGES; i++) {
1619 struct radv_shader *shader = pipeline->shaders[i];
1620 uint64_t va;
1621
1622 if (!shader)
1623 continue;
1624
1625 va = radv_sqtt_shader_get_va_reloc(pipeline, i);
1626 base_va = MIN2(base_va, va);
1627 }
1628
1629 result = ac_sqtt_add_code_object_loader_event(&device->sqtt, pipeline->pipeline_hash, base_va);
1630 if (!result)
1631 return VK_ERROR_OUT_OF_HOST_MEMORY;
1632
1633 result = radv_add_code_object(device, pipeline);
1634 if (result != VK_SUCCESS)
1635 return result;
1636
1637 return VK_SUCCESS;
1638 }
1639
1640 static void
radv_unregister_records(struct radv_device * device,uint64_t hash)1641 radv_unregister_records(struct radv_device *device, uint64_t hash)
1642 {
1643 struct ac_sqtt *sqtt = &device->sqtt;
1644 struct rgp_pso_correlation *pso_correlation = &sqtt->rgp_pso_correlation;
1645 struct rgp_loader_events *loader_events = &sqtt->rgp_loader_events;
1646 struct rgp_code_object *code_object = &sqtt->rgp_code_object;
1647
1648 /* Destroy the PSO correlation record. */
1649 simple_mtx_lock(&pso_correlation->lock);
1650 list_for_each_entry_safe (struct rgp_pso_correlation_record, record, &pso_correlation->record, list) {
1651 if (record->pipeline_hash[0] == hash) {
1652 pso_correlation->record_count--;
1653 list_del(&record->list);
1654 free(record);
1655 break;
1656 }
1657 }
1658 simple_mtx_unlock(&pso_correlation->lock);
1659
1660 /* Destroy the code object loader record. */
1661 simple_mtx_lock(&loader_events->lock);
1662 list_for_each_entry_safe (struct rgp_loader_events_record, record, &loader_events->record, list) {
1663 if (record->code_object_hash[0] == hash) {
1664 loader_events->record_count--;
1665 list_del(&record->list);
1666 free(record);
1667 break;
1668 }
1669 }
1670 simple_mtx_unlock(&loader_events->lock);
1671
1672 /* Destroy the code object record. */
1673 simple_mtx_lock(&code_object->lock);
1674 list_for_each_entry_safe (struct rgp_code_object_record, record, &code_object->record, list) {
1675 if (record->pipeline_hash[0] == hash) {
1676 code_object->record_count--;
1677 list_del(&record->list);
1678 free(record);
1679 break;
1680 }
1681 }
1682 simple_mtx_unlock(&code_object->lock);
1683 }
1684
1685 VKAPI_ATTR VkResult VKAPI_CALL
sqtt_CreateGraphicsPipelines(VkDevice _device,VkPipelineCache pipelineCache,uint32_t count,const VkGraphicsPipelineCreateInfo * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines)1686 sqtt_CreateGraphicsPipelines(VkDevice _device, VkPipelineCache pipelineCache, uint32_t count,
1687 const VkGraphicsPipelineCreateInfo *pCreateInfos, const VkAllocationCallbacks *pAllocator,
1688 VkPipeline *pPipelines)
1689 {
1690 VK_FROM_HANDLE(radv_device, device, _device);
1691 VkResult result;
1692
1693 result = device->layer_dispatch.rgp.CreateGraphicsPipelines(_device, pipelineCache, count, pCreateInfos, pAllocator,
1694 pPipelines);
1695 if (result != VK_SUCCESS)
1696 return result;
1697
1698 for (unsigned i = 0; i < count; i++) {
1699 VK_FROM_HANDLE(radv_pipeline, pipeline, pPipelines[i]);
1700
1701 if (!pipeline)
1702 continue;
1703
1704 const VkPipelineCreateFlagBits2KHR create_flags = vk_graphics_pipeline_create_flags(&pCreateInfos[i]);
1705 if (create_flags & VK_PIPELINE_CREATE_2_LIBRARY_BIT_KHR)
1706 continue;
1707
1708 result = radv_sqtt_reloc_graphics_shaders(device, radv_pipeline_to_graphics(pipeline));
1709 if (result != VK_SUCCESS)
1710 goto fail;
1711
1712 result = radv_register_pipeline(device, pipeline);
1713 if (result != VK_SUCCESS)
1714 goto fail;
1715 }
1716
1717 return VK_SUCCESS;
1718
1719 fail:
1720 for (unsigned i = 0; i < count; i++) {
1721 sqtt_DestroyPipeline(_device, pPipelines[i], pAllocator);
1722 pPipelines[i] = VK_NULL_HANDLE;
1723 }
1724 return result;
1725 }
1726
1727 VKAPI_ATTR VkResult VKAPI_CALL
sqtt_CreateComputePipelines(VkDevice _device,VkPipelineCache pipelineCache,uint32_t count,const VkComputePipelineCreateInfo * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines)1728 sqtt_CreateComputePipelines(VkDevice _device, VkPipelineCache pipelineCache, uint32_t count,
1729 const VkComputePipelineCreateInfo *pCreateInfos, const VkAllocationCallbacks *pAllocator,
1730 VkPipeline *pPipelines)
1731 {
1732 VK_FROM_HANDLE(radv_device, device, _device);
1733 VkResult result;
1734
1735 result = device->layer_dispatch.rgp.CreateComputePipelines(_device, pipelineCache, count, pCreateInfos, pAllocator,
1736 pPipelines);
1737 if (result != VK_SUCCESS)
1738 return result;
1739
1740 for (unsigned i = 0; i < count; i++) {
1741 VK_FROM_HANDLE(radv_pipeline, pipeline, pPipelines[i]);
1742
1743 if (!pipeline)
1744 continue;
1745
1746 result = radv_register_pipeline(device, pipeline);
1747 if (result != VK_SUCCESS)
1748 goto fail;
1749 }
1750
1751 return VK_SUCCESS;
1752
1753 fail:
1754 for (unsigned i = 0; i < count; i++) {
1755 sqtt_DestroyPipeline(_device, pPipelines[i], pAllocator);
1756 pPipelines[i] = VK_NULL_HANDLE;
1757 }
1758 return result;
1759 }
1760
1761 VKAPI_ATTR VkResult VKAPI_CALL
sqtt_CreateRayTracingPipelinesKHR(VkDevice _device,VkDeferredOperationKHR deferredOperation,VkPipelineCache pipelineCache,uint32_t count,const VkRayTracingPipelineCreateInfoKHR * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines)1762 sqtt_CreateRayTracingPipelinesKHR(VkDevice _device, VkDeferredOperationKHR deferredOperation,
1763 VkPipelineCache pipelineCache, uint32_t count,
1764 const VkRayTracingPipelineCreateInfoKHR *pCreateInfos,
1765 const VkAllocationCallbacks *pAllocator, VkPipeline *pPipelines)
1766 {
1767 VK_FROM_HANDLE(radv_device, device, _device);
1768 VkResult result;
1769
1770 result = device->layer_dispatch.rgp.CreateRayTracingPipelinesKHR(_device, deferredOperation, pipelineCache, count,
1771 pCreateInfos, pAllocator, pPipelines);
1772 if (result != VK_SUCCESS)
1773 return result;
1774
1775 for (unsigned i = 0; i < count; i++) {
1776 VK_FROM_HANDLE(radv_pipeline, pipeline, pPipelines[i]);
1777
1778 if (!pipeline)
1779 continue;
1780
1781 const VkPipelineCreateFlagBits2KHR create_flags = vk_rt_pipeline_create_flags(&pCreateInfos[i]);
1782 if (create_flags & VK_PIPELINE_CREATE_2_LIBRARY_BIT_KHR)
1783 continue;
1784
1785 result = radv_register_rt_pipeline(device, radv_pipeline_to_ray_tracing(pipeline));
1786 if (result != VK_SUCCESS)
1787 goto fail;
1788 }
1789
1790 return VK_SUCCESS;
1791
1792 fail:
1793 for (unsigned i = 0; i < count; i++) {
1794 sqtt_DestroyPipeline(_device, pPipelines[i], pAllocator);
1795 pPipelines[i] = VK_NULL_HANDLE;
1796 }
1797 return result;
1798 }
1799
1800 VKAPI_ATTR void VKAPI_CALL
sqtt_DestroyPipeline(VkDevice _device,VkPipeline _pipeline,const VkAllocationCallbacks * pAllocator)1801 sqtt_DestroyPipeline(VkDevice _device, VkPipeline _pipeline, const VkAllocationCallbacks *pAllocator)
1802 {
1803 VK_FROM_HANDLE(radv_device, device, _device);
1804 VK_FROM_HANDLE(radv_pipeline, pipeline, _pipeline);
1805
1806 if (!_pipeline)
1807 return;
1808
1809 /* Ray tracing pipelines have multiple records, each with their own hash */
1810 if (pipeline->type == RADV_PIPELINE_RAY_TRACING) {
1811 /* We have one record for each stage, plus one for the traversal shader and one for the prolog */
1812 uint32_t record_count = radv_pipeline_to_ray_tracing(pipeline)->stage_count + 2;
1813 unsigned char sha1[SHA1_DIGEST_LENGTH];
1814 for (uint32_t i = 0; i < record_count; ++i) {
1815 compute_unique_rt_sha(pipeline->pipeline_hash, i, sha1);
1816 radv_unregister_records(device, *(uint64_t *)sha1);
1817 }
1818 } else
1819 radv_unregister_records(device, pipeline->pipeline_hash);
1820
1821 if (pipeline->type == RADV_PIPELINE_GRAPHICS) {
1822 struct radv_graphics_pipeline *graphics_pipeline = radv_pipeline_to_graphics(pipeline);
1823 struct radv_sqtt_shaders_reloc *reloc = graphics_pipeline->sqtt_shaders_reloc;
1824
1825 radv_free_shader_memory(device, reloc->alloc);
1826 free(reloc);
1827 }
1828
1829 device->layer_dispatch.rgp.DestroyPipeline(_device, _pipeline, pAllocator);
1830 }
1831
1832 #undef API_MARKER
1833