xref: /aosp_15_r20/external/mesa3d/src/amd/vulkan/layers/radv_sqtt_layer.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2020 Valve Corporation
3  *
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #include "radv_cmd_buffer.h"
8 #include "radv_cs.h"
9 #include "radv_entrypoints.h"
10 #include "radv_pipeline_rt.h"
11 #include "radv_queue.h"
12 #include "radv_shader.h"
13 #include "radv_sqtt.h"
14 #include "vk_common_entrypoints.h"
15 #include "vk_semaphore.h"
16 
17 #include "ac_rgp.h"
18 #include "ac_sqtt.h"
19 
20 void
radv_sqtt_emit_relocated_shaders(struct radv_cmd_buffer * cmd_buffer,struct radv_graphics_pipeline * pipeline)21 radv_sqtt_emit_relocated_shaders(struct radv_cmd_buffer *cmd_buffer, struct radv_graphics_pipeline *pipeline)
22 {
23    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
24    const struct radv_physical_device *pdev = radv_device_physical(device);
25    const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
26    struct radv_sqtt_shaders_reloc *reloc = pipeline->sqtt_shaders_reloc;
27    struct radeon_cmdbuf *cs = cmd_buffer->cs;
28    uint64_t va;
29 
30    radv_cs_add_buffer(device->ws, cs, reloc->bo);
31 
32    /* VS */
33    if (pipeline->base.shaders[MESA_SHADER_VERTEX]) {
34       struct radv_shader *vs = pipeline->base.shaders[MESA_SHADER_VERTEX];
35 
36       va = reloc->va[MESA_SHADER_VERTEX];
37       if (vs->info.vs.as_ls) {
38          radeon_set_sh_reg(cs, vs->info.regs.pgm_lo, va >> 8);
39       } else if (vs->info.vs.as_es) {
40          radeon_set_sh_reg_seq(cs, vs->info.regs.pgm_lo, 2);
41          radeon_emit(cs, va >> 8);
42          radeon_emit(cs, S_00B324_MEM_BASE(va >> 40));
43       } else if (vs->info.is_ngg) {
44          radeon_set_sh_reg(cs, vs->info.regs.pgm_lo, va >> 8);
45       } else {
46          radeon_set_sh_reg_seq(cs, vs->info.regs.pgm_lo, 2);
47          radeon_emit(cs, va >> 8);
48          radeon_emit(cs, S_00B124_MEM_BASE(va >> 40));
49       }
50    }
51 
52    /* TCS */
53    if (pipeline->base.shaders[MESA_SHADER_TESS_CTRL]) {
54       const struct radv_shader *tcs = pipeline->base.shaders[MESA_SHADER_TESS_CTRL];
55 
56       va = reloc->va[MESA_SHADER_TESS_CTRL];
57 
58       if (gfx_level >= GFX9) {
59          radeon_set_sh_reg(cs, tcs->info.regs.pgm_lo, va >> 8);
60       } else {
61          radeon_set_sh_reg_seq(cs, tcs->info.regs.pgm_lo, 2);
62          radeon_emit(cs, va >> 8);
63          radeon_emit(cs, S_00B424_MEM_BASE(va >> 40));
64       }
65    }
66 
67    /* TES */
68    if (pipeline->base.shaders[MESA_SHADER_TESS_EVAL]) {
69       struct radv_shader *tes = pipeline->base.shaders[MESA_SHADER_TESS_EVAL];
70 
71       va = reloc->va[MESA_SHADER_TESS_EVAL];
72       if (tes->info.is_ngg) {
73          radeon_set_sh_reg(cs, tes->info.regs.pgm_lo, va >> 8);
74       } else if (tes->info.tes.as_es) {
75          radeon_set_sh_reg_seq(cs, tes->info.regs.pgm_lo, 2);
76          radeon_emit(cs, va >> 8);
77          radeon_emit(cs, S_00B324_MEM_BASE(va >> 40));
78       } else {
79          radeon_set_sh_reg_seq(cs, tes->info.regs.pgm_lo, 2);
80          radeon_emit(cs, va >> 8);
81          radeon_emit(cs, S_00B124_MEM_BASE(va >> 40));
82       }
83    }
84 
85    /* GS */
86    if (pipeline->base.shaders[MESA_SHADER_GEOMETRY]) {
87       struct radv_shader *gs = pipeline->base.shaders[MESA_SHADER_GEOMETRY];
88 
89       va = reloc->va[MESA_SHADER_GEOMETRY];
90       if (gs->info.is_ngg) {
91          radeon_set_sh_reg(cs, gs->info.regs.pgm_lo, va >> 8);
92       } else {
93          if (gfx_level >= GFX9) {
94             radeon_set_sh_reg(cs, gs->info.regs.pgm_lo, va >> 8);
95          } else {
96             radeon_set_sh_reg_seq(cs, gs->info.regs.pgm_lo, 2);
97             radeon_emit(cs, va >> 8);
98             radeon_emit(cs, S_00B224_MEM_BASE(va >> 40));
99          }
100       }
101    }
102 
103    /* FS */
104    if (pipeline->base.shaders[MESA_SHADER_FRAGMENT]) {
105       const struct radv_shader *ps = pipeline->base.shaders[MESA_SHADER_FRAGMENT];
106 
107       va = reloc->va[MESA_SHADER_FRAGMENT];
108 
109       radeon_set_sh_reg_seq(cs, ps->info.regs.pgm_lo, 2);
110       radeon_emit(cs, va >> 8);
111       radeon_emit(cs, S_00B024_MEM_BASE(va >> 40));
112    }
113 
114    /* MS */
115    if (pipeline->base.shaders[MESA_SHADER_MESH]) {
116       const struct radv_shader *ms = pipeline->base.shaders[MESA_SHADER_MESH];
117 
118       va = reloc->va[MESA_SHADER_MESH];
119 
120       radeon_set_sh_reg(cs, ms->info.regs.pgm_lo, va >> 8);
121    }
122 }
123 
124 static uint64_t
radv_sqtt_shader_get_va_reloc(struct radv_pipeline * pipeline,gl_shader_stage stage)125 radv_sqtt_shader_get_va_reloc(struct radv_pipeline *pipeline, gl_shader_stage stage)
126 {
127    if (pipeline->type == RADV_PIPELINE_GRAPHICS) {
128       struct radv_graphics_pipeline *graphics_pipeline = radv_pipeline_to_graphics(pipeline);
129       struct radv_sqtt_shaders_reloc *reloc = graphics_pipeline->sqtt_shaders_reloc;
130       return reloc->va[stage];
131    }
132 
133    return radv_shader_get_va(pipeline->shaders[stage]);
134 }
135 
136 static VkResult
radv_sqtt_reloc_graphics_shaders(struct radv_device * device,struct radv_graphics_pipeline * pipeline)137 radv_sqtt_reloc_graphics_shaders(struct radv_device *device, struct radv_graphics_pipeline *pipeline)
138 {
139    struct radv_shader_dma_submission *submission = NULL;
140    struct radv_sqtt_shaders_reloc *reloc;
141    uint32_t code_size = 0;
142 
143    reloc = calloc(1, sizeof(*reloc));
144    if (!reloc)
145       return VK_ERROR_OUT_OF_HOST_MEMORY;
146 
147    /* Compute the total code size. */
148    for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; i++) {
149       const struct radv_shader *shader = pipeline->base.shaders[i];
150       if (!shader)
151          continue;
152 
153       code_size += align(shader->code_size, RADV_SHADER_ALLOC_ALIGNMENT);
154    }
155 
156    /* Allocate memory for all shader binaries. */
157    reloc->alloc = radv_alloc_shader_memory(device, code_size, false, pipeline);
158    if (!reloc->alloc) {
159       free(reloc);
160       return VK_ERROR_OUT_OF_DEVICE_MEMORY;
161    }
162 
163    reloc->bo = reloc->alloc->arena->bo;
164 
165    /* Relocate shader binaries to be contiguous in memory as requested by RGP. */
166    uint64_t slab_va = radv_buffer_get_va(reloc->bo) + reloc->alloc->offset;
167    char *slab_ptr = reloc->alloc->arena->ptr + reloc->alloc->offset;
168    uint64_t offset = 0;
169 
170    if (device->shader_use_invisible_vram) {
171       submission = radv_shader_dma_get_submission(device, reloc->bo, slab_va, code_size);
172       if (!submission)
173          return VK_ERROR_UNKNOWN;
174    }
175 
176    for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; ++i) {
177       const struct radv_shader *shader = pipeline->base.shaders[i];
178       void *dest_ptr;
179       if (!shader)
180          continue;
181 
182       reloc->va[i] = slab_va + offset;
183 
184       if (device->shader_use_invisible_vram)
185          dest_ptr = submission->ptr + offset;
186       else
187          dest_ptr = slab_ptr + offset;
188 
189       memcpy(dest_ptr, shader->code, shader->code_size);
190 
191       offset += align(shader->code_size, RADV_SHADER_ALLOC_ALIGNMENT);
192    }
193 
194    if (device->shader_use_invisible_vram) {
195       uint64_t upload_seq = 0;
196 
197       if (!radv_shader_dma_submit(device, submission, &upload_seq))
198          return VK_ERROR_UNKNOWN;
199 
200       for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; ++i) {
201          struct radv_shader *shader = pipeline->base.shaders[i];
202 
203          if (!shader)
204             continue;
205 
206          shader->upload_seq = upload_seq;
207       }
208 
209       if (pipeline->base.gs_copy_shader)
210          pipeline->base.gs_copy_shader->upload_seq = upload_seq;
211    }
212 
213    pipeline->sqtt_shaders_reloc = reloc;
214 
215    return VK_SUCCESS;
216 }
217 
218 static void
radv_write_begin_general_api_marker(struct radv_cmd_buffer * cmd_buffer,enum rgp_sqtt_marker_general_api_type api_type)219 radv_write_begin_general_api_marker(struct radv_cmd_buffer *cmd_buffer, enum rgp_sqtt_marker_general_api_type api_type)
220 {
221    struct rgp_sqtt_marker_general_api marker = {0};
222 
223    marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_GENERAL_API;
224    marker.api_type = api_type;
225 
226    radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
227 }
228 
229 static void
radv_write_end_general_api_marker(struct radv_cmd_buffer * cmd_buffer,enum rgp_sqtt_marker_general_api_type api_type)230 radv_write_end_general_api_marker(struct radv_cmd_buffer *cmd_buffer, enum rgp_sqtt_marker_general_api_type api_type)
231 {
232    struct rgp_sqtt_marker_general_api marker = {0};
233 
234    marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_GENERAL_API;
235    marker.api_type = api_type;
236    marker.is_end = 1;
237 
238    radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
239 }
240 
241 static void
radv_write_event_marker(struct radv_cmd_buffer * cmd_buffer,enum rgp_sqtt_marker_event_type api_type,uint32_t vertex_offset_user_data,uint32_t instance_offset_user_data,uint32_t draw_index_user_data)242 radv_write_event_marker(struct radv_cmd_buffer *cmd_buffer, enum rgp_sqtt_marker_event_type api_type,
243                         uint32_t vertex_offset_user_data, uint32_t instance_offset_user_data,
244                         uint32_t draw_index_user_data)
245 {
246    struct rgp_sqtt_marker_event marker = {0};
247 
248    marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_EVENT;
249    marker.api_type = api_type;
250    marker.cmd_id = cmd_buffer->state.num_events++;
251    marker.cb_id = cmd_buffer->sqtt_cb_id;
252 
253    if (vertex_offset_user_data == UINT_MAX || instance_offset_user_data == UINT_MAX) {
254       vertex_offset_user_data = 0;
255       instance_offset_user_data = 0;
256    }
257 
258    if (draw_index_user_data == UINT_MAX)
259       draw_index_user_data = vertex_offset_user_data;
260 
261    marker.vertex_offset_reg_idx = vertex_offset_user_data;
262    marker.instance_offset_reg_idx = instance_offset_user_data;
263    marker.draw_index_reg_idx = draw_index_user_data;
264 
265    radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
266 }
267 
268 static void
radv_write_event_with_dims_marker(struct radv_cmd_buffer * cmd_buffer,enum rgp_sqtt_marker_event_type api_type,uint32_t x,uint32_t y,uint32_t z)269 radv_write_event_with_dims_marker(struct radv_cmd_buffer *cmd_buffer, enum rgp_sqtt_marker_event_type api_type,
270                                   uint32_t x, uint32_t y, uint32_t z)
271 {
272    struct rgp_sqtt_marker_event_with_dims marker = {0};
273 
274    marker.event.identifier = RGP_SQTT_MARKER_IDENTIFIER_EVENT;
275    marker.event.api_type = api_type;
276    marker.event.cmd_id = cmd_buffer->state.num_events++;
277    marker.event.cb_id = cmd_buffer->sqtt_cb_id;
278    marker.event.has_thread_dims = 1;
279 
280    marker.thread_x = x;
281    marker.thread_y = y;
282    marker.thread_z = z;
283 
284    radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
285 }
286 
287 void
radv_write_user_event_marker(struct radv_cmd_buffer * cmd_buffer,enum rgp_sqtt_marker_user_event_type type,const char * str)288 radv_write_user_event_marker(struct radv_cmd_buffer *cmd_buffer, enum rgp_sqtt_marker_user_event_type type,
289                              const char *str)
290 {
291    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
292 
293    if (likely(!device->sqtt.bo))
294       return;
295 
296    if (type == UserEventPop) {
297       assert(str == NULL);
298       struct rgp_sqtt_marker_user_event marker = {0};
299       marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_USER_EVENT;
300       marker.data_type = type;
301 
302       radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
303    } else {
304       assert(str != NULL);
305       unsigned len = strlen(str);
306       struct rgp_sqtt_marker_user_event_with_length marker = {0};
307       marker.user_event.identifier = RGP_SQTT_MARKER_IDENTIFIER_USER_EVENT;
308       marker.user_event.data_type = type;
309       marker.length = align(len, 4);
310 
311       uint8_t *buffer = alloca(sizeof(marker) + marker.length);
312       memset(buffer, 0, sizeof(marker) + marker.length);
313       memcpy(buffer, &marker, sizeof(marker));
314       memcpy(buffer + sizeof(marker), str, len);
315 
316       radv_emit_sqtt_userdata(cmd_buffer, buffer, sizeof(marker) / 4 + marker.length / 4);
317    }
318 }
319 
320 void
radv_describe_begin_cmd_buffer(struct radv_cmd_buffer * cmd_buffer)321 radv_describe_begin_cmd_buffer(struct radv_cmd_buffer *cmd_buffer)
322 {
323    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
324    uint64_t device_id = (uintptr_t)device;
325    struct rgp_sqtt_marker_cb_start marker = {0};
326 
327    if (likely(!device->sqtt.bo))
328       return;
329 
330    /* Reserve a command buffer ID for SQTT. */
331    const struct radv_physical_device *pdev = radv_device_physical(device);
332    enum amd_ip_type ip_type = radv_queue_family_to_ring(pdev, cmd_buffer->qf);
333    union rgp_sqtt_marker_cb_id cb_id = ac_sqtt_get_next_cmdbuf_id(&device->sqtt, ip_type);
334    cmd_buffer->sqtt_cb_id = cb_id.all;
335 
336    marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_CB_START;
337    marker.cb_id = cmd_buffer->sqtt_cb_id;
338    marker.device_id_low = device_id;
339    marker.device_id_high = device_id >> 32;
340    marker.queue = cmd_buffer->qf;
341    marker.queue_flags = VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT;
342 
343    if (cmd_buffer->qf == RADV_QUEUE_GENERAL)
344       marker.queue_flags |= VK_QUEUE_GRAPHICS_BIT;
345 
346    if (!radv_sparse_queue_enabled(pdev))
347       marker.queue_flags |= VK_QUEUE_SPARSE_BINDING_BIT;
348 
349    radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
350 }
351 
352 void
radv_describe_end_cmd_buffer(struct radv_cmd_buffer * cmd_buffer)353 radv_describe_end_cmd_buffer(struct radv_cmd_buffer *cmd_buffer)
354 {
355    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
356    uint64_t device_id = (uintptr_t)device;
357    struct rgp_sqtt_marker_cb_end marker = {0};
358 
359    if (likely(!device->sqtt.bo))
360       return;
361 
362    marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_CB_END;
363    marker.cb_id = cmd_buffer->sqtt_cb_id;
364    marker.device_id_low = device_id;
365    marker.device_id_high = device_id >> 32;
366 
367    radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
368 }
369 
370 void
radv_describe_draw(struct radv_cmd_buffer * cmd_buffer)371 radv_describe_draw(struct radv_cmd_buffer *cmd_buffer)
372 {
373    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
374 
375    if (likely(!device->sqtt.bo))
376       return;
377 
378    radv_write_event_marker(cmd_buffer, cmd_buffer->state.current_event_type, UINT_MAX, UINT_MAX, UINT_MAX);
379 }
380 
381 void
radv_describe_dispatch(struct radv_cmd_buffer * cmd_buffer,const struct radv_dispatch_info * info)382 radv_describe_dispatch(struct radv_cmd_buffer *cmd_buffer, const struct radv_dispatch_info *info)
383 {
384    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
385 
386    if (likely(!device->sqtt.bo))
387       return;
388 
389    if (info->indirect) {
390       radv_write_event_marker(cmd_buffer, cmd_buffer->state.current_event_type, UINT_MAX, UINT_MAX, UINT_MAX);
391    } else {
392       radv_write_event_with_dims_marker(cmd_buffer, cmd_buffer->state.current_event_type, info->blocks[0],
393                                         info->blocks[1], info->blocks[2]);
394    }
395 }
396 
397 void
radv_describe_begin_render_pass_clear(struct radv_cmd_buffer * cmd_buffer,VkImageAspectFlagBits aspects)398 radv_describe_begin_render_pass_clear(struct radv_cmd_buffer *cmd_buffer, VkImageAspectFlagBits aspects)
399 {
400    cmd_buffer->state.current_event_type =
401       (aspects & VK_IMAGE_ASPECT_COLOR_BIT) ? EventRenderPassColorClear : EventRenderPassDepthStencilClear;
402 }
403 
404 void
radv_describe_end_render_pass_clear(struct radv_cmd_buffer * cmd_buffer)405 radv_describe_end_render_pass_clear(struct radv_cmd_buffer *cmd_buffer)
406 {
407    cmd_buffer->state.current_event_type = EventInternalUnknown;
408 }
409 
410 void
radv_describe_begin_render_pass_resolve(struct radv_cmd_buffer * cmd_buffer)411 radv_describe_begin_render_pass_resolve(struct radv_cmd_buffer *cmd_buffer)
412 {
413    cmd_buffer->state.current_event_type = EventRenderPassResolve;
414 }
415 
416 void
radv_describe_end_render_pass_resolve(struct radv_cmd_buffer * cmd_buffer)417 radv_describe_end_render_pass_resolve(struct radv_cmd_buffer *cmd_buffer)
418 {
419    cmd_buffer->state.current_event_type = EventInternalUnknown;
420 }
421 
422 void
radv_describe_barrier_end_delayed(struct radv_cmd_buffer * cmd_buffer)423 radv_describe_barrier_end_delayed(struct radv_cmd_buffer *cmd_buffer)
424 {
425    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
426    struct rgp_sqtt_marker_barrier_end marker = {0};
427 
428    if (likely(!device->sqtt.bo) || !cmd_buffer->state.pending_sqtt_barrier_end)
429       return;
430 
431    cmd_buffer->state.pending_sqtt_barrier_end = false;
432 
433    marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_BARRIER_END;
434    marker.cb_id = cmd_buffer->sqtt_cb_id;
435 
436    marker.num_layout_transitions = cmd_buffer->state.num_layout_transitions;
437 
438    if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_WAIT_ON_EOP_TS)
439       marker.wait_on_eop_ts = true;
440    if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_VS_PARTIAL_FLUSH)
441       marker.vs_partial_flush = true;
442    if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_PS_PARTIAL_FLUSH)
443       marker.ps_partial_flush = true;
444    if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_CS_PARTIAL_FLUSH)
445       marker.cs_partial_flush = true;
446    if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_PFP_SYNC_ME)
447       marker.pfp_sync_me = true;
448    if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_SYNC_CP_DMA)
449       marker.sync_cp_dma = true;
450    if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_VMEM_L0)
451       marker.inval_tcp = true;
452    if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_ICACHE)
453       marker.inval_sqI = true;
454    if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_SMEM_L0)
455       marker.inval_sqK = true;
456    if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_FLUSH_L2)
457       marker.flush_tcc = true;
458    if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_L2)
459       marker.inval_tcc = true;
460    if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_FLUSH_CB)
461       marker.flush_cb = true;
462    if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_CB)
463       marker.inval_cb = true;
464    if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_FLUSH_DB)
465       marker.flush_db = true;
466    if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_DB)
467       marker.inval_db = true;
468    if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_L1)
469       marker.inval_gl1 = true;
470 
471    radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
472 
473    cmd_buffer->state.num_layout_transitions = 0;
474 }
475 
476 void
radv_describe_barrier_start(struct radv_cmd_buffer * cmd_buffer,enum rgp_barrier_reason reason)477 radv_describe_barrier_start(struct radv_cmd_buffer *cmd_buffer, enum rgp_barrier_reason reason)
478 {
479    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
480    struct rgp_sqtt_marker_barrier_start marker = {0};
481 
482    if (likely(!device->sqtt.bo))
483       return;
484 
485    if (cmd_buffer->state.in_barrier) {
486       assert(!"attempted to start a barrier while already in a barrier");
487       return;
488    }
489 
490    radv_describe_barrier_end_delayed(cmd_buffer);
491    cmd_buffer->state.sqtt_flush_bits = 0;
492    cmd_buffer->state.in_barrier = true;
493 
494    marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_BARRIER_START;
495    marker.cb_id = cmd_buffer->sqtt_cb_id;
496    marker.dword02 = reason;
497 
498    radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
499 }
500 
501 void
radv_describe_barrier_end(struct radv_cmd_buffer * cmd_buffer)502 radv_describe_barrier_end(struct radv_cmd_buffer *cmd_buffer)
503 {
504    cmd_buffer->state.in_barrier = false;
505    cmd_buffer->state.pending_sqtt_barrier_end = true;
506 }
507 
508 void
radv_describe_layout_transition(struct radv_cmd_buffer * cmd_buffer,const struct radv_barrier_data * barrier)509 radv_describe_layout_transition(struct radv_cmd_buffer *cmd_buffer, const struct radv_barrier_data *barrier)
510 {
511    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
512    struct rgp_sqtt_marker_layout_transition marker = {0};
513 
514    if (likely(!device->sqtt.bo))
515       return;
516 
517    if (!cmd_buffer->state.in_barrier) {
518       assert(!"layout transition marker should be only emitted inside a barrier marker");
519       return;
520    }
521 
522    marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_LAYOUT_TRANSITION;
523    marker.depth_stencil_expand = barrier->layout_transitions.depth_stencil_expand;
524    marker.htile_hiz_range_expand = barrier->layout_transitions.htile_hiz_range_expand;
525    marker.depth_stencil_resummarize = barrier->layout_transitions.depth_stencil_resummarize;
526    marker.dcc_decompress = barrier->layout_transitions.dcc_decompress;
527    marker.fmask_decompress = barrier->layout_transitions.fmask_decompress;
528    marker.fast_clear_eliminate = barrier->layout_transitions.fast_clear_eliminate;
529    marker.fmask_color_expand = barrier->layout_transitions.fmask_color_expand;
530    marker.init_mask_ram = barrier->layout_transitions.init_mask_ram;
531 
532    radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
533 
534    cmd_buffer->state.num_layout_transitions++;
535 }
536 
537 void
radv_describe_begin_accel_struct_build(struct radv_cmd_buffer * cmd_buffer,uint32_t count)538 radv_describe_begin_accel_struct_build(struct radv_cmd_buffer *cmd_buffer, uint32_t count)
539 {
540    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
541 
542    if (likely(!device->sqtt.bo))
543       return;
544 
545    char marker[64];
546    snprintf(marker, sizeof(marker), "vkCmdBuildAccelerationStructuresKHR(%u)", count);
547    radv_write_user_event_marker(cmd_buffer, UserEventPush, marker);
548 }
549 
550 void
radv_describe_end_accel_struct_build(struct radv_cmd_buffer * cmd_buffer)551 radv_describe_end_accel_struct_build(struct radv_cmd_buffer *cmd_buffer)
552 {
553    radv_write_user_event_marker(cmd_buffer, UserEventPop, NULL);
554 }
555 
556 static void
radv_describe_pipeline_bind(struct radv_cmd_buffer * cmd_buffer,VkPipelineBindPoint pipelineBindPoint,struct radv_pipeline * pipeline)557 radv_describe_pipeline_bind(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint pipelineBindPoint,
558                             struct radv_pipeline *pipeline)
559 {
560    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
561    struct rgp_sqtt_marker_pipeline_bind marker = {0};
562 
563    if (likely(!device->sqtt.bo))
564       return;
565 
566    marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_BIND_PIPELINE;
567    marker.cb_id = cmd_buffer->sqtt_cb_id;
568    marker.bind_point = pipelineBindPoint;
569    marker.api_pso_hash[0] = pipeline->pipeline_hash;
570    marker.api_pso_hash[1] = pipeline->pipeline_hash >> 32;
571 
572    radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
573 }
574 
575 /* Queue events */
576 static void
radv_describe_queue_event(struct radv_queue * queue,struct rgp_queue_event_record * record)577 radv_describe_queue_event(struct radv_queue *queue, struct rgp_queue_event_record *record)
578 {
579    struct radv_device *device = radv_queue_device(queue);
580    struct ac_sqtt *sqtt = &device->sqtt;
581    struct rgp_queue_event *queue_event = &sqtt->rgp_queue_event;
582 
583    simple_mtx_lock(&queue_event->lock);
584    list_addtail(&record->list, &queue_event->record);
585    queue_event->record_count++;
586    simple_mtx_unlock(&queue_event->lock);
587 }
588 
589 static VkResult
radv_describe_queue_present(struct radv_queue * queue,uint64_t cpu_timestamp,void * gpu_timestamp_ptr)590 radv_describe_queue_present(struct radv_queue *queue, uint64_t cpu_timestamp, void *gpu_timestamp_ptr)
591 {
592    struct rgp_queue_event_record *record;
593 
594    record = calloc(1, sizeof(struct rgp_queue_event_record));
595    if (!record)
596       return VK_ERROR_OUT_OF_HOST_MEMORY;
597 
598    record->event_type = SQTT_QUEUE_TIMING_EVENT_PRESENT;
599    record->cpu_timestamp = cpu_timestamp;
600    record->gpu_timestamps[0] = gpu_timestamp_ptr;
601    record->queue_info_index = queue->vk.queue_family_index;
602 
603    radv_describe_queue_event(queue, record);
604 
605    return VK_SUCCESS;
606 }
607 
608 static VkResult
radv_describe_queue_submit(struct radv_queue * queue,struct radv_cmd_buffer * cmd_buffer,uint32_t cmdbuf_idx,uint64_t cpu_timestamp,void * pre_gpu_timestamp_ptr,void * post_gpu_timestamp_ptr)609 radv_describe_queue_submit(struct radv_queue *queue, struct radv_cmd_buffer *cmd_buffer, uint32_t cmdbuf_idx,
610                            uint64_t cpu_timestamp, void *pre_gpu_timestamp_ptr, void *post_gpu_timestamp_ptr)
611 {
612    struct radv_device *device = radv_queue_device(queue);
613    struct rgp_queue_event_record *record;
614 
615    record = calloc(1, sizeof(struct rgp_queue_event_record));
616    if (!record)
617       return VK_ERROR_OUT_OF_HOST_MEMORY;
618 
619    record->event_type = SQTT_QUEUE_TIMING_EVENT_CMDBUF_SUBMIT;
620    record->api_id = (uintptr_t)cmd_buffer;
621    record->cpu_timestamp = cpu_timestamp;
622    record->frame_index = device->vk.current_frame;
623    record->gpu_timestamps[0] = pre_gpu_timestamp_ptr;
624    record->gpu_timestamps[1] = post_gpu_timestamp_ptr;
625    record->queue_info_index = queue->vk.queue_family_index;
626    record->submit_sub_index = cmdbuf_idx;
627 
628    radv_describe_queue_event(queue, record);
629 
630    return VK_SUCCESS;
631 }
632 
633 static VkResult
radv_describe_queue_semaphore(struct radv_queue * queue,struct vk_semaphore * sync,enum sqtt_queue_event_type event_type)634 radv_describe_queue_semaphore(struct radv_queue *queue, struct vk_semaphore *sync,
635                               enum sqtt_queue_event_type event_type)
636 {
637    struct rgp_queue_event_record *record;
638 
639    record = calloc(1, sizeof(struct rgp_queue_event_record));
640    if (!record)
641       return VK_ERROR_OUT_OF_HOST_MEMORY;
642 
643    record->event_type = event_type;
644    record->api_id = (uintptr_t)sync;
645    record->cpu_timestamp = os_time_get_nano();
646    record->queue_info_index = queue->vk.queue_family_index;
647 
648    radv_describe_queue_event(queue, record);
649 
650    return VK_SUCCESS;
651 }
652 
653 static void
radv_handle_sqtt(VkQueue _queue)654 radv_handle_sqtt(VkQueue _queue)
655 {
656    VK_FROM_HANDLE(radv_queue, queue, _queue);
657    struct radv_device *device = radv_queue_device(queue);
658    const struct radv_physical_device *pdev = radv_device_physical(device);
659    bool trigger = device->sqtt_triggered;
660    device->sqtt_triggered = false;
661 
662    if (device->sqtt_enabled) {
663       struct ac_sqtt_trace sqtt_trace = {0};
664 
665       radv_end_sqtt(queue);
666       device->sqtt_enabled = false;
667 
668       /* TODO: Do something better than this whole sync. */
669       device->vk.dispatch_table.QueueWaitIdle(_queue);
670 
671       if (radv_get_sqtt_trace(queue, &sqtt_trace)) {
672          struct ac_spm_trace spm_trace;
673 
674          if (device->spm.bo)
675             ac_spm_get_trace(&device->spm, &spm_trace);
676 
677          ac_dump_rgp_capture(&pdev->info, &sqtt_trace, device->spm.bo ? &spm_trace : NULL);
678       } else {
679          /* Trigger a new capture if the driver failed to get
680           * the trace because the buffer was too small.
681           */
682          trigger = true;
683       }
684 
685       /* Clear resources used for this capture. */
686       radv_reset_sqtt_trace(device);
687    }
688 
689    if (trigger) {
690       if (ac_check_profile_state(&pdev->info)) {
691          fprintf(stderr, "radv: Canceling RGP trace request as a hang condition has been "
692                          "detected. Force the GPU into a profiling mode with e.g. "
693                          "\"echo profile_peak  > "
694                          "/sys/class/drm/card0/device/power_dpm_force_performance_level\"\n");
695          return;
696       }
697 
698       /* Sample CPU/GPU clocks before starting the trace. */
699       if (!radv_sqtt_sample_clocks(device)) {
700          fprintf(stderr, "radv: Failed to sample clocks\n");
701       }
702 
703       radv_begin_sqtt(queue);
704       assert(!device->sqtt_enabled);
705       device->sqtt_enabled = true;
706    }
707 }
708 
709 VKAPI_ATTR VkResult VKAPI_CALL
sqtt_QueuePresentKHR(VkQueue _queue,const VkPresentInfoKHR * pPresentInfo)710 sqtt_QueuePresentKHR(VkQueue _queue, const VkPresentInfoKHR *pPresentInfo)
711 {
712    VK_FROM_HANDLE(radv_queue, queue, _queue);
713    struct radv_device *device = radv_queue_device(queue);
714    VkResult result;
715 
716    queue->sqtt_present = true;
717 
718    result = device->layer_dispatch.rgp.QueuePresentKHR(_queue, pPresentInfo);
719    if (result != VK_SUCCESS && result != VK_SUBOPTIMAL_KHR)
720       return result;
721 
722    queue->sqtt_present = false;
723 
724    radv_handle_sqtt(_queue);
725 
726    return VK_SUCCESS;
727 }
728 
729 static VkResult
radv_sqtt_wsi_submit(VkQueue _queue,uint32_t submitCount,const VkSubmitInfo2 * pSubmits,VkFence _fence)730 radv_sqtt_wsi_submit(VkQueue _queue, uint32_t submitCount, const VkSubmitInfo2 *pSubmits, VkFence _fence)
731 {
732    VK_FROM_HANDLE(radv_queue, queue, _queue);
733    struct radv_device *device = radv_queue_device(queue);
734    VkCommandBufferSubmitInfo *new_cmdbufs = NULL;
735    struct radeon_winsys_bo *gpu_timestamp_bo;
736    uint32_t gpu_timestamp_offset;
737    VkCommandBuffer timed_cmdbuf;
738    void *gpu_timestamp_ptr;
739    uint64_t cpu_timestamp;
740    VkResult result = VK_SUCCESS;
741 
742    assert(submitCount <= 1 && pSubmits != NULL);
743 
744    for (uint32_t i = 0; i < submitCount; i++) {
745       const VkSubmitInfo2 *pSubmit = &pSubmits[i];
746       VkSubmitInfo2 sqtt_submit = *pSubmit;
747 
748       assert(sqtt_submit.commandBufferInfoCount <= 1);
749 
750       /* Command buffers */
751       uint32_t new_cmdbuf_count = sqtt_submit.commandBufferInfoCount + 1;
752 
753       new_cmdbufs = malloc(new_cmdbuf_count * sizeof(*new_cmdbufs));
754       if (!new_cmdbufs)
755          return VK_ERROR_OUT_OF_HOST_MEMORY;
756 
757       /* Sample the current CPU time before building the GPU timestamp cmdbuf. */
758       cpu_timestamp = os_time_get_nano();
759 
760       result = radv_sqtt_acquire_gpu_timestamp(device, &gpu_timestamp_bo, &gpu_timestamp_offset, &gpu_timestamp_ptr);
761       if (result != VK_SUCCESS)
762          goto fail;
763 
764       result = radv_sqtt_get_timed_cmdbuf(queue, gpu_timestamp_bo, gpu_timestamp_offset,
765                                           VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT, &timed_cmdbuf);
766       if (result != VK_SUCCESS)
767          goto fail;
768 
769       new_cmdbufs[0] = (VkCommandBufferSubmitInfo){
770          .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO,
771          .commandBuffer = timed_cmdbuf,
772       };
773 
774       if (sqtt_submit.commandBufferInfoCount == 1)
775          new_cmdbufs[1] = sqtt_submit.pCommandBufferInfos[0];
776 
777       sqtt_submit.commandBufferInfoCount = new_cmdbuf_count;
778       sqtt_submit.pCommandBufferInfos = new_cmdbufs;
779 
780       radv_describe_queue_present(queue, cpu_timestamp, gpu_timestamp_ptr);
781 
782       result = device->layer_dispatch.rgp.QueueSubmit2(_queue, 1, &sqtt_submit, _fence);
783       if (result != VK_SUCCESS)
784          goto fail;
785 
786       FREE(new_cmdbufs);
787    }
788 
789    return result;
790 
791 fail:
792    FREE(new_cmdbufs);
793    return result;
794 }
795 
796 VKAPI_ATTR VkResult VKAPI_CALL
sqtt_QueueSubmit2(VkQueue _queue,uint32_t submitCount,const VkSubmitInfo2 * pSubmits,VkFence _fence)797 sqtt_QueueSubmit2(VkQueue _queue, uint32_t submitCount, const VkSubmitInfo2 *pSubmits, VkFence _fence)
798 {
799    VK_FROM_HANDLE(radv_queue, queue, _queue);
800    struct radv_device *device = radv_queue_device(queue);
801    const bool is_gfx_or_ace = queue->state.qf == RADV_QUEUE_GENERAL || queue->state.qf == RADV_QUEUE_COMPUTE;
802    VkCommandBufferSubmitInfo *new_cmdbufs = NULL;
803    VkResult result = VK_SUCCESS;
804 
805    /* Only consider queue events on graphics/compute when enabled. */
806    if (!device->sqtt_enabled || !radv_sqtt_queue_events_enabled() || !is_gfx_or_ace)
807       return device->layer_dispatch.rgp.QueueSubmit2(_queue, submitCount, pSubmits, _fence);
808 
809    for (uint32_t i = 0; i < submitCount; i++) {
810       const VkSubmitInfo2 *pSubmit = &pSubmits[i];
811 
812       /* Wait semaphores */
813       for (uint32_t j = 0; j < pSubmit->waitSemaphoreInfoCount; j++) {
814          const VkSemaphoreSubmitInfo *pWaitSemaphoreInfo = &pSubmit->pWaitSemaphoreInfos[j];
815          VK_FROM_HANDLE(vk_semaphore, sem, pWaitSemaphoreInfo->semaphore);
816          radv_describe_queue_semaphore(queue, sem, SQTT_QUEUE_TIMING_EVENT_WAIT_SEMAPHORE);
817       }
818    }
819 
820    if (queue->sqtt_present)
821       return radv_sqtt_wsi_submit(_queue, submitCount, pSubmits, _fence);
822 
823    for (uint32_t i = 0; i < submitCount; i++) {
824       const VkSubmitInfo2 *pSubmit = &pSubmits[i];
825       VkSubmitInfo2 sqtt_submit = *pSubmit;
826 
827       /* Command buffers */
828       uint32_t new_cmdbuf_count = sqtt_submit.commandBufferInfoCount * 3;
829       uint32_t cmdbuf_idx = 0;
830 
831       new_cmdbufs = malloc(new_cmdbuf_count * sizeof(*new_cmdbufs));
832       if (!new_cmdbufs)
833          return VK_ERROR_OUT_OF_HOST_MEMORY;
834 
835       for (uint32_t j = 0; j < sqtt_submit.commandBufferInfoCount; j++) {
836          const VkCommandBufferSubmitInfo *pCommandBufferInfo = &sqtt_submit.pCommandBufferInfos[j];
837          struct radeon_winsys_bo *gpu_timestamps_bo[2];
838          uint32_t gpu_timestamps_offset[2];
839          VkCommandBuffer pre_timed_cmdbuf, post_timed_cmdbuf;
840          void *gpu_timestamps_ptr[2];
841          uint64_t cpu_timestamp;
842 
843          /* Sample the current CPU time before building the timed cmdbufs. */
844          cpu_timestamp = os_time_get_nano();
845 
846          result = radv_sqtt_acquire_gpu_timestamp(device, &gpu_timestamps_bo[0], &gpu_timestamps_offset[0],
847                                                   &gpu_timestamps_ptr[0]);
848          if (result != VK_SUCCESS)
849             goto fail;
850 
851          result = radv_sqtt_get_timed_cmdbuf(queue, gpu_timestamps_bo[0], gpu_timestamps_offset[0],
852                                              VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT, &pre_timed_cmdbuf);
853          if (result != VK_SUCCESS)
854             goto fail;
855 
856          new_cmdbufs[cmdbuf_idx++] = (VkCommandBufferSubmitInfo){
857             .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO,
858             .commandBuffer = pre_timed_cmdbuf,
859          };
860 
861          new_cmdbufs[cmdbuf_idx++] = *pCommandBufferInfo;
862 
863          result = radv_sqtt_acquire_gpu_timestamp(device, &gpu_timestamps_bo[1], &gpu_timestamps_offset[1],
864                                                   &gpu_timestamps_ptr[1]);
865          if (result != VK_SUCCESS)
866             goto fail;
867 
868          result = radv_sqtt_get_timed_cmdbuf(queue, gpu_timestamps_bo[1], gpu_timestamps_offset[1],
869                                              VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT, &post_timed_cmdbuf);
870          if (result != VK_SUCCESS)
871             goto fail;
872 
873          new_cmdbufs[cmdbuf_idx++] = (VkCommandBufferSubmitInfo){
874             .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO,
875             .commandBuffer = post_timed_cmdbuf,
876          };
877 
878          VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, pCommandBufferInfo->commandBuffer);
879          radv_describe_queue_submit(queue, cmd_buffer, j, cpu_timestamp, gpu_timestamps_ptr[0], gpu_timestamps_ptr[1]);
880       }
881 
882       sqtt_submit.commandBufferInfoCount = new_cmdbuf_count;
883       sqtt_submit.pCommandBufferInfos = new_cmdbufs;
884 
885       result = device->layer_dispatch.rgp.QueueSubmit2(_queue, 1, &sqtt_submit, _fence);
886       if (result != VK_SUCCESS)
887          goto fail;
888 
889       /* Signal semaphores */
890       for (uint32_t j = 0; j < sqtt_submit.signalSemaphoreInfoCount; j++) {
891          const VkSemaphoreSubmitInfo *pSignalSemaphoreInfo = &sqtt_submit.pSignalSemaphoreInfos[j];
892          VK_FROM_HANDLE(vk_semaphore, sem, pSignalSemaphoreInfo->semaphore);
893          radv_describe_queue_semaphore(queue, sem, SQTT_QUEUE_TIMING_EVENT_SIGNAL_SEMAPHORE);
894       }
895 
896       FREE(new_cmdbufs);
897    }
898 
899    return result;
900 
901 fail:
902    FREE(new_cmdbufs);
903    return result;
904 }
905 
906 #define EVENT_MARKER_BASE(cmd_name, api_name, event_name, ...)                                                         \
907    VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);                                                         \
908    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);                                                    \
909    radv_write_begin_general_api_marker(cmd_buffer, ApiCmd##api_name);                                                  \
910    cmd_buffer->state.current_event_type = EventCmd##event_name;                                                        \
911    device->layer_dispatch.rgp.Cmd##cmd_name(__VA_ARGS__);                                                              \
912    cmd_buffer->state.current_event_type = EventInternalUnknown;                                                        \
913    radv_write_end_general_api_marker(cmd_buffer, ApiCmd##api_name);
914 
915 #define EVENT_MARKER_ALIAS(cmd_name, api_name, ...) EVENT_MARKER_BASE(cmd_name, api_name, api_name, __VA_ARGS__);
916 
917 #define EVENT_MARKER(cmd_name, ...) EVENT_MARKER_ALIAS(cmd_name, cmd_name, __VA_ARGS__);
918 
919 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdDraw(VkCommandBuffer commandBuffer,uint32_t vertexCount,uint32_t instanceCount,uint32_t firstVertex,uint32_t firstInstance)920 sqtt_CmdDraw(VkCommandBuffer commandBuffer, uint32_t vertexCount, uint32_t instanceCount, uint32_t firstVertex,
921              uint32_t firstInstance)
922 {
923    EVENT_MARKER(Draw, commandBuffer, vertexCount, instanceCount, firstVertex, firstInstance);
924 }
925 
926 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdDrawIndexed(VkCommandBuffer commandBuffer,uint32_t indexCount,uint32_t instanceCount,uint32_t firstIndex,int32_t vertexOffset,uint32_t firstInstance)927 sqtt_CmdDrawIndexed(VkCommandBuffer commandBuffer, uint32_t indexCount, uint32_t instanceCount, uint32_t firstIndex,
928                     int32_t vertexOffset, uint32_t firstInstance)
929 {
930    EVENT_MARKER(DrawIndexed, commandBuffer, indexCount, instanceCount, firstIndex, vertexOffset, firstInstance);
931 }
932 
933 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdDrawIndirect(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,uint32_t drawCount,uint32_t stride)934 sqtt_CmdDrawIndirect(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, uint32_t drawCount,
935                      uint32_t stride)
936 {
937    EVENT_MARKER(DrawIndirect, commandBuffer, buffer, offset, drawCount, stride);
938 }
939 
940 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,uint32_t drawCount,uint32_t stride)941 sqtt_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, uint32_t drawCount,
942                             uint32_t stride)
943 {
944    EVENT_MARKER(DrawIndexedIndirect, commandBuffer, buffer, offset, drawCount, stride);
945 }
946 
947 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdDrawIndirectCount(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,VkBuffer countBuffer,VkDeviceSize countBufferOffset,uint32_t maxDrawCount,uint32_t stride)948 sqtt_CmdDrawIndirectCount(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, VkBuffer countBuffer,
949                           VkDeviceSize countBufferOffset, uint32_t maxDrawCount, uint32_t stride)
950 {
951    EVENT_MARKER(DrawIndirectCount, commandBuffer, buffer, offset, countBuffer, countBufferOffset, maxDrawCount, stride);
952 }
953 
954 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdDrawIndexedIndirectCount(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,VkBuffer countBuffer,VkDeviceSize countBufferOffset,uint32_t maxDrawCount,uint32_t stride)955 sqtt_CmdDrawIndexedIndirectCount(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset,
956                                  VkBuffer countBuffer, VkDeviceSize countBufferOffset, uint32_t maxDrawCount,
957                                  uint32_t stride)
958 {
959    EVENT_MARKER(DrawIndexedIndirectCount, commandBuffer, buffer, offset, countBuffer, countBufferOffset, maxDrawCount,
960                 stride);
961 }
962 
963 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdDispatch(VkCommandBuffer commandBuffer,uint32_t x,uint32_t y,uint32_t z)964 sqtt_CmdDispatch(VkCommandBuffer commandBuffer, uint32_t x, uint32_t y, uint32_t z)
965 {
966    EVENT_MARKER_ALIAS(DispatchBase, Dispatch, commandBuffer, 0, 0, 0, x, y, z);
967 }
968 
969 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdDispatchIndirect(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset)970 sqtt_CmdDispatchIndirect(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset)
971 {
972    EVENT_MARKER(DispatchIndirect, commandBuffer, buffer, offset);
973 }
974 
975 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdCopyBuffer2(VkCommandBuffer commandBuffer,const VkCopyBufferInfo2 * pCopyBufferInfo)976 sqtt_CmdCopyBuffer2(VkCommandBuffer commandBuffer, const VkCopyBufferInfo2 *pCopyBufferInfo)
977 {
978    EVENT_MARKER_ALIAS(CopyBuffer2, CopyBuffer, commandBuffer, pCopyBufferInfo);
979 }
980 
981 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdFillBuffer(VkCommandBuffer commandBuffer,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize fillSize,uint32_t data)982 sqtt_CmdFillBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDeviceSize dstOffset, VkDeviceSize fillSize,
983                    uint32_t data)
984 {
985    EVENT_MARKER(FillBuffer, commandBuffer, dstBuffer, dstOffset, fillSize, data);
986 }
987 
988 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdUpdateBuffer(VkCommandBuffer commandBuffer,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize dataSize,const void * pData)989 sqtt_CmdUpdateBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDeviceSize dstOffset, VkDeviceSize dataSize,
990                      const void *pData)
991 {
992    EVENT_MARKER(UpdateBuffer, commandBuffer, dstBuffer, dstOffset, dataSize, pData);
993 }
994 
995 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdCopyImage2(VkCommandBuffer commandBuffer,const VkCopyImageInfo2 * pCopyImageInfo)996 sqtt_CmdCopyImage2(VkCommandBuffer commandBuffer, const VkCopyImageInfo2 *pCopyImageInfo)
997 {
998    EVENT_MARKER_ALIAS(CopyImage2, CopyImage, commandBuffer, pCopyImageInfo);
999 }
1000 
1001 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer,const VkCopyBufferToImageInfo2 * pCopyBufferToImageInfo)1002 sqtt_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer, const VkCopyBufferToImageInfo2 *pCopyBufferToImageInfo)
1003 {
1004    EVENT_MARKER_ALIAS(CopyBufferToImage2, CopyBufferToImage, commandBuffer, pCopyBufferToImageInfo);
1005 }
1006 
1007 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer,const VkCopyImageToBufferInfo2 * pCopyImageToBufferInfo)1008 sqtt_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer, const VkCopyImageToBufferInfo2 *pCopyImageToBufferInfo)
1009 {
1010    EVENT_MARKER_ALIAS(CopyImageToBuffer2, CopyImageToBuffer, commandBuffer, pCopyImageToBufferInfo);
1011 }
1012 
1013 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdBlitImage2(VkCommandBuffer commandBuffer,const VkBlitImageInfo2 * pBlitImageInfo)1014 sqtt_CmdBlitImage2(VkCommandBuffer commandBuffer, const VkBlitImageInfo2 *pBlitImageInfo)
1015 {
1016    EVENT_MARKER_ALIAS(BlitImage2, BlitImage, commandBuffer, pBlitImageInfo);
1017 }
1018 
1019 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdClearColorImage(VkCommandBuffer commandBuffer,VkImage image_h,VkImageLayout imageLayout,const VkClearColorValue * pColor,uint32_t rangeCount,const VkImageSubresourceRange * pRanges)1020 sqtt_CmdClearColorImage(VkCommandBuffer commandBuffer, VkImage image_h, VkImageLayout imageLayout,
1021                         const VkClearColorValue *pColor, uint32_t rangeCount, const VkImageSubresourceRange *pRanges)
1022 {
1023    EVENT_MARKER(ClearColorImage, commandBuffer, image_h, imageLayout, pColor, rangeCount, pRanges);
1024 }
1025 
1026 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer,VkImage image_h,VkImageLayout imageLayout,const VkClearDepthStencilValue * pDepthStencil,uint32_t rangeCount,const VkImageSubresourceRange * pRanges)1027 sqtt_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer, VkImage image_h, VkImageLayout imageLayout,
1028                                const VkClearDepthStencilValue *pDepthStencil, uint32_t rangeCount,
1029                                const VkImageSubresourceRange *pRanges)
1030 {
1031    EVENT_MARKER(ClearDepthStencilImage, commandBuffer, image_h, imageLayout, pDepthStencil, rangeCount, pRanges);
1032 }
1033 
1034 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdClearAttachments(VkCommandBuffer commandBuffer,uint32_t attachmentCount,const VkClearAttachment * pAttachments,uint32_t rectCount,const VkClearRect * pRects)1035 sqtt_CmdClearAttachments(VkCommandBuffer commandBuffer, uint32_t attachmentCount, const VkClearAttachment *pAttachments,
1036                          uint32_t rectCount, const VkClearRect *pRects)
1037 {
1038    EVENT_MARKER(ClearAttachments, commandBuffer, attachmentCount, pAttachments, rectCount, pRects);
1039 }
1040 
1041 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdResolveImage2(VkCommandBuffer commandBuffer,const VkResolveImageInfo2 * pResolveImageInfo)1042 sqtt_CmdResolveImage2(VkCommandBuffer commandBuffer, const VkResolveImageInfo2 *pResolveImageInfo)
1043 {
1044    EVENT_MARKER_ALIAS(ResolveImage2, ResolveImage, commandBuffer, pResolveImageInfo);
1045 }
1046 
1047 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdWaitEvents2(VkCommandBuffer commandBuffer,uint32_t eventCount,const VkEvent * pEvents,const VkDependencyInfo * pDependencyInfos)1048 sqtt_CmdWaitEvents2(VkCommandBuffer commandBuffer, uint32_t eventCount, const VkEvent *pEvents,
1049                     const VkDependencyInfo *pDependencyInfos)
1050 {
1051    EVENT_MARKER_ALIAS(WaitEvents2, WaitEvents, commandBuffer, eventCount, pEvents, pDependencyInfos);
1052 }
1053 
1054 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdPipelineBarrier2(VkCommandBuffer commandBuffer,const VkDependencyInfo * pDependencyInfo)1055 sqtt_CmdPipelineBarrier2(VkCommandBuffer commandBuffer, const VkDependencyInfo *pDependencyInfo)
1056 {
1057    EVENT_MARKER_ALIAS(PipelineBarrier2, PipelineBarrier, commandBuffer, pDependencyInfo);
1058 }
1059 
1060 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdResetQueryPool(VkCommandBuffer commandBuffer,VkQueryPool queryPool,uint32_t firstQuery,uint32_t queryCount)1061 sqtt_CmdResetQueryPool(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t firstQuery, uint32_t queryCount)
1062 {
1063    EVENT_MARKER(ResetQueryPool, commandBuffer, queryPool, firstQuery, queryCount);
1064 }
1065 
1066 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer,VkQueryPool queryPool,uint32_t firstQuery,uint32_t queryCount,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize stride,VkQueryResultFlags flags)1067 sqtt_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t firstQuery,
1068                              uint32_t queryCount, VkBuffer dstBuffer, VkDeviceSize dstOffset, VkDeviceSize stride,
1069                              VkQueryResultFlags flags)
1070 {
1071    EVENT_MARKER(CopyQueryPoolResults, commandBuffer, queryPool, firstQuery, queryCount, dstBuffer, dstOffset, stride,
1072                 flags);
1073 }
1074 
1075 #define EVENT_RT_MARKER(cmd_name, flags, ...) EVENT_MARKER_BASE(cmd_name, Dispatch, cmd_name | flags, __VA_ARGS__);
1076 
1077 #define EVENT_RT_MARKER_ALIAS(cmd_name, event_name, flags, ...)                                                        \
1078    EVENT_MARKER_BASE(cmd_name, Dispatch, event_name | flags, __VA_ARGS__);
1079 
1080 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdTraceRaysKHR(VkCommandBuffer commandBuffer,const VkStridedDeviceAddressRegionKHR * pRaygenShaderBindingTable,const VkStridedDeviceAddressRegionKHR * pMissShaderBindingTable,const VkStridedDeviceAddressRegionKHR * pHitShaderBindingTable,const VkStridedDeviceAddressRegionKHR * pCallableShaderBindingTable,uint32_t width,uint32_t height,uint32_t depth)1081 sqtt_CmdTraceRaysKHR(VkCommandBuffer commandBuffer, const VkStridedDeviceAddressRegionKHR *pRaygenShaderBindingTable,
1082                      const VkStridedDeviceAddressRegionKHR *pMissShaderBindingTable,
1083                      const VkStridedDeviceAddressRegionKHR *pHitShaderBindingTable,
1084                      const VkStridedDeviceAddressRegionKHR *pCallableShaderBindingTable, uint32_t width,
1085                      uint32_t height, uint32_t depth)
1086 {
1087    EVENT_RT_MARKER(TraceRaysKHR, ApiRayTracingSeparateCompiled, commandBuffer, pRaygenShaderBindingTable,
1088                    pMissShaderBindingTable, pHitShaderBindingTable, pCallableShaderBindingTable, width, height, depth);
1089 }
1090 
1091 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdTraceRaysIndirectKHR(VkCommandBuffer commandBuffer,const VkStridedDeviceAddressRegionKHR * pRaygenShaderBindingTable,const VkStridedDeviceAddressRegionKHR * pMissShaderBindingTable,const VkStridedDeviceAddressRegionKHR * pHitShaderBindingTable,const VkStridedDeviceAddressRegionKHR * pCallableShaderBindingTable,VkDeviceAddress indirectDeviceAddress)1092 sqtt_CmdTraceRaysIndirectKHR(VkCommandBuffer commandBuffer,
1093                              const VkStridedDeviceAddressRegionKHR *pRaygenShaderBindingTable,
1094                              const VkStridedDeviceAddressRegionKHR *pMissShaderBindingTable,
1095                              const VkStridedDeviceAddressRegionKHR *pHitShaderBindingTable,
1096                              const VkStridedDeviceAddressRegionKHR *pCallableShaderBindingTable,
1097                              VkDeviceAddress indirectDeviceAddress)
1098 {
1099    EVENT_RT_MARKER(TraceRaysIndirectKHR, ApiRayTracingSeparateCompiled, commandBuffer, pRaygenShaderBindingTable,
1100                    pMissShaderBindingTable, pHitShaderBindingTable, pCallableShaderBindingTable, indirectDeviceAddress);
1101 }
1102 
1103 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdTraceRaysIndirect2KHR(VkCommandBuffer commandBuffer,VkDeviceAddress indirectDeviceAddress)1104 sqtt_CmdTraceRaysIndirect2KHR(VkCommandBuffer commandBuffer, VkDeviceAddress indirectDeviceAddress)
1105 {
1106    EVENT_RT_MARKER_ALIAS(TraceRaysIndirect2KHR, TraceRaysIndirectKHR, ApiRayTracingSeparateCompiled, commandBuffer,
1107                          indirectDeviceAddress);
1108 }
1109 
1110 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdCopyAccelerationStructureKHR(VkCommandBuffer commandBuffer,const VkCopyAccelerationStructureInfoKHR * pInfo)1111 sqtt_CmdCopyAccelerationStructureKHR(VkCommandBuffer commandBuffer, const VkCopyAccelerationStructureInfoKHR *pInfo)
1112 {
1113    EVENT_RT_MARKER(CopyAccelerationStructureKHR, 0, commandBuffer, pInfo);
1114 }
1115 
1116 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdCopyAccelerationStructureToMemoryKHR(VkCommandBuffer commandBuffer,const VkCopyAccelerationStructureToMemoryInfoKHR * pInfo)1117 sqtt_CmdCopyAccelerationStructureToMemoryKHR(VkCommandBuffer commandBuffer,
1118                                              const VkCopyAccelerationStructureToMemoryInfoKHR *pInfo)
1119 {
1120    EVENT_RT_MARKER(CopyAccelerationStructureToMemoryKHR, 0, commandBuffer, pInfo);
1121 }
1122 
1123 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdCopyMemoryToAccelerationStructureKHR(VkCommandBuffer commandBuffer,const VkCopyMemoryToAccelerationStructureInfoKHR * pInfo)1124 sqtt_CmdCopyMemoryToAccelerationStructureKHR(VkCommandBuffer commandBuffer,
1125                                              const VkCopyMemoryToAccelerationStructureInfoKHR *pInfo)
1126 {
1127    EVENT_RT_MARKER(CopyMemoryToAccelerationStructureKHR, 0, commandBuffer, pInfo);
1128 }
1129 
1130 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdDrawMeshTasksEXT(VkCommandBuffer commandBuffer,uint32_t x,uint32_t y,uint32_t z)1131 sqtt_CmdDrawMeshTasksEXT(VkCommandBuffer commandBuffer, uint32_t x, uint32_t y, uint32_t z)
1132 {
1133    EVENT_MARKER(DrawMeshTasksEXT, commandBuffer, x, y, z);
1134 }
1135 
1136 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdDrawMeshTasksIndirectEXT(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,uint32_t drawCount,uint32_t stride)1137 sqtt_CmdDrawMeshTasksIndirectEXT(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset,
1138                                  uint32_t drawCount, uint32_t stride)
1139 {
1140    EVENT_MARKER(DrawMeshTasksIndirectEXT, commandBuffer, buffer, offset, drawCount, stride);
1141 }
1142 
1143 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdDrawMeshTasksIndirectCountEXT(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,VkBuffer countBuffer,VkDeviceSize countBufferOffset,uint32_t maxDrawCount,uint32_t stride)1144 sqtt_CmdDrawMeshTasksIndirectCountEXT(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset,
1145                                       VkBuffer countBuffer, VkDeviceSize countBufferOffset, uint32_t maxDrawCount,
1146                                       uint32_t stride)
1147 {
1148    EVENT_MARKER(DrawMeshTasksIndirectCountEXT, commandBuffer, buffer, offset, countBuffer, countBufferOffset,
1149                 maxDrawCount, stride);
1150 }
1151 
1152 #undef EVENT_RT_MARKER_ALIAS
1153 #undef EVENT_RT_MARKER
1154 
1155 #undef EVENT_MARKER
1156 #undef EVENT_MARKER_ALIAS
1157 #undef EVENT_MARKER_BASE
1158 
1159 #define API_MARKER_ALIAS(cmd_name, api_name, ...)                                                                      \
1160    VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);                                                         \
1161    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);                                                    \
1162    radv_write_begin_general_api_marker(cmd_buffer, ApiCmd##api_name);                                                  \
1163    device->layer_dispatch.rgp.Cmd##cmd_name(__VA_ARGS__);                                                              \
1164    radv_write_end_general_api_marker(cmd_buffer, ApiCmd##api_name);
1165 
1166 #define API_MARKER(cmd_name, ...) API_MARKER_ALIAS(cmd_name, cmd_name, __VA_ARGS__);
1167 
1168 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdBindPipeline(VkCommandBuffer commandBuffer,VkPipelineBindPoint pipelineBindPoint,VkPipeline _pipeline)1169 sqtt_CmdBindPipeline(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint, VkPipeline _pipeline)
1170 {
1171    VK_FROM_HANDLE(radv_pipeline, pipeline, _pipeline);
1172 
1173    API_MARKER(BindPipeline, commandBuffer, pipelineBindPoint, _pipeline);
1174 
1175    if (pipelineBindPoint == VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR) {
1176       /* RGP seems to expect a compute bind point to detect and report RT pipelines, which makes
1177        * sense somehow given that RT shaders are compiled to an unified compute shader.
1178        */
1179       radv_describe_pipeline_bind(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1180    } else {
1181       radv_describe_pipeline_bind(cmd_buffer, pipelineBindPoint, pipeline);
1182    }
1183 }
1184 
1185 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdBindDescriptorSets(VkCommandBuffer commandBuffer,VkPipelineBindPoint pipelineBindPoint,VkPipelineLayout layout,uint32_t firstSet,uint32_t descriptorSetCount,const VkDescriptorSet * pDescriptorSets,uint32_t dynamicOffsetCount,const uint32_t * pDynamicOffsets)1186 sqtt_CmdBindDescriptorSets(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint,
1187                            VkPipelineLayout layout, uint32_t firstSet, uint32_t descriptorSetCount,
1188                            const VkDescriptorSet *pDescriptorSets, uint32_t dynamicOffsetCount,
1189                            const uint32_t *pDynamicOffsets)
1190 {
1191    API_MARKER(BindDescriptorSets, commandBuffer, pipelineBindPoint, layout, firstSet, descriptorSetCount,
1192               pDescriptorSets, dynamicOffsetCount, pDynamicOffsets);
1193 }
1194 
1195 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdBindIndexBuffer(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,VkIndexType indexType)1196 sqtt_CmdBindIndexBuffer(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, VkIndexType indexType)
1197 {
1198    API_MARKER(BindIndexBuffer, commandBuffer, buffer, offset, indexType);
1199 }
1200 
1201 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdBindVertexBuffers2(VkCommandBuffer commandBuffer,uint32_t firstBinding,uint32_t bindingCount,const VkBuffer * pBuffers,const VkDeviceSize * pOffsets,const VkDeviceSize * pSizes,const VkDeviceSize * pStrides)1202 sqtt_CmdBindVertexBuffers2(VkCommandBuffer commandBuffer, uint32_t firstBinding, uint32_t bindingCount,
1203                            const VkBuffer *pBuffers, const VkDeviceSize *pOffsets, const VkDeviceSize *pSizes,
1204                            const VkDeviceSize *pStrides)
1205 {
1206    API_MARKER_ALIAS(BindVertexBuffers2, BindVertexBuffers, commandBuffer, firstBinding, bindingCount, pBuffers,
1207                     pOffsets, pSizes, pStrides);
1208 }
1209 
1210 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdBeginQuery(VkCommandBuffer commandBuffer,VkQueryPool queryPool,uint32_t query,VkQueryControlFlags flags)1211 sqtt_CmdBeginQuery(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t query, VkQueryControlFlags flags)
1212 {
1213    API_MARKER(BeginQuery, commandBuffer, queryPool, query, flags);
1214 }
1215 
1216 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdEndQuery(VkCommandBuffer commandBuffer,VkQueryPool queryPool,uint32_t query)1217 sqtt_CmdEndQuery(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t query)
1218 {
1219    API_MARKER(EndQuery, commandBuffer, queryPool, query);
1220 }
1221 
1222 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdWriteTimestamp2(VkCommandBuffer commandBuffer,VkPipelineStageFlags2 stage,VkQueryPool queryPool,uint32_t query)1223 sqtt_CmdWriteTimestamp2(VkCommandBuffer commandBuffer, VkPipelineStageFlags2 stage, VkQueryPool queryPool,
1224                         uint32_t query)
1225 {
1226    API_MARKER_ALIAS(WriteTimestamp2, WriteTimestamp, commandBuffer, stage, queryPool, query);
1227 }
1228 
1229 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdPushConstants(VkCommandBuffer commandBuffer,VkPipelineLayout layout,VkShaderStageFlags stageFlags,uint32_t offset,uint32_t size,const void * pValues)1230 sqtt_CmdPushConstants(VkCommandBuffer commandBuffer, VkPipelineLayout layout, VkShaderStageFlags stageFlags,
1231                       uint32_t offset, uint32_t size, const void *pValues)
1232 {
1233    API_MARKER(PushConstants, commandBuffer, layout, stageFlags, offset, size, pValues);
1234 }
1235 
1236 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdBeginRendering(VkCommandBuffer commandBuffer,const VkRenderingInfo * pRenderingInfo)1237 sqtt_CmdBeginRendering(VkCommandBuffer commandBuffer, const VkRenderingInfo *pRenderingInfo)
1238 {
1239    API_MARKER_ALIAS(BeginRendering, BeginRenderPass, commandBuffer, pRenderingInfo);
1240 }
1241 
1242 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdEndRendering(VkCommandBuffer commandBuffer)1243 sqtt_CmdEndRendering(VkCommandBuffer commandBuffer)
1244 {
1245    API_MARKER_ALIAS(EndRendering, EndRenderPass, commandBuffer);
1246 }
1247 
1248 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdExecuteCommands(VkCommandBuffer commandBuffer,uint32_t commandBufferCount,const VkCommandBuffer * pCmdBuffers)1249 sqtt_CmdExecuteCommands(VkCommandBuffer commandBuffer, uint32_t commandBufferCount, const VkCommandBuffer *pCmdBuffers)
1250 {
1251    API_MARKER(ExecuteCommands, commandBuffer, commandBufferCount, pCmdBuffers);
1252 }
1253 
1254 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdExecuteGeneratedCommandsNV(VkCommandBuffer commandBuffer,VkBool32 isPreprocessed,const VkGeneratedCommandsInfoNV * pGeneratedCommandsInfo)1255 sqtt_CmdExecuteGeneratedCommandsNV(VkCommandBuffer commandBuffer, VkBool32 isPreprocessed,
1256                                    const VkGeneratedCommandsInfoNV *pGeneratedCommandsInfo)
1257 {
1258    /* There is no ExecuteIndirect Vulkan event in RGP yet. */
1259    API_MARKER_ALIAS(ExecuteGeneratedCommandsNV, ExecuteCommands, commandBuffer, isPreprocessed, pGeneratedCommandsInfo);
1260 }
1261 
1262 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdSetViewport(VkCommandBuffer commandBuffer,uint32_t firstViewport,uint32_t viewportCount,const VkViewport * pViewports)1263 sqtt_CmdSetViewport(VkCommandBuffer commandBuffer, uint32_t firstViewport, uint32_t viewportCount,
1264                     const VkViewport *pViewports)
1265 {
1266    API_MARKER(SetViewport, commandBuffer, firstViewport, viewportCount, pViewports);
1267 }
1268 
1269 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdSetScissor(VkCommandBuffer commandBuffer,uint32_t firstScissor,uint32_t scissorCount,const VkRect2D * pScissors)1270 sqtt_CmdSetScissor(VkCommandBuffer commandBuffer, uint32_t firstScissor, uint32_t scissorCount,
1271                    const VkRect2D *pScissors)
1272 {
1273    API_MARKER(SetScissor, commandBuffer, firstScissor, scissorCount, pScissors);
1274 }
1275 
1276 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdSetLineWidth(VkCommandBuffer commandBuffer,float lineWidth)1277 sqtt_CmdSetLineWidth(VkCommandBuffer commandBuffer, float lineWidth)
1278 {
1279    API_MARKER(SetLineWidth, commandBuffer, lineWidth);
1280 }
1281 
1282 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdSetDepthBias(VkCommandBuffer commandBuffer,float depthBiasConstantFactor,float depthBiasClamp,float depthBiasSlopeFactor)1283 sqtt_CmdSetDepthBias(VkCommandBuffer commandBuffer, float depthBiasConstantFactor, float depthBiasClamp,
1284                      float depthBiasSlopeFactor)
1285 {
1286    API_MARKER(SetDepthBias, commandBuffer, depthBiasConstantFactor, depthBiasClamp, depthBiasSlopeFactor);
1287 }
1288 
1289 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdSetBlendConstants(VkCommandBuffer commandBuffer,const float blendConstants[4])1290 sqtt_CmdSetBlendConstants(VkCommandBuffer commandBuffer, const float blendConstants[4])
1291 {
1292    API_MARKER(SetBlendConstants, commandBuffer, blendConstants);
1293 }
1294 
1295 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdSetDepthBounds(VkCommandBuffer commandBuffer,float minDepthBounds,float maxDepthBounds)1296 sqtt_CmdSetDepthBounds(VkCommandBuffer commandBuffer, float minDepthBounds, float maxDepthBounds)
1297 {
1298    API_MARKER(SetDepthBounds, commandBuffer, minDepthBounds, maxDepthBounds);
1299 }
1300 
1301 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdSetStencilCompareMask(VkCommandBuffer commandBuffer,VkStencilFaceFlags faceMask,uint32_t compareMask)1302 sqtt_CmdSetStencilCompareMask(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, uint32_t compareMask)
1303 {
1304    API_MARKER(SetStencilCompareMask, commandBuffer, faceMask, compareMask);
1305 }
1306 
1307 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdSetStencilWriteMask(VkCommandBuffer commandBuffer,VkStencilFaceFlags faceMask,uint32_t writeMask)1308 sqtt_CmdSetStencilWriteMask(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, uint32_t writeMask)
1309 {
1310    API_MARKER(SetStencilWriteMask, commandBuffer, faceMask, writeMask);
1311 }
1312 
1313 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdSetStencilReference(VkCommandBuffer commandBuffer,VkStencilFaceFlags faceMask,uint32_t reference)1314 sqtt_CmdSetStencilReference(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, uint32_t reference)
1315 {
1316    API_MARKER(SetStencilReference, commandBuffer, faceMask, reference);
1317 }
1318 
1319 /* VK_EXT_debug_marker */
1320 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdDebugMarkerBeginEXT(VkCommandBuffer commandBuffer,const VkDebugMarkerMarkerInfoEXT * pMarkerInfo)1321 sqtt_CmdDebugMarkerBeginEXT(VkCommandBuffer commandBuffer, const VkDebugMarkerMarkerInfoEXT *pMarkerInfo)
1322 {
1323    VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
1324    radv_write_user_event_marker(cmd_buffer, UserEventPush, pMarkerInfo->pMarkerName);
1325 }
1326 
1327 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdDebugMarkerEndEXT(VkCommandBuffer commandBuffer)1328 sqtt_CmdDebugMarkerEndEXT(VkCommandBuffer commandBuffer)
1329 {
1330    VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
1331    radv_write_user_event_marker(cmd_buffer, UserEventPop, NULL);
1332 }
1333 
1334 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdDebugMarkerInsertEXT(VkCommandBuffer commandBuffer,const VkDebugMarkerMarkerInfoEXT * pMarkerInfo)1335 sqtt_CmdDebugMarkerInsertEXT(VkCommandBuffer commandBuffer, const VkDebugMarkerMarkerInfoEXT *pMarkerInfo)
1336 {
1337    VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
1338    radv_write_user_event_marker(cmd_buffer, UserEventTrigger, pMarkerInfo->pMarkerName);
1339 }
1340 
1341 VKAPI_ATTR VkResult VKAPI_CALL
sqtt_DebugMarkerSetObjectTagEXT(VkDevice device,const VkDebugMarkerObjectTagInfoEXT * pTagInfo)1342 sqtt_DebugMarkerSetObjectTagEXT(VkDevice device, const VkDebugMarkerObjectTagInfoEXT *pTagInfo)
1343 {
1344    /* no-op */
1345    return VK_SUCCESS;
1346 }
1347 
1348 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdBeginDebugUtilsLabelEXT(VkCommandBuffer commandBuffer,const VkDebugUtilsLabelEXT * pLabelInfo)1349 sqtt_CmdBeginDebugUtilsLabelEXT(VkCommandBuffer commandBuffer, const VkDebugUtilsLabelEXT *pLabelInfo)
1350 {
1351    VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
1352    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1353 
1354    radv_write_user_event_marker(cmd_buffer, UserEventPush, pLabelInfo->pLabelName);
1355 
1356    device->layer_dispatch.rgp.CmdBeginDebugUtilsLabelEXT(commandBuffer, pLabelInfo);
1357 }
1358 
1359 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdEndDebugUtilsLabelEXT(VkCommandBuffer commandBuffer)1360 sqtt_CmdEndDebugUtilsLabelEXT(VkCommandBuffer commandBuffer)
1361 {
1362    VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
1363    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1364 
1365    radv_write_user_event_marker(cmd_buffer, UserEventPop, NULL);
1366 
1367    device->layer_dispatch.rgp.CmdEndDebugUtilsLabelEXT(commandBuffer);
1368 }
1369 
1370 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdInsertDebugUtilsLabelEXT(VkCommandBuffer commandBuffer,const VkDebugUtilsLabelEXT * pLabelInfo)1371 sqtt_CmdInsertDebugUtilsLabelEXT(VkCommandBuffer commandBuffer, const VkDebugUtilsLabelEXT *pLabelInfo)
1372 {
1373    VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
1374    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1375 
1376    radv_write_user_event_marker(cmd_buffer, UserEventTrigger, pLabelInfo->pLabelName);
1377 
1378    device->layer_dispatch.rgp.CmdInsertDebugUtilsLabelEXT(commandBuffer, pLabelInfo);
1379 }
1380 
1381 /* Pipelines */
1382 static enum rgp_hardware_stages
radv_get_rgp_shader_stage(struct radv_shader * shader)1383 radv_get_rgp_shader_stage(struct radv_shader *shader)
1384 {
1385    switch (shader->info.stage) {
1386    case MESA_SHADER_VERTEX:
1387       if (shader->info.vs.as_ls)
1388          return RGP_HW_STAGE_LS;
1389       else if (shader->info.vs.as_es)
1390          return RGP_HW_STAGE_ES;
1391       else if (shader->info.is_ngg)
1392          return RGP_HW_STAGE_GS;
1393       else
1394          return RGP_HW_STAGE_VS;
1395    case MESA_SHADER_TESS_CTRL:
1396       return RGP_HW_STAGE_HS;
1397    case MESA_SHADER_TESS_EVAL:
1398       if (shader->info.tes.as_es)
1399          return RGP_HW_STAGE_ES;
1400       else if (shader->info.is_ngg)
1401          return RGP_HW_STAGE_GS;
1402       else
1403          return RGP_HW_STAGE_VS;
1404    case MESA_SHADER_MESH:
1405    case MESA_SHADER_GEOMETRY:
1406       return RGP_HW_STAGE_GS;
1407    case MESA_SHADER_FRAGMENT:
1408       return RGP_HW_STAGE_PS;
1409    case MESA_SHADER_TASK:
1410    case MESA_SHADER_COMPUTE:
1411    case MESA_SHADER_RAYGEN:
1412    case MESA_SHADER_CLOSEST_HIT:
1413    case MESA_SHADER_ANY_HIT:
1414    case MESA_SHADER_INTERSECTION:
1415    case MESA_SHADER_MISS:
1416    case MESA_SHADER_CALLABLE:
1417       return RGP_HW_STAGE_CS;
1418    default:
1419       unreachable("invalid mesa shader stage");
1420    }
1421 }
1422 
1423 static void
radv_fill_code_object_record(struct radv_device * device,struct rgp_shader_data * shader_data,struct radv_shader * shader,uint64_t va)1424 radv_fill_code_object_record(struct radv_device *device, struct rgp_shader_data *shader_data,
1425                              struct radv_shader *shader, uint64_t va)
1426 {
1427    const struct radv_physical_device *pdev = radv_device_physical(device);
1428    unsigned lds_increment = pdev->info.gfx_level >= GFX11 && shader->info.stage == MESA_SHADER_FRAGMENT
1429                                ? 1024
1430                                : pdev->info.lds_encode_granularity;
1431 
1432    memset(shader_data->rt_shader_name, 0, sizeof(shader_data->rt_shader_name));
1433    shader_data->hash[0] = (uint64_t)(uintptr_t)shader;
1434    shader_data->hash[1] = (uint64_t)(uintptr_t)shader >> 32;
1435    shader_data->code_size = shader->code_size;
1436    shader_data->code = shader->code;
1437    shader_data->vgpr_count = shader->config.num_vgprs;
1438    shader_data->sgpr_count = shader->config.num_sgprs;
1439    shader_data->scratch_memory_size = shader->config.scratch_bytes_per_wave;
1440    shader_data->lds_size = shader->config.lds_size * lds_increment;
1441    shader_data->wavefront_size = shader->info.wave_size;
1442    shader_data->base_address = va & 0xffffffffffff;
1443    shader_data->elf_symbol_offset = 0;
1444    shader_data->hw_stage = radv_get_rgp_shader_stage(shader);
1445    shader_data->is_combined = false;
1446 }
1447 
1448 static VkResult
radv_add_code_object(struct radv_device * device,struct radv_pipeline * pipeline)1449 radv_add_code_object(struct radv_device *device, struct radv_pipeline *pipeline)
1450 {
1451    struct ac_sqtt *sqtt = &device->sqtt;
1452    struct rgp_code_object *code_object = &sqtt->rgp_code_object;
1453    struct rgp_code_object_record *record;
1454 
1455    record = malloc(sizeof(struct rgp_code_object_record));
1456    if (!record)
1457       return VK_ERROR_OUT_OF_HOST_MEMORY;
1458 
1459    record->shader_stages_mask = 0;
1460    record->num_shaders_combined = 0;
1461    record->pipeline_hash[0] = pipeline->pipeline_hash;
1462    record->pipeline_hash[1] = pipeline->pipeline_hash;
1463    record->is_rt = false;
1464 
1465    for (unsigned i = 0; i < MESA_VULKAN_SHADER_STAGES; i++) {
1466       struct radv_shader *shader = pipeline->shaders[i];
1467 
1468       if (!shader)
1469          continue;
1470 
1471       radv_fill_code_object_record(device, &record->shader_data[i], shader, radv_sqtt_shader_get_va_reloc(pipeline, i));
1472 
1473       record->shader_stages_mask |= (1 << i);
1474       record->num_shaders_combined++;
1475    }
1476 
1477    simple_mtx_lock(&code_object->lock);
1478    list_addtail(&record->list, &code_object->record);
1479    code_object->record_count++;
1480    simple_mtx_unlock(&code_object->lock);
1481 
1482    return VK_SUCCESS;
1483 }
1484 
1485 static VkResult
radv_add_rt_record(struct radv_device * device,struct rgp_code_object * code_object,struct radv_ray_tracing_pipeline * pipeline,struct radv_shader * shader,uint32_t stack_size,uint32_t index,uint64_t hash)1486 radv_add_rt_record(struct radv_device *device, struct rgp_code_object *code_object,
1487                    struct radv_ray_tracing_pipeline *pipeline, struct radv_shader *shader, uint32_t stack_size,
1488                    uint32_t index, uint64_t hash)
1489 {
1490    struct rgp_code_object_record *record = malloc(sizeof(struct rgp_code_object_record));
1491    if (!record)
1492       return VK_ERROR_OUT_OF_HOST_MEMORY;
1493 
1494    struct rgp_shader_data *shader_data = &record->shader_data[shader->info.stage];
1495 
1496    record->shader_stages_mask = 0;
1497    record->num_shaders_combined = 0;
1498    record->pipeline_hash[0] = hash;
1499    record->pipeline_hash[1] = hash;
1500 
1501    radv_fill_code_object_record(device, shader_data, shader, shader->va);
1502    shader_data->rt_stack_size = stack_size;
1503 
1504    record->shader_stages_mask |= (1 << shader->info.stage);
1505    record->is_rt = true;
1506    switch (shader->info.stage) {
1507    case MESA_SHADER_RAYGEN:
1508       snprintf(shader_data->rt_shader_name, sizeof(shader_data->rt_shader_name), "rgen_%d", index);
1509       break;
1510    case MESA_SHADER_CLOSEST_HIT:
1511       snprintf(shader_data->rt_shader_name, sizeof(shader_data->rt_shader_name), "chit_%d", index);
1512       break;
1513    case MESA_SHADER_MISS:
1514       snprintf(shader_data->rt_shader_name, sizeof(shader_data->rt_shader_name), "miss_%d", index);
1515       break;
1516    case MESA_SHADER_INTERSECTION:
1517       snprintf(shader_data->rt_shader_name, sizeof(shader_data->rt_shader_name), "traversal");
1518       break;
1519    case MESA_SHADER_CALLABLE:
1520       snprintf(shader_data->rt_shader_name, sizeof(shader_data->rt_shader_name), "call_%d", index);
1521       break;
1522    case MESA_SHADER_COMPUTE:
1523       snprintf(shader_data->rt_shader_name, sizeof(shader_data->rt_shader_name), "_amdgpu_cs_main");
1524       break;
1525    default:
1526       unreachable("invalid rt stage");
1527    }
1528    record->num_shaders_combined = 1;
1529 
1530    simple_mtx_lock(&code_object->lock);
1531    list_addtail(&record->list, &code_object->record);
1532    code_object->record_count++;
1533    simple_mtx_unlock(&code_object->lock);
1534 
1535    return VK_SUCCESS;
1536 }
1537 
1538 static void
compute_unique_rt_sha(uint64_t pipeline_hash,unsigned index,unsigned char sha1[SHA1_DIGEST_LENGTH])1539 compute_unique_rt_sha(uint64_t pipeline_hash, unsigned index, unsigned char sha1[SHA1_DIGEST_LENGTH])
1540 {
1541    struct mesa_sha1 ctx;
1542    _mesa_sha1_init(&ctx);
1543    _mesa_sha1_update(&ctx, &pipeline_hash, sizeof(pipeline_hash));
1544    _mesa_sha1_update(&ctx, &index, sizeof(index));
1545    _mesa_sha1_final(&ctx, sha1);
1546 }
1547 
1548 static VkResult
radv_register_rt_stage(struct radv_device * device,struct radv_ray_tracing_pipeline * pipeline,uint32_t index,uint32_t stack_size,struct radv_shader * shader)1549 radv_register_rt_stage(struct radv_device *device, struct radv_ray_tracing_pipeline *pipeline, uint32_t index,
1550                        uint32_t stack_size, struct radv_shader *shader)
1551 {
1552    unsigned char sha1[SHA1_DIGEST_LENGTH];
1553    VkResult result;
1554 
1555    compute_unique_rt_sha(pipeline->base.base.pipeline_hash, index, sha1);
1556 
1557    result = ac_sqtt_add_pso_correlation(&device->sqtt, *(uint64_t *)sha1, pipeline->base.base.pipeline_hash);
1558    if (!result)
1559       return VK_ERROR_OUT_OF_HOST_MEMORY;
1560    result = ac_sqtt_add_code_object_loader_event(&device->sqtt, *(uint64_t *)sha1, shader->va);
1561    if (!result)
1562       return VK_ERROR_OUT_OF_HOST_MEMORY;
1563    result =
1564       radv_add_rt_record(device, &device->sqtt.rgp_code_object, pipeline, shader, stack_size, index, *(uint64_t *)sha1);
1565    return result;
1566 }
1567 
1568 static VkResult
radv_register_rt_pipeline(struct radv_device * device,struct radv_ray_tracing_pipeline * pipeline)1569 radv_register_rt_pipeline(struct radv_device *device, struct radv_ray_tracing_pipeline *pipeline)
1570 {
1571    VkResult result = VK_SUCCESS;
1572 
1573    uint32_t max_any_hit_stack_size = 0;
1574    uint32_t max_intersection_stack_size = 0;
1575 
1576    for (unsigned i = 0; i < pipeline->stage_count; i++) {
1577       struct radv_ray_tracing_stage *stage = &pipeline->stages[i];
1578       if (stage->stage == MESA_SHADER_ANY_HIT)
1579          max_any_hit_stack_size = MAX2(max_any_hit_stack_size, stage->stack_size);
1580       else if (stage->stage == MESA_SHADER_INTERSECTION)
1581          max_intersection_stack_size = MAX2(max_intersection_stack_size, stage->stack_size);
1582 
1583       if (!pipeline->stages[i].shader)
1584          continue;
1585 
1586       result = radv_register_rt_stage(device, pipeline, i, stage->stack_size, stage->shader);
1587       if (result != VK_SUCCESS)
1588          return result;
1589    }
1590 
1591    uint32_t idx = pipeline->stage_count;
1592 
1593    /* Combined traversal shader */
1594    if (pipeline->base.base.shaders[MESA_SHADER_INTERSECTION]) {
1595       result = radv_register_rt_stage(device, pipeline, idx++, max_any_hit_stack_size + max_intersection_stack_size,
1596                                       pipeline->base.base.shaders[MESA_SHADER_INTERSECTION]);
1597       if (result != VK_SUCCESS)
1598          return result;
1599    }
1600 
1601    /* Prolog */
1602    result = radv_register_rt_stage(device, pipeline, idx++, 0, pipeline->prolog);
1603 
1604    return result;
1605 }
1606 
1607 static VkResult
radv_register_pipeline(struct radv_device * device,struct radv_pipeline * pipeline)1608 radv_register_pipeline(struct radv_device *device, struct radv_pipeline *pipeline)
1609 {
1610    bool result;
1611    uint64_t base_va = ~0;
1612 
1613    result = ac_sqtt_add_pso_correlation(&device->sqtt, pipeline->pipeline_hash, pipeline->pipeline_hash);
1614    if (!result)
1615       return VK_ERROR_OUT_OF_HOST_MEMORY;
1616 
1617    /* Find the lowest shader BO VA. */
1618    for (unsigned i = 0; i < MESA_VULKAN_SHADER_STAGES; i++) {
1619       struct radv_shader *shader = pipeline->shaders[i];
1620       uint64_t va;
1621 
1622       if (!shader)
1623          continue;
1624 
1625       va = radv_sqtt_shader_get_va_reloc(pipeline, i);
1626       base_va = MIN2(base_va, va);
1627    }
1628 
1629    result = ac_sqtt_add_code_object_loader_event(&device->sqtt, pipeline->pipeline_hash, base_va);
1630    if (!result)
1631       return VK_ERROR_OUT_OF_HOST_MEMORY;
1632 
1633    result = radv_add_code_object(device, pipeline);
1634    if (result != VK_SUCCESS)
1635       return result;
1636 
1637    return VK_SUCCESS;
1638 }
1639 
1640 static void
radv_unregister_records(struct radv_device * device,uint64_t hash)1641 radv_unregister_records(struct radv_device *device, uint64_t hash)
1642 {
1643    struct ac_sqtt *sqtt = &device->sqtt;
1644    struct rgp_pso_correlation *pso_correlation = &sqtt->rgp_pso_correlation;
1645    struct rgp_loader_events *loader_events = &sqtt->rgp_loader_events;
1646    struct rgp_code_object *code_object = &sqtt->rgp_code_object;
1647 
1648    /* Destroy the PSO correlation record. */
1649    simple_mtx_lock(&pso_correlation->lock);
1650    list_for_each_entry_safe (struct rgp_pso_correlation_record, record, &pso_correlation->record, list) {
1651       if (record->pipeline_hash[0] == hash) {
1652          pso_correlation->record_count--;
1653          list_del(&record->list);
1654          free(record);
1655          break;
1656       }
1657    }
1658    simple_mtx_unlock(&pso_correlation->lock);
1659 
1660    /* Destroy the code object loader record. */
1661    simple_mtx_lock(&loader_events->lock);
1662    list_for_each_entry_safe (struct rgp_loader_events_record, record, &loader_events->record, list) {
1663       if (record->code_object_hash[0] == hash) {
1664          loader_events->record_count--;
1665          list_del(&record->list);
1666          free(record);
1667          break;
1668       }
1669    }
1670    simple_mtx_unlock(&loader_events->lock);
1671 
1672    /* Destroy the code object record. */
1673    simple_mtx_lock(&code_object->lock);
1674    list_for_each_entry_safe (struct rgp_code_object_record, record, &code_object->record, list) {
1675       if (record->pipeline_hash[0] == hash) {
1676          code_object->record_count--;
1677          list_del(&record->list);
1678          free(record);
1679          break;
1680       }
1681    }
1682    simple_mtx_unlock(&code_object->lock);
1683 }
1684 
1685 VKAPI_ATTR VkResult VKAPI_CALL
sqtt_CreateGraphicsPipelines(VkDevice _device,VkPipelineCache pipelineCache,uint32_t count,const VkGraphicsPipelineCreateInfo * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines)1686 sqtt_CreateGraphicsPipelines(VkDevice _device, VkPipelineCache pipelineCache, uint32_t count,
1687                              const VkGraphicsPipelineCreateInfo *pCreateInfos, const VkAllocationCallbacks *pAllocator,
1688                              VkPipeline *pPipelines)
1689 {
1690    VK_FROM_HANDLE(radv_device, device, _device);
1691    VkResult result;
1692 
1693    result = device->layer_dispatch.rgp.CreateGraphicsPipelines(_device, pipelineCache, count, pCreateInfos, pAllocator,
1694                                                                pPipelines);
1695    if (result != VK_SUCCESS)
1696       return result;
1697 
1698    for (unsigned i = 0; i < count; i++) {
1699       VK_FROM_HANDLE(radv_pipeline, pipeline, pPipelines[i]);
1700 
1701       if (!pipeline)
1702          continue;
1703 
1704       const VkPipelineCreateFlagBits2KHR create_flags = vk_graphics_pipeline_create_flags(&pCreateInfos[i]);
1705       if (create_flags & VK_PIPELINE_CREATE_2_LIBRARY_BIT_KHR)
1706          continue;
1707 
1708       result = radv_sqtt_reloc_graphics_shaders(device, radv_pipeline_to_graphics(pipeline));
1709       if (result != VK_SUCCESS)
1710          goto fail;
1711 
1712       result = radv_register_pipeline(device, pipeline);
1713       if (result != VK_SUCCESS)
1714          goto fail;
1715    }
1716 
1717    return VK_SUCCESS;
1718 
1719 fail:
1720    for (unsigned i = 0; i < count; i++) {
1721       sqtt_DestroyPipeline(_device, pPipelines[i], pAllocator);
1722       pPipelines[i] = VK_NULL_HANDLE;
1723    }
1724    return result;
1725 }
1726 
1727 VKAPI_ATTR VkResult VKAPI_CALL
sqtt_CreateComputePipelines(VkDevice _device,VkPipelineCache pipelineCache,uint32_t count,const VkComputePipelineCreateInfo * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines)1728 sqtt_CreateComputePipelines(VkDevice _device, VkPipelineCache pipelineCache, uint32_t count,
1729                             const VkComputePipelineCreateInfo *pCreateInfos, const VkAllocationCallbacks *pAllocator,
1730                             VkPipeline *pPipelines)
1731 {
1732    VK_FROM_HANDLE(radv_device, device, _device);
1733    VkResult result;
1734 
1735    result = device->layer_dispatch.rgp.CreateComputePipelines(_device, pipelineCache, count, pCreateInfos, pAllocator,
1736                                                               pPipelines);
1737    if (result != VK_SUCCESS)
1738       return result;
1739 
1740    for (unsigned i = 0; i < count; i++) {
1741       VK_FROM_HANDLE(radv_pipeline, pipeline, pPipelines[i]);
1742 
1743       if (!pipeline)
1744          continue;
1745 
1746       result = radv_register_pipeline(device, pipeline);
1747       if (result != VK_SUCCESS)
1748          goto fail;
1749    }
1750 
1751    return VK_SUCCESS;
1752 
1753 fail:
1754    for (unsigned i = 0; i < count; i++) {
1755       sqtt_DestroyPipeline(_device, pPipelines[i], pAllocator);
1756       pPipelines[i] = VK_NULL_HANDLE;
1757    }
1758    return result;
1759 }
1760 
1761 VKAPI_ATTR VkResult VKAPI_CALL
sqtt_CreateRayTracingPipelinesKHR(VkDevice _device,VkDeferredOperationKHR deferredOperation,VkPipelineCache pipelineCache,uint32_t count,const VkRayTracingPipelineCreateInfoKHR * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines)1762 sqtt_CreateRayTracingPipelinesKHR(VkDevice _device, VkDeferredOperationKHR deferredOperation,
1763                                   VkPipelineCache pipelineCache, uint32_t count,
1764                                   const VkRayTracingPipelineCreateInfoKHR *pCreateInfos,
1765                                   const VkAllocationCallbacks *pAllocator, VkPipeline *pPipelines)
1766 {
1767    VK_FROM_HANDLE(radv_device, device, _device);
1768    VkResult result;
1769 
1770    result = device->layer_dispatch.rgp.CreateRayTracingPipelinesKHR(_device, deferredOperation, pipelineCache, count,
1771                                                                     pCreateInfos, pAllocator, pPipelines);
1772    if (result != VK_SUCCESS)
1773       return result;
1774 
1775    for (unsigned i = 0; i < count; i++) {
1776       VK_FROM_HANDLE(radv_pipeline, pipeline, pPipelines[i]);
1777 
1778       if (!pipeline)
1779          continue;
1780 
1781       const VkPipelineCreateFlagBits2KHR create_flags = vk_rt_pipeline_create_flags(&pCreateInfos[i]);
1782       if (create_flags & VK_PIPELINE_CREATE_2_LIBRARY_BIT_KHR)
1783          continue;
1784 
1785       result = radv_register_rt_pipeline(device, radv_pipeline_to_ray_tracing(pipeline));
1786       if (result != VK_SUCCESS)
1787          goto fail;
1788    }
1789 
1790    return VK_SUCCESS;
1791 
1792 fail:
1793    for (unsigned i = 0; i < count; i++) {
1794       sqtt_DestroyPipeline(_device, pPipelines[i], pAllocator);
1795       pPipelines[i] = VK_NULL_HANDLE;
1796    }
1797    return result;
1798 }
1799 
1800 VKAPI_ATTR void VKAPI_CALL
sqtt_DestroyPipeline(VkDevice _device,VkPipeline _pipeline,const VkAllocationCallbacks * pAllocator)1801 sqtt_DestroyPipeline(VkDevice _device, VkPipeline _pipeline, const VkAllocationCallbacks *pAllocator)
1802 {
1803    VK_FROM_HANDLE(radv_device, device, _device);
1804    VK_FROM_HANDLE(radv_pipeline, pipeline, _pipeline);
1805 
1806    if (!_pipeline)
1807       return;
1808 
1809    /* Ray tracing pipelines have multiple records, each with their own hash */
1810    if (pipeline->type == RADV_PIPELINE_RAY_TRACING) {
1811       /* We have one record for each stage, plus one for the traversal shader and one for the prolog */
1812       uint32_t record_count = radv_pipeline_to_ray_tracing(pipeline)->stage_count + 2;
1813       unsigned char sha1[SHA1_DIGEST_LENGTH];
1814       for (uint32_t i = 0; i < record_count; ++i) {
1815          compute_unique_rt_sha(pipeline->pipeline_hash, i, sha1);
1816          radv_unregister_records(device, *(uint64_t *)sha1);
1817       }
1818    } else
1819       radv_unregister_records(device, pipeline->pipeline_hash);
1820 
1821    if (pipeline->type == RADV_PIPELINE_GRAPHICS) {
1822       struct radv_graphics_pipeline *graphics_pipeline = radv_pipeline_to_graphics(pipeline);
1823       struct radv_sqtt_shaders_reloc *reloc = graphics_pipeline->sqtt_shaders_reloc;
1824 
1825       radv_free_shader_memory(device, reloc->alloc);
1826       free(reloc);
1827    }
1828 
1829    device->layer_dispatch.rgp.DestroyPipeline(_device, _pipeline, pAllocator);
1830 }
1831 
1832 #undef API_MARKER
1833