xref: /aosp_15_r20/external/mesa3d/src/amd/vulkan/layers/radv_rra_layer.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2022 Friedrich Vock
3  *
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #include "meta/radv_meta.h"
8 #include "util/u_process.h"
9 #include "radv_event.h"
10 #include "radv_rra.h"
11 #include "vk_acceleration_structure.h"
12 #include "vk_common_entrypoints.h"
13 
14 VKAPI_ATTR VkResult VKAPI_CALL
rra_QueuePresentKHR(VkQueue _queue,const VkPresentInfoKHR * pPresentInfo)15 rra_QueuePresentKHR(VkQueue _queue, const VkPresentInfoKHR *pPresentInfo)
16 {
17    VK_FROM_HANDLE(radv_queue, queue, _queue);
18    struct radv_device *device = radv_queue_device(queue);
19 
20    if (device->rra_trace.triggered) {
21       device->rra_trace.triggered = false;
22 
23       if (_mesa_hash_table_num_entries(device->rra_trace.accel_structs) == 0) {
24          fprintf(stderr, "radv: No acceleration structures captured, not saving RRA trace.\n");
25       } else {
26          char filename[2048];
27          time_t t = time(NULL);
28          struct tm now = *localtime(&t);
29          snprintf(filename, sizeof(filename), "/tmp/%s_%04d.%02d.%02d_%02d.%02d.%02d.rra", util_get_process_name(),
30                   1900 + now.tm_year, now.tm_mon + 1, now.tm_mday, now.tm_hour, now.tm_min, now.tm_sec);
31 
32          VkResult result = radv_rra_dump_trace(_queue, filename);
33          if (result == VK_SUCCESS)
34             fprintf(stderr, "radv: RRA capture saved to '%s'\n", filename);
35          else
36             fprintf(stderr, "radv: Failed to save RRA capture!\n");
37       }
38    }
39 
40    VkResult result = device->layer_dispatch.rra.QueuePresentKHR(_queue, pPresentInfo);
41    if (result != VK_SUCCESS && result != VK_SUBOPTIMAL_KHR)
42       return result;
43 
44    VkDevice _device = radv_device_to_handle(device);
45    radv_rra_trace_clear_ray_history(_device, &device->rra_trace);
46 
47    if (device->rra_trace.triggered && device->rra_trace.ray_history_buffer) {
48       result = device->layer_dispatch.rra.DeviceWaitIdle(_device);
49       if (result != VK_SUCCESS)
50          return result;
51 
52       struct radv_ray_history_header *header = device->rra_trace.ray_history_data;
53       header->offset = sizeof(struct radv_ray_history_header);
54    }
55 
56    if (!device->rra_trace.copy_after_build)
57       return VK_SUCCESS;
58 
59    struct hash_table *accel_structs = device->rra_trace.accel_structs;
60 
61    hash_table_foreach (accel_structs, entry) {
62       struct radv_rra_accel_struct_data *data = entry->data;
63       if (!data->is_dead)
64          continue;
65 
66       radv_destroy_rra_accel_struct_data(_device, data);
67       _mesa_hash_table_remove(accel_structs, entry);
68    }
69 
70    return VK_SUCCESS;
71 }
72 
73 static VkResult
rra_init_accel_struct_data_buffer(VkDevice vk_device,struct radv_rra_accel_struct_buffer * buffer,uint32_t size)74 rra_init_accel_struct_data_buffer(VkDevice vk_device, struct radv_rra_accel_struct_buffer *buffer, uint32_t size)
75 {
76    VK_FROM_HANDLE(radv_device, device, vk_device);
77 
78    buffer->ref_cnt = 1;
79 
80    VkBufferCreateInfo buffer_create_info = {
81       .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
82       .size = size,
83    };
84 
85    VkResult result = radv_create_buffer(device, &buffer_create_info, NULL, &buffer->buffer, true);
86    if (result != VK_SUCCESS)
87       return result;
88 
89    VkMemoryRequirements requirements;
90    vk_common_GetBufferMemoryRequirements(vk_device, buffer->buffer, &requirements);
91 
92    VkMemoryAllocateFlagsInfo flags_info = {
93       .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO,
94       .flags = VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT,
95    };
96 
97    VkMemoryAllocateInfo alloc_info = {
98       .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
99       .pNext = &flags_info,
100       .allocationSize = requirements.size,
101       .memoryTypeIndex = device->rra_trace.copy_memory_index,
102    };
103    result = radv_alloc_memory(device, &alloc_info, NULL, &buffer->memory, true);
104    if (result != VK_SUCCESS)
105       goto fail_buffer;
106 
107    result = vk_common_BindBufferMemory(vk_device, buffer->buffer, buffer->memory, 0);
108    if (result != VK_SUCCESS)
109       goto fail_memory;
110 
111    return result;
112 fail_memory:
113    radv_FreeMemory(vk_device, buffer->memory, NULL);
114    buffer->memory = VK_NULL_HANDLE;
115 fail_buffer:
116    radv_DestroyBuffer(vk_device, buffer->buffer, NULL);
117    buffer->buffer = VK_NULL_HANDLE;
118    return result;
119 }
120 
121 VKAPI_ATTR VkResult VKAPI_CALL
rra_CreateAccelerationStructureKHR(VkDevice _device,const VkAccelerationStructureCreateInfoKHR * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkAccelerationStructureKHR * pAccelerationStructure)122 rra_CreateAccelerationStructureKHR(VkDevice _device, const VkAccelerationStructureCreateInfoKHR *pCreateInfo,
123                                    const VkAllocationCallbacks *pAllocator,
124                                    VkAccelerationStructureKHR *pAccelerationStructure)
125 {
126    VK_FROM_HANDLE(radv_device, device, _device);
127    VK_FROM_HANDLE(radv_buffer, buffer, pCreateInfo->buffer);
128 
129    VkResult result = device->layer_dispatch.rra.CreateAccelerationStructureKHR(_device, pCreateInfo, pAllocator,
130                                                                                pAccelerationStructure);
131 
132    if (result != VK_SUCCESS)
133       return result;
134 
135    VK_FROM_HANDLE(vk_acceleration_structure, structure, *pAccelerationStructure);
136    simple_mtx_lock(&device->rra_trace.data_mtx);
137 
138    struct radv_rra_accel_struct_data *data = calloc(1, sizeof(struct radv_rra_accel_struct_data));
139    if (!data) {
140       result = VK_ERROR_OUT_OF_HOST_MEMORY;
141       goto fail_as;
142    }
143 
144    data->va = buffer->bo ? vk_acceleration_structure_get_va(structure) : 0;
145    data->type = pCreateInfo->type;
146    data->is_dead = false;
147 
148    VkEventCreateInfo eventCreateInfo = {
149       .sType = VK_STRUCTURE_TYPE_EVENT_CREATE_INFO,
150    };
151 
152    result = radv_create_event(device, &eventCreateInfo, NULL, &data->build_event, true);
153    if (result != VK_SUCCESS)
154       goto fail_data;
155 
156    _mesa_hash_table_insert(device->rra_trace.accel_structs, structure, data);
157 
158    if (data->va)
159       _mesa_hash_table_u64_insert(device->rra_trace.accel_struct_vas, data->va, structure);
160 
161    goto exit;
162 fail_data:
163    free(data);
164 fail_as:
165    device->layer_dispatch.rra.DestroyAccelerationStructureKHR(_device, *pAccelerationStructure, pAllocator);
166    *pAccelerationStructure = VK_NULL_HANDLE;
167 exit:
168    simple_mtx_unlock(&device->rra_trace.data_mtx);
169    return result;
170 }
171 
172 static void
handle_accel_struct_write(VkCommandBuffer commandBuffer,VkAccelerationStructureKHR accelerationStructure,uint64_t size)173 handle_accel_struct_write(VkCommandBuffer commandBuffer, VkAccelerationStructureKHR accelerationStructure,
174                           uint64_t size)
175 {
176    VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
177    VK_FROM_HANDLE(vk_acceleration_structure, accel_struct, accelerationStructure);
178 
179    size = MIN2(size, accel_struct->size);
180 
181    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
182    VkDevice _device = radv_device_to_handle(device);
183 
184    struct hash_entry *entry = _mesa_hash_table_search(device->rra_trace.accel_structs, accel_struct);
185    struct radv_rra_accel_struct_data *data = entry->data;
186 
187    VkMemoryBarrier2 barrier = {
188       .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2,
189       .srcStageMask = VK_PIPELINE_STAGE_2_ACCELERATION_STRUCTURE_BUILD_BIT_KHR,
190       .srcAccessMask = VK_ACCESS_2_ACCELERATION_STRUCTURE_WRITE_BIT_KHR,
191       .dstStageMask = VK_PIPELINE_STAGE_2_ALL_TRANSFER_BIT,
192       .dstAccessMask = VK_ACCESS_2_TRANSFER_READ_BIT,
193    };
194 
195    VkDependencyInfo dependencyInfo = {
196       .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
197       .memoryBarrierCount = 1,
198       .pMemoryBarriers = &barrier,
199    };
200 
201    radv_CmdPipelineBarrier2(commandBuffer, &dependencyInfo);
202 
203    vk_common_CmdSetEvent(commandBuffer, data->build_event, 0);
204 
205    if (!data->va) {
206       data->va = vk_acceleration_structure_get_va(accel_struct);
207       _mesa_hash_table_u64_insert(device->rra_trace.accel_struct_vas, data->va, accel_struct);
208    }
209 
210    if (data->size < size) {
211       data->size = size;
212 
213       if (device->rra_trace.copy_after_build) {
214          if (data->buffer)
215             radv_rra_accel_struct_buffer_unref(device, data->buffer);
216 
217          data->buffer = calloc(1, sizeof(struct radv_rra_accel_struct_buffer));
218          if (rra_init_accel_struct_data_buffer(_device, data->buffer, size) != VK_SUCCESS)
219             return;
220       }
221    }
222 
223    if (!data->buffer)
224       return;
225 
226    if (!_mesa_set_search(cmd_buffer->accel_struct_buffers, data->buffer)) {
227       radv_radv_rra_accel_struct_buffer_ref(data->buffer);
228       _mesa_set_add(cmd_buffer->accel_struct_buffers, data->buffer);
229    }
230 
231    VkBufferCopy2 region = {
232       .sType = VK_STRUCTURE_TYPE_BUFFER_COPY_2,
233       .srcOffset = accel_struct->offset,
234       .size = size,
235    };
236 
237    VkCopyBufferInfo2 copyInfo = {
238       .sType = VK_STRUCTURE_TYPE_COPY_BUFFER_INFO_2,
239       .srcBuffer = accel_struct->buffer,
240       .dstBuffer = data->buffer->buffer,
241       .regionCount = 1,
242       .pRegions = &region,
243    };
244 
245    radv_CmdCopyBuffer2(commandBuffer, &copyInfo);
246 }
247 
248 VKAPI_ATTR void VKAPI_CALL
rra_CmdBuildAccelerationStructuresKHR(VkCommandBuffer commandBuffer,uint32_t infoCount,const VkAccelerationStructureBuildGeometryInfoKHR * pInfos,const VkAccelerationStructureBuildRangeInfoKHR * const * ppBuildRangeInfos)249 rra_CmdBuildAccelerationStructuresKHR(VkCommandBuffer commandBuffer, uint32_t infoCount,
250                                       const VkAccelerationStructureBuildGeometryInfoKHR *pInfos,
251                                       const VkAccelerationStructureBuildRangeInfoKHR *const *ppBuildRangeInfos)
252 {
253    VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
254    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
255 
256    device->layer_dispatch.rra.CmdBuildAccelerationStructuresKHR(commandBuffer, infoCount, pInfos, ppBuildRangeInfos);
257 
258    simple_mtx_lock(&device->rra_trace.data_mtx);
259 
260    for (uint32_t i = 0; i < infoCount; ++i) {
261       uint32_t *primitive_counts = alloca(pInfos[i].geometryCount * sizeof(uint32_t));
262       for (uint32_t geometry_index = 0; geometry_index < pInfos[i].geometryCount; geometry_index++)
263          primitive_counts[geometry_index] = ppBuildRangeInfos[i][geometry_index].primitiveCount;
264 
265       /* vkd3d-proton specifies the size of the backing buffer. This can cause false positives when removing aliasing
266        * acceleration structures, because a buffer can be used by multiple acceleration structures. Therefore we need to
267        * compute the actual size. */
268       VkAccelerationStructureBuildSizesInfoKHR size_info;
269       device->layer_dispatch.rra.GetAccelerationStructureBuildSizesKHR(radv_device_to_handle(device),
270                                                                        VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR,
271                                                                        pInfos + i, primitive_counts, &size_info);
272 
273       handle_accel_struct_write(commandBuffer, pInfos[i].dstAccelerationStructure, size_info.accelerationStructureSize);
274    }
275 
276    simple_mtx_unlock(&device->rra_trace.data_mtx);
277 }
278 
279 VKAPI_ATTR void VKAPI_CALL
rra_CmdCopyAccelerationStructureKHR(VkCommandBuffer commandBuffer,const VkCopyAccelerationStructureInfoKHR * pInfo)280 rra_CmdCopyAccelerationStructureKHR(VkCommandBuffer commandBuffer, const VkCopyAccelerationStructureInfoKHR *pInfo)
281 {
282    VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
283    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
284 
285    device->layer_dispatch.rra.CmdCopyAccelerationStructureKHR(commandBuffer, pInfo);
286 
287    simple_mtx_lock(&device->rra_trace.data_mtx);
288 
289    VK_FROM_HANDLE(vk_acceleration_structure, src, pInfo->src);
290 
291    struct hash_entry *entry = _mesa_hash_table_search(device->rra_trace.accel_structs, src);
292    struct radv_rra_accel_struct_data *data = entry->data;
293 
294    handle_accel_struct_write(commandBuffer, pInfo->dst, data->size);
295 
296    simple_mtx_unlock(&device->rra_trace.data_mtx);
297 }
298 
299 VKAPI_ATTR void VKAPI_CALL
rra_CmdCopyMemoryToAccelerationStructureKHR(VkCommandBuffer commandBuffer,const VkCopyMemoryToAccelerationStructureInfoKHR * pInfo)300 rra_CmdCopyMemoryToAccelerationStructureKHR(VkCommandBuffer commandBuffer,
301                                             const VkCopyMemoryToAccelerationStructureInfoKHR *pInfo)
302 {
303    VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
304    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
305 
306    device->layer_dispatch.rra.CmdCopyMemoryToAccelerationStructureKHR(commandBuffer, pInfo);
307 
308    simple_mtx_lock(&device->rra_trace.data_mtx);
309 
310    VK_FROM_HANDLE(vk_acceleration_structure, dst, pInfo->dst);
311    handle_accel_struct_write(commandBuffer, pInfo->dst, dst->size);
312 
313    simple_mtx_unlock(&device->rra_trace.data_mtx);
314 }
315 
316 VKAPI_ATTR void VKAPI_CALL
rra_DestroyAccelerationStructureKHR(VkDevice _device,VkAccelerationStructureKHR _structure,const VkAllocationCallbacks * pAllocator)317 rra_DestroyAccelerationStructureKHR(VkDevice _device, VkAccelerationStructureKHR _structure,
318                                     const VkAllocationCallbacks *pAllocator)
319 {
320    if (!_structure)
321       return;
322 
323    VK_FROM_HANDLE(radv_device, device, _device);
324    simple_mtx_lock(&device->rra_trace.data_mtx);
325 
326    VK_FROM_HANDLE(vk_acceleration_structure, structure, _structure);
327 
328    struct hash_entry *entry = _mesa_hash_table_search(device->rra_trace.accel_structs, structure);
329 
330    assert(entry);
331    struct radv_rra_accel_struct_data *data = entry->data;
332 
333    if (device->rra_trace.copy_after_build)
334       data->is_dead = true;
335    else
336       _mesa_hash_table_remove(device->rra_trace.accel_structs, entry);
337 
338    simple_mtx_unlock(&device->rra_trace.data_mtx);
339 
340    device->layer_dispatch.rra.DestroyAccelerationStructureKHR(_device, _structure, pAllocator);
341 }
342 
343 VKAPI_ATTR VkResult VKAPI_CALL
rra_QueueSubmit2KHR(VkQueue _queue,uint32_t submitCount,const VkSubmitInfo2 * pSubmits,VkFence _fence)344 rra_QueueSubmit2KHR(VkQueue _queue, uint32_t submitCount, const VkSubmitInfo2 *pSubmits, VkFence _fence)
345 {
346    VK_FROM_HANDLE(radv_queue, queue, _queue);
347    struct radv_device *device = radv_queue_device(queue);
348 
349    VkResult result = device->layer_dispatch.rra.QueueSubmit2KHR(_queue, submitCount, pSubmits, _fence);
350    if (result != VK_SUCCESS || !device->rra_trace.triggered)
351       return result;
352 
353    uint32_t total_trace_count = 0;
354 
355    simple_mtx_lock(&device->rra_trace.data_mtx);
356 
357    for (uint32_t submit_index = 0; submit_index < submitCount; submit_index++) {
358       for (uint32_t i = 0; i < pSubmits[submit_index].commandBufferInfoCount; i++) {
359          VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, pSubmits[submit_index].pCommandBufferInfos[i].commandBuffer);
360          uint32_t trace_count =
361             util_dynarray_num_elements(&cmd_buffer->ray_history, struct radv_rra_ray_history_data *);
362          if (!trace_count)
363             continue;
364 
365          total_trace_count += trace_count;
366          util_dynarray_append_dynarray(&device->rra_trace.ray_history, &cmd_buffer->ray_history);
367       }
368    }
369 
370    if (!total_trace_count) {
371       simple_mtx_unlock(&device->rra_trace.data_mtx);
372       return result;
373    }
374 
375    result = device->layer_dispatch.rra.DeviceWaitIdle(radv_device_to_handle(device));
376 
377    struct radv_ray_history_header *header = device->rra_trace.ray_history_data;
378    header->submit_base_index += total_trace_count;
379 
380    simple_mtx_unlock(&device->rra_trace.data_mtx);
381 
382    return result;
383 }
384