1 /*
2 * Copyright © 2022 Friedrich Vock
3 *
4 * SPDX-License-Identifier: MIT
5 */
6
7 #include "meta/radv_meta.h"
8 #include "util/u_process.h"
9 #include "radv_event.h"
10 #include "radv_rra.h"
11 #include "vk_acceleration_structure.h"
12 #include "vk_common_entrypoints.h"
13
14 VKAPI_ATTR VkResult VKAPI_CALL
rra_QueuePresentKHR(VkQueue _queue,const VkPresentInfoKHR * pPresentInfo)15 rra_QueuePresentKHR(VkQueue _queue, const VkPresentInfoKHR *pPresentInfo)
16 {
17 VK_FROM_HANDLE(radv_queue, queue, _queue);
18 struct radv_device *device = radv_queue_device(queue);
19
20 if (device->rra_trace.triggered) {
21 device->rra_trace.triggered = false;
22
23 if (_mesa_hash_table_num_entries(device->rra_trace.accel_structs) == 0) {
24 fprintf(stderr, "radv: No acceleration structures captured, not saving RRA trace.\n");
25 } else {
26 char filename[2048];
27 time_t t = time(NULL);
28 struct tm now = *localtime(&t);
29 snprintf(filename, sizeof(filename), "/tmp/%s_%04d.%02d.%02d_%02d.%02d.%02d.rra", util_get_process_name(),
30 1900 + now.tm_year, now.tm_mon + 1, now.tm_mday, now.tm_hour, now.tm_min, now.tm_sec);
31
32 VkResult result = radv_rra_dump_trace(_queue, filename);
33 if (result == VK_SUCCESS)
34 fprintf(stderr, "radv: RRA capture saved to '%s'\n", filename);
35 else
36 fprintf(stderr, "radv: Failed to save RRA capture!\n");
37 }
38 }
39
40 VkResult result = device->layer_dispatch.rra.QueuePresentKHR(_queue, pPresentInfo);
41 if (result != VK_SUCCESS && result != VK_SUBOPTIMAL_KHR)
42 return result;
43
44 VkDevice _device = radv_device_to_handle(device);
45 radv_rra_trace_clear_ray_history(_device, &device->rra_trace);
46
47 if (device->rra_trace.triggered && device->rra_trace.ray_history_buffer) {
48 result = device->layer_dispatch.rra.DeviceWaitIdle(_device);
49 if (result != VK_SUCCESS)
50 return result;
51
52 struct radv_ray_history_header *header = device->rra_trace.ray_history_data;
53 header->offset = sizeof(struct radv_ray_history_header);
54 }
55
56 if (!device->rra_trace.copy_after_build)
57 return VK_SUCCESS;
58
59 struct hash_table *accel_structs = device->rra_trace.accel_structs;
60
61 hash_table_foreach (accel_structs, entry) {
62 struct radv_rra_accel_struct_data *data = entry->data;
63 if (!data->is_dead)
64 continue;
65
66 radv_destroy_rra_accel_struct_data(_device, data);
67 _mesa_hash_table_remove(accel_structs, entry);
68 }
69
70 return VK_SUCCESS;
71 }
72
73 static VkResult
rra_init_accel_struct_data_buffer(VkDevice vk_device,struct radv_rra_accel_struct_buffer * buffer,uint32_t size)74 rra_init_accel_struct_data_buffer(VkDevice vk_device, struct radv_rra_accel_struct_buffer *buffer, uint32_t size)
75 {
76 VK_FROM_HANDLE(radv_device, device, vk_device);
77
78 buffer->ref_cnt = 1;
79
80 VkBufferCreateInfo buffer_create_info = {
81 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
82 .size = size,
83 };
84
85 VkResult result = radv_create_buffer(device, &buffer_create_info, NULL, &buffer->buffer, true);
86 if (result != VK_SUCCESS)
87 return result;
88
89 VkMemoryRequirements requirements;
90 vk_common_GetBufferMemoryRequirements(vk_device, buffer->buffer, &requirements);
91
92 VkMemoryAllocateFlagsInfo flags_info = {
93 .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO,
94 .flags = VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT,
95 };
96
97 VkMemoryAllocateInfo alloc_info = {
98 .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
99 .pNext = &flags_info,
100 .allocationSize = requirements.size,
101 .memoryTypeIndex = device->rra_trace.copy_memory_index,
102 };
103 result = radv_alloc_memory(device, &alloc_info, NULL, &buffer->memory, true);
104 if (result != VK_SUCCESS)
105 goto fail_buffer;
106
107 result = vk_common_BindBufferMemory(vk_device, buffer->buffer, buffer->memory, 0);
108 if (result != VK_SUCCESS)
109 goto fail_memory;
110
111 return result;
112 fail_memory:
113 radv_FreeMemory(vk_device, buffer->memory, NULL);
114 buffer->memory = VK_NULL_HANDLE;
115 fail_buffer:
116 radv_DestroyBuffer(vk_device, buffer->buffer, NULL);
117 buffer->buffer = VK_NULL_HANDLE;
118 return result;
119 }
120
121 VKAPI_ATTR VkResult VKAPI_CALL
rra_CreateAccelerationStructureKHR(VkDevice _device,const VkAccelerationStructureCreateInfoKHR * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkAccelerationStructureKHR * pAccelerationStructure)122 rra_CreateAccelerationStructureKHR(VkDevice _device, const VkAccelerationStructureCreateInfoKHR *pCreateInfo,
123 const VkAllocationCallbacks *pAllocator,
124 VkAccelerationStructureKHR *pAccelerationStructure)
125 {
126 VK_FROM_HANDLE(radv_device, device, _device);
127 VK_FROM_HANDLE(radv_buffer, buffer, pCreateInfo->buffer);
128
129 VkResult result = device->layer_dispatch.rra.CreateAccelerationStructureKHR(_device, pCreateInfo, pAllocator,
130 pAccelerationStructure);
131
132 if (result != VK_SUCCESS)
133 return result;
134
135 VK_FROM_HANDLE(vk_acceleration_structure, structure, *pAccelerationStructure);
136 simple_mtx_lock(&device->rra_trace.data_mtx);
137
138 struct radv_rra_accel_struct_data *data = calloc(1, sizeof(struct radv_rra_accel_struct_data));
139 if (!data) {
140 result = VK_ERROR_OUT_OF_HOST_MEMORY;
141 goto fail_as;
142 }
143
144 data->va = buffer->bo ? vk_acceleration_structure_get_va(structure) : 0;
145 data->type = pCreateInfo->type;
146 data->is_dead = false;
147
148 VkEventCreateInfo eventCreateInfo = {
149 .sType = VK_STRUCTURE_TYPE_EVENT_CREATE_INFO,
150 };
151
152 result = radv_create_event(device, &eventCreateInfo, NULL, &data->build_event, true);
153 if (result != VK_SUCCESS)
154 goto fail_data;
155
156 _mesa_hash_table_insert(device->rra_trace.accel_structs, structure, data);
157
158 if (data->va)
159 _mesa_hash_table_u64_insert(device->rra_trace.accel_struct_vas, data->va, structure);
160
161 goto exit;
162 fail_data:
163 free(data);
164 fail_as:
165 device->layer_dispatch.rra.DestroyAccelerationStructureKHR(_device, *pAccelerationStructure, pAllocator);
166 *pAccelerationStructure = VK_NULL_HANDLE;
167 exit:
168 simple_mtx_unlock(&device->rra_trace.data_mtx);
169 return result;
170 }
171
172 static void
handle_accel_struct_write(VkCommandBuffer commandBuffer,VkAccelerationStructureKHR accelerationStructure,uint64_t size)173 handle_accel_struct_write(VkCommandBuffer commandBuffer, VkAccelerationStructureKHR accelerationStructure,
174 uint64_t size)
175 {
176 VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
177 VK_FROM_HANDLE(vk_acceleration_structure, accel_struct, accelerationStructure);
178
179 size = MIN2(size, accel_struct->size);
180
181 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
182 VkDevice _device = radv_device_to_handle(device);
183
184 struct hash_entry *entry = _mesa_hash_table_search(device->rra_trace.accel_structs, accel_struct);
185 struct radv_rra_accel_struct_data *data = entry->data;
186
187 VkMemoryBarrier2 barrier = {
188 .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2,
189 .srcStageMask = VK_PIPELINE_STAGE_2_ACCELERATION_STRUCTURE_BUILD_BIT_KHR,
190 .srcAccessMask = VK_ACCESS_2_ACCELERATION_STRUCTURE_WRITE_BIT_KHR,
191 .dstStageMask = VK_PIPELINE_STAGE_2_ALL_TRANSFER_BIT,
192 .dstAccessMask = VK_ACCESS_2_TRANSFER_READ_BIT,
193 };
194
195 VkDependencyInfo dependencyInfo = {
196 .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
197 .memoryBarrierCount = 1,
198 .pMemoryBarriers = &barrier,
199 };
200
201 radv_CmdPipelineBarrier2(commandBuffer, &dependencyInfo);
202
203 vk_common_CmdSetEvent(commandBuffer, data->build_event, 0);
204
205 if (!data->va) {
206 data->va = vk_acceleration_structure_get_va(accel_struct);
207 _mesa_hash_table_u64_insert(device->rra_trace.accel_struct_vas, data->va, accel_struct);
208 }
209
210 if (data->size < size) {
211 data->size = size;
212
213 if (device->rra_trace.copy_after_build) {
214 if (data->buffer)
215 radv_rra_accel_struct_buffer_unref(device, data->buffer);
216
217 data->buffer = calloc(1, sizeof(struct radv_rra_accel_struct_buffer));
218 if (rra_init_accel_struct_data_buffer(_device, data->buffer, size) != VK_SUCCESS)
219 return;
220 }
221 }
222
223 if (!data->buffer)
224 return;
225
226 if (!_mesa_set_search(cmd_buffer->accel_struct_buffers, data->buffer)) {
227 radv_radv_rra_accel_struct_buffer_ref(data->buffer);
228 _mesa_set_add(cmd_buffer->accel_struct_buffers, data->buffer);
229 }
230
231 VkBufferCopy2 region = {
232 .sType = VK_STRUCTURE_TYPE_BUFFER_COPY_2,
233 .srcOffset = accel_struct->offset,
234 .size = size,
235 };
236
237 VkCopyBufferInfo2 copyInfo = {
238 .sType = VK_STRUCTURE_TYPE_COPY_BUFFER_INFO_2,
239 .srcBuffer = accel_struct->buffer,
240 .dstBuffer = data->buffer->buffer,
241 .regionCount = 1,
242 .pRegions = ®ion,
243 };
244
245 radv_CmdCopyBuffer2(commandBuffer, ©Info);
246 }
247
248 VKAPI_ATTR void VKAPI_CALL
rra_CmdBuildAccelerationStructuresKHR(VkCommandBuffer commandBuffer,uint32_t infoCount,const VkAccelerationStructureBuildGeometryInfoKHR * pInfos,const VkAccelerationStructureBuildRangeInfoKHR * const * ppBuildRangeInfos)249 rra_CmdBuildAccelerationStructuresKHR(VkCommandBuffer commandBuffer, uint32_t infoCount,
250 const VkAccelerationStructureBuildGeometryInfoKHR *pInfos,
251 const VkAccelerationStructureBuildRangeInfoKHR *const *ppBuildRangeInfos)
252 {
253 VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
254 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
255
256 device->layer_dispatch.rra.CmdBuildAccelerationStructuresKHR(commandBuffer, infoCount, pInfos, ppBuildRangeInfos);
257
258 simple_mtx_lock(&device->rra_trace.data_mtx);
259
260 for (uint32_t i = 0; i < infoCount; ++i) {
261 uint32_t *primitive_counts = alloca(pInfos[i].geometryCount * sizeof(uint32_t));
262 for (uint32_t geometry_index = 0; geometry_index < pInfos[i].geometryCount; geometry_index++)
263 primitive_counts[geometry_index] = ppBuildRangeInfos[i][geometry_index].primitiveCount;
264
265 /* vkd3d-proton specifies the size of the backing buffer. This can cause false positives when removing aliasing
266 * acceleration structures, because a buffer can be used by multiple acceleration structures. Therefore we need to
267 * compute the actual size. */
268 VkAccelerationStructureBuildSizesInfoKHR size_info;
269 device->layer_dispatch.rra.GetAccelerationStructureBuildSizesKHR(radv_device_to_handle(device),
270 VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR,
271 pInfos + i, primitive_counts, &size_info);
272
273 handle_accel_struct_write(commandBuffer, pInfos[i].dstAccelerationStructure, size_info.accelerationStructureSize);
274 }
275
276 simple_mtx_unlock(&device->rra_trace.data_mtx);
277 }
278
279 VKAPI_ATTR void VKAPI_CALL
rra_CmdCopyAccelerationStructureKHR(VkCommandBuffer commandBuffer,const VkCopyAccelerationStructureInfoKHR * pInfo)280 rra_CmdCopyAccelerationStructureKHR(VkCommandBuffer commandBuffer, const VkCopyAccelerationStructureInfoKHR *pInfo)
281 {
282 VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
283 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
284
285 device->layer_dispatch.rra.CmdCopyAccelerationStructureKHR(commandBuffer, pInfo);
286
287 simple_mtx_lock(&device->rra_trace.data_mtx);
288
289 VK_FROM_HANDLE(vk_acceleration_structure, src, pInfo->src);
290
291 struct hash_entry *entry = _mesa_hash_table_search(device->rra_trace.accel_structs, src);
292 struct radv_rra_accel_struct_data *data = entry->data;
293
294 handle_accel_struct_write(commandBuffer, pInfo->dst, data->size);
295
296 simple_mtx_unlock(&device->rra_trace.data_mtx);
297 }
298
299 VKAPI_ATTR void VKAPI_CALL
rra_CmdCopyMemoryToAccelerationStructureKHR(VkCommandBuffer commandBuffer,const VkCopyMemoryToAccelerationStructureInfoKHR * pInfo)300 rra_CmdCopyMemoryToAccelerationStructureKHR(VkCommandBuffer commandBuffer,
301 const VkCopyMemoryToAccelerationStructureInfoKHR *pInfo)
302 {
303 VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
304 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
305
306 device->layer_dispatch.rra.CmdCopyMemoryToAccelerationStructureKHR(commandBuffer, pInfo);
307
308 simple_mtx_lock(&device->rra_trace.data_mtx);
309
310 VK_FROM_HANDLE(vk_acceleration_structure, dst, pInfo->dst);
311 handle_accel_struct_write(commandBuffer, pInfo->dst, dst->size);
312
313 simple_mtx_unlock(&device->rra_trace.data_mtx);
314 }
315
316 VKAPI_ATTR void VKAPI_CALL
rra_DestroyAccelerationStructureKHR(VkDevice _device,VkAccelerationStructureKHR _structure,const VkAllocationCallbacks * pAllocator)317 rra_DestroyAccelerationStructureKHR(VkDevice _device, VkAccelerationStructureKHR _structure,
318 const VkAllocationCallbacks *pAllocator)
319 {
320 if (!_structure)
321 return;
322
323 VK_FROM_HANDLE(radv_device, device, _device);
324 simple_mtx_lock(&device->rra_trace.data_mtx);
325
326 VK_FROM_HANDLE(vk_acceleration_structure, structure, _structure);
327
328 struct hash_entry *entry = _mesa_hash_table_search(device->rra_trace.accel_structs, structure);
329
330 assert(entry);
331 struct radv_rra_accel_struct_data *data = entry->data;
332
333 if (device->rra_trace.copy_after_build)
334 data->is_dead = true;
335 else
336 _mesa_hash_table_remove(device->rra_trace.accel_structs, entry);
337
338 simple_mtx_unlock(&device->rra_trace.data_mtx);
339
340 device->layer_dispatch.rra.DestroyAccelerationStructureKHR(_device, _structure, pAllocator);
341 }
342
343 VKAPI_ATTR VkResult VKAPI_CALL
rra_QueueSubmit2KHR(VkQueue _queue,uint32_t submitCount,const VkSubmitInfo2 * pSubmits,VkFence _fence)344 rra_QueueSubmit2KHR(VkQueue _queue, uint32_t submitCount, const VkSubmitInfo2 *pSubmits, VkFence _fence)
345 {
346 VK_FROM_HANDLE(radv_queue, queue, _queue);
347 struct radv_device *device = radv_queue_device(queue);
348
349 VkResult result = device->layer_dispatch.rra.QueueSubmit2KHR(_queue, submitCount, pSubmits, _fence);
350 if (result != VK_SUCCESS || !device->rra_trace.triggered)
351 return result;
352
353 uint32_t total_trace_count = 0;
354
355 simple_mtx_lock(&device->rra_trace.data_mtx);
356
357 for (uint32_t submit_index = 0; submit_index < submitCount; submit_index++) {
358 for (uint32_t i = 0; i < pSubmits[submit_index].commandBufferInfoCount; i++) {
359 VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, pSubmits[submit_index].pCommandBufferInfos[i].commandBuffer);
360 uint32_t trace_count =
361 util_dynarray_num_elements(&cmd_buffer->ray_history, struct radv_rra_ray_history_data *);
362 if (!trace_count)
363 continue;
364
365 total_trace_count += trace_count;
366 util_dynarray_append_dynarray(&device->rra_trace.ray_history, &cmd_buffer->ray_history);
367 }
368 }
369
370 if (!total_trace_count) {
371 simple_mtx_unlock(&device->rra_trace.data_mtx);
372 return result;
373 }
374
375 result = device->layer_dispatch.rra.DeviceWaitIdle(radv_device_to_handle(device));
376
377 struct radv_ray_history_header *header = device->rra_trace.ray_history_data;
378 header->submit_base_index += total_trace_count;
379
380 simple_mtx_unlock(&device->rra_trace.data_mtx);
381
382 return result;
383 }
384