xref: /aosp_15_r20/external/mesa3d/src/amd/vulkan/radv_rra.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2022 Friedrich Vock
3  *
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #include "radv_rra.h"
8 #include "bvh/bvh.h"
9 #include "util/half_float.h"
10 #include "amd_family.h"
11 #include "radv_device.h"
12 #include "radv_entrypoints.h"
13 #include "radv_physical_device.h"
14 #include "vk_acceleration_structure.h"
15 #include "vk_common_entrypoints.h"
16 
17 #define RRA_MAGIC 0x204644525F444D41
18 
19 struct rra_file_header {
20    uint64_t magic;
21    uint32_t version;
22    uint32_t unused;
23    uint64_t chunk_descriptions_offset;
24    uint64_t chunk_descriptions_size;
25 };
26 
27 static_assert(sizeof(struct rra_file_header) == 32, "rra_file_header does not match RRA spec");
28 
29 enum rra_chunk_version {
30    RADV_RRA_ASIC_API_INFO_CHUNK_VERSION = 0x1,
31    RADV_RRA_RAY_HISTORY_CHUNK_VERSION = 0x2,
32    RADV_RRA_ACCEL_STRUCT_CHUNK_VERSION = 0xF0005,
33 };
34 
35 enum rra_file_api {
36    RADV_RRA_API_DX9,
37    RADV_RRA_API_DX11,
38    RADV_RRA_API_DX12,
39    RADV_RRA_API_VULKAN,
40    RADV_RRA_API_OPENGL,
41    RADV_RRA_API_OPENCL,
42    RADV_RRA_API_MANTLE,
43    RADV_RRA_API_GENERIC,
44 };
45 
46 struct rra_file_chunk_description {
47    char name[16];
48    uint32_t is_zstd_compressed;
49    enum rra_chunk_version version;
50    uint64_t header_offset;
51    uint64_t header_size;
52    uint64_t data_offset;
53    uint64_t data_size;
54    uint64_t unused;
55 };
56 
57 static_assert(sizeof(struct rra_file_chunk_description) == 64, "rra_file_chunk_description does not match RRA spec");
58 
59 static uint64_t
node_to_addr(uint64_t node)60 node_to_addr(uint64_t node)
61 {
62    node &= ~7ull;
63    node <<= 19;
64    return ((int64_t)node) >> 16;
65 }
66 
67 static void
rra_dump_header(FILE * output,uint64_t chunk_descriptions_offset,uint64_t chunk_descriptions_size)68 rra_dump_header(FILE *output, uint64_t chunk_descriptions_offset, uint64_t chunk_descriptions_size)
69 {
70    struct rra_file_header header = {
71       .magic = RRA_MAGIC,
72       .version = 3,
73       .chunk_descriptions_offset = chunk_descriptions_offset,
74       .chunk_descriptions_size = chunk_descriptions_size,
75    };
76    fwrite(&header, sizeof(header), 1, output);
77 }
78 
79 static void
rra_dump_chunk_description(uint64_t offset,uint64_t header_size,uint64_t data_size,const char * name,enum rra_chunk_version version,FILE * output)80 rra_dump_chunk_description(uint64_t offset, uint64_t header_size, uint64_t data_size, const char *name,
81                            enum rra_chunk_version version, FILE *output)
82 {
83    struct rra_file_chunk_description chunk = {
84       .version = version,
85       .header_offset = offset,
86       .header_size = header_size,
87       .data_offset = offset + header_size,
88       .data_size = data_size,
89    };
90    memcpy(chunk.name, name, strnlen(name, sizeof(chunk.name)));
91    fwrite(&chunk, sizeof(struct rra_file_chunk_description), 1, output);
92 }
93 
94 enum rra_memory_type {
95    RRA_MEMORY_TYPE_UNKNOWN,
96    RRA_MEMORY_TYPE_DDR,
97    RRA_MEMORY_TYPE_DDR2,
98    RRA_MEMORY_TYPE_DDR3,
99    RRA_MEMORY_TYPE_DDR4,
100    RRA_MEMORY_TYPE_DDR5,
101    RRA_MEMORY_TYPE_GDDR3,
102    RRA_MEMORY_TYPE_GDDR4,
103    RRA_MEMORY_TYPE_GDDR5,
104    RRA_MEMORY_TYPE_GDDR6,
105    RRA_MEMORY_TYPE_HBM,
106    RRA_MEMORY_TYPE_HBM2,
107    RRA_MEMORY_TYPE_HBM3,
108    RRA_MEMORY_TYPE_LPDDR4,
109    RRA_MEMORY_TYPE_LPDDR5,
110 };
111 
112 #define RRA_FILE_DEVICE_NAME_MAX_SIZE 256
113 
114 struct rra_asic_info {
115    uint64_t min_shader_clk_freq;
116    uint64_t min_mem_clk_freq;
117    char unused[8];
118    uint64_t max_shader_clk_freq;
119    uint64_t max_mem_clk_freq;
120    uint32_t device_id;
121    uint32_t rev_id;
122    char unused2[80];
123    uint64_t vram_size;
124    uint32_t bus_width;
125    char unused3[12];
126    char device_name[RRA_FILE_DEVICE_NAME_MAX_SIZE];
127    char unused4[16];
128    uint32_t mem_ops_per_clk;
129    uint32_t mem_type;
130    char unused5[135];
131    bool valid;
132 };
133 
134 static_assert(sizeof(struct rra_asic_info) == 568, "rra_asic_info does not match RRA spec");
135 
136 static uint32_t
amdgpu_vram_type_to_rra(uint32_t type)137 amdgpu_vram_type_to_rra(uint32_t type)
138 {
139    switch (type) {
140    case AMD_VRAM_TYPE_UNKNOWN:
141       return RRA_MEMORY_TYPE_UNKNOWN;
142    case AMD_VRAM_TYPE_DDR2:
143       return RRA_MEMORY_TYPE_DDR2;
144    case AMD_VRAM_TYPE_DDR3:
145       return RRA_MEMORY_TYPE_DDR3;
146    case AMD_VRAM_TYPE_DDR4:
147       return RRA_MEMORY_TYPE_DDR4;
148    case AMD_VRAM_TYPE_DDR5:
149       return RRA_MEMORY_TYPE_DDR5;
150    case AMD_VRAM_TYPE_HBM:
151       return RRA_MEMORY_TYPE_HBM;
152    case AMD_VRAM_TYPE_GDDR3:
153       return RRA_MEMORY_TYPE_GDDR3;
154    case AMD_VRAM_TYPE_GDDR4:
155       return RRA_MEMORY_TYPE_GDDR4;
156    case AMD_VRAM_TYPE_GDDR5:
157       return RRA_MEMORY_TYPE_GDDR5;
158    case AMD_VRAM_TYPE_GDDR6:
159       return RRA_MEMORY_TYPE_GDDR6;
160    case AMD_VRAM_TYPE_LPDDR4:
161       return RRA_MEMORY_TYPE_LPDDR4;
162    case AMD_VRAM_TYPE_LPDDR5:
163       return RRA_MEMORY_TYPE_LPDDR5;
164    default:
165       unreachable("invalid vram type");
166    }
167 }
168 
169 static void
rra_dump_asic_info(const struct radeon_info * gpu_info,FILE * output)170 rra_dump_asic_info(const struct radeon_info *gpu_info, FILE *output)
171 {
172    struct rra_asic_info asic_info = {
173       /* All frequencies are in Hz */
174       .min_shader_clk_freq = 0,
175       .max_shader_clk_freq = gpu_info->max_gpu_freq_mhz * 1000000,
176       .min_mem_clk_freq = 0,
177       .max_mem_clk_freq = gpu_info->memory_freq_mhz * 1000000,
178 
179       .vram_size = (uint64_t)gpu_info->vram_size_kb * 1024,
180 
181       .mem_type = amdgpu_vram_type_to_rra(gpu_info->vram_type),
182       .mem_ops_per_clk = ac_memory_ops_per_clock(gpu_info->vram_type),
183       .bus_width = gpu_info->memory_bus_width,
184 
185       .device_id = gpu_info->pci.dev,
186       .rev_id = gpu_info->pci_rev_id,
187    };
188 
189    strncpy(asic_info.device_name, gpu_info->marketing_name ? gpu_info->marketing_name : gpu_info->name,
190            RRA_FILE_DEVICE_NAME_MAX_SIZE - 1);
191 
192    fwrite(&asic_info, sizeof(struct rra_asic_info), 1, output);
193 }
194 
195 enum rra_bvh_type {
196    RRA_BVH_TYPE_TLAS,
197    RRA_BVH_TYPE_BLAS,
198 };
199 
200 struct rra_accel_struct_chunk_header {
201    /*
202     * Declaring this as uint64_t would make the compiler insert padding to
203     * satisfy alignment restrictions.
204     */
205    uint32_t virtual_address[2];
206    uint32_t metadata_offset;
207    uint32_t metadata_size;
208    uint32_t header_offset;
209    uint32_t header_size;
210    enum rra_bvh_type bvh_type;
211 };
212 
213 static_assert(sizeof(struct rra_accel_struct_chunk_header) == 28,
214               "rra_accel_struct_chunk_header does not match RRA spec");
215 
216 struct rra_accel_struct_post_build_info {
217    uint32_t bvh_type : 1;
218    uint32_t reserved1 : 5;
219    uint32_t tri_compression_mode : 2;
220    uint32_t fp16_interior_mode : 2;
221    uint32_t reserved2 : 6;
222    uint32_t build_flags : 16;
223 };
224 
225 static_assert(sizeof(struct rra_accel_struct_post_build_info) == 4,
226               "rra_accel_struct_post_build_info does not match RRA spec");
227 
228 struct rra_accel_struct_header {
229    struct rra_accel_struct_post_build_info post_build_info;
230    /*
231     * Size of the internal acceleration structure metadata in the
232     * proprietary drivers. Seems to always be 128.
233     */
234    uint32_t metadata_size;
235    uint32_t file_size;
236    uint32_t primitive_count;
237    uint32_t active_primitive_count;
238    uint32_t unused1;
239    uint32_t geometry_description_count;
240    VkGeometryTypeKHR geometry_type;
241    uint32_t internal_nodes_offset;
242    uint32_t leaf_nodes_offset;
243    uint32_t geometry_infos_offset;
244    uint32_t leaf_ids_offset;
245    uint32_t interior_fp32_node_count;
246    uint32_t interior_fp16_node_count;
247    uint32_t leaf_node_count;
248    uint32_t rt_driver_interface_version;
249    uint64_t unused2;
250    uint32_t half_fp32_node_count;
251    char unused3[44];
252 };
253 
254 #define RRA_ROOT_NODE_OFFSET align(sizeof(struct rra_accel_struct_header), 64)
255 
256 static_assert(sizeof(struct rra_accel_struct_header) == 120, "rra_accel_struct_header does not match RRA spec");
257 
258 struct rra_accel_struct_metadata {
259    uint64_t virtual_address;
260    uint32_t byte_size;
261    char unused[116];
262 };
263 
264 static_assert(sizeof(struct rra_accel_struct_metadata) == 128, "rra_accel_struct_metadata does not match RRA spec");
265 
266 struct rra_geometry_info {
267    uint32_t primitive_count : 29;
268    uint32_t flags : 3;
269    uint32_t unknown;
270    uint32_t leaf_node_list_offset;
271 };
272 
273 static_assert(sizeof(struct rra_geometry_info) == 12, "rra_geometry_info does not match RRA spec");
274 
275 static struct rra_accel_struct_header
rra_fill_accel_struct_header_common(struct radv_accel_struct_header * header,size_t parent_id_table_size,size_t leaf_node_data_size,size_t internal_node_data_size,uint64_t primitive_count)276 rra_fill_accel_struct_header_common(struct radv_accel_struct_header *header, size_t parent_id_table_size,
277                                     size_t leaf_node_data_size, size_t internal_node_data_size,
278                                     uint64_t primitive_count)
279 {
280    struct rra_accel_struct_header result = {
281       .post_build_info =
282          {
283             .build_flags = header->build_flags,
284             /* Seems to be no compression */
285             .tri_compression_mode = 0,
286          },
287       .primitive_count = primitive_count,
288       /* TODO: calculate active primitives */
289       .active_primitive_count = primitive_count,
290       .geometry_description_count = header->geometry_count,
291       .interior_fp32_node_count = internal_node_data_size / sizeof(struct radv_bvh_box32_node),
292       .leaf_node_count = primitive_count,
293       .rt_driver_interface_version = 8 << 16,
294    };
295 
296    result.metadata_size = sizeof(struct rra_accel_struct_metadata) + parent_id_table_size;
297    result.file_size =
298       result.metadata_size + sizeof(struct rra_accel_struct_header) + internal_node_data_size + leaf_node_data_size;
299 
300    result.internal_nodes_offset = sizeof(struct rra_accel_struct_metadata);
301    result.leaf_nodes_offset = result.internal_nodes_offset + internal_node_data_size;
302    result.geometry_infos_offset = result.leaf_nodes_offset + leaf_node_data_size;
303    result.leaf_ids_offset = result.geometry_infos_offset;
304    if (!header->instance_count)
305       result.leaf_ids_offset += header->geometry_count * sizeof(struct rra_geometry_info);
306 
307    return result;
308 }
309 
310 struct rra_box32_node {
311    uint32_t children[4];
312    float coords[4][2][3];
313    uint32_t reserved[4];
314 };
315 
316 struct rra_box16_node {
317    uint32_t children[4];
318    float16_t coords[4][2][3];
319 };
320 
321 /*
322  * RRA files contain this struct in place of hardware
323  * instance nodes. They're named "instance desc" internally.
324  */
325 struct rra_instance_node {
326    float wto_matrix[12];
327    uint32_t custom_instance_id : 24;
328    uint32_t mask : 8;
329    uint32_t sbt_offset : 24;
330    uint32_t instance_flags : 8;
331    uint64_t blas_va : 54;
332    uint64_t hw_instance_flags : 10;
333    uint32_t instance_id;
334    uint32_t unused1;
335    uint32_t blas_metadata_size;
336    uint32_t unused2;
337    float otw_matrix[12];
338 };
339 
340 static_assert(sizeof(struct rra_instance_node) == 128, "rra_instance_node does not match RRA spec!");
341 
342 /*
343  * Format RRA uses for aabb nodes
344  */
345 struct rra_aabb_node {
346    float aabb[2][3];
347    uint32_t unused1[6];
348    uint32_t geometry_id : 28;
349    uint32_t flags : 4;
350    uint32_t primitive_id;
351    uint32_t unused[2];
352 };
353 
354 static_assert(sizeof(struct rra_aabb_node) == 64, "rra_aabb_node does not match RRA spec!");
355 
356 struct rra_triangle_node {
357    float coords[3][3];
358    uint32_t reserved[3];
359    uint32_t geometry_id : 28;
360    uint32_t flags : 4;
361    uint32_t triangle_id;
362    uint32_t reserved2;
363    uint32_t id;
364 };
365 
366 static_assert(sizeof(struct rra_triangle_node) == 64, "rra_triangle_node does not match RRA spec!");
367 
368 static void
rra_dump_tlas_header(struct radv_accel_struct_header * header,size_t parent_id_table_size,size_t leaf_node_data_size,size_t internal_node_data_size,uint64_t primitive_count,FILE * output)369 rra_dump_tlas_header(struct radv_accel_struct_header *header, size_t parent_id_table_size, size_t leaf_node_data_size,
370                      size_t internal_node_data_size, uint64_t primitive_count, FILE *output)
371 {
372    struct rra_accel_struct_header file_header = rra_fill_accel_struct_header_common(
373       header, parent_id_table_size, leaf_node_data_size, internal_node_data_size, primitive_count);
374    file_header.post_build_info.bvh_type = RRA_BVH_TYPE_TLAS;
375    file_header.geometry_type = VK_GEOMETRY_TYPE_INSTANCES_KHR;
376 
377    fwrite(&file_header, sizeof(struct rra_accel_struct_header), 1, output);
378 }
379 
380 static void
rra_dump_blas_header(struct radv_accel_struct_header * header,size_t parent_id_table_size,struct radv_accel_struct_geometry_info * geometry_infos,size_t leaf_node_data_size,size_t internal_node_data_size,uint64_t primitive_count,FILE * output)381 rra_dump_blas_header(struct radv_accel_struct_header *header, size_t parent_id_table_size,
382                      struct radv_accel_struct_geometry_info *geometry_infos, size_t leaf_node_data_size,
383                      size_t internal_node_data_size, uint64_t primitive_count, FILE *output)
384 {
385    struct rra_accel_struct_header file_header = rra_fill_accel_struct_header_common(
386       header, parent_id_table_size, leaf_node_data_size, internal_node_data_size, primitive_count);
387    file_header.post_build_info.bvh_type = RRA_BVH_TYPE_BLAS;
388    file_header.geometry_type = header->geometry_count ? geometry_infos->type : VK_GEOMETRY_TYPE_TRIANGLES_KHR;
389 
390    fwrite(&file_header, sizeof(struct rra_accel_struct_header), 1, output);
391 }
392 
393 static uint32_t
rra_parent_table_index_from_offset(uint32_t offset,uint32_t parent_table_size)394 rra_parent_table_index_from_offset(uint32_t offset, uint32_t parent_table_size)
395 {
396    uint32_t max_parent_table_index = parent_table_size / sizeof(uint32_t) - 1;
397    return max_parent_table_index - (offset - RRA_ROOT_NODE_OFFSET) / 64;
398 }
399 
400 struct rra_validation_context {
401    bool failed;
402    char location[31];
403 };
404 
rra_validation_fail(struct rra_validation_context * ctx,const char * message,...)405 static void PRINTFLIKE(2, 3) rra_validation_fail(struct rra_validation_context *ctx, const char *message, ...)
406 {
407    if (!ctx->failed) {
408       fprintf(stderr, "radv: rra: Validation failed at %s:\n", ctx->location);
409       ctx->failed = true;
410    }
411 
412    fprintf(stderr, "   ");
413 
414    va_list list;
415    va_start(list, message);
416    vfprintf(stderr, message, list);
417    va_end(list);
418 
419    fprintf(stderr, "\n");
420 }
421 
422 static bool
rra_validate_header(struct radv_rra_accel_struct_data * accel_struct,const struct radv_accel_struct_header * header)423 rra_validate_header(struct radv_rra_accel_struct_data *accel_struct, const struct radv_accel_struct_header *header)
424 {
425    struct rra_validation_context ctx = {
426       .location = "header",
427    };
428 
429    if (accel_struct->type == VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR && header->instance_count > 0)
430       rra_validation_fail(&ctx, "BLAS contains instances");
431 
432    if (header->bvh_offset >= accel_struct->size)
433       rra_validation_fail(&ctx, "Invalid BVH offset %u", header->bvh_offset);
434 
435    if (header->instance_count * sizeof(struct radv_bvh_instance_node) >= accel_struct->size)
436       rra_validation_fail(&ctx, "Too many instances");
437 
438    return ctx.failed;
439 }
440 
441 static bool
is_internal_node(uint32_t type)442 is_internal_node(uint32_t type)
443 {
444    return type == radv_bvh_node_box16 || type == radv_bvh_node_box32;
445 }
446 
447 static const char *node_type_names[8] = {
448    [radv_bvh_node_triangle + 0] = "triangle0",
449    [radv_bvh_node_triangle + 1] = "triangle1",
450    [radv_bvh_node_triangle + 2] = "triangle2",
451    [radv_bvh_node_triangle + 3] = "triangle3",
452    [radv_bvh_node_box16] = "box16",
453    [radv_bvh_node_box32] = "box32",
454    [radv_bvh_node_instance] = "instance",
455    [radv_bvh_node_aabb] = "aabb",
456 };
457 
458 static bool
rra_validate_node(struct hash_table_u64 * accel_struct_vas,uint8_t * data,void * node,uint32_t geometry_count,uint32_t size,bool is_bottom_level,uint32_t depth)459 rra_validate_node(struct hash_table_u64 *accel_struct_vas, uint8_t *data, void *node, uint32_t geometry_count,
460                   uint32_t size, bool is_bottom_level, uint32_t depth)
461 {
462    struct rra_validation_context ctx = {0};
463 
464    if (depth > 1024) {
465       rra_validation_fail(&ctx, "depth > 1024");
466       return true;
467    }
468 
469    uint32_t cur_offset = (uint8_t *)node - data;
470    snprintf(ctx.location, sizeof(ctx.location), "internal node (offset=%u)", cur_offset);
471 
472    /* The child ids are located at offset=0 for both box16 and box32 nodes. */
473    uint32_t *children = node;
474    for (uint32_t i = 0; i < 4; ++i) {
475       if (children[i] == 0xFFFFFFFF)
476          continue;
477 
478       uint32_t type = children[i] & 7;
479       uint32_t offset = (children[i] & (~7u)) << 3;
480 
481       if (!is_internal_node(type) && is_bottom_level == (type == radv_bvh_node_instance))
482          rra_validation_fail(&ctx,
483                              is_bottom_level ? "%s node in BLAS (child index %u)" : "%s node in TLAS (child index %u)",
484                              node_type_names[type], i);
485 
486       if (offset > size) {
487          rra_validation_fail(&ctx, "Invalid child offset (child index %u)", i);
488          continue;
489       }
490 
491       struct rra_validation_context child_ctx = {0};
492       snprintf(child_ctx.location, sizeof(child_ctx.location), "%s node (offset=%u)", node_type_names[type], offset);
493 
494       if (is_internal_node(type)) {
495          ctx.failed |=
496             rra_validate_node(accel_struct_vas, data, data + offset, geometry_count, size, is_bottom_level, depth + 1);
497       } else if (type == radv_bvh_node_instance) {
498          struct radv_bvh_instance_node *src = (struct radv_bvh_instance_node *)(data + offset);
499          uint64_t blas_va = node_to_addr(src->bvh_ptr) - src->bvh_offset;
500          if (!_mesa_hash_table_u64_search(accel_struct_vas, blas_va))
501             rra_validation_fail(&child_ctx, "Invalid instance node pointer 0x%llx (offset: 0x%x)",
502                                 (unsigned long long)src->bvh_ptr, src->bvh_offset);
503       } else if (type == radv_bvh_node_aabb) {
504          struct radv_bvh_aabb_node *src = (struct radv_bvh_aabb_node *)(data + offset);
505          if ((src->geometry_id_and_flags & 0xFFFFFFF) >= geometry_count)
506             rra_validation_fail(&ctx, "geometry_id >= geometry_count");
507       } else {
508          struct radv_bvh_triangle_node *src = (struct radv_bvh_triangle_node *)(data + offset);
509          if ((src->geometry_id_and_flags & 0xFFFFFFF) >= geometry_count)
510             rra_validation_fail(&ctx, "geometry_id >= geometry_count");
511       }
512 
513       ctx.failed |= child_ctx.failed;
514    }
515    return ctx.failed;
516 }
517 
518 struct rra_transcoding_context {
519    const uint8_t *src;
520    uint8_t *dst;
521    uint32_t dst_leaf_offset;
522    uint32_t dst_internal_offset;
523    uint32_t *parent_id_table;
524    uint32_t parent_id_table_size;
525    uint32_t *leaf_node_ids;
526    uint32_t *leaf_indices;
527 };
528 
529 static void
rra_transcode_triangle_node(struct rra_transcoding_context * ctx,const struct radv_bvh_triangle_node * src)530 rra_transcode_triangle_node(struct rra_transcoding_context *ctx, const struct radv_bvh_triangle_node *src)
531 {
532    struct rra_triangle_node *dst = (struct rra_triangle_node *)(ctx->dst + ctx->dst_leaf_offset);
533    ctx->dst_leaf_offset += sizeof(struct rra_triangle_node);
534 
535    for (int i = 0; i < 3; ++i)
536       for (int j = 0; j < 3; ++j)
537          dst->coords[i][j] = src->coords[i][j];
538    dst->triangle_id = src->triangle_id;
539    dst->geometry_id = src->geometry_id_and_flags & 0xfffffff;
540    dst->flags = src->geometry_id_and_flags >> 28;
541    dst->id = src->id;
542 }
543 
544 static void
rra_transcode_aabb_node(struct rra_transcoding_context * ctx,const struct radv_bvh_aabb_node * src,radv_aabb bounds)545 rra_transcode_aabb_node(struct rra_transcoding_context *ctx, const struct radv_bvh_aabb_node *src, radv_aabb bounds)
546 {
547    struct rra_aabb_node *dst = (struct rra_aabb_node *)(ctx->dst + ctx->dst_leaf_offset);
548    ctx->dst_leaf_offset += sizeof(struct rra_aabb_node);
549 
550    dst->aabb[0][0] = bounds.min.x;
551    dst->aabb[0][1] = bounds.min.y;
552    dst->aabb[0][2] = bounds.min.z;
553    dst->aabb[1][0] = bounds.max.x;
554    dst->aabb[1][1] = bounds.max.y;
555    dst->aabb[1][2] = bounds.max.z;
556 
557    dst->geometry_id = src->geometry_id_and_flags & 0xfffffff;
558    dst->flags = src->geometry_id_and_flags >> 28;
559    dst->primitive_id = src->primitive_id;
560 }
561 
562 static void
rra_transcode_instance_node(struct rra_transcoding_context * ctx,const struct radv_bvh_instance_node * src)563 rra_transcode_instance_node(struct rra_transcoding_context *ctx, const struct radv_bvh_instance_node *src)
564 {
565    uint64_t blas_va = node_to_addr(src->bvh_ptr) - src->bvh_offset;
566 
567    struct rra_instance_node *dst = (struct rra_instance_node *)(ctx->dst + ctx->dst_leaf_offset);
568    ctx->dst_leaf_offset += sizeof(struct rra_instance_node);
569 
570    dst->custom_instance_id = src->custom_instance_and_mask & 0xffffff;
571    dst->mask = src->custom_instance_and_mask >> 24;
572    dst->sbt_offset = src->sbt_offset_and_flags & 0xffffff;
573    dst->instance_flags = src->sbt_offset_and_flags >> 24;
574    dst->blas_va = (blas_va + sizeof(struct rra_accel_struct_metadata)) >> 3;
575    dst->instance_id = src->instance_id;
576    dst->blas_metadata_size = sizeof(struct rra_accel_struct_metadata);
577 
578    memcpy(dst->wto_matrix, src->wto_matrix.values, sizeof(dst->wto_matrix));
579    memcpy(dst->otw_matrix, src->otw_matrix.values, sizeof(dst->otw_matrix));
580 }
581 
582 static uint32_t rra_transcode_node(struct rra_transcoding_context *ctx, uint32_t parent_id, uint32_t src_id,
583                                    radv_aabb bounds);
584 
585 static void
rra_transcode_box16_node(struct rra_transcoding_context * ctx,const struct radv_bvh_box16_node * src)586 rra_transcode_box16_node(struct rra_transcoding_context *ctx, const struct radv_bvh_box16_node *src)
587 {
588    uint32_t dst_offset = ctx->dst_internal_offset;
589    ctx->dst_internal_offset += sizeof(struct rra_box16_node);
590    struct rra_box16_node *dst = (struct rra_box16_node *)(ctx->dst + dst_offset);
591 
592    memcpy(dst->coords, src->coords, sizeof(dst->coords));
593 
594    for (uint32_t i = 0; i < 4; ++i) {
595       if (src->children[i] == 0xffffffff) {
596          dst->children[i] = 0xffffffff;
597          continue;
598       }
599 
600       radv_aabb bounds = {
601          .min =
602             {
603                _mesa_half_to_float(src->coords[i][0][0]),
604                _mesa_half_to_float(src->coords[i][0][1]),
605                _mesa_half_to_float(src->coords[i][0][2]),
606             },
607          .max =
608             {
609                _mesa_half_to_float(src->coords[i][1][0]),
610                _mesa_half_to_float(src->coords[i][1][1]),
611                _mesa_half_to_float(src->coords[i][1][2]),
612             },
613       };
614 
615       dst->children[i] = rra_transcode_node(ctx, radv_bvh_node_box16 | (dst_offset >> 3), src->children[i], bounds);
616    }
617 }
618 
619 static void
rra_transcode_box32_node(struct rra_transcoding_context * ctx,const struct radv_bvh_box32_node * src)620 rra_transcode_box32_node(struct rra_transcoding_context *ctx, const struct radv_bvh_box32_node *src)
621 {
622    uint32_t dst_offset = ctx->dst_internal_offset;
623    ctx->dst_internal_offset += sizeof(struct rra_box32_node);
624    struct rra_box32_node *dst = (struct rra_box32_node *)(ctx->dst + dst_offset);
625 
626    memcpy(dst->coords, src->coords, sizeof(dst->coords));
627 
628    for (uint32_t i = 0; i < 4; ++i) {
629       if (isnan(src->coords[i].min.x)) {
630          dst->children[i] = 0xffffffff;
631          continue;
632       }
633 
634       dst->children[i] =
635          rra_transcode_node(ctx, radv_bvh_node_box32 | (dst_offset >> 3), src->children[i], src->coords[i]);
636    }
637 }
638 
639 static uint32_t
get_geometry_id(const void * node,uint32_t node_type)640 get_geometry_id(const void *node, uint32_t node_type)
641 {
642    if (node_type == radv_bvh_node_triangle) {
643       const struct radv_bvh_triangle_node *triangle = node;
644       return triangle->geometry_id_and_flags & 0xFFFFFFF;
645    }
646 
647    if (node_type == radv_bvh_node_aabb) {
648       const struct radv_bvh_aabb_node *aabb = node;
649       return aabb->geometry_id_and_flags & 0xFFFFFFF;
650    }
651 
652    return 0;
653 }
654 
655 static uint32_t
rra_transcode_node(struct rra_transcoding_context * ctx,uint32_t parent_id,uint32_t src_id,radv_aabb bounds)656 rra_transcode_node(struct rra_transcoding_context *ctx, uint32_t parent_id, uint32_t src_id, radv_aabb bounds)
657 {
658    uint32_t node_type = src_id & 7;
659    uint32_t src_offset = (src_id & (~7u)) << 3;
660 
661    uint32_t dst_offset;
662 
663    const void *src_child_node = ctx->src + src_offset;
664    if (is_internal_node(node_type)) {
665       dst_offset = ctx->dst_internal_offset;
666       if (node_type == radv_bvh_node_box32)
667          rra_transcode_box32_node(ctx, src_child_node);
668       else
669          rra_transcode_box16_node(ctx, src_child_node);
670    } else {
671       dst_offset = ctx->dst_leaf_offset;
672 
673       if (node_type == radv_bvh_node_triangle)
674          rra_transcode_triangle_node(ctx, src_child_node);
675       else if (node_type == radv_bvh_node_aabb)
676          rra_transcode_aabb_node(ctx, src_child_node, bounds);
677       else if (node_type == radv_bvh_node_instance)
678          rra_transcode_instance_node(ctx, src_child_node);
679    }
680 
681    uint32_t parent_id_index = rra_parent_table_index_from_offset(dst_offset, ctx->parent_id_table_size);
682    ctx->parent_id_table[parent_id_index] = parent_id;
683 
684    uint32_t dst_id = node_type | (dst_offset >> 3);
685    if (!is_internal_node(node_type))
686       ctx->leaf_node_ids[ctx->leaf_indices[get_geometry_id(src_child_node, node_type)]++] = dst_id;
687 
688    return dst_id;
689 }
690 
691 struct rra_bvh_info {
692    uint32_t leaf_nodes_size;
693    uint32_t internal_nodes_size;
694    struct rra_geometry_info *geometry_infos;
695 };
696 
697 static void
rra_gather_bvh_info(const uint8_t * bvh,uint32_t node_id,struct rra_bvh_info * dst)698 rra_gather_bvh_info(const uint8_t *bvh, uint32_t node_id, struct rra_bvh_info *dst)
699 {
700    uint32_t node_type = node_id & 7;
701 
702    switch (node_type) {
703    case radv_bvh_node_box16:
704       dst->internal_nodes_size += sizeof(struct rra_box16_node);
705       break;
706    case radv_bvh_node_box32:
707       dst->internal_nodes_size += sizeof(struct rra_box32_node);
708       break;
709    case radv_bvh_node_instance:
710       dst->leaf_nodes_size += sizeof(struct rra_instance_node);
711       break;
712    case radv_bvh_node_triangle:
713       dst->leaf_nodes_size += sizeof(struct rra_triangle_node);
714       break;
715    case radv_bvh_node_aabb:
716       dst->leaf_nodes_size += sizeof(struct rra_aabb_node);
717       break;
718    default:
719       break;
720    }
721 
722    const void *node = bvh + ((node_id & (~7u)) << 3);
723    if (is_internal_node(node_type)) {
724       /* The child ids are located at offset=0 for both box16 and box32 nodes. */
725       const uint32_t *children = node;
726       for (uint32_t i = 0; i < 4; i++)
727          if (children[i] != 0xffffffff)
728             rra_gather_bvh_info(bvh, children[i], dst);
729    } else {
730       dst->geometry_infos[get_geometry_id(node, node_type)].primitive_count++;
731    }
732 }
733 
734 static VkResult
rra_dump_acceleration_structure(struct radv_rra_accel_struct_data * accel_struct,uint8_t * data,struct hash_table_u64 * accel_struct_vas,bool should_validate,FILE * output)735 rra_dump_acceleration_structure(struct radv_rra_accel_struct_data *accel_struct, uint8_t *data,
736                                 struct hash_table_u64 *accel_struct_vas, bool should_validate, FILE *output)
737 {
738    struct radv_accel_struct_header *header = (struct radv_accel_struct_header *)data;
739 
740    bool is_tlas = header->instance_count > 0;
741 
742    uint64_t geometry_infos_offset = sizeof(struct radv_accel_struct_header);
743 
744    /* convert root node id to offset */
745    uint32_t src_root_offset = (RADV_BVH_ROOT_NODE & ~7) << 3;
746 
747    if (should_validate) {
748       if (rra_validate_header(accel_struct, header)) {
749          return VK_ERROR_VALIDATION_FAILED_EXT;
750       }
751       if (rra_validate_node(accel_struct_vas, data + header->bvh_offset, data + header->bvh_offset + src_root_offset,
752                             header->geometry_count, accel_struct->size, !is_tlas, 0)) {
753          return VK_ERROR_VALIDATION_FAILED_EXT;
754       }
755    }
756 
757    VkResult result = VK_SUCCESS;
758 
759    struct rra_geometry_info *rra_geometry_infos = NULL;
760    uint32_t *leaf_indices = NULL;
761    uint32_t *node_parent_table = NULL;
762    uint32_t *leaf_node_ids = NULL;
763    uint8_t *dst_structure_data = NULL;
764 
765    rra_geometry_infos = calloc(header->geometry_count, sizeof(struct rra_geometry_info));
766    if (!rra_geometry_infos) {
767       result = VK_ERROR_OUT_OF_HOST_MEMORY;
768       goto exit;
769    }
770 
771    struct rra_bvh_info bvh_info = {
772       .geometry_infos = rra_geometry_infos,
773    };
774    rra_gather_bvh_info(data + header->bvh_offset, RADV_BVH_ROOT_NODE, &bvh_info);
775 
776    leaf_indices = calloc(header->geometry_count, sizeof(struct rra_geometry_info));
777    if (!leaf_indices) {
778       result = VK_ERROR_OUT_OF_HOST_MEMORY;
779       goto exit;
780    }
781 
782    uint64_t primitive_count = 0;
783 
784    struct radv_accel_struct_geometry_info *geometry_infos =
785       (struct radv_accel_struct_geometry_info *)(data + geometry_infos_offset);
786 
787    for (uint32_t i = 0; i < header->geometry_count; ++i) {
788       rra_geometry_infos[i].flags = geometry_infos[i].flags;
789       rra_geometry_infos[i].leaf_node_list_offset = primitive_count * sizeof(uint32_t);
790       leaf_indices[i] = primitive_count;
791       primitive_count += rra_geometry_infos[i].primitive_count;
792    }
793 
794    uint32_t node_parent_table_size =
795       ((bvh_info.leaf_nodes_size + bvh_info.internal_nodes_size) / 64) * sizeof(uint32_t);
796 
797    node_parent_table = calloc(node_parent_table_size, 1);
798    if (!node_parent_table) {
799       result = VK_ERROR_OUT_OF_HOST_MEMORY;
800       goto exit;
801    }
802 
803    leaf_node_ids = calloc(primitive_count, sizeof(uint32_t));
804    if (!leaf_node_ids) {
805       result = VK_ERROR_OUT_OF_HOST_MEMORY;
806       goto exit;
807    }
808    dst_structure_data = calloc(RRA_ROOT_NODE_OFFSET + bvh_info.internal_nodes_size + bvh_info.leaf_nodes_size, 1);
809    if (!dst_structure_data) {
810       result = VK_ERROR_OUT_OF_HOST_MEMORY;
811       goto exit;
812    }
813 
814    struct rra_transcoding_context ctx = {
815       .src = data + header->bvh_offset,
816       .dst = dst_structure_data,
817       .dst_leaf_offset = RRA_ROOT_NODE_OFFSET + bvh_info.internal_nodes_size,
818       .dst_internal_offset = RRA_ROOT_NODE_OFFSET,
819       .parent_id_table = node_parent_table,
820       .parent_id_table_size = node_parent_table_size,
821       .leaf_node_ids = leaf_node_ids,
822       .leaf_indices = leaf_indices,
823    };
824 
825    rra_transcode_node(&ctx, 0xFFFFFFFF, RADV_BVH_ROOT_NODE, header->aabb);
826 
827    struct rra_accel_struct_chunk_header chunk_header = {
828       .metadata_offset = 0,
829       /*
830        * RRA loads the part of the metadata that is used into a struct.
831        * If the size is larger than just the "used" part, the loading
832        * operation overwrites internal pointers with data from the file,
833        * likely causing a crash.
834        */
835       .metadata_size = offsetof(struct rra_accel_struct_metadata, unused),
836       .header_offset = sizeof(struct rra_accel_struct_metadata) + node_parent_table_size,
837       .header_size = sizeof(struct rra_accel_struct_header),
838       .bvh_type = is_tlas ? RRA_BVH_TYPE_TLAS : RRA_BVH_TYPE_BLAS,
839    };
840 
841    /*
842     * When associating TLASes with BLASes, acceleration structure VAs are
843     * looked up in a hashmap. But due to the way BLAS VAs are stored for
844     * each instance in the RRA file format (divided by 8, and limited to 54 bits),
845     * the top bits are masked away.
846     * In order to make sure BLASes can be found in the hashmap, we have
847     * to replicate that mask here.
848     */
849    uint64_t va = accel_struct->va & 0x1FFFFFFFFFFFFFF;
850    memcpy(chunk_header.virtual_address, &va, sizeof(uint64_t));
851 
852    struct rra_accel_struct_metadata rra_metadata = {
853       .virtual_address = va,
854       .byte_size = bvh_info.leaf_nodes_size + bvh_info.internal_nodes_size + sizeof(struct rra_accel_struct_header),
855    };
856 
857    fwrite(&chunk_header, sizeof(struct rra_accel_struct_chunk_header), 1, output);
858    fwrite(&rra_metadata, sizeof(struct rra_accel_struct_metadata), 1, output);
859 
860    /* Write node parent id data */
861    fwrite(node_parent_table, 1, node_parent_table_size, output);
862 
863    if (is_tlas)
864       rra_dump_tlas_header(header, node_parent_table_size, bvh_info.leaf_nodes_size, bvh_info.internal_nodes_size,
865                            primitive_count, output);
866    else
867       rra_dump_blas_header(header, node_parent_table_size, geometry_infos, bvh_info.leaf_nodes_size,
868                            bvh_info.internal_nodes_size, primitive_count, output);
869 
870    /* Write acceleration structure data  */
871    fwrite(dst_structure_data + RRA_ROOT_NODE_OFFSET, 1, bvh_info.internal_nodes_size + bvh_info.leaf_nodes_size,
872           output);
873 
874    if (!is_tlas)
875       fwrite(rra_geometry_infos, sizeof(struct rra_geometry_info), header->geometry_count, output);
876 
877    /* Write leaf node ids */
878    uint32_t leaf_node_list_size = primitive_count * sizeof(uint32_t);
879    fwrite(leaf_node_ids, 1, leaf_node_list_size, output);
880 
881 exit:
882    free(rra_geometry_infos);
883    free(leaf_indices);
884    free(dst_structure_data);
885    free(node_parent_table);
886    free(leaf_node_ids);
887 
888    return result;
889 }
890 
891 VkResult
radv_rra_trace_init(struct radv_device * device)892 radv_rra_trace_init(struct radv_device *device)
893 {
894    const struct radv_physical_device *pdev = radv_device_physical(device);
895 
896    device->rra_trace.validate_as = debug_get_bool_option("RADV_RRA_TRACE_VALIDATE", false);
897    device->rra_trace.copy_after_build = debug_get_bool_option("RADV_RRA_TRACE_COPY_AFTER_BUILD", true);
898    device->rra_trace.accel_structs = _mesa_pointer_hash_table_create(NULL);
899    device->rra_trace.accel_struct_vas = _mesa_hash_table_u64_create(NULL);
900    simple_mtx_init(&device->rra_trace.data_mtx, mtx_plain);
901 
902    device->rra_trace.copy_memory_index =
903       radv_find_memory_index(pdev, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
904                                       VK_MEMORY_PROPERTY_HOST_CACHED_BIT);
905 
906    util_dynarray_init(&device->rra_trace.ray_history, NULL);
907 
908    device->rra_trace.ray_history_buffer_size = debug_get_num_option("RADV_RRA_TRACE_HISTORY_SIZE", 100 * 1024 * 1024);
909    if (device->rra_trace.ray_history_buffer_size <
910        sizeof(struct radv_ray_history_header) + sizeof(struct radv_packed_end_trace_token))
911       return VK_SUCCESS;
912 
913    device->rra_trace.ray_history_resolution_scale = debug_get_num_option("RADV_RRA_TRACE_RESOLUTION_SCALE", 1);
914    device->rra_trace.ray_history_resolution_scale = MAX2(device->rra_trace.ray_history_resolution_scale, 1);
915 
916    VkBufferCreateInfo buffer_create_info = {
917       .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
918       .pNext =
919          &(VkBufferUsageFlags2CreateInfoKHR){
920             .sType = VK_STRUCTURE_TYPE_BUFFER_USAGE_FLAGS_2_CREATE_INFO_KHR,
921             .usage = VK_BUFFER_USAGE_2_TRANSFER_SRC_BIT_KHR | VK_BUFFER_USAGE_2_SHADER_DEVICE_ADDRESS_BIT_KHR,
922          },
923       .size = device->rra_trace.ray_history_buffer_size,
924    };
925 
926    VkDevice _device = radv_device_to_handle(device);
927    VkResult result = radv_CreateBuffer(_device, &buffer_create_info, NULL, &device->rra_trace.ray_history_buffer);
928    if (result != VK_SUCCESS)
929       return result;
930 
931    VkMemoryRequirements requirements;
932    vk_common_GetBufferMemoryRequirements(_device, device->rra_trace.ray_history_buffer, &requirements);
933 
934    VkMemoryAllocateInfo alloc_info = {
935       .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
936       .allocationSize = requirements.size,
937       .memoryTypeIndex =
938          radv_find_memory_index(pdev, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
939                                          VK_MEMORY_PROPERTY_HOST_COHERENT_BIT),
940    };
941 
942    result = radv_AllocateMemory(_device, &alloc_info, NULL, &device->rra_trace.ray_history_memory);
943    if (result != VK_SUCCESS)
944       return result;
945 
946    result = vk_common_MapMemory(_device, device->rra_trace.ray_history_memory, 0, VK_WHOLE_SIZE, 0,
947                                 (void **)&device->rra_trace.ray_history_data);
948    if (result != VK_SUCCESS)
949       return result;
950 
951    result = vk_common_BindBufferMemory(_device, device->rra_trace.ray_history_buffer,
952                                        device->rra_trace.ray_history_memory, 0);
953 
954    VkBufferDeviceAddressInfo addr_info = {
955       .sType = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO,
956       .buffer = device->rra_trace.ray_history_buffer,
957    };
958    device->rra_trace.ray_history_addr = radv_GetBufferDeviceAddress(_device, &addr_info);
959 
960    struct radv_ray_history_header *ray_history_header = device->rra_trace.ray_history_data;
961    memset(ray_history_header, 0, sizeof(struct radv_ray_history_header));
962    ray_history_header->offset = 1;
963 
964    return result;
965 }
966 
967 void
radv_rra_trace_clear_ray_history(VkDevice _device,struct radv_rra_trace_data * data)968 radv_rra_trace_clear_ray_history(VkDevice _device, struct radv_rra_trace_data *data)
969 {
970    util_dynarray_foreach (&data->ray_history, struct radv_rra_ray_history_data *, _entry) {
971       struct radv_rra_ray_history_data *entry = *_entry;
972       free(entry);
973    }
974    util_dynarray_clear(&data->ray_history);
975 }
976 
977 void
radv_radv_rra_accel_struct_buffer_ref(struct radv_rra_accel_struct_buffer * buffer)978 radv_radv_rra_accel_struct_buffer_ref(struct radv_rra_accel_struct_buffer *buffer)
979 {
980    assert(buffer->ref_cnt >= 1);
981    p_atomic_inc(&buffer->ref_cnt);
982 }
983 
984 void
radv_rra_accel_struct_buffer_unref(struct radv_device * device,struct radv_rra_accel_struct_buffer * buffer)985 radv_rra_accel_struct_buffer_unref(struct radv_device *device, struct radv_rra_accel_struct_buffer *buffer)
986 {
987    if (p_atomic_dec_zero(&buffer->ref_cnt)) {
988       VkDevice _device = radv_device_to_handle(device);
989       radv_DestroyBuffer(_device, buffer->buffer, NULL);
990       radv_FreeMemory(_device, buffer->memory, NULL);
991    }
992 }
993 
994 void
radv_rra_accel_struct_buffers_unref(struct radv_device * device,struct set * buffers)995 radv_rra_accel_struct_buffers_unref(struct radv_device *device, struct set *buffers)
996 {
997    set_foreach_remove (buffers, entry)
998       radv_rra_accel_struct_buffer_unref(device, (void *)entry->key);
999 }
1000 
1001 void
radv_rra_trace_finish(VkDevice vk_device,struct radv_rra_trace_data * data)1002 radv_rra_trace_finish(VkDevice vk_device, struct radv_rra_trace_data *data)
1003 {
1004    radv_DestroyBuffer(vk_device, data->ray_history_buffer, NULL);
1005 
1006    if (data->ray_history_memory)
1007       vk_common_UnmapMemory(vk_device, data->ray_history_memory);
1008 
1009    radv_FreeMemory(vk_device, data->ray_history_memory, NULL);
1010 
1011    radv_rra_trace_clear_ray_history(vk_device, data);
1012    util_dynarray_fini(&data->ray_history);
1013 
1014    if (data->accel_structs)
1015       hash_table_foreach (data->accel_structs, entry)
1016          radv_destroy_rra_accel_struct_data(vk_device, entry->data);
1017 
1018    simple_mtx_destroy(&data->data_mtx);
1019    _mesa_hash_table_destroy(data->accel_structs, NULL);
1020    _mesa_hash_table_u64_destroy(data->accel_struct_vas);
1021 }
1022 
1023 void
radv_destroy_rra_accel_struct_data(VkDevice _device,struct radv_rra_accel_struct_data * data)1024 radv_destroy_rra_accel_struct_data(VkDevice _device, struct radv_rra_accel_struct_data *data)
1025 {
1026    VK_FROM_HANDLE(radv_device, device, _device);
1027 
1028    if (data->buffer)
1029       radv_rra_accel_struct_buffer_unref(device, data->buffer);
1030 
1031    radv_DestroyEvent(_device, data->build_event, NULL);
1032    free(data);
1033 }
1034 
1035 static int
accel_struct_entry_cmp(const void * a,const void * b)1036 accel_struct_entry_cmp(const void *a, const void *b)
1037 {
1038    struct hash_entry *entry_a = *(struct hash_entry *const *)a;
1039    struct hash_entry *entry_b = *(struct hash_entry *const *)b;
1040    const struct radv_rra_accel_struct_data *s_a = entry_a->data;
1041    const struct radv_rra_accel_struct_data *s_b = entry_b->data;
1042 
1043    return s_a->va > s_b->va ? 1 : s_a->va < s_b->va ? -1 : 0;
1044 }
1045 
1046 struct rra_copy_context {
1047    VkDevice device;
1048    VkQueue queue;
1049 
1050    VkCommandPool pool;
1051    VkCommandBuffer cmd_buffer;
1052    uint32_t family_index;
1053 
1054    VkDeviceMemory memory;
1055    VkBuffer buffer;
1056    void *mapped_data;
1057 
1058    struct hash_entry **entries;
1059 
1060    uint32_t min_size;
1061 };
1062 
1063 static VkResult
rra_copy_context_init(struct rra_copy_context * ctx)1064 rra_copy_context_init(struct rra_copy_context *ctx)
1065 {
1066    VK_FROM_HANDLE(radv_device, device, ctx->device);
1067    if (device->rra_trace.copy_after_build)
1068       return VK_SUCCESS;
1069 
1070    uint32_t max_size = ctx->min_size;
1071    uint32_t accel_struct_count = _mesa_hash_table_num_entries(device->rra_trace.accel_structs);
1072    for (unsigned i = 0; i < accel_struct_count; i++) {
1073       struct radv_rra_accel_struct_data *data = ctx->entries[i]->data;
1074       max_size = MAX2(max_size, data->size);
1075    }
1076 
1077    VkCommandPoolCreateInfo pool_info = {
1078       .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
1079       .queueFamilyIndex = ctx->family_index,
1080    };
1081 
1082    VkResult result = vk_common_CreateCommandPool(ctx->device, &pool_info, NULL, &ctx->pool);
1083    if (result != VK_SUCCESS)
1084       return result;
1085 
1086    VkCommandBufferAllocateInfo cmdbuf_alloc_info = {
1087       .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
1088       .commandPool = ctx->pool,
1089       .commandBufferCount = 1,
1090    };
1091 
1092    result = vk_common_AllocateCommandBuffers(ctx->device, &cmdbuf_alloc_info, &ctx->cmd_buffer);
1093    if (result != VK_SUCCESS)
1094       goto fail_pool;
1095 
1096    VkBufferCreateInfo buffer_create_info = {
1097       .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
1098       .pNext =
1099          &(VkBufferUsageFlags2CreateInfoKHR){
1100             .sType = VK_STRUCTURE_TYPE_BUFFER_USAGE_FLAGS_2_CREATE_INFO_KHR,
1101             .usage = VK_BUFFER_USAGE_2_TRANSFER_DST_BIT_KHR,
1102          },
1103       .size = max_size,
1104    };
1105 
1106    result = radv_CreateBuffer(ctx->device, &buffer_create_info, NULL, &ctx->buffer);
1107    if (result != VK_SUCCESS)
1108       goto fail_pool;
1109 
1110    VkMemoryRequirements requirements;
1111    vk_common_GetBufferMemoryRequirements(ctx->device, ctx->buffer, &requirements);
1112 
1113    VkMemoryAllocateInfo alloc_info = {
1114       .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
1115       .allocationSize = requirements.size,
1116       .memoryTypeIndex = device->rra_trace.copy_memory_index,
1117    };
1118 
1119    result = radv_AllocateMemory(ctx->device, &alloc_info, NULL, &ctx->memory);
1120    if (result != VK_SUCCESS)
1121       goto fail_buffer;
1122 
1123    result = vk_common_MapMemory(ctx->device, ctx->memory, 0, VK_WHOLE_SIZE, 0, (void **)&ctx->mapped_data);
1124    if (result != VK_SUCCESS)
1125       goto fail_memory;
1126 
1127    result = vk_common_BindBufferMemory(ctx->device, ctx->buffer, ctx->memory, 0);
1128    if (result != VK_SUCCESS)
1129       goto fail_memory;
1130 
1131    return result;
1132 fail_memory:
1133    radv_FreeMemory(ctx->device, ctx->memory, NULL);
1134 fail_buffer:
1135    radv_DestroyBuffer(ctx->device, ctx->buffer, NULL);
1136 fail_pool:
1137    vk_common_DestroyCommandPool(ctx->device, ctx->pool, NULL);
1138    return result;
1139 }
1140 
1141 static void
rra_copy_context_finish(struct rra_copy_context * ctx)1142 rra_copy_context_finish(struct rra_copy_context *ctx)
1143 {
1144    VK_FROM_HANDLE(radv_device, device, ctx->device);
1145    if (device->rra_trace.copy_after_build)
1146       return;
1147 
1148    vk_common_DestroyCommandPool(ctx->device, ctx->pool, NULL);
1149    radv_DestroyBuffer(ctx->device, ctx->buffer, NULL);
1150    vk_common_UnmapMemory(ctx->device, ctx->memory);
1151    radv_FreeMemory(ctx->device, ctx->memory, NULL);
1152 }
1153 
1154 static void *
rra_map_accel_struct_data(struct rra_copy_context * ctx,uint32_t i)1155 rra_map_accel_struct_data(struct rra_copy_context *ctx, uint32_t i)
1156 {
1157    struct radv_rra_accel_struct_data *data = ctx->entries[i]->data;
1158    if (radv_GetEventStatus(ctx->device, data->build_event) != VK_EVENT_SET)
1159       return NULL;
1160 
1161    if (data->buffer->memory) {
1162       void *mapped_data;
1163       vk_common_MapMemory(ctx->device, data->buffer->memory, 0, VK_WHOLE_SIZE, 0, &mapped_data);
1164       return mapped_data;
1165    }
1166 
1167    const struct vk_acceleration_structure *accel_struct = ctx->entries[i]->key;
1168    VkResult result;
1169 
1170    VkCommandBufferBeginInfo begin_info = {
1171       .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
1172    };
1173    result = radv_BeginCommandBuffer(ctx->cmd_buffer, &begin_info);
1174    if (result != VK_SUCCESS)
1175       return NULL;
1176 
1177    VkBufferCopy2 copy = {
1178       .sType = VK_STRUCTURE_TYPE_BUFFER_COPY_2,
1179       .srcOffset = accel_struct->offset,
1180       .size = accel_struct->size,
1181    };
1182 
1183    VkCopyBufferInfo2 copy_info = {
1184       .sType = VK_STRUCTURE_TYPE_COPY_BUFFER_INFO_2,
1185       .srcBuffer = accel_struct->buffer,
1186       .dstBuffer = ctx->buffer,
1187       .regionCount = 1,
1188       .pRegions = &copy,
1189    };
1190 
1191    radv_CmdCopyBuffer2(ctx->cmd_buffer, &copy_info);
1192 
1193    result = radv_EndCommandBuffer(ctx->cmd_buffer);
1194    if (result != VK_SUCCESS)
1195       return NULL;
1196 
1197    VkSubmitInfo submit_info = {
1198       .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
1199       .commandBufferCount = 1,
1200       .pCommandBuffers = &ctx->cmd_buffer,
1201    };
1202 
1203    result = vk_common_QueueSubmit(ctx->queue, 1, &submit_info, VK_NULL_HANDLE);
1204    if (result != VK_SUCCESS)
1205       return NULL;
1206 
1207    result = vk_common_QueueWaitIdle(ctx->queue);
1208    if (result != VK_SUCCESS)
1209       return NULL;
1210 
1211    return ctx->mapped_data;
1212 }
1213 
1214 static void
rra_unmap_accel_struct_data(struct rra_copy_context * ctx,uint32_t i)1215 rra_unmap_accel_struct_data(struct rra_copy_context *ctx, uint32_t i)
1216 {
1217    struct radv_rra_accel_struct_data *data = ctx->entries[i]->data;
1218 
1219    if (data->buffer && data->buffer->memory)
1220       vk_common_UnmapMemory(ctx->device, data->buffer->memory);
1221 }
1222 
1223 enum rra_ray_history_token_type {
1224    rra_ray_history_token_begin,
1225    rra_ray_history_token_tlas,
1226    rra_ray_history_token_blas,
1227    rra_ray_history_token_end,
1228    rra_ray_history_token_call,
1229    rra_ray_history_token_timestamp,
1230    rra_ray_history_token_ahit_status,
1231    rra_ray_history_token_call2,
1232    rra_ray_history_token_isec_status,
1233    rra_ray_history_token_end2,
1234    rra_ray_history_token_begin2,
1235    rra_ray_history_token_normal = 0xFFFF,
1236 };
1237 
1238 struct rra_ray_history_id_token {
1239    uint32_t id : 30;
1240    uint32_t reserved : 1;
1241    uint32_t has_control : 1;
1242 };
1243 static_assert(sizeof(struct rra_ray_history_id_token) == 4, "rra_ray_history_id_token does not match RRA expectations");
1244 
1245 struct rra_ray_history_control_token {
1246    uint32_t type : 16;
1247    uint32_t length : 8;
1248    uint32_t data : 8;
1249 };
1250 static_assert(sizeof(struct rra_ray_history_control_token) == 4,
1251               "rra_ray_history_control_token does not match RRA expectations");
1252 
1253 struct rra_ray_history_begin_token {
1254    uint32_t wave_id;
1255    uint32_t launch_ids[3];
1256    uint32_t accel_struct_lo;
1257    uint32_t accel_struct_hi;
1258    uint32_t ray_flags;
1259    uint32_t cull_mask : 8;
1260    uint32_t stb_offset : 4;
1261    uint32_t stb_stride : 4;
1262    uint32_t miss_index : 16;
1263    float origin[3];
1264    float tmin;
1265    float direction[3];
1266    float tmax;
1267 };
1268 static_assert(sizeof(struct rra_ray_history_begin_token) == 64,
1269               "rra_ray_history_begin_token does not match RRA expectations");
1270 
1271 struct rra_ray_history_begin2_token {
1272    struct rra_ray_history_begin_token base;
1273    uint32_t call_instruction_id;
1274    uint32_t unique_wave_id;
1275    uint32_t parent_unique_wave_id;
1276 };
1277 static_assert(sizeof(struct rra_ray_history_begin2_token) == 76,
1278               "rra_ray_history_begin2_token does not match RRA expectations");
1279 
1280 struct rra_ray_history_end_token {
1281    uint32_t primitive_index;
1282    uint32_t geometry_index;
1283 };
1284 static_assert(sizeof(struct rra_ray_history_end_token) == 8,
1285               "rra_ray_history_end_token does not match RRA expectations");
1286 
1287 struct rra_ray_history_end2_token {
1288    struct rra_ray_history_end_token base;
1289    uint32_t instance_index : 24;
1290    uint32_t hit_kind : 8;
1291    uint32_t iteration_count;
1292    uint32_t candidate_instance_count;
1293    float t;
1294 };
1295 static_assert(sizeof(struct rra_ray_history_end2_token) == 24,
1296               "rra_ray_history_end2_token does not match RRA expectations");
1297 
1298 struct rra_ray_history_tlas_token {
1299    uint64_t addr;
1300 };
1301 static_assert(sizeof(struct rra_ray_history_tlas_token) == 8,
1302               "rra_ray_history_tlas_token does not match RRA expectations");
1303 
1304 struct rra_ray_history_blas_token {
1305    uint64_t addr;
1306 };
1307 static_assert(sizeof(struct rra_ray_history_blas_token) == 8,
1308               "rra_ray_history_blas_token does not match RRA expectations");
1309 
1310 struct rra_ray_history_call_token {
1311    uint32_t addr[2];
1312 };
1313 static_assert(sizeof(struct rra_ray_history_call_token) == 8,
1314               "rra_ray_history_call_token does not match RRA expectations");
1315 
1316 struct rra_ray_history_call2_token {
1317    struct rra_ray_history_call_token base;
1318    uint32_t sbt_index;
1319 };
1320 static_assert(sizeof(struct rra_ray_history_call2_token) == 12,
1321               "rra_ray_history_call2_token does not match RRA expectations");
1322 
1323 struct rra_ray_history_isec_token {
1324    float t;
1325    uint32_t hit_kind;
1326 };
1327 static_assert(sizeof(struct rra_ray_history_isec_token) == 8,
1328               "rra_ray_history_isec_token does not match RRA expectations");
1329 
1330 struct rra_ray_history_timestamp_token {
1331    uint64_t gpu_timestamp;
1332 };
1333 static_assert(sizeof(struct rra_ray_history_timestamp_token) == 8,
1334               "rra_ray_history_timestamp_token does not match RRA expectations");
1335 
1336 VkResult
radv_rra_dump_trace(VkQueue vk_queue,char * filename)1337 radv_rra_dump_trace(VkQueue vk_queue, char *filename)
1338 {
1339    VK_FROM_HANDLE(radv_queue, queue, vk_queue);
1340    struct radv_device *device = radv_queue_device(queue);
1341    const struct radv_physical_device *pdev = radv_device_physical(device);
1342    VkDevice vk_device = radv_device_to_handle(device);
1343 
1344    VkResult result = vk_common_DeviceWaitIdle(vk_device);
1345    if (result != VK_SUCCESS)
1346       return result;
1347 
1348    uint64_t *accel_struct_offsets = NULL;
1349    uint64_t *ray_history_offsets = NULL;
1350    uint64_t *ray_history_sizes = NULL;
1351    struct hash_entry **hash_entries = NULL;
1352    FILE *file = NULL;
1353 
1354    uint32_t struct_count = _mesa_hash_table_num_entries(device->rra_trace.accel_structs);
1355    accel_struct_offsets = calloc(struct_count, sizeof(uint64_t));
1356    if (!accel_struct_offsets)
1357       return VK_ERROR_OUT_OF_HOST_MEMORY;
1358 
1359    uint32_t dispatch_count =
1360       util_dynarray_num_elements(&device->rra_trace.ray_history, struct radv_rra_ray_history_data *);
1361    ray_history_offsets = calloc(dispatch_count, sizeof(uint64_t));
1362    if (!ray_history_offsets) {
1363       result = VK_ERROR_OUT_OF_HOST_MEMORY;
1364       goto cleanup;
1365    }
1366 
1367    ray_history_sizes = calloc(dispatch_count, sizeof(uint64_t));
1368    if (!ray_history_sizes) {
1369       result = VK_ERROR_OUT_OF_HOST_MEMORY;
1370       goto cleanup;
1371    }
1372 
1373    hash_entries = malloc(sizeof(*hash_entries) * struct_count);
1374    if (!hash_entries) {
1375       result = VK_ERROR_OUT_OF_HOST_MEMORY;
1376       goto cleanup;
1377    }
1378 
1379    file = fopen(filename, "w");
1380    if (!file) {
1381       result = VK_ERROR_OUT_OF_HOST_MEMORY;
1382       goto cleanup;
1383    }
1384 
1385    /*
1386     * The header contents can only be determined after all acceleration
1387     * structures have been dumped. An empty struct is written instead
1388     * to keep offsets intact.
1389     */
1390    struct rra_file_header header = {0};
1391    fwrite(&header, sizeof(struct rra_file_header), 1, file);
1392 
1393    uint64_t api_info_offset = (uint64_t)ftell(file);
1394    uint64_t api = RADV_RRA_API_VULKAN;
1395    fwrite(&api, sizeof(uint64_t), 1, file);
1396 
1397    uint64_t asic_info_offset = (uint64_t)ftell(file);
1398    rra_dump_asic_info(&pdev->info, file);
1399 
1400    uint64_t written_accel_struct_count = 0;
1401 
1402    struct hash_entry *last_entry = NULL;
1403    for (unsigned i = 0; (last_entry = _mesa_hash_table_next_entry(device->rra_trace.accel_structs, last_entry)); ++i)
1404       hash_entries[i] = last_entry;
1405 
1406    qsort(hash_entries, struct_count, sizeof(*hash_entries), accel_struct_entry_cmp);
1407 
1408    struct rra_copy_context copy_ctx = {
1409       .device = vk_device,
1410       .queue = vk_queue,
1411       .entries = hash_entries,
1412       .family_index = queue->vk.queue_family_index,
1413       .min_size = device->rra_trace.ray_history_buffer_size,
1414    };
1415 
1416    result = rra_copy_context_init(&copy_ctx);
1417    if (result != VK_SUCCESS)
1418       goto cleanup;
1419 
1420    for (unsigned i = 0; i < struct_count; i++) {
1421       struct radv_rra_accel_struct_data *data = hash_entries[i]->data;
1422       void *mapped_data = rra_map_accel_struct_data(&copy_ctx, i);
1423       if (!mapped_data)
1424          continue;
1425 
1426       accel_struct_offsets[written_accel_struct_count] = (uint64_t)ftell(file);
1427       result = rra_dump_acceleration_structure(data, mapped_data, device->rra_trace.accel_struct_vas,
1428                                                device->rra_trace.validate_as, file);
1429 
1430       rra_unmap_accel_struct_data(&copy_ctx, i);
1431 
1432       if (result == VK_SUCCESS)
1433          written_accel_struct_count++;
1434    }
1435 
1436    uint64_t ray_history_offset = (uint64_t)ftell(file);
1437 
1438    if (dispatch_count) {
1439       uint32_t ray_history_index = 0xFFFFFFFF;
1440       struct radv_rra_ray_history_data *ray_history = NULL;
1441 
1442       uint8_t *history = device->rra_trace.ray_history_data;
1443       struct radv_ray_history_header *history_header = (void *)history;
1444 
1445       uint32_t history_buffer_size_mb = device->rra_trace.ray_history_buffer_size / 1024 / 1024;
1446       uint32_t history_size_mb = history_header->offset / 1024 / 1024;
1447       if (history_header->offset > device->rra_trace.ray_history_buffer_size) {
1448          fprintf(stderr, "radv: rra: The ray history buffer size (%u MB) is to small. %u MB is required.\n",
1449                  history_buffer_size_mb, history_size_mb);
1450       } else {
1451          fprintf(stderr, "radv: rra: Ray history buffer size = %u MB, ray history size = %u MB.\n",
1452                  history_buffer_size_mb, history_size_mb);
1453       }
1454 
1455       uint32_t history_size = MIN2(history_header->offset, device->rra_trace.ray_history_buffer_size);
1456 
1457       uint32_t token_size;
1458       for (uint32_t offset = sizeof(struct radv_ray_history_header);; offset += token_size) {
1459          if (offset + sizeof(struct radv_packed_end_trace_token) > history_size)
1460             break;
1461 
1462          struct radv_packed_end_trace_token *src = (void *)(history + offset);
1463          token_size = src->header.hit ? sizeof(struct radv_packed_end_trace_token)
1464                                       : offsetof(struct radv_packed_end_trace_token, primitive_id);
1465 
1466          if (src->dispatch_index != ray_history_index) {
1467             ray_history_index = src->dispatch_index;
1468             assert(ray_history_index < dispatch_count);
1469             ray_history = *util_dynarray_element(&device->rra_trace.ray_history, struct radv_rra_ray_history_data *,
1470                                                  ray_history_index);
1471 
1472             assert(!ray_history_offsets[ray_history_index]);
1473             ray_history_offsets[ray_history_index] = (uint64_t)ftell(file);
1474             fwrite(&ray_history->metadata, sizeof(struct radv_rra_ray_history_metadata), 1, file);
1475          }
1476 
1477          uint32_t *dispatch_size = ray_history->metadata.dispatch_size.size;
1478 
1479          uint32_t x = src->header.launch_index % dispatch_size[0];
1480          uint32_t y = (src->header.launch_index / dispatch_size[0]) % dispatch_size[1];
1481          uint32_t z = src->header.launch_index / (dispatch_size[0] * dispatch_size[1]);
1482 
1483          struct rra_ray_history_id_token begin_id = {
1484             .id = src->header.launch_index,
1485             .has_control = true,
1486          };
1487          struct rra_ray_history_control_token begin_control = {
1488             .type = rra_ray_history_token_begin,
1489             .length = sizeof(struct rra_ray_history_begin_token) / 4,
1490          };
1491          struct rra_ray_history_begin_token begin = {
1492             .wave_id = src->header.launch_index / 32,
1493             .launch_ids = {x, y, z},
1494             .accel_struct_lo = src->accel_struct_lo,
1495             .accel_struct_hi = src->accel_struct_hi & 0x1FFFFFF,
1496             .ray_flags = src->flags,
1497             .cull_mask = src->cull_mask,
1498             .stb_offset = src->sbt_offset,
1499             .stb_stride = src->sbt_stride,
1500             .miss_index = src->miss_index,
1501             .origin[0] = src->origin[0],
1502             .origin[1] = src->origin[1],
1503             .origin[2] = src->origin[2],
1504             .tmin = src->tmin,
1505             .direction[0] = src->direction[0],
1506             .direction[1] = src->direction[1],
1507             .direction[2] = src->direction[2],
1508             .tmax = src->tmax,
1509          };
1510          fwrite(&begin_id, sizeof(begin_id), 1, file);
1511          fwrite(&begin_control, sizeof(begin_control), 1, file);
1512          fwrite(&begin, sizeof(begin), 1, file);
1513          ray_history_sizes[ray_history_index] += sizeof(begin_id) + sizeof(begin_control) + sizeof(begin);
1514 
1515          for (uint32_t i = 0; i < src->ahit_count; i++) {
1516             struct rra_ray_history_id_token ahit_status_id = {
1517                .id = src->header.launch_index,
1518                .has_control = true,
1519             };
1520             struct rra_ray_history_control_token ahit_status_control = {
1521                .type = rra_ray_history_token_ahit_status,
1522                .data = i == src->ahit_count - 1 ? 2 : 0,
1523             };
1524             fwrite(&ahit_status_id, sizeof(ahit_status_id), 1, file);
1525             fwrite(&ahit_status_control, sizeof(ahit_status_control), 1, file);
1526             ray_history_sizes[ray_history_index] += sizeof(ahit_status_id) + sizeof(ahit_status_control);
1527          }
1528 
1529          for (uint32_t i = 0; i < src->isec_count; i++) {
1530             struct rra_ray_history_id_token isec_status_id = {
1531                .id = src->header.launch_index,
1532                .has_control = true,
1533             };
1534             struct rra_ray_history_control_token isec_status_control = {
1535                .type = rra_ray_history_token_isec_status,
1536                .data = i == src->isec_count - 1 ? 2 : 0,
1537             };
1538             fwrite(&isec_status_id, sizeof(isec_status_id), 1, file);
1539             fwrite(&isec_status_control, sizeof(isec_status_control), 1, file);
1540             ray_history_sizes[ray_history_index] += sizeof(isec_status_id) + sizeof(isec_status_control);
1541          }
1542 
1543          struct rra_ray_history_id_token end_id = {
1544             .id = src->header.launch_index,
1545             .has_control = true,
1546          };
1547          struct rra_ray_history_control_token end_control = {
1548             .type = rra_ray_history_token_end2,
1549             .length = sizeof(struct rra_ray_history_end2_token) / 4,
1550          };
1551          struct rra_ray_history_end2_token end = {
1552             .base.primitive_index = 0xFFFFFFFF,
1553             .base.geometry_index = 0xFFFFFFFF,
1554             .iteration_count = src->iteration_count,
1555             .candidate_instance_count = src->instance_count,
1556          };
1557 
1558          if (src->header.hit) {
1559             end.base.primitive_index = src->primitive_id;
1560             end.base.geometry_index = src->geometry_id;
1561             end.instance_index = src->instance_id;
1562             end.hit_kind = src->hit_kind;
1563             end.t = src->t;
1564          }
1565 
1566          fwrite(&end_id, sizeof(end_id), 1, file);
1567          fwrite(&end_control, sizeof(end_control), 1, file);
1568          fwrite(&end, sizeof(end), 1, file);
1569          ray_history_sizes[ray_history_index] += sizeof(end_id) + sizeof(end_control) + sizeof(end);
1570       }
1571 
1572       for (uint32_t i = 0; i < dispatch_count; i++) {
1573          if (ray_history_offsets[i])
1574             continue;
1575 
1576          ray_history = *util_dynarray_element(&device->rra_trace.ray_history, struct radv_rra_ray_history_data *, i);
1577          ray_history_offsets[i] = (uint64_t)ftell(file);
1578          fwrite(&ray_history->metadata, sizeof(struct radv_rra_ray_history_metadata), 1, file);
1579       }
1580 
1581       history_header->offset = 1;
1582    }
1583 
1584    rra_copy_context_finish(&copy_ctx);
1585 
1586    uint64_t chunk_info_offset = (uint64_t)ftell(file);
1587    rra_dump_chunk_description(api_info_offset, 0, 8, "ApiInfo", RADV_RRA_ASIC_API_INFO_CHUNK_VERSION, file);
1588    rra_dump_chunk_description(asic_info_offset, 0, sizeof(struct rra_asic_info), "AsicInfo",
1589                               RADV_RRA_ASIC_API_INFO_CHUNK_VERSION, file);
1590 
1591    for (uint32_t i = 0; i < dispatch_count; i++) {
1592       rra_dump_chunk_description(ray_history_offsets[i], 0, sizeof(struct radv_rra_ray_history_metadata),
1593                                  "HistoryMetadata", RADV_RRA_RAY_HISTORY_CHUNK_VERSION, file);
1594       rra_dump_chunk_description(ray_history_offsets[i] + sizeof(struct radv_rra_ray_history_metadata), 0,
1595                                  ray_history_sizes[i], "HistoryTokensRaw", RADV_RRA_RAY_HISTORY_CHUNK_VERSION, file);
1596    }
1597 
1598    for (uint32_t i = 0; i < written_accel_struct_count; ++i) {
1599       uint64_t accel_struct_size;
1600       if (i == written_accel_struct_count - 1)
1601          accel_struct_size = (uint64_t)(ray_history_offset - accel_struct_offsets[i]);
1602       else
1603          accel_struct_size = (uint64_t)(accel_struct_offsets[i + 1] - accel_struct_offsets[i]);
1604 
1605       rra_dump_chunk_description(accel_struct_offsets[i], sizeof(struct rra_accel_struct_chunk_header),
1606                                  accel_struct_size, "RawAccelStruct", RADV_RRA_ACCEL_STRUCT_CHUNK_VERSION, file);
1607    }
1608 
1609    uint64_t file_end = (uint64_t)ftell(file);
1610 
1611    /* All info is available, dump header now */
1612    fseek(file, 0, SEEK_SET);
1613    rra_dump_header(file, chunk_info_offset, file_end - chunk_info_offset);
1614 
1615    result = VK_SUCCESS;
1616 cleanup:
1617    if (file)
1618       fclose(file);
1619 
1620    free(hash_entries);
1621    free(ray_history_sizes);
1622    free(ray_history_offsets);
1623    free(accel_struct_offsets);
1624    return result;
1625 }
1626