1 /*
2 * Copyright © 2020 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "anv_private.h"
25
26 #include <math.h>
27
28 #include "util/u_debug.h"
29 #include "util/half_float.h"
30 #include "util/u_atomic.h"
31
32 #include "genxml/gen_macros.h"
33 #include "genxml/genX_pack.h"
34 #include "genxml/genX_rt_pack.h"
35
36 #include "ds/intel_tracepoints.h"
37
38 #if GFX_VERx10 >= 125
39 #include "grl/grl_structs.h"
40
41 /* Wait for the previous dispatches to finish and flush their data port
42 * writes.
43 */
44 #define ANV_GRL_FLUSH_FLAGS (ANV_PIPE_END_OF_PIPE_SYNC_BIT | \
45 ANV_PIPE_DATA_CACHE_FLUSH_BIT | \
46 ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT)
47
48 static const VkAccelerationStructureGeometryKHR *
get_geometry(const VkAccelerationStructureBuildGeometryInfoKHR * pInfo,uint32_t index)49 get_geometry(const VkAccelerationStructureBuildGeometryInfoKHR *pInfo,
50 uint32_t index)
51 {
52 return pInfo->pGeometries ? &pInfo->pGeometries[index] :
53 pInfo->ppGeometries[index];
54 }
55
align_transient_size(size_t bytes)56 static size_t align_transient_size(size_t bytes)
57 {
58 return align_uintptr(bytes, 64);
59 }
60
align_private_size(size_t bytes)61 static size_t align_private_size(size_t bytes)
62 {
63 return align_uintptr(bytes, 64);
64 }
65
get_scheduler_size(size_t num_builds)66 static size_t get_scheduler_size(size_t num_builds)
67 {
68 size_t scheduler_size = sizeof(union SchedulerUnion);
69 /* add more memory for qnode creation stage if needed */
70 if (num_builds > QNODE_GLOBAL_ROOT_BUFFER_MIN_ENTRIES_NUM) {
71 scheduler_size += (num_builds - QNODE_GLOBAL_ROOT_BUFFER_MIN_ENTRIES_NUM) * 2 *
72 sizeof(struct QNodeGlobalRootBufferEntry);
73 }
74
75 return align_private_size(scheduler_size);
76 }
77
78 static size_t
get_batched_binnedsah_transient_mem_size(size_t num_builds)79 get_batched_binnedsah_transient_mem_size(size_t num_builds)
80 {
81 if (num_builds == 0)
82 return 0;
83 return num_builds * (sizeof(struct SAHBuildBuffersInfo) + sizeof(gpuva_t));
84 }
85
86 static size_t
get_batched_binnedsah_private_mem_size(size_t num_builds)87 get_batched_binnedsah_private_mem_size(size_t num_builds)
88 {
89 if (num_builds == 0)
90 return 0;
91
92 size_t globals_size = align_private_size(num_builds * sizeof(struct SAHBuildGlobals));
93 return globals_size + get_scheduler_size(num_builds);
94 }
95
96 static uint32_t
estimate_qbvh6_nodes(const uint32_t N)97 estimate_qbvh6_nodes(const uint32_t N)
98 {
99 const uint32_t W = 6;
100 const uint32_t N0 = N / 2 + N % 2; // lowest level with 2 leaves per QBVH6 node
101 const uint32_t N1 = N0 / W + (N0 % W ? 1 : 0); // filled level
102 const uint32_t N2 = N0 / W + (N1 % W ? 1 : 0); // filled level
103 const uint32_t N3 = N0 / W + (N2 % W ? 1 : 0); // filled level
104 const uint32_t N4 = N3; // overestimate remaining nodes
105 return N0 + N1 + N2 + N3 + N4;
106 }
107
108 /* Estimates the worst case number of QBVH6 nodes for a top-down BVH
109 * build that guarantees to produce subtree with N >= K primitives
110 * from which a single QBVH6 node is created.
111 */
112 static uint32_t
estimate_qbvh6_nodes_minK(const uint32_t N,uint32_t K)113 estimate_qbvh6_nodes_minK(const uint32_t N, uint32_t K)
114 {
115 const uint32_t N0 = N / K + (N % K ? 1 : 0); // lowest level of nodes with K leaves minimally
116 return N0 + estimate_qbvh6_nodes(N0);
117 }
118
119 static size_t
estimate_qbvh6_fatleafs(const size_t P)120 estimate_qbvh6_fatleafs(const size_t P)
121 {
122 return P;
123 }
124
125 static size_t
estimate_qbvh6_nodes_worstcase(const size_t P)126 estimate_qbvh6_nodes_worstcase(const size_t P)
127 {
128 const size_t F = estimate_qbvh6_fatleafs(P);
129
130 // worst-case each inner node having 5 fat-leaf children.
131 // number of inner nodes is F/5 and number of fat-leaves is F
132 return F + ceil(F/5.0);
133 }
134
135 #define sizeof_PrimRef 32
136 #define sizeof_HwInstanceLeaf (GENX(RT_BVH_INSTANCE_LEAF_length) * 4)
137 #define sizeof_InternalNode (GENX(RT_BVH_INTERNAL_NODE_length) * 4)
138 #define sizeof_Procedural (GENX(RT_BVH_PROCEDURAL_LEAF_length) * 4)
139 #define sizeof_Quad (GENX(RT_BVH_QUAD_LEAF_length) * 4)
140
141 static struct MKSizeEstimate
get_gpu_size_estimate(const VkAccelerationStructureBuildGeometryInfoKHR * pInfo,const VkAccelerationStructureBuildRangeInfoKHR * pBuildRangeInfos,const uint32_t * pMaxPrimitiveCounts)142 get_gpu_size_estimate(const VkAccelerationStructureBuildGeometryInfoKHR *pInfo,
143 const VkAccelerationStructureBuildRangeInfoKHR *pBuildRangeInfos,
144 const uint32_t *pMaxPrimitiveCounts)
145 {
146 uint32_t num_triangles = 0, num_aabbs = 0, num_instances = 0;
147 for (unsigned g = 0; g < pInfo->geometryCount; g++) {
148 const VkAccelerationStructureGeometryKHR *pGeometry =
149 get_geometry(pInfo, g);
150 uint32_t prim_count = pBuildRangeInfos != NULL ?
151 pBuildRangeInfos[g].primitiveCount : pMaxPrimitiveCounts[g];
152
153 switch (pGeometry->geometryType) {
154 case VK_GEOMETRY_TYPE_TRIANGLES_KHR:
155 num_triangles += prim_count;
156 break;
157 case VK_GEOMETRY_TYPE_AABBS_KHR:
158 num_aabbs += prim_count;
159 break;
160 case VK_GEOMETRY_TYPE_INSTANCES_KHR:
161 num_instances += prim_count;
162 break;
163 default:
164 unreachable("Unsupported geometry type");
165 }
166 }
167 const uint32_t num_primitives = num_triangles + num_aabbs + num_instances;
168
169 struct MKSizeEstimate est = {};
170
171 uint64_t size = sizeof(BVHBase);
172 size = align64(size, 64);
173
174 /* Must immediately follow BVHBase because we use fixed offset to nodes. */
175 est.node_data_start = size;
176
177 switch (pInfo->type) {
178 case VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR: {
179 assert(num_triangles == 0 && num_aabbs == 0);
180
181 est.numPrimitives = num_instances;
182 est.numPrimitivesToSplit = 0;
183 est.numBuildPrimitives = est.numPrimitives + est.numPrimitivesToSplit;
184
185 est.min_primitives = est.numPrimitives;
186 est.max_primitives = est.numPrimitives + est.numPrimitivesToSplit;
187
188 unsigned int sizeInnerNodes =
189 (unsigned int) estimate_qbvh6_nodes_worstcase(est.numBuildPrimitives) *
190 sizeof_InternalNode;
191 if (sizeInnerNodes == 0)
192 sizeInnerNodes = sizeof_InternalNode;
193
194 est.max_inner_nodes = sizeInnerNodes / sizeof_InternalNode;
195
196 size += sizeInnerNodes;
197 STATIC_ASSERT(sizeof_InternalNode % 64 == 0);
198
199 est.leaf_data_start = size;
200 size += est.numBuildPrimitives * sizeof_HwInstanceLeaf;
201 STATIC_ASSERT(sizeof_HwInstanceLeaf % 64 == 0);
202
203 est.leaf_data_size = est.numBuildPrimitives * sizeof_HwInstanceLeaf;
204
205 break;
206 }
207
208 case VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR: {
209 assert(num_instances == 0);
210
211 /* RT: TODO */
212 const float split_factor = 0.0f;
213 uint32_t num_prims_to_split = 0;
214 if (false)
215 num_prims_to_split = num_triangles + (double)split_factor;
216
217 const uint32_t num_build_triangles = num_triangles + num_prims_to_split;
218 const uint32_t num_build_primitives = num_build_triangles + num_aabbs;
219
220 est.numPrimitives = num_primitives;
221 est.numTriangles = num_triangles;
222 est.numProcedurals = num_aabbs;
223 est.numMeshes = pInfo->geometryCount;
224 est.numBuildPrimitives = num_build_primitives;
225 est.numPrimitivesToSplit = num_prims_to_split;
226 est.max_instance_leafs = 0;
227
228 est.min_primitives = (size_t)(num_build_triangles * 0.5f + num_aabbs);
229 est.max_primitives = num_build_triangles + num_aabbs;
230
231 size_t nodeBytes = 0;
232 nodeBytes += estimate_qbvh6_nodes_worstcase(num_build_triangles) * sizeof_InternalNode;
233 nodeBytes += estimate_qbvh6_nodes_worstcase(num_aabbs) * sizeof_InternalNode;
234 if (nodeBytes == 0) // for case with 0 primitives
235 nodeBytes = sizeof_InternalNode;
236 nodeBytes = MAX2(nodeBytes, 8 * (size_t)num_build_primitives); // for primref_index0/1 buffers
237
238 est.max_inner_nodes = nodeBytes / sizeof_InternalNode;
239
240 size += nodeBytes;
241 STATIC_ASSERT(sizeof_InternalNode % 64 == 0);
242
243 est.leaf_data_start = size;
244 size += num_build_triangles * sizeof_Quad;
245 STATIC_ASSERT(sizeof_Quad % 64 == 0);
246
247 est.procedural_data_start = size;
248 size += num_aabbs * sizeof_Procedural;
249 STATIC_ASSERT(sizeof_Procedural % 64 == 0);
250
251 est.leaf_data_size = num_build_triangles * sizeof_Quad +
252 num_aabbs * sizeof_Procedural;
253
254 if (num_build_primitives == 0)
255 size += MAX2(sizeof_Quad, sizeof_Procedural);
256 break;
257 }
258
259 default:
260 unreachable("Unsupported acceleration structure type");
261 }
262
263 size = align64(size, 64);
264 est.instance_descs_start = size;
265 size += sizeof(struct InstanceDesc) * num_instances;
266
267 est.geo_meta_data_start = size;
268 size += sizeof(struct GeoMetaData) * pInfo->geometryCount;
269 size = align64(size, 64);
270
271 assert(size == align64(size, 64));
272 est.back_pointer_start = size;
273
274 const bool alloc_backpointers = false; /* RT TODO */
275 if (alloc_backpointers) {
276 size += est.max_inner_nodes * sizeof(uint32_t);
277 size = align64(size, 64);
278 }
279
280 assert(size < UINT32_MAX);
281 est.sizeTotal = align64(size, 64);
282
283 return est;
284 }
285
286 struct scratch_layout {
287 gpuva_t base;
288 uint32_t total_size;
289
290 gpuva_t primrefs;
291 gpuva_t globals;
292 gpuva_t leaf_index_buffers;
293 uint32_t leaf_index_buffer_stride;
294
295 /* new_sah */
296 gpuva_t qnode_buffer;
297 gpuva_t bvh2_buffer;
298 };
299
300 static size_t
get_bvh2_size(uint32_t num_primitivies)301 get_bvh2_size(uint32_t num_primitivies)
302 {
303 if (num_primitivies == 0)
304 return 0;
305 return sizeof(struct BVH2) +
306 (2 * num_primitivies - 1) * sizeof(struct BVH2Node);
307 }
308
309 static struct scratch_layout
get_gpu_scratch_layout(struct anv_address base,struct MKSizeEstimate est,enum anv_rt_bvh_build_method build_method)310 get_gpu_scratch_layout(struct anv_address base,
311 struct MKSizeEstimate est,
312 enum anv_rt_bvh_build_method build_method)
313 {
314 struct scratch_layout scratch = {
315 .base = anv_address_physical(base),
316 };
317 gpuva_t current = anv_address_physical(base);
318
319 scratch.globals = current;
320 current += sizeof(struct Globals);
321
322 scratch.primrefs = intel_canonical_address(current);
323 current += est.numBuildPrimitives * sizeof_PrimRef;
324
325 scratch.leaf_index_buffers = intel_canonical_address(current);
326 current += est.numBuildPrimitives * sizeof(uint32_t) * 2;
327 scratch.leaf_index_buffer_stride = sizeof(uint32_t);
328
329 switch (build_method) {
330 case ANV_BVH_BUILD_METHOD_TRIVIAL:
331 break;
332
333 case ANV_BVH_BUILD_METHOD_NEW_SAH: {
334 size_t bvh2_size = get_bvh2_size(est.numBuildPrimitives);
335 if (est.leaf_data_size < bvh2_size) {
336 scratch.bvh2_buffer = intel_canonical_address(current);
337 current += bvh2_size;
338 }
339
340 scratch.qnode_buffer = intel_canonical_address(current);
341 current += 2 * sizeof(dword) * est.max_inner_nodes;
342 break;
343 }
344
345 default:
346 unreachable("invalid build");
347 }
348
349 assert((current - scratch.base) < UINT32_MAX);
350 scratch.total_size = current - scratch.base;
351
352 return scratch;
353 }
354
355 static void
anv_get_gpu_acceleration_structure_size(UNUSED struct anv_device * device,VkAccelerationStructureBuildTypeKHR buildType,const VkAccelerationStructureBuildGeometryInfoKHR * pBuildInfo,const uint32_t * pMaxPrimitiveCounts,VkAccelerationStructureBuildSizesInfoKHR * pSizeInfo)356 anv_get_gpu_acceleration_structure_size(
357 UNUSED struct anv_device *device,
358 VkAccelerationStructureBuildTypeKHR buildType,
359 const VkAccelerationStructureBuildGeometryInfoKHR* pBuildInfo,
360 const uint32_t* pMaxPrimitiveCounts,
361 VkAccelerationStructureBuildSizesInfoKHR* pSizeInfo)
362 {
363
364 struct MKSizeEstimate est = get_gpu_size_estimate(pBuildInfo, NULL,
365 pMaxPrimitiveCounts);
366 struct scratch_layout scratch = get_gpu_scratch_layout(ANV_NULL_ADDRESS, est,
367 device->bvh_build_method);
368
369 pSizeInfo->accelerationStructureSize = est.sizeTotal;
370 pSizeInfo->buildScratchSize = scratch.total_size;
371 pSizeInfo->updateScratchSize = scratch.total_size; /* TODO */
372 }
373
374 void
genX(GetAccelerationStructureBuildSizesKHR)375 genX(GetAccelerationStructureBuildSizesKHR)(
376 VkDevice _device,
377 VkAccelerationStructureBuildTypeKHR buildType,
378 const VkAccelerationStructureBuildGeometryInfoKHR* pBuildInfo,
379 const uint32_t* pMaxPrimitiveCounts,
380 VkAccelerationStructureBuildSizesInfoKHR* pSizeInfo)
381 {
382 ANV_FROM_HANDLE(anv_device, device, _device);
383 assert(pSizeInfo->sType ==
384 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR);
385
386 VkAccelerationStructureBuildSizesInfoKHR gpu_size_info;
387 anv_get_gpu_acceleration_structure_size(device, buildType, pBuildInfo,
388 pMaxPrimitiveCounts,
389 &gpu_size_info);
390
391 pSizeInfo->accelerationStructureSize =
392 gpu_size_info.accelerationStructureSize;
393 pSizeInfo->buildScratchSize = gpu_size_info.buildScratchSize;
394 pSizeInfo->updateScratchSize = gpu_size_info.updateScratchSize;
395 }
396
397 void
genX(GetDeviceAccelerationStructureCompatibilityKHR)398 genX(GetDeviceAccelerationStructureCompatibilityKHR)(
399 VkDevice _device,
400 const VkAccelerationStructureVersionInfoKHR* pVersionInfo,
401 VkAccelerationStructureCompatibilityKHR* pCompatibility)
402 {
403 ANV_FROM_HANDLE(anv_device, device, _device);
404
405 if (memcmp(pVersionInfo->pVersionData,
406 device->physical->rt_uuid,
407 sizeof(device->physical->rt_uuid)) == 0) {
408 *pCompatibility = VK_ACCELERATION_STRUCTURE_COMPATIBILITY_COMPATIBLE_KHR;
409 } else {
410 *pCompatibility = VK_ACCELERATION_STRUCTURE_COMPATIBILITY_INCOMPATIBLE_KHR;
411 }
412 }
413
414 static inline uint8_t
vk_to_grl_GeometryFlags(VkGeometryFlagsKHR flags)415 vk_to_grl_GeometryFlags(VkGeometryFlagsKHR flags)
416 {
417 uint8_t grl_flags = GEOMETRY_FLAG_NONE;
418 unsigned mask = flags;
419 while (mask) {
420 int i = u_bit_scan(&mask);
421 switch ((VkGeometryFlagBitsKHR)(1u << i)) {
422 case VK_GEOMETRY_OPAQUE_BIT_KHR:
423 grl_flags |= GEOMETRY_FLAG_OPAQUE;
424 break;
425 case VK_GEOMETRY_NO_DUPLICATE_ANY_HIT_INVOCATION_BIT_KHR:
426 grl_flags |= GEOMETRY_FLAG_NO_DUPLICATE_ANYHIT_INVOCATION;
427 break;
428 default:
429 unreachable("Unsupported acceleration structure build flag");
430 }
431 }
432 return grl_flags;
433 }
434
435 static inline IndexFormat
vk_to_grl_IndexFormat(VkIndexType type)436 vk_to_grl_IndexFormat(VkIndexType type)
437 {
438 switch (type) {
439 case VK_INDEX_TYPE_NONE_KHR: return INDEX_FORMAT_NONE;
440 case VK_INDEX_TYPE_UINT8_KHR: unreachable("No UINT8 support yet");
441 case VK_INDEX_TYPE_UINT16: return INDEX_FORMAT_R16_UINT;
442 case VK_INDEX_TYPE_UINT32: return INDEX_FORMAT_R32_UINT;
443 default:
444 unreachable("Unsupported index type");
445 }
446 }
447
448 static inline VertexFormat
vk_to_grl_VertexFormat(VkFormat format)449 vk_to_grl_VertexFormat(VkFormat format)
450 {
451 switch (format) {
452 case VK_FORMAT_R32G32_SFLOAT: return VERTEX_FORMAT_R32G32_FLOAT;
453 case VK_FORMAT_R32G32B32_SFLOAT: return VERTEX_FORMAT_R32G32B32_FLOAT;
454 case VK_FORMAT_R16G16_SFLOAT: return VERTEX_FORMAT_R16G16_FLOAT;
455 case VK_FORMAT_R16G16B16A16_SFLOAT: return VERTEX_FORMAT_R16G16B16A16_FLOAT;
456 case VK_FORMAT_R16G16_SNORM: return VERTEX_FORMAT_R16G16_SNORM;
457 case VK_FORMAT_R16G16B16A16_SNORM: return VERTEX_FORMAT_R16G16B16A16_SNORM;
458 case VK_FORMAT_R16G16B16A16_UNORM: return VERTEX_FORMAT_R16G16B16A16_UNORM;
459 case VK_FORMAT_R16G16_UNORM: return VERTEX_FORMAT_R16G16_UNORM;
460 /* case VK_FORMAT_R10G10B10A2_UNORM: return VERTEX_FORMAT_R10G10B10A2_UNORM; */
461 case VK_FORMAT_R8G8B8A8_UNORM: return VERTEX_FORMAT_R8G8B8A8_UNORM;
462 case VK_FORMAT_R8G8_UNORM: return VERTEX_FORMAT_R8G8_UNORM;
463 case VK_FORMAT_R8G8B8A8_SNORM: return VERTEX_FORMAT_R8G8B8A8_SNORM;
464 case VK_FORMAT_R8G8_SNORM: return VERTEX_FORMAT_R8G8_SNORM;
465 default:
466 unreachable("Unsupported vertex format");
467 }
468 }
469
470 static struct Geo
vk_to_grl_Geo(const VkAccelerationStructureGeometryKHR * pGeometry,uint32_t prim_count,uint32_t transform_offset,uint32_t primitive_offset,uint32_t first_vertex)471 vk_to_grl_Geo(const VkAccelerationStructureGeometryKHR *pGeometry,
472 uint32_t prim_count,
473 uint32_t transform_offset,
474 uint32_t primitive_offset,
475 uint32_t first_vertex)
476 {
477 struct Geo geo = {
478 .Flags = vk_to_grl_GeometryFlags(pGeometry->flags),
479 };
480
481 switch (pGeometry->geometryType) {
482 case VK_GEOMETRY_TYPE_TRIANGLES_KHR: {
483 const VkAccelerationStructureGeometryTrianglesDataKHR *vk_tri =
484 &pGeometry->geometry.triangles;
485
486 geo.Type = GEOMETRY_TYPE_TRIANGLES;
487
488 geo.Desc.Triangles.pTransformBuffer =
489 vk_tri->transformData.deviceAddress;
490 geo.Desc.Triangles.pIndexBuffer =
491 vk_tri->indexData.deviceAddress;
492 geo.Desc.Triangles.pVertexBuffer =
493 vk_tri->vertexData.deviceAddress;
494 geo.Desc.Triangles.VertexBufferByteStride = vk_tri->vertexStride;
495
496 if (geo.Desc.Triangles.pTransformBuffer)
497 geo.Desc.Triangles.pTransformBuffer += transform_offset;
498
499 if (vk_tri->indexType == VK_INDEX_TYPE_NONE_KHR) {
500 geo.Desc.Triangles.IndexCount = 0;
501 geo.Desc.Triangles.VertexCount = prim_count * 3;
502 geo.Desc.Triangles.IndexFormat = INDEX_FORMAT_NONE;
503 geo.Desc.Triangles.pVertexBuffer += primitive_offset;
504 } else {
505 geo.Desc.Triangles.IndexCount = prim_count * 3;
506 geo.Desc.Triangles.VertexCount = vk_tri->maxVertex;
507 geo.Desc.Triangles.IndexFormat =
508 vk_to_grl_IndexFormat(vk_tri->indexType);
509 geo.Desc.Triangles.pIndexBuffer += primitive_offset;
510 }
511
512 geo.Desc.Triangles.VertexFormat =
513 vk_to_grl_VertexFormat(vk_tri->vertexFormat);
514 geo.Desc.Triangles.pVertexBuffer += vk_tri->vertexStride * first_vertex;
515 break;
516 }
517
518 case VK_GEOMETRY_TYPE_AABBS_KHR: {
519 const VkAccelerationStructureGeometryAabbsDataKHR *vk_aabbs =
520 &pGeometry->geometry.aabbs;
521 geo.Type = GEOMETRY_TYPE_PROCEDURAL;
522 geo.Desc.Procedural.pAABBs_GPUVA =
523 vk_aabbs->data.deviceAddress + primitive_offset;
524 geo.Desc.Procedural.AABBByteStride = vk_aabbs->stride;
525 geo.Desc.Procedural.AABBCount = prim_count;
526 break;
527 }
528
529 default:
530 unreachable("Invalid geometry type");
531 }
532
533 return geo;
534 }
535
536 #include "grl/grl_metakernel_copy.h"
537 #include "grl/grl_metakernel_misc.h"
538 #include "grl/grl_metakernel_build_primref.h"
539 #include "grl/grl_metakernel_new_sah_builder.h"
540 #include "grl/grl_metakernel_build_leaf.h"
541
542 struct build_state {
543 enum anv_rt_bvh_build_method build_method;
544
545 struct MKSizeEstimate estimate;
546 struct scratch_layout scratch;
547 struct MKBuilderState state;
548
549 struct anv_address bvh_addr;
550
551 size_t geom_size_prefix_sum_buffer;
552 size_t transient_size;
553
554 uint32_t leaf_type;
555 uint32_t leaf_size;
556
557 uint32_t num_geometries;
558 uint32_t num_instances;
559
560 uint64_t instances_addr;
561 bool array_of_instances_ptr;
562
563 const VkAccelerationStructureGeometryKHR *vk_geoms;
564 };
565
566 static void
get_binnedsah_scratch_buffers(struct build_state * bs,uint64_t * p_qnode_buffer,uint64_t * p_primref_indices,uint64_t * p_bvh2)567 get_binnedsah_scratch_buffers(struct build_state *bs,
568 uint64_t *p_qnode_buffer,
569 uint64_t *p_primref_indices,
570 uint64_t *p_bvh2)
571 {
572 if (bs->estimate.numBuildPrimitives == 0)
573 {
574 *p_bvh2 = 0;
575 *p_qnode_buffer = 0;
576 *p_primref_indices = 0;
577 return;
578 }
579
580 size_t bvh2_size = get_bvh2_size(bs->estimate.numBuildPrimitives);
581 if (bs->estimate.leaf_data_size < bvh2_size) {
582 assert(bs->scratch.bvh2_buffer != 0);
583 *p_bvh2 = bs->scratch.bvh2_buffer;
584 } else {
585 *p_bvh2 = intel_canonical_address(bs->state.bvh_buffer +
586 bs->estimate.leaf_data_start);
587 }
588
589 assert(bs->scratch.qnode_buffer != 0);
590 *p_qnode_buffer = bs->scratch.qnode_buffer;
591
592 assert(bs->scratch.leaf_index_buffers != 0);
593 *p_primref_indices = bs->scratch.leaf_index_buffers;
594 }
595
596 static void
write_memory(struct anv_cmd_alloc alloc,size_t offset,const void * data,size_t data_len)597 write_memory(struct anv_cmd_alloc alloc, size_t offset, const void *data, size_t data_len)
598 {
599 assert((offset + data_len) < alloc.size);
600 memcpy(alloc.map + offset, data, data_len);
601 }
602
603 static void
cmd_build_acceleration_structures(struct anv_cmd_buffer * cmd_buffer,uint32_t infoCount,const VkAccelerationStructureBuildGeometryInfoKHR * pInfos,const VkAccelerationStructureBuildRangeInfoKHR * const * ppBuildRangeInfos,const VkDeviceAddress * pIndirectDeviceAddresses,const uint32_t * pIndirectStrides,const uint32_t * const * ppMaxPrimitiveCounts)604 cmd_build_acceleration_structures(
605 struct anv_cmd_buffer *cmd_buffer,
606 uint32_t infoCount,
607 const VkAccelerationStructureBuildGeometryInfoKHR *pInfos,
608 const VkAccelerationStructureBuildRangeInfoKHR *const *ppBuildRangeInfos,
609 const VkDeviceAddress *pIndirectDeviceAddresses,
610 const uint32_t *pIndirectStrides,
611 const uint32_t *const *ppMaxPrimitiveCounts)
612 {
613 struct anv_device *device = cmd_buffer->device;
614 VK_MULTIALLOC(ma);
615
616 struct build_state *builds;
617 vk_multialloc_add(&ma, &builds, struct build_state, infoCount);
618
619 if (!vk_multialloc_zalloc(&ma,
620 &cmd_buffer->device->vk.alloc,
621 VK_SYSTEM_ALLOCATION_SCOPE_COMMAND)) {
622 anv_batch_set_error(&cmd_buffer->batch, VK_ERROR_OUT_OF_HOST_MEMORY);
623 return;
624 }
625
626 trace_intel_begin_as_build(&cmd_buffer->trace);
627
628 /* TODO: Indirect */
629 assert(ppBuildRangeInfos != NULL);
630
631 size_t transient_mem_init_globals_size = 0;
632 size_t transient_mem_init_globals_offset = 0;
633
634 size_t transient_total = 0;
635
636 size_t private_mem_total = 0;
637
638 size_t num_trivial_builds = 0;
639 size_t num_new_sah_builds = 0;
640
641 /* Prepare a bunch of data for the kernels we have to run. */
642 for (uint32_t i = 0; i < infoCount; i++) {
643 struct build_state *bs = &builds[i];
644
645 const VkAccelerationStructureBuildGeometryInfoKHR *pInfo = &pInfos[i];
646 struct anv_address scratch_addr =
647 anv_address_from_u64(pInfo->scratchData.deviceAddress);
648
649 const VkAccelerationStructureBuildRangeInfoKHR *pBuildRangeInfos =
650 ppBuildRangeInfos ? ppBuildRangeInfos[i] : NULL;
651 const uint32_t *pMaxPrimitiveCounts =
652 ppMaxPrimitiveCounts ? ppMaxPrimitiveCounts[i] : NULL;
653
654 ANV_FROM_HANDLE(vk_acceleration_structure, dst_accel,
655 pInfo->dstAccelerationStructure);
656
657 bs->build_method = device->bvh_build_method;
658
659 bs->bvh_addr = anv_address_from_u64(vk_acceleration_structure_get_va(dst_accel));
660
661 bs->estimate = get_gpu_size_estimate(pInfo, pBuildRangeInfos,
662 pMaxPrimitiveCounts);
663 bs->scratch = get_gpu_scratch_layout(scratch_addr, bs->estimate,
664 bs->build_method);
665
666 uint32_t leaf_size, leaf_type;
667
668 switch (pInfo->type) {
669 case VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR: {
670 assert(pInfo->geometryCount == 1);
671
672 const VkAccelerationStructureGeometryKHR *pGeometry =
673 get_geometry(pInfo, 0);
674 assert(pGeometry->geometryType == VK_GEOMETRY_TYPE_INSTANCES_KHR);
675
676 const VkAccelerationStructureGeometryInstancesDataKHR *instances =
677 &pGeometry->geometry.instances;
678
679 bs->num_instances = pBuildRangeInfos[0].primitiveCount;
680 bs->instances_addr = instances->data.deviceAddress;
681 bs->array_of_instances_ptr = instances->arrayOfPointers;
682 leaf_type = NODE_TYPE_INSTANCE;
683 leaf_size = GENX(RT_BVH_INSTANCE_LEAF_length) * 4;
684 break;
685 }
686
687 case VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR: {
688 bs->num_geometries = pInfo->geometryCount;
689 leaf_type = NODE_TYPE_QUAD;
690 leaf_size = GENX(RT_BVH_QUAD_LEAF_length) * 4;
691 break;
692 }
693
694 default:
695 unreachable("Unsupported acceleration structure type");
696 }
697
698 size_t geom_struct_size = bs->num_geometries * sizeof(struct Geo);
699 size_t geom_prefix_sum_size = align_uintptr(sizeof(uint32_t) * (bs->num_geometries + 1), 64);
700
701 bs->transient_size = geom_prefix_sum_size + geom_struct_size;
702
703 bs->geom_size_prefix_sum_buffer = transient_total + 0;
704
705 bs->state = (struct MKBuilderState) {
706 .geomDesc_buffer = bs->geom_size_prefix_sum_buffer +
707 geom_prefix_sum_size,
708 .build_primref_buffer = bs->scratch.primrefs,
709 .build_globals = bs->scratch.globals,
710 .bvh_buffer = anv_address_physical(bs->bvh_addr),
711 .leaf_type = leaf_type,
712 .leaf_size = leaf_size,
713 };
714
715 transient_total += bs->transient_size;
716
717 switch (device->bvh_build_method) {
718 case ANV_BVH_BUILD_METHOD_TRIVIAL:
719 num_trivial_builds++;
720 break;
721 case ANV_BVH_BUILD_METHOD_NEW_SAH:
722 num_new_sah_builds++;
723 break;
724 default:
725 unreachable("invalid BVH build method");
726 }
727
728 transient_mem_init_globals_size += sizeof(struct BatchedInitGlobalsData);
729 }
730
731 transient_total = align_transient_size(transient_total);
732 transient_mem_init_globals_offset = transient_total;
733 transient_total += align_transient_size(transient_mem_init_globals_size);
734
735 size_t transient_mem_binnedsah_size = 0;
736 size_t transient_mem_binnedsah_offset = 0;
737 size_t private_mem_binnedsah_size = 0;
738 size_t private_mem_binnedsah_offset = 0;
739
740 transient_mem_binnedsah_size = get_batched_binnedsah_transient_mem_size(num_new_sah_builds);
741 transient_mem_binnedsah_offset = transient_total;
742 transient_total += align_transient_size(transient_mem_binnedsah_size);
743
744 private_mem_binnedsah_size = get_batched_binnedsah_private_mem_size(num_new_sah_builds);
745 private_mem_binnedsah_offset = private_mem_total;
746 private_mem_total += align_private_size(private_mem_binnedsah_size);
747
748 /* Allocate required memory, unless we already have a suiteable buffer */
749 struct anv_cmd_alloc private_mem_alloc;
750 if (private_mem_total > cmd_buffer->state.rt.build_priv_mem_size) {
751 private_mem_alloc =
752 anv_cmd_buffer_alloc_space(cmd_buffer, private_mem_total, 64,
753 false /* mapped */);
754 if (anv_cmd_alloc_is_empty(private_mem_alloc)) {
755 anv_batch_set_error(&cmd_buffer->batch, VK_ERROR_OUT_OF_DEVICE_MEMORY);
756 goto error;
757 }
758
759 cmd_buffer->state.rt.build_priv_mem_addr = private_mem_alloc.address;
760 cmd_buffer->state.rt.build_priv_mem_size = private_mem_alloc.size;
761 } else {
762 private_mem_alloc = (struct anv_cmd_alloc) {
763 .address = cmd_buffer->state.rt.build_priv_mem_addr,
764 .map = anv_address_map(cmd_buffer->state.rt.build_priv_mem_addr),
765 .size = cmd_buffer->state.rt.build_priv_mem_size,
766 };
767 }
768
769 struct anv_cmd_alloc transient_mem_alloc =
770 anv_cmd_buffer_alloc_space(cmd_buffer, transient_total, 64,
771 true /* mapped */);
772 if (transient_total > 0 && anv_cmd_alloc_is_empty(transient_mem_alloc)) {
773 anv_batch_set_error(&cmd_buffer->batch, VK_ERROR_OUT_OF_DEVICE_MEMORY);
774 goto error;
775 }
776
777 uint64_t private_base = anv_address_physical(private_mem_alloc.address);
778 uint64_t transient_base = anv_address_physical(transient_mem_alloc.address);
779
780 /* Prepare transient memory */
781 for (uint32_t i = 0; i < infoCount; i++) {
782 struct build_state *bs = &builds[i];
783
784 const VkAccelerationStructureBuildGeometryInfoKHR *pInfo = &pInfos[i];
785
786 const VkAccelerationStructureBuildRangeInfoKHR *pBuildRangeInfos =
787 ppBuildRangeInfos ? ppBuildRangeInfos[i] : NULL;
788
789 struct Geo *geos = transient_mem_alloc.map + bs->state.geomDesc_buffer;
790 uint32_t *prefixes = transient_mem_alloc.map + bs->geom_size_prefix_sum_buffer;
791 uint32_t prefix_sum = 0;
792 for (unsigned g = 0; g < bs->num_geometries; g++) {
793 const VkAccelerationStructureGeometryKHR *pGeometry = get_geometry(pInfo, g);
794 uint32_t prim_count = pBuildRangeInfos[g].primitiveCount;
795 geos[g] = vk_to_grl_Geo(pGeometry, prim_count,
796 pBuildRangeInfos[g].transformOffset,
797 pBuildRangeInfos[g].primitiveOffset,
798 pBuildRangeInfos[g].firstVertex);
799
800 prefixes[g] = prefix_sum;
801 prefix_sum += prim_count;
802 }
803
804 prefixes[bs->num_geometries] = prefix_sum;
805
806 bs->geom_size_prefix_sum_buffer =
807 intel_canonical_address(bs->geom_size_prefix_sum_buffer +
808 transient_base);
809 bs->state.geomDesc_buffer =
810 intel_canonical_address(bs->state.geomDesc_buffer +
811 transient_base);
812
813 struct BatchedInitGlobalsData data = {
814 .p_build_globals = bs->scratch.globals,
815 .p_bvh_buffer = anv_address_physical(bs->bvh_addr),
816
817 .numPrimitives = 0,
818 .numGeometries = bs->num_geometries,
819 .numInstances = bs->num_instances,
820
821 .instance_descs_start = bs->estimate.instance_descs_start,
822 .geo_meta_data_start = bs->estimate.geo_meta_data_start,
823 .node_data_start = bs->estimate.node_data_start,
824 .leaf_data_start = bs->estimate.leaf_data_start,
825 .procedural_data_start = bs->estimate.procedural_data_start,
826 .back_pointer_start = bs->estimate.back_pointer_start,
827 .sizeTotal = bs->estimate.sizeTotal,
828
829 .leafType = bs->state.leaf_type,
830 .leafSize = bs->state.leaf_size,
831 };
832
833 write_memory(transient_mem_alloc,
834 transient_mem_init_globals_offset + i * sizeof(data),
835 &data, sizeof(data));
836 }
837
838 genX(flush_pipeline_select_gpgpu)(cmd_buffer);
839
840 /* Due to the nature of GRL and its heavy use of jumps/predication, we
841 * cannot tell exactly in what order the CFE_STATE we insert are going to
842 * be executed. So always use the largest possible size.
843 */
844 genX(cmd_buffer_ensure_cfe_state)(
845 cmd_buffer,
846 cmd_buffer->device->physical->max_grl_scratch_size);
847
848 /* Round 1 : init_globals kernel */
849 genX(grl_misc_batched_init_globals)(
850 cmd_buffer,
851 intel_canonical_address(transient_base +
852 transient_mem_init_globals_offset),
853 infoCount);
854
855 anv_add_pending_pipe_bits(cmd_buffer,
856 ANV_GRL_FLUSH_FLAGS,
857 "building accel struct");
858
859 /* Round 2 : Copy instance/geometry data from the application provided
860 * buffers into the acceleration structures.
861 */
862 for (uint32_t i = 0; i < infoCount; i++) {
863 struct build_state *bs = &builds[i];
864
865 /* Metadata */
866 if (bs->num_instances) {
867 assert(bs->num_geometries == 0);
868
869 const uint64_t copy_size = bs->num_instances * sizeof(InstanceDesc);
870 /* This must be calculated in same way as
871 * groupCountForGeoMetaDataCopySize
872 */
873 const uint32_t num_threads = (copy_size >> 8) + 3;
874
875 if (bs->array_of_instances_ptr) {
876 genX(grl_misc_copy_instance_ptrs)(
877 cmd_buffer,
878 anv_address_physical(anv_address_add(bs->bvh_addr,
879 bs->estimate.instance_descs_start)),
880 bs->instances_addr,
881 copy_size, num_threads);
882 } else {
883 genX(grl_misc_copy_instances)(
884 cmd_buffer,
885 anv_address_physical(anv_address_add(bs->bvh_addr,
886 bs->estimate.instance_descs_start)),
887 bs->instances_addr,
888 copy_size, num_threads);
889 }
890 }
891
892 if (bs->num_geometries) {
893 assert(bs->num_instances == 0);
894 const uint64_t copy_size = bs->num_geometries * sizeof(struct GeoMetaData);
895
896 /* This must be calculated in same way as
897 * groupCountForGeoMetaDataCopySize
898 */
899 const uint32_t num_threads = (copy_size >> 6) + 1;
900
901 genX(grl_misc_copy_geo_meta_data)(
902 cmd_buffer,
903 anv_address_physical(anv_address_add(bs->bvh_addr,
904 bs->estimate.geo_meta_data_start)),
905 bs->state.geomDesc_buffer,
906 copy_size,
907 num_threads);
908 }
909
910 /* Primrefs */
911 if (bs->num_instances) {
912 if (bs->array_of_instances_ptr) {
913 genX(grl_build_primref_buildPrimirefsFromInstancesArrOfPtrs)(
914 cmd_buffer,
915 bs->instances_addr,
916 PREFIX_MK_SIZE(grl_build_primref, bs->estimate),
917 PREFIX_MK_STATE(grl_build_primref, bs->state),
918 false /* allowUpdate */);
919 } else {
920 genX(grl_build_primref_buildPrimirefsFromInstances)(
921 cmd_buffer,
922 bs->instances_addr,
923 PREFIX_MK_SIZE(grl_build_primref, bs->estimate),
924 PREFIX_MK_STATE(grl_build_primref, bs->state),
925 false /* allowUpdate */);
926 }
927 }
928
929 if (bs->num_geometries) {
930 const VkAccelerationStructureBuildGeometryInfoKHR *pInfo = &pInfos[i];
931 const VkAccelerationStructureBuildRangeInfoKHR *pBuildRangeInfos =
932 ppBuildRangeInfos ? ppBuildRangeInfos[i] : NULL;
933
934 assert(pInfo->geometryCount == bs->num_geometries);
935 for (unsigned g = 0; g < pInfo->geometryCount; g++) {
936 const VkAccelerationStructureGeometryKHR *pGeometry =
937 get_geometry(pInfo, g);
938
939 switch (pGeometry->geometryType) {
940 case VK_GEOMETRY_TYPE_TRIANGLES_KHR:
941 genX(grl_build_primref_primrefs_from_tris)(
942 cmd_buffer,
943 PREFIX_MK_STATE(grl_build_primref, bs->state),
944 PREFIX_MK_SIZE(grl_build_primref, bs->estimate),
945 bs->state.geomDesc_buffer + g * sizeof(struct Geo),
946 g,
947 vk_to_grl_GeometryFlags(pGeometry->flags),
948 /* TODO: Indirect */
949 pBuildRangeInfos[g].primitiveCount);
950 break;
951
952 case VK_GEOMETRY_TYPE_AABBS_KHR:
953 genX(grl_build_primref_primrefs_from_proc)(
954 cmd_buffer,
955 PREFIX_MK_STATE(grl_build_primref, bs->state),
956 PREFIX_MK_SIZE(grl_build_primref, bs->estimate),
957 bs->state.geomDesc_buffer + g * sizeof(struct Geo),
958 g,
959 vk_to_grl_GeometryFlags(pGeometry->flags),
960 /* TODO: Indirect */
961 pBuildRangeInfos[g].primitiveCount);
962 break;
963
964 default:
965 unreachable("Invalid geometry type");
966 }
967 }
968 }
969 }
970
971 anv_add_pending_pipe_bits(cmd_buffer,
972 ANV_GRL_FLUSH_FLAGS,
973 "building accel struct");
974
975 /* Dispatch trivial builds */
976 if (num_trivial_builds) {
977 for (uint32_t i = 0; i < infoCount; i++) {
978 struct build_state *bs = &builds[i];
979
980 if (bs->build_method != ANV_BVH_BUILD_METHOD_TRIVIAL)
981 continue;
982
983 genX(grl_new_sah_builder_single_pass_binsah)(
984 cmd_buffer,
985 bs->scratch.globals,
986 bs->state.bvh_buffer,
987 bs->state.build_primref_buffer,
988 bs->scratch.leaf_index_buffers,
989 false /* alloc_backpointers */);
990 }
991 }
992
993 /* Dispatch new SAH builds */
994 if (num_new_sah_builds) {
995 size_t global_ptrs_offset = transient_mem_binnedsah_offset;
996 size_t buffers_info_offset = transient_mem_binnedsah_offset + sizeof(gpuva_t) * num_new_sah_builds;
997
998 size_t scheduler_offset = private_mem_binnedsah_offset;
999 size_t sah_globals_offset = private_mem_binnedsah_offset + get_scheduler_size(num_new_sah_builds);
1000
1001 struct SAHBuildArgsBatchable args = {
1002 .num_builds = infoCount,
1003 .p_globals_ptrs = intel_canonical_address(transient_base + global_ptrs_offset),
1004 .p_buffers_info = intel_canonical_address(transient_base + buffers_info_offset),
1005 .p_scheduler = intel_canonical_address(private_base + scheduler_offset),
1006 .p_sah_globals = intel_canonical_address(private_base + sah_globals_offset),
1007 .num_max_qnode_global_root_buffer_entries = MAX2(num_new_sah_builds, QNODE_GLOBAL_ROOT_BUFFER_MIN_ENTRIES_NUM),
1008 };
1009
1010 for (uint32_t i = 0; i < infoCount; i++) {
1011 struct build_state *bs = &builds[i];
1012
1013 if (bs->build_method != ANV_BVH_BUILD_METHOD_NEW_SAH)
1014 continue;
1015
1016 uint64_t p_build_primref_index_buffers;
1017 uint64_t p_bvh2;
1018 uint64_t p_qnode_child_buffer;
1019
1020 get_binnedsah_scratch_buffers(bs,
1021 &p_qnode_child_buffer,
1022 &p_build_primref_index_buffers,
1023 &p_bvh2);
1024
1025 struct SAHBuildBuffersInfo buffers = {
1026 .p_primref_index_buffers = bs->scratch.leaf_index_buffers,
1027 .p_bvh_base = bs->state.bvh_buffer,
1028 .p_primrefs_buffer = bs->state.build_primref_buffer,
1029 .p_bvh2 = p_bvh2,
1030 .p_qnode_root_buffer = p_qnode_child_buffer,
1031 .sah_globals_flags = 0,
1032 };
1033
1034 write_memory(transient_mem_alloc, buffers_info_offset, &buffers, sizeof(buffers));
1035 buffers_info_offset += sizeof(buffers);
1036
1037 write_memory(transient_mem_alloc, global_ptrs_offset, &bs->state.build_globals,
1038 sizeof(bs->state.build_globals));
1039 global_ptrs_offset += sizeof(bs->state.build_globals);
1040 }
1041
1042 genX(grl_new_sah_builder_new_sah_build_batchable)(
1043 cmd_buffer, PREFIX_MK_SAH_BUILD_ARGS_BATCHABLE(grl_new_sah_builder, args));
1044 }
1045
1046 if (num_new_sah_builds == 0)
1047 anv_add_pending_pipe_bits(cmd_buffer,
1048 ANV_GRL_FLUSH_FLAGS,
1049 "building accel struct");
1050
1051 /* Finally write the leaves. */
1052 for (uint32_t i = 0; i < infoCount; i++) {
1053 struct build_state *bs = &builds[i];
1054
1055 if (bs->num_instances) {
1056 assert(bs->num_geometries == 0);
1057 if (bs->array_of_instances_ptr) {
1058 genX(grl_leaf_builder_buildLeafDXR_instances_pointers)(cmd_buffer,
1059 PREFIX_MK_STATE(grl_leaf_builder, bs->state),
1060 bs->scratch.leaf_index_buffers,
1061 bs->instances_addr,
1062 bs->scratch.leaf_index_buffer_stride,
1063 0 /* offset */,
1064 bs->estimate.numBuildPrimitives);
1065 } else {
1066 genX(grl_leaf_builder_buildLeafDXR_instances)(cmd_buffer,
1067 PREFIX_MK_STATE(grl_leaf_builder, bs->state),
1068 bs->scratch.leaf_index_buffers,
1069 bs->instances_addr,
1070 bs->scratch.leaf_index_buffer_stride,
1071 0 /* offset */,
1072 bs->estimate.numBuildPrimitives);
1073 }
1074 }
1075
1076 if (bs->num_geometries) {
1077 assert(bs->num_instances == 0);
1078 const uint64_t p_numPrimitives =
1079 bs->state.build_globals + offsetof(struct Globals, numPrimitives);
1080
1081 assert(bs->estimate.numProcedurals == 0 ||
1082 bs->estimate.numTriangles == 0);
1083 if (bs->estimate.numProcedurals) {
1084 genX(grl_leaf_builder_buildLeafDXR_procedurals)(
1085 cmd_buffer,
1086 PREFIX_MK_STATE(grl_leaf_builder, bs->state),
1087 bs->scratch.leaf_index_buffers,
1088 bs->scratch.leaf_index_buffer_stride,
1089 0 /* offset */,
1090 p_numPrimitives);
1091 } else {
1092 genX(grl_leaf_builder_buildLeafDXR_quads)(
1093 cmd_buffer,
1094 PREFIX_MK_STATE(grl_leaf_builder, bs->state),
1095 bs->scratch.leaf_index_buffers,
1096 bs->scratch.leaf_index_buffer_stride,
1097 0 /* offset */,
1098 p_numPrimitives,
1099 false /* allow_updates */);
1100 }
1101 }
1102 }
1103
1104 anv_add_pending_pipe_bits(cmd_buffer,
1105 ANV_GRL_FLUSH_FLAGS,
1106 "building accel struct");
1107
1108 trace_intel_end_as_build(&cmd_buffer->trace);
1109
1110 error:
1111 vk_free(&cmd_buffer->device->vk.alloc, builds);
1112 }
1113
1114 void
genX(CmdBuildAccelerationStructuresKHR)1115 genX(CmdBuildAccelerationStructuresKHR)(
1116 VkCommandBuffer commandBuffer,
1117 uint32_t infoCount,
1118 const VkAccelerationStructureBuildGeometryInfoKHR* pInfos,
1119 const VkAccelerationStructureBuildRangeInfoKHR* const* ppBuildRangeInfos)
1120 {
1121 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1122
1123 if (anv_batch_has_error(&cmd_buffer->batch))
1124 return;
1125
1126 cmd_build_acceleration_structures(cmd_buffer, infoCount, pInfos,
1127 ppBuildRangeInfos, NULL, NULL, NULL);
1128 }
1129
1130 void
genX(CmdBuildAccelerationStructuresIndirectKHR)1131 genX(CmdBuildAccelerationStructuresIndirectKHR)(
1132 VkCommandBuffer commandBuffer,
1133 uint32_t infoCount,
1134 const VkAccelerationStructureBuildGeometryInfoKHR* pInfos,
1135 const VkDeviceAddress* pIndirectDeviceAddresses,
1136 const uint32_t* pIndirectStrides,
1137 const uint32_t* const* ppMaxPrimitiveCounts)
1138 {
1139 unreachable("Unimplemented");
1140 }
1141
1142 void
genX(CmdCopyAccelerationStructureKHR)1143 genX(CmdCopyAccelerationStructureKHR)(
1144 VkCommandBuffer commandBuffer,
1145 const VkCopyAccelerationStructureInfoKHR* pInfo)
1146 {
1147 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1148 ANV_FROM_HANDLE(vk_acceleration_structure, src_accel, pInfo->src);
1149 ANV_FROM_HANDLE(vk_acceleration_structure, dst_accel, pInfo->dst);
1150
1151 assert(pInfo->mode == VK_COPY_ACCELERATION_STRUCTURE_MODE_COMPACT_KHR ||
1152 pInfo->mode == VK_COPY_ACCELERATION_STRUCTURE_MODE_CLONE_KHR);
1153
1154 if (pInfo->mode == VK_COPY_ACCELERATION_STRUCTURE_MODE_CLONE_KHR) {
1155 uint64_t src_size_addr =
1156 vk_acceleration_structure_get_va(src_accel) +
1157 offsetof(struct BVHBase, Meta.allocationSize);
1158 genX(grl_copy_clone_indirect)(
1159 cmd_buffer,
1160 vk_acceleration_structure_get_va(dst_accel),
1161 vk_acceleration_structure_get_va(src_accel),
1162 src_size_addr);
1163 } else {
1164 genX(grl_copy_compact)(
1165 cmd_buffer,
1166 vk_acceleration_structure_get_va(dst_accel),
1167 vk_acceleration_structure_get_va(src_accel));
1168 }
1169
1170 anv_add_pending_pipe_bits(cmd_buffer,
1171 ANV_PIPE_END_OF_PIPE_SYNC_BIT,
1172 "after copy acceleration struct");
1173 }
1174
1175 void
genX(CmdCopyAccelerationStructureToMemoryKHR)1176 genX(CmdCopyAccelerationStructureToMemoryKHR)(
1177 VkCommandBuffer commandBuffer,
1178 const VkCopyAccelerationStructureToMemoryInfoKHR* pInfo)
1179 {
1180 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1181 ANV_FROM_HANDLE(vk_acceleration_structure, src_accel, pInfo->src);
1182 struct anv_device *device = cmd_buffer->device;
1183 uint64_t src_size_addr =
1184 vk_acceleration_structure_get_va(src_accel) +
1185 offsetof(struct BVHBase, Meta.allocationSize);
1186
1187 assert(pInfo->mode == VK_COPY_ACCELERATION_STRUCTURE_MODE_SERIALIZE_KHR);
1188
1189 genX(grl_copy_serialize_indirect)(
1190 cmd_buffer,
1191 pInfo->dst.deviceAddress,
1192 vk_acceleration_structure_get_va(src_accel),
1193 anv_address_physical(device->rt_uuid_addr),
1194 src_size_addr);
1195
1196 anv_add_pending_pipe_bits(cmd_buffer,
1197 ANV_PIPE_END_OF_PIPE_SYNC_BIT,
1198 "after copy acceleration struct");
1199 }
1200
1201 void
genX(CmdCopyMemoryToAccelerationStructureKHR)1202 genX(CmdCopyMemoryToAccelerationStructureKHR)(
1203 VkCommandBuffer commandBuffer,
1204 const VkCopyMemoryToAccelerationStructureInfoKHR* pInfo)
1205 {
1206 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1207 ANV_FROM_HANDLE(vk_acceleration_structure, dst_accel, pInfo->dst);
1208
1209 assert(pInfo->mode == VK_COPY_ACCELERATION_STRUCTURE_MODE_DESERIALIZE_KHR);
1210
1211 uint64_t src_size_addr = pInfo->src.deviceAddress +
1212 offsetof(struct SerializationHeader, DeserializedSizeInBytes);
1213 genX(grl_copy_deserialize_indirect)(
1214 cmd_buffer,
1215 vk_acceleration_structure_get_va(dst_accel),
1216 pInfo->src.deviceAddress,
1217 src_size_addr);
1218
1219 anv_add_pending_pipe_bits(cmd_buffer,
1220 ANV_PIPE_END_OF_PIPE_SYNC_BIT,
1221 "after copy acceleration struct");
1222 }
1223
1224 /* TODO: Host commands */
1225
1226 VkResult
genX(BuildAccelerationStructuresKHR)1227 genX(BuildAccelerationStructuresKHR)(
1228 VkDevice _device,
1229 VkDeferredOperationKHR deferredOperation,
1230 uint32_t infoCount,
1231 const VkAccelerationStructureBuildGeometryInfoKHR* pInfos,
1232 const VkAccelerationStructureBuildRangeInfoKHR* const* ppBuildRangeInfos)
1233 {
1234 ANV_FROM_HANDLE(anv_device, device, _device);
1235 unreachable("Unimplemented");
1236 return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT);
1237 }
1238
1239 VkResult
genX(CopyAccelerationStructureKHR)1240 genX(CopyAccelerationStructureKHR)(
1241 VkDevice _device,
1242 VkDeferredOperationKHR deferredOperation,
1243 const VkCopyAccelerationStructureInfoKHR* pInfo)
1244 {
1245 ANV_FROM_HANDLE(anv_device, device, _device);
1246 unreachable("Unimplemented");
1247 return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT);
1248 }
1249
1250 VkResult
genX(CopyAccelerationStructureToMemoryKHR)1251 genX(CopyAccelerationStructureToMemoryKHR)(
1252 VkDevice _device,
1253 VkDeferredOperationKHR deferredOperation,
1254 const VkCopyAccelerationStructureToMemoryInfoKHR* pInfo)
1255 {
1256 ANV_FROM_HANDLE(anv_device, device, _device);
1257 unreachable("Unimplemented");
1258 return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT);
1259 }
1260
1261 VkResult
genX(CopyMemoryToAccelerationStructureKHR)1262 genX(CopyMemoryToAccelerationStructureKHR)(
1263 VkDevice _device,
1264 VkDeferredOperationKHR deferredOperation,
1265 const VkCopyMemoryToAccelerationStructureInfoKHR* pInfo)
1266 {
1267 ANV_FROM_HANDLE(anv_device, device, _device);
1268 unreachable("Unimplemented");
1269 return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT);
1270 }
1271
1272 VkResult
genX(WriteAccelerationStructuresPropertiesKHR)1273 genX(WriteAccelerationStructuresPropertiesKHR)(
1274 VkDevice _device,
1275 uint32_t accelerationStructureCount,
1276 const VkAccelerationStructureKHR* pAccelerationStructures,
1277 VkQueryType queryType,
1278 size_t dataSize,
1279 void* pData,
1280 size_t stride)
1281 {
1282 ANV_FROM_HANDLE(anv_device, device, _device);
1283 unreachable("Unimplemented");
1284 return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT);
1285 }
1286
1287 #endif /* GFX_VERx10 >= 125 */
1288