xref: /aosp_15_r20/external/mesa3d/src/amd/vulkan/bvh/build_helpers.h (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2022 Konstantin Seurer
3  *
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #ifndef BVH_BUILD_HELPERS_H
8 #define BVH_BUILD_HELPERS_H
9 
10 #include "bvh.h"
11 
12 #define VK_FORMAT_UNDEFINED                  0
13 #define VK_FORMAT_R4G4_UNORM_PACK8           1
14 #define VK_FORMAT_R4G4B4A4_UNORM_PACK16      2
15 #define VK_FORMAT_B4G4R4A4_UNORM_PACK16      3
16 #define VK_FORMAT_R5G6B5_UNORM_PACK16        4
17 #define VK_FORMAT_B5G6R5_UNORM_PACK16        5
18 #define VK_FORMAT_R5G5B5A1_UNORM_PACK16      6
19 #define VK_FORMAT_B5G5R5A1_UNORM_PACK16      7
20 #define VK_FORMAT_A1R5G5B5_UNORM_PACK16      8
21 #define VK_FORMAT_R8_UNORM                   9
22 #define VK_FORMAT_R8_SNORM                   10
23 #define VK_FORMAT_R8_USCALED                 11
24 #define VK_FORMAT_R8_SSCALED                 12
25 #define VK_FORMAT_R8_UINT                    13
26 #define VK_FORMAT_R8_SINT                    14
27 #define VK_FORMAT_R8_SRGB                    15
28 #define VK_FORMAT_R8G8_UNORM                 16
29 #define VK_FORMAT_R8G8_SNORM                 17
30 #define VK_FORMAT_R8G8_USCALED               18
31 #define VK_FORMAT_R8G8_SSCALED               19
32 #define VK_FORMAT_R8G8_UINT                  20
33 #define VK_FORMAT_R8G8_SINT                  21
34 #define VK_FORMAT_R8G8_SRGB                  22
35 #define VK_FORMAT_R8G8B8_UNORM               23
36 #define VK_FORMAT_R8G8B8_SNORM               24
37 #define VK_FORMAT_R8G8B8_USCALED             25
38 #define VK_FORMAT_R8G8B8_SSCALED             26
39 #define VK_FORMAT_R8G8B8_UINT                27
40 #define VK_FORMAT_R8G8B8_SINT                28
41 #define VK_FORMAT_R8G8B8_SRGB                29
42 #define VK_FORMAT_B8G8R8_UNORM               30
43 #define VK_FORMAT_B8G8R8_SNORM               31
44 #define VK_FORMAT_B8G8R8_USCALED             32
45 #define VK_FORMAT_B8G8R8_SSCALED             33
46 #define VK_FORMAT_B8G8R8_UINT                34
47 #define VK_FORMAT_B8G8R8_SINT                35
48 #define VK_FORMAT_B8G8R8_SRGB                36
49 #define VK_FORMAT_R8G8B8A8_UNORM             37
50 #define VK_FORMAT_R8G8B8A8_SNORM             38
51 #define VK_FORMAT_R8G8B8A8_USCALED           39
52 #define VK_FORMAT_R8G8B8A8_SSCALED           40
53 #define VK_FORMAT_R8G8B8A8_UINT              41
54 #define VK_FORMAT_R8G8B8A8_SINT              42
55 #define VK_FORMAT_R8G8B8A8_SRGB              43
56 #define VK_FORMAT_B8G8R8A8_UNORM             44
57 #define VK_FORMAT_B8G8R8A8_SNORM             45
58 #define VK_FORMAT_B8G8R8A8_USCALED           46
59 #define VK_FORMAT_B8G8R8A8_SSCALED           47
60 #define VK_FORMAT_B8G8R8A8_UINT              48
61 #define VK_FORMAT_B8G8R8A8_SINT              49
62 #define VK_FORMAT_B8G8R8A8_SRGB              50
63 #define VK_FORMAT_A8B8G8R8_UNORM_PACK32      51
64 #define VK_FORMAT_A8B8G8R8_SNORM_PACK32      52
65 #define VK_FORMAT_A8B8G8R8_USCALED_PACK32    53
66 #define VK_FORMAT_A8B8G8R8_SSCALED_PACK32    54
67 #define VK_FORMAT_A8B8G8R8_UINT_PACK32       55
68 #define VK_FORMAT_A8B8G8R8_SINT_PACK32       56
69 #define VK_FORMAT_A8B8G8R8_SRGB_PACK32       57
70 #define VK_FORMAT_A2R10G10B10_UNORM_PACK32   58
71 #define VK_FORMAT_A2R10G10B10_SNORM_PACK32   59
72 #define VK_FORMAT_A2R10G10B10_USCALED_PACK32 60
73 #define VK_FORMAT_A2R10G10B10_SSCALED_PACK32 61
74 #define VK_FORMAT_A2R10G10B10_UINT_PACK32    62
75 #define VK_FORMAT_A2R10G10B10_SINT_PACK32    63
76 #define VK_FORMAT_A2B10G10R10_UNORM_PACK32   64
77 #define VK_FORMAT_A2B10G10R10_SNORM_PACK32   65
78 #define VK_FORMAT_A2B10G10R10_USCALED_PACK32 66
79 #define VK_FORMAT_A2B10G10R10_SSCALED_PACK32 67
80 #define VK_FORMAT_A2B10G10R10_UINT_PACK32    68
81 #define VK_FORMAT_A2B10G10R10_SINT_PACK32    69
82 #define VK_FORMAT_R16_UNORM                  70
83 #define VK_FORMAT_R16_SNORM                  71
84 #define VK_FORMAT_R16_USCALED                72
85 #define VK_FORMAT_R16_SSCALED                73
86 #define VK_FORMAT_R16_UINT                   74
87 #define VK_FORMAT_R16_SINT                   75
88 #define VK_FORMAT_R16_SFLOAT                 76
89 #define VK_FORMAT_R16G16_UNORM               77
90 #define VK_FORMAT_R16G16_SNORM               78
91 #define VK_FORMAT_R16G16_USCALED             79
92 #define VK_FORMAT_R16G16_SSCALED             80
93 #define VK_FORMAT_R16G16_UINT                81
94 #define VK_FORMAT_R16G16_SINT                82
95 #define VK_FORMAT_R16G16_SFLOAT              83
96 #define VK_FORMAT_R16G16B16_UNORM            84
97 #define VK_FORMAT_R16G16B16_SNORM            85
98 #define VK_FORMAT_R16G16B16_USCALED          86
99 #define VK_FORMAT_R16G16B16_SSCALED          87
100 #define VK_FORMAT_R16G16B16_UINT             88
101 #define VK_FORMAT_R16G16B16_SINT             89
102 #define VK_FORMAT_R16G16B16_SFLOAT           90
103 #define VK_FORMAT_R16G16B16A16_UNORM         91
104 #define VK_FORMAT_R16G16B16A16_SNORM         92
105 #define VK_FORMAT_R16G16B16A16_USCALED       93
106 #define VK_FORMAT_R16G16B16A16_SSCALED       94
107 #define VK_FORMAT_R16G16B16A16_UINT          95
108 #define VK_FORMAT_R16G16B16A16_SINT          96
109 #define VK_FORMAT_R16G16B16A16_SFLOAT        97
110 #define VK_FORMAT_R32_UINT                   98
111 #define VK_FORMAT_R32_SINT                   99
112 #define VK_FORMAT_R32_SFLOAT                 100
113 #define VK_FORMAT_R32G32_UINT                101
114 #define VK_FORMAT_R32G32_SINT                102
115 #define VK_FORMAT_R32G32_SFLOAT              103
116 #define VK_FORMAT_R32G32B32_UINT             104
117 #define VK_FORMAT_R32G32B32_SINT             105
118 #define VK_FORMAT_R32G32B32_SFLOAT           106
119 #define VK_FORMAT_R32G32B32A32_UINT          107
120 #define VK_FORMAT_R32G32B32A32_SINT          108
121 #define VK_FORMAT_R32G32B32A32_SFLOAT        109
122 #define VK_FORMAT_R64_UINT                   110
123 #define VK_FORMAT_R64_SINT                   111
124 #define VK_FORMAT_R64_SFLOAT                 112
125 #define VK_FORMAT_R64G64_UINT                113
126 #define VK_FORMAT_R64G64_SINT                114
127 #define VK_FORMAT_R64G64_SFLOAT              115
128 #define VK_FORMAT_R64G64B64_UINT             116
129 #define VK_FORMAT_R64G64B64_SINT             117
130 #define VK_FORMAT_R64G64B64_SFLOAT           118
131 #define VK_FORMAT_R64G64B64A64_UINT          119
132 #define VK_FORMAT_R64G64B64A64_SINT          120
133 #define VK_FORMAT_R64G64B64A64_SFLOAT        121
134 
135 #define VK_INDEX_TYPE_UINT16    0
136 #define VK_INDEX_TYPE_UINT32    1
137 #define VK_INDEX_TYPE_NONE_KHR  1000165000
138 #define VK_INDEX_TYPE_UINT8_EXT 1000265000
139 
140 #define VK_GEOMETRY_TYPE_TRIANGLES_KHR 0
141 #define VK_GEOMETRY_TYPE_AABBS_KHR     1
142 #define VK_GEOMETRY_TYPE_INSTANCES_KHR 2
143 
144 #define VK_GEOMETRY_INSTANCE_TRIANGLE_FACING_CULL_DISABLE_BIT_KHR 1
145 #define VK_GEOMETRY_INSTANCE_TRIANGLE_FLIP_FACING_BIT_KHR         2
146 #define VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_KHR                 4
147 #define VK_GEOMETRY_INSTANCE_FORCE_NO_OPAQUE_BIT_KHR              8
148 
149 #define TYPE(type, align)                                                                                              \
150    layout(buffer_reference, buffer_reference_align = align, scalar) buffer type##_ref                                  \
151    {                                                                                                                   \
152       type value;                                                                                                      \
153    };
154 
155 #define REF(type)  type##_ref
156 #define VOID_REF   uint64_t
157 #define NULL       0
158 #define DEREF(var) var.value
159 
160 #define SIZEOF(type) uint32_t(uint64_t(REF(type)(uint64_t(0)) + 1))
161 
162 #define OFFSET(ptr, offset) (uint64_t(ptr) + offset)
163 
164 #define INFINITY (1.0 / 0.0)
165 #define NAN      (0.0 / 0.0)
166 
167 #define INDEX(type, ptr, index) REF(type)(OFFSET(ptr, (index)*SIZEOF(type)))
168 
169 TYPE(int8_t, 1);
170 TYPE(uint8_t, 1);
171 TYPE(int16_t, 2);
172 TYPE(uint16_t, 2);
173 TYPE(int32_t, 4);
174 TYPE(uint32_t, 4);
175 TYPE(int64_t, 8);
176 TYPE(uint64_t, 8);
177 
178 TYPE(float, 4);
179 
180 TYPE(vec2, 4);
181 TYPE(vec3, 4);
182 TYPE(vec4, 4);
183 
184 TYPE(uvec4, 16);
185 
186 TYPE(VOID_REF, 8);
187 
188 /* copied from u_math.h */
189 uint32_t
align(uint32_t value,uint32_t alignment)190 align(uint32_t value, uint32_t alignment)
191 {
192    return (value + alignment - 1) & ~(alignment - 1);
193 }
194 
195 int32_t
to_emulated_float(float f)196 to_emulated_float(float f)
197 {
198    int32_t bits = floatBitsToInt(f);
199    return f < 0 ? -2147483648 - bits : bits;
200 }
201 
202 float
from_emulated_float(int32_t bits)203 from_emulated_float(int32_t bits)
204 {
205    return intBitsToFloat(bits < 0 ? -2147483648 - bits : bits);
206 }
207 
208 TYPE(radv_aabb, 4);
209 
210 struct key_id_pair {
211    uint32_t id;
212    uint32_t key;
213 };
214 TYPE(key_id_pair, 4);
215 
216 TYPE(radv_accel_struct_serialization_header, 8);
217 TYPE(radv_accel_struct_header, 8);
218 TYPE(radv_bvh_triangle_node, 4);
219 TYPE(radv_bvh_aabb_node, 4);
220 TYPE(radv_bvh_instance_node, 8);
221 TYPE(radv_bvh_box16_node, 4);
222 TYPE(radv_bvh_box32_node, 4);
223 
224 TYPE(radv_ir_header, 4);
225 TYPE(radv_ir_node, 4);
226 TYPE(radv_ir_box_node, 4);
227 
228 TYPE(radv_global_sync_data, 4);
229 
230 uint32_t
id_to_offset(uint32_t id)231 id_to_offset(uint32_t id)
232 {
233    return (id & (~7u)) << 3;
234 }
235 
236 uint32_t
id_to_type(uint32_t id)237 id_to_type(uint32_t id)
238 {
239    return id & 7u;
240 }
241 
242 uint32_t
pack_node_id(uint32_t offset,uint32_t type)243 pack_node_id(uint32_t offset, uint32_t type)
244 {
245    return (offset >> 3) | type;
246 }
247 
248 uint64_t
node_to_addr(uint64_t node)249 node_to_addr(uint64_t node)
250 {
251    node &= ~7ul;
252    node <<= 19;
253    return int64_t(node) >> 16;
254 }
255 
256 uint64_t
addr_to_node(uint64_t addr)257 addr_to_node(uint64_t addr)
258 {
259    return (addr >> 3) & ((1ul << 45) - 1);
260 }
261 
262 uint32_t
ir_id_to_offset(uint32_t id)263 ir_id_to_offset(uint32_t id)
264 {
265    return id & (~3u);
266 }
267 
268 uint32_t
ir_id_to_type(uint32_t id)269 ir_id_to_type(uint32_t id)
270 {
271    return id & 3u;
272 }
273 
274 uint32_t
pack_ir_node_id(uint32_t offset,uint32_t type)275 pack_ir_node_id(uint32_t offset, uint32_t type)
276 {
277    return offset | type;
278 }
279 
280 uint32_t
ir_type_to_bvh_type(uint32_t type)281 ir_type_to_bvh_type(uint32_t type)
282 {
283    switch (type) {
284    case radv_ir_node_triangle:
285       return radv_bvh_node_triangle;
286    case radv_ir_node_internal:
287       return radv_bvh_node_box32;
288    case radv_ir_node_instance:
289       return radv_bvh_node_instance;
290    case radv_ir_node_aabb:
291       return radv_bvh_node_aabb;
292    }
293    /* unreachable in valid nodes */
294    return RADV_BVH_INVALID_NODE;
295 }
296 
297 float
aabb_surface_area(radv_aabb aabb)298 aabb_surface_area(radv_aabb aabb)
299 {
300    vec3 diagonal = aabb.max - aabb.min;
301    return 2 * diagonal.x * diagonal.y + 2 * diagonal.y * diagonal.z + 2 * diagonal.x * diagonal.z;
302 }
303 
304 /* Just a wrapper for 3 uints. */
305 struct triangle_indices {
306    uint32_t index[3];
307 };
308 
309 triangle_indices
load_indices(VOID_REF indices,uint32_t index_format,uint32_t global_id)310 load_indices(VOID_REF indices, uint32_t index_format, uint32_t global_id)
311 {
312    triangle_indices result;
313 
314    uint32_t index_base = global_id * 3;
315 
316    switch (index_format) {
317    case VK_INDEX_TYPE_UINT16: {
318       result.index[0] = DEREF(INDEX(uint16_t, indices, index_base + 0));
319       result.index[1] = DEREF(INDEX(uint16_t, indices, index_base + 1));
320       result.index[2] = DEREF(INDEX(uint16_t, indices, index_base + 2));
321       break;
322    }
323    case VK_INDEX_TYPE_UINT32: {
324       result.index[0] = DEREF(INDEX(uint32_t, indices, index_base + 0));
325       result.index[1] = DEREF(INDEX(uint32_t, indices, index_base + 1));
326       result.index[2] = DEREF(INDEX(uint32_t, indices, index_base + 2));
327       break;
328    }
329    case VK_INDEX_TYPE_NONE_KHR: {
330       result.index[0] = index_base + 0;
331       result.index[1] = index_base + 1;
332       result.index[2] = index_base + 2;
333       break;
334    }
335    case VK_INDEX_TYPE_UINT8_EXT: {
336       result.index[0] = DEREF(INDEX(uint8_t, indices, index_base + 0));
337       result.index[1] = DEREF(INDEX(uint8_t, indices, index_base + 1));
338       result.index[2] = DEREF(INDEX(uint8_t, indices, index_base + 2));
339       break;
340    }
341    }
342 
343    return result;
344 }
345 
346 /* Just a wrapper for 3 vec4s. */
347 struct triangle_vertices {
348    vec4 vertex[3];
349 };
350 
351 TYPE(float16_t, 2);
352 
353 triangle_vertices
load_vertices(VOID_REF vertices,triangle_indices indices,uint32_t vertex_format,uint32_t stride)354 load_vertices(VOID_REF vertices, triangle_indices indices, uint32_t vertex_format, uint32_t stride)
355 {
356    triangle_vertices result;
357 
358    for (uint32_t i = 0; i < 3; i++) {
359       VOID_REF vertex_ptr = OFFSET(vertices, indices.index[i] * stride);
360       vec4 vertex = vec4(0.0, 0.0, 0.0, 1.0);
361 
362       switch (vertex_format) {
363       case VK_FORMAT_R32G32_SFLOAT:
364          vertex.x = DEREF(INDEX(float, vertex_ptr, 0));
365          vertex.y = DEREF(INDEX(float, vertex_ptr, 1));
366          break;
367       case VK_FORMAT_R32G32B32_SFLOAT:
368       case VK_FORMAT_R32G32B32A32_SFLOAT:
369          vertex.x = DEREF(INDEX(float, vertex_ptr, 0));
370          vertex.y = DEREF(INDEX(float, vertex_ptr, 1));
371          vertex.z = DEREF(INDEX(float, vertex_ptr, 2));
372          break;
373       case VK_FORMAT_R16G16_SFLOAT:
374          vertex.x = DEREF(INDEX(float16_t, vertex_ptr, 0));
375          vertex.y = DEREF(INDEX(float16_t, vertex_ptr, 1));
376          break;
377       case VK_FORMAT_R16G16B16_SFLOAT:
378       case VK_FORMAT_R16G16B16A16_SFLOAT:
379          vertex.x = DEREF(INDEX(float16_t, vertex_ptr, 0));
380          vertex.y = DEREF(INDEX(float16_t, vertex_ptr, 1));
381          vertex.z = DEREF(INDEX(float16_t, vertex_ptr, 2));
382          break;
383       case VK_FORMAT_R16G16_SNORM:
384          vertex.x = max(-1.0, DEREF(INDEX(int16_t, vertex_ptr, 0)) / float(0x7FFF));
385          vertex.y = max(-1.0, DEREF(INDEX(int16_t, vertex_ptr, 1)) / float(0x7FFF));
386          break;
387       case VK_FORMAT_R16G16B16A16_SNORM:
388          vertex.x = max(-1.0, DEREF(INDEX(int16_t, vertex_ptr, 0)) / float(0x7FFF));
389          vertex.y = max(-1.0, DEREF(INDEX(int16_t, vertex_ptr, 1)) / float(0x7FFF));
390          vertex.z = max(-1.0, DEREF(INDEX(int16_t, vertex_ptr, 2)) / float(0x7FFF));
391          break;
392       case VK_FORMAT_R8G8_SNORM:
393          vertex.x = max(-1.0, DEREF(INDEX(int8_t, vertex_ptr, 0)) / float(0x7F));
394          vertex.y = max(-1.0, DEREF(INDEX(int8_t, vertex_ptr, 1)) / float(0x7F));
395          break;
396       case VK_FORMAT_R8G8B8A8_SNORM:
397          vertex.x = max(-1.0, DEREF(INDEX(int8_t, vertex_ptr, 0)) / float(0x7F));
398          vertex.y = max(-1.0, DEREF(INDEX(int8_t, vertex_ptr, 1)) / float(0x7F));
399          vertex.z = max(-1.0, DEREF(INDEX(int8_t, vertex_ptr, 2)) / float(0x7F));
400          break;
401       case VK_FORMAT_R16G16_UNORM:
402          vertex.x = DEREF(INDEX(uint16_t, vertex_ptr, 0)) / float(0xFFFF);
403          vertex.y = DEREF(INDEX(uint16_t, vertex_ptr, 1)) / float(0xFFFF);
404          break;
405       case VK_FORMAT_R16G16B16A16_UNORM:
406          vertex.x = DEREF(INDEX(uint16_t, vertex_ptr, 0)) / float(0xFFFF);
407          vertex.y = DEREF(INDEX(uint16_t, vertex_ptr, 1)) / float(0xFFFF);
408          vertex.z = DEREF(INDEX(uint16_t, vertex_ptr, 2)) / float(0xFFFF);
409          break;
410       case VK_FORMAT_R8G8_UNORM:
411          vertex.x = DEREF(INDEX(uint8_t, vertex_ptr, 0)) / float(0xFF);
412          vertex.y = DEREF(INDEX(uint8_t, vertex_ptr, 1)) / float(0xFF);
413          break;
414       case VK_FORMAT_R8G8B8A8_UNORM:
415          vertex.x = DEREF(INDEX(uint8_t, vertex_ptr, 0)) / float(0xFF);
416          vertex.y = DEREF(INDEX(uint8_t, vertex_ptr, 1)) / float(0xFF);
417          vertex.z = DEREF(INDEX(uint8_t, vertex_ptr, 2)) / float(0xFF);
418          break;
419       case VK_FORMAT_A2B10G10R10_UNORM_PACK32: {
420          uint32_t data = DEREF(REF(uint32_t)(vertex_ptr));
421          vertex.x = float(data & 0x3FF) / 0x3FF;
422          vertex.y = float((data >> 10) & 0x3FF) / 0x3FF;
423          vertex.z = float((data >> 20) & 0x3FF) / 0x3FF;
424          break;
425       }
426       }
427 
428       result.vertex[i] = vertex;
429    }
430 
431    return result;
432 }
433 
434 /* A GLSL-adapted copy of VkAccelerationStructureInstanceKHR. */
435 struct AccelerationStructureInstance {
436    mat3x4 transform;
437    uint32_t custom_instance_and_mask;
438    uint32_t sbt_offset_and_flags;
439    uint64_t accelerationStructureReference;
440 };
441 TYPE(AccelerationStructureInstance, 8);
442 
443 bool
build_triangle(inout radv_aabb bounds,VOID_REF dst_ptr,radv_bvh_geometry_data geom_data,uint32_t global_id)444 build_triangle(inout radv_aabb bounds, VOID_REF dst_ptr, radv_bvh_geometry_data geom_data, uint32_t global_id)
445 {
446    bool is_valid = true;
447    triangle_indices indices = load_indices(geom_data.indices, geom_data.index_format, global_id);
448 
449    triangle_vertices vertices = load_vertices(geom_data.data, indices, geom_data.vertex_format, geom_data.stride);
450 
451    /* An inactive triangle is one for which the first (X) component of any vertex is NaN. If any
452     * other vertex component is NaN, and the first is not, the behavior is undefined. If the vertex
453     * format does not have a NaN representation, then all triangles are considered active.
454     */
455    if (isnan(vertices.vertex[0].x) || isnan(vertices.vertex[1].x) || isnan(vertices.vertex[2].x))
456 #if ALWAYS_ACTIVE
457       is_valid = false;
458 #else
459       return false;
460 #endif
461 
462    if (geom_data.transform != NULL) {
463       mat4 transform = mat4(1.0);
464 
465       for (uint32_t col = 0; col < 4; col++)
466          for (uint32_t row = 0; row < 3; row++)
467             transform[col][row] = DEREF(INDEX(float, geom_data.transform, col + row * 4));
468 
469       for (uint32_t i = 0; i < 3; i++)
470          vertices.vertex[i] = transform * vertices.vertex[i];
471    }
472 
473    REF(radv_bvh_triangle_node) node = REF(radv_bvh_triangle_node)(dst_ptr);
474 
475    bounds.min = vec3(INFINITY);
476    bounds.max = vec3(-INFINITY);
477 
478    for (uint32_t coord = 0; coord < 3; coord++)
479       for (uint32_t comp = 0; comp < 3; comp++) {
480          DEREF(node).coords[coord][comp] = vertices.vertex[coord][comp];
481          bounds.min[comp] = min(bounds.min[comp], vertices.vertex[coord][comp]);
482          bounds.max[comp] = max(bounds.max[comp], vertices.vertex[coord][comp]);
483       }
484 
485    DEREF(node).triangle_id = global_id;
486    DEREF(node).geometry_id_and_flags = geom_data.geometry_id;
487    DEREF(node).id = 9;
488 
489    return is_valid;
490 }
491 
492 bool
build_aabb(inout radv_aabb bounds,VOID_REF src_ptr,VOID_REF dst_ptr,uint32_t geometry_id,uint32_t global_id)493 build_aabb(inout radv_aabb bounds, VOID_REF src_ptr, VOID_REF dst_ptr, uint32_t geometry_id, uint32_t global_id)
494 {
495    bool is_valid = true;
496    REF(radv_bvh_aabb_node) node = REF(radv_bvh_aabb_node)(dst_ptr);
497 
498    for (uint32_t vec = 0; vec < 2; vec++)
499       for (uint32_t comp = 0; comp < 3; comp++) {
500          float coord = DEREF(INDEX(float, src_ptr, comp + vec * 3));
501 
502          if (vec == 0)
503             bounds.min[comp] = coord;
504          else
505             bounds.max[comp] = coord;
506       }
507 
508    /* An inactive AABB is one for which the minimum X coordinate is NaN. If any other component is
509     * NaN, and the first is not, the behavior is undefined.
510     */
511    if (isnan(bounds.min.x))
512 #if ALWAYS_ACTIVE
513       is_valid = false;
514 #else
515       return false;
516 #endif
517 
518    DEREF(node).primitive_id = global_id;
519    DEREF(node).geometry_id_and_flags = geometry_id;
520 
521    return is_valid;
522 }
523 
524 radv_aabb
calculate_instance_node_bounds(radv_accel_struct_header header,mat3x4 otw_matrix)525 calculate_instance_node_bounds(radv_accel_struct_header header, mat3x4 otw_matrix)
526 {
527    radv_aabb aabb;
528    for (uint32_t comp = 0; comp < 3; ++comp) {
529       aabb.min[comp] = otw_matrix[comp][3];
530       aabb.max[comp] = otw_matrix[comp][3];
531       for (uint32_t col = 0; col < 3; ++col) {
532          aabb.min[comp] +=
533             min(otw_matrix[comp][col] * header.aabb.min[col], otw_matrix[comp][col] * header.aabb.max[col]);
534          aabb.max[comp] +=
535             max(otw_matrix[comp][col] * header.aabb.min[col], otw_matrix[comp][col] * header.aabb.max[col]);
536       }
537    }
538    return aabb;
539 }
540 
541 uint32_t
encode_sbt_offset_and_flags(uint32_t src)542 encode_sbt_offset_and_flags(uint32_t src)
543 {
544    uint32_t flags = src >> 24;
545    uint32_t ret = src & 0xffffffu;
546    if ((flags & VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_KHR) != 0)
547       ret |= RADV_INSTANCE_FORCE_OPAQUE;
548    if ((flags & VK_GEOMETRY_INSTANCE_FORCE_NO_OPAQUE_BIT_KHR) == 0)
549       ret |= RADV_INSTANCE_NO_FORCE_NOT_OPAQUE;
550    if ((flags & VK_GEOMETRY_INSTANCE_TRIANGLE_FACING_CULL_DISABLE_BIT_KHR) != 0)
551       ret |= RADV_INSTANCE_TRIANGLE_FACING_CULL_DISABLE;
552    if ((flags & VK_GEOMETRY_INSTANCE_TRIANGLE_FLIP_FACING_BIT_KHR) != 0)
553       ret |= RADV_INSTANCE_TRIANGLE_FLIP_FACING;
554    return ret;
555 }
556 
557 bool
build_instance(inout radv_aabb bounds,VOID_REF src_ptr,VOID_REF dst_ptr,uint32_t global_id)558 build_instance(inout radv_aabb bounds, VOID_REF src_ptr, VOID_REF dst_ptr, uint32_t global_id)
559 {
560    REF(radv_bvh_instance_node) node = REF(radv_bvh_instance_node)(dst_ptr);
561 
562    AccelerationStructureInstance instance = DEREF(REF(AccelerationStructureInstance)(src_ptr));
563 
564    /* An inactive instance is one whose acceleration structure handle is VK_NULL_HANDLE. Since the active terminology is
565     * only relevant for BVH updates, which we do not implement, we can also skip instances with mask == 0.
566     */
567    if (instance.accelerationStructureReference == 0 || instance.custom_instance_and_mask < (1u << 24u))
568       return false;
569 
570    radv_accel_struct_header instance_header =
571       DEREF(REF(radv_accel_struct_header)(instance.accelerationStructureReference));
572 
573    DEREF(node).bvh_ptr = addr_to_node(instance.accelerationStructureReference + instance_header.bvh_offset);
574    DEREF(node).bvh_offset = instance_header.bvh_offset;
575 
576    mat4 transform = mat4(instance.transform);
577    mat4 inv_transform = transpose(inverse(transpose(transform)));
578    DEREF(node).wto_matrix = mat3x4(inv_transform);
579    DEREF(node).otw_matrix = mat3x4(transform);
580 
581    bounds = calculate_instance_node_bounds(instance_header, mat3x4(transform));
582 
583    DEREF(node).custom_instance_and_mask = instance.custom_instance_and_mask;
584    DEREF(node).sbt_offset_and_flags = encode_sbt_offset_and_flags(instance.sbt_offset_and_flags);
585    DEREF(node).instance_id = global_id;
586 
587    return true;
588 }
589 
590 /** Compute ceiling of integer quotient of A divided by B.
591     From macros.h */
592 #define DIV_ROUND_UP(A, B) (((A) + (B)-1) / (B))
593 
594 #ifdef USE_GLOBAL_SYNC
595 
596 /* There might be more invocations available than tasks to do.
597  * In that case, the fetched task index is greater than the
598  * counter offset for the next phase. To avoid out-of-bounds
599  * accessing, phases will be skipped until the task index is
600  * is in-bounds again. */
601 uint32_t num_tasks_to_skip = 0;
602 uint32_t phase_index = 0;
603 bool should_skip = false;
604 shared uint32_t global_task_index;
605 
606 shared uint32_t shared_phase_index;
607 
608 uint32_t
task_count(REF (radv_ir_header)header)609 task_count(REF(radv_ir_header) header)
610 {
611    uint32_t phase_index = DEREF(header).sync_data.phase_index;
612    return DEREF(header).sync_data.task_counts[phase_index & 1];
613 }
614 
615 /* Sets the task count for the next phase. */
616 void
set_next_task_count(REF (radv_ir_header)header,uint32_t new_count)617 set_next_task_count(REF(radv_ir_header) header, uint32_t new_count)
618 {
619    uint32_t phase_index = DEREF(header).sync_data.phase_index;
620    DEREF(header).sync_data.task_counts[(phase_index + 1) & 1] = new_count;
621 }
622 
623 /*
624  * This function has two main objectives:
625  * Firstly, it partitions pending work among free invocations.
626  * Secondly, it guarantees global synchronization between different phases.
627  *
628  * After every call to fetch_task, a new task index is returned.
629  * fetch_task will also set num_tasks_to_skip. Use should_execute_phase
630  * to determine if the current phase should be executed or skipped.
631  *
632  * Since tasks are assigned per-workgroup, there is a possibility of the task index being
633  * greater than the total task count.
634  */
635 uint32_t
fetch_task(REF (radv_ir_header)header,bool did_work)636 fetch_task(REF(radv_ir_header) header, bool did_work)
637 {
638    /* Perform a memory + control barrier for all buffer writes for the entire workgroup.
639     * This guarantees that once the workgroup leaves the PHASE loop, all invocations have finished
640     * and their results are written to memory. */
641    controlBarrier(gl_ScopeWorkgroup, gl_ScopeDevice, gl_StorageSemanticsBuffer,
642                   gl_SemanticsAcquireRelease | gl_SemanticsMakeAvailable | gl_SemanticsMakeVisible);
643    if (gl_LocalInvocationIndex == 0) {
644       if (did_work)
645          atomicAdd(DEREF(header).sync_data.task_done_counter, 1);
646       global_task_index = atomicAdd(DEREF(header).sync_data.task_started_counter, 1);
647 
648       do {
649          /* Perform a memory barrier to refresh the current phase's end counter, in case
650           * another workgroup changed it. */
651          memoryBarrier(gl_ScopeDevice, gl_StorageSemanticsBuffer,
652                        gl_SemanticsAcquireRelease | gl_SemanticsMakeAvailable | gl_SemanticsMakeVisible);
653 
654          /* The first invocation of the first workgroup in a new phase is responsible to initiate the
655           * switch to a new phase. It is only possible to switch to a new phase if all tasks of the
656           * previous phase have been completed. Switching to a new phase and incrementing the phase
657           * end counter in turn notifies all invocations for that phase that it is safe to execute.
658           */
659          if (global_task_index == DEREF(header).sync_data.current_phase_end_counter &&
660              DEREF(header).sync_data.task_done_counter == DEREF(header).sync_data.current_phase_end_counter) {
661             if (DEREF(header).sync_data.next_phase_exit_flag != 0) {
662                DEREF(header).sync_data.phase_index = TASK_INDEX_INVALID;
663                memoryBarrier(gl_ScopeDevice, gl_StorageSemanticsBuffer,
664                              gl_SemanticsAcquireRelease | gl_SemanticsMakeAvailable | gl_SemanticsMakeVisible);
665             } else {
666                atomicAdd(DEREF(header).sync_data.phase_index, 1);
667                DEREF(header).sync_data.current_phase_start_counter = DEREF(header).sync_data.current_phase_end_counter;
668                /* Ensure the changes to the phase index and start/end counter are visible for other
669                 * workgroup waiting in the loop. */
670                memoryBarrier(gl_ScopeDevice, gl_StorageSemanticsBuffer,
671                              gl_SemanticsAcquireRelease | gl_SemanticsMakeAvailable | gl_SemanticsMakeVisible);
672                atomicAdd(DEREF(header).sync_data.current_phase_end_counter,
673                          DIV_ROUND_UP(task_count(header), gl_WorkGroupSize.x));
674             }
675             break;
676          }
677 
678          /* If other invocations have finished all nodes, break out; there is no work to do */
679          if (DEREF(header).sync_data.phase_index == TASK_INDEX_INVALID) {
680             break;
681          }
682       } while (global_task_index >= DEREF(header).sync_data.current_phase_end_counter);
683 
684       shared_phase_index = DEREF(header).sync_data.phase_index;
685    }
686 
687    barrier();
688    if (DEREF(header).sync_data.phase_index == TASK_INDEX_INVALID)
689       return TASK_INDEX_INVALID;
690 
691    num_tasks_to_skip = shared_phase_index - phase_index;
692 
693    uint32_t local_task_index = global_task_index - DEREF(header).sync_data.current_phase_start_counter;
694    return local_task_index * gl_WorkGroupSize.x + gl_LocalInvocationID.x;
695 }
696 
697 bool
should_execute_phase()698 should_execute_phase()
699 {
700    if (num_tasks_to_skip > 0) {
701       /* Skip to next phase. */
702       ++phase_index;
703       --num_tasks_to_skip;
704       return false;
705    }
706    return true;
707 }
708 
709 #define PHASE(header)                                                                                                  \
710    for (; task_index != TASK_INDEX_INVALID && should_execute_phase(); task_index = fetch_task(header, true))
711 #endif
712 
713 #endif /* BUILD_HELPERS_H */
714