xref: /aosp_15_r20/external/mesa3d/src/intel/vulkan/grl/gpu/qbvh6.h (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 //
2 // Copyright (C) 2009-2021 Intel Corporation
3 //
4 // SPDX-License-Identifier: MIT
5 //
6 //
7 
8 #pragma once
9 
10 #include "GRLGen12.h"
11 
12 #include "shared.h"
13 #include "quad.h"
14 
15 /* ====== GENERAL BVH config ====== */
16 
17 #define BVH_NODE_N6 6
18 #define BVH_NODE_N 8
19 #define BVH_NODE_N_LOG 3
20 
21 #define SAH_LOG_BLOCK_SHIFT 2
22 #define BVH_LEAF_N_MIN BVH_NODE_N6
23 #define BVH_LEAF_N_MAX BVH_NODE_N6
24 
25 #define BVH_NODE_DEFAULT_MASK 0xff
26 #define BVH_NODE_DEGENERATED_MASK 0x00
27 
28 /* ====== QUANTIZATION config ====== */
29 
30 #define QUANT_BITS 8
31 #define QUANT_MIN 0
32 #define QUANT_MAX 255
33 #define QUANT_MAX_MANT (255.0f / 256.0f)
34 
35 #define NO_NODE_OFFSET 0
36 
37 /* ======================================================================= */
38 /* ============================== BVH BASE =============================== */
39 /* ======================================================================= */
40 
setBVHBaseBounds(struct BVHBase * base,struct AABB * aabb)41 GRL_INLINE void setBVHBaseBounds(struct BVHBase *base, struct AABB *aabb)
42 {
43     base->Meta.bounds.lower[0] = aabb->lower.x;
44     base->Meta.bounds.lower[1] = aabb->lower.y;
45     base->Meta.bounds.lower[2] = aabb->lower.z;
46 
47     base->Meta.bounds.upper[0] = aabb->upper.x;
48     base->Meta.bounds.upper[1] = aabb->upper.y;
49     base->Meta.bounds.upper[2] = aabb->upper.z;
50 }
51 
BVHBase_nodeData(struct BVHBase * bvh)52 GRL_INLINE global struct QBVHNodeN *BVHBase_nodeData(struct BVHBase *bvh)
53 {
54     return (global struct QBVHNodeN *)((void *)bvh + BVH_ROOT_NODE_OFFSET);
55 }
56 
BVHBase_rootNode(struct BVHBase * bvh)57 GRL_INLINE global struct QBVHNodeN *BVHBase_rootNode(struct BVHBase *bvh)
58 {
59     return (global struct QBVHNodeN *)((void *)bvh + BVH_ROOT_NODE_OFFSET);
60 }
61 
BVHBase_quadLeaves(struct BVHBase * bvh)62 GRL_INLINE global struct Quad *BVHBase_quadLeaves(struct BVHBase *bvh)
63 {
64     return (global struct Quad *)((void *)bvh + 64 * (ulong)bvh->quadLeafStart);
65 }
66 
BVHBase_numNodes(struct BVHBase * bvh)67 GRL_INLINE uint64_t BVHBase_numNodes(struct BVHBase *bvh)
68 {
69     return bvh->nodeDataCur - BVH_ROOT_NODE_OFFSET / 64;
70 }
71 
BVHBase_numQuads(struct BVHBase * bvh)72 GRL_INLINE uint64_t BVHBase_numQuads(struct BVHBase *bvh)
73 {
74     return bvh->quadLeafCur - bvh->quadLeafStart;
75 }
76 
BVHBase_numProcedurals(struct BVHBase * bvh)77 GRL_INLINE uint64_t BVHBase_numProcedurals(struct BVHBase *bvh)
78 {
79     return bvh->proceduralDataCur - bvh->proceduralDataStart;
80 }
81 
BVHBase_numInstances(struct BVHBase * bvh)82 GRL_INLINE uint64_t BVHBase_numInstances(struct BVHBase *bvh)
83 {
84     return bvh->instanceLeafEnd - bvh->instanceLeafStart;
85 }
86 
87 /* =================================================================== */
88 /* ============================== QBVH =============================== */
89 /* =================================================================== */
90 
91 __constant const float ulp = FLT_EPSILON;
92 
conservativeAABB(struct AABB * aabb)93 GRL_INLINE struct AABB conservativeAABB(struct AABB *aabb)
94 {
95     struct AABB box;
96     const float4 v4 = max(fabs(aabb->lower), fabs(aabb->upper));
97     const float v = ulp * max(v4.x, max(v4.y, v4.z));
98     box.lower = aabb->lower - (float4)v;
99     box.upper = aabb->upper + (float4)v;
100     return box;
101 }
102 
conservativeAABB3f(struct AABB3f * aabb3d)103 GRL_INLINE struct AABB3f conservativeAABB3f(struct AABB3f* aabb3d)
104 {
105     struct AABB aabb4d = AABBfromAABB3f(*aabb3d);
106     struct AABB box = conservativeAABB(&aabb4d);
107     return AABB3fFromAABB(box);
108 }
109 
110 struct QBVH_AABB
111 {
112     uchar lower_x[BVH_NODE_N6];
113     uchar upper_x[BVH_NODE_N6];
114     uchar lower_y[BVH_NODE_N6];
115     uchar upper_y[BVH_NODE_N6];
116     uchar lower_z[BVH_NODE_N6];
117     uchar upper_z[BVH_NODE_N6];
118 };
119 
120 struct QBVHNodeN
121 {
122     float lower[3];
123     int offset;
124     // 16 bytes
125     uchar type;
126     uchar pad;
127     // 18 bytes
128     char exp[3];
129     uchar instMask;
130     // 22 bytes
131     uchar childData[6];
132     // 28 bytes
133     struct QBVH_AABB qbounds; // + 36 bytes
134                               // 64 bytes
135 };
136 
QBVHNodeN_blockIncr(struct QBVHNodeN * This,uint childID)137 GRL_INLINE uint QBVHNodeN_blockIncr(struct QBVHNodeN *This, uint childID)
138 {
139     return This->childData[childID] & 0x3;
140 }
141 
QBVHNodeN_startPrim(struct QBVHNodeN * This,uint childID)142 GRL_INLINE uint QBVHNodeN_startPrim(struct QBVHNodeN *This, uint childID)
143 {
144     return (This->childData[childID] >> 2) & 0xF;
145 }
146 
initQBVHNodeN(struct QBVHNodeN * qnode)147 GRL_INLINE void initQBVHNodeN(struct QBVHNodeN *qnode)
148 {
149     uint *ptr = (uint *)qnode;
150     for (uint i = 0; i < 16; i++)
151         ptr[i] = 0;
152 }
153 
extractAABB_QBVHNodeN(struct QBVHNodeN * qnode,uint i)154 GRL_INLINE struct AABB extractAABB_QBVHNodeN(struct QBVHNodeN *qnode, uint i)
155 {
156     struct AABB aabb;
157     const float4 base = (float4)(qnode->lower[0], qnode->lower[1], qnode->lower[2], 0.0f);
158     const int4 lower_i = (int4)(qnode->qbounds.lower_x[i], qnode->qbounds.lower_y[i], qnode->qbounds.lower_z[i], 0);
159     const int4 upper_i = (int4)(qnode->qbounds.upper_x[i], qnode->qbounds.upper_y[i], qnode->qbounds.upper_z[i], 0);
160     const int4 exp_i = (int4)(qnode->exp[0], qnode->exp[1], qnode->exp[2], 0.0f);
161     aabb.lower = base + bitShiftLdexp4(convert_float4_rtn(lower_i), exp_i - 8);
162     aabb.upper = base + bitShiftLdexp4(convert_float4_rtp(upper_i), exp_i - 8);
163     return aabb;
164 }
165 
getAABB_QBVHNodeN(struct QBVHNodeN * qnode)166 GRL_INLINE struct AABB getAABB_QBVHNodeN(struct QBVHNodeN *qnode)
167 {
168     struct AABB aabb;
169 #if 0
170   AABB_init(&aabb);
171   for (uint i = 0; i < BVH_NODE_N6; i++)
172   {
173     struct AABB v = extractAABB_QBVHNodeN(qnode, i);
174     AABB_extend(&aabb, &v);
175   }
176 #else
177     uint lower_x = qnode->qbounds.lower_x[0];
178     uint lower_y = qnode->qbounds.lower_y[0];
179     uint lower_z = qnode->qbounds.lower_z[0];
180 
181     uint upper_x = qnode->qbounds.upper_x[0];
182     uint upper_y = qnode->qbounds.upper_y[0];
183     uint upper_z = qnode->qbounds.upper_z[0];
184 
185     for (uint i = 1; i < BVH_NODE_N6; i++)
186     {
187         uint lx = qnode->qbounds.lower_x[i];
188         uint ly = qnode->qbounds.lower_y[i];
189         uint lz = qnode->qbounds.lower_z[i];
190 
191         uint ux = qnode->qbounds.upper_x[i];
192         uint uy = qnode->qbounds.upper_y[i];
193         uint uz = qnode->qbounds.upper_z[i];
194 
195         bool valid = lx <= ux;
196         if (valid)
197         {
198             lower_x = min(lower_x, lx);
199             lower_y = min(lower_y, ly);
200             lower_z = min(lower_z, lz);
201 
202             upper_x = max(upper_x, ux);
203             upper_y = max(upper_y, uy);
204             upper_z = max(upper_z, uz);
205         }
206     }
207 
208     const float4 base = (float4)(qnode->lower[0], qnode->lower[1], qnode->lower[2], 0.0f);
209     const int4 lower_i = (int4)(lower_x, lower_y, lower_z, 0);
210     const int4 upper_i = (int4)(upper_x, upper_y, upper_z, 0);
211     const int4 exp_i = (int4)(qnode->exp[0], qnode->exp[1], qnode->exp[2], 0.0f);
212     aabb.lower = base + bitShiftLdexp4(convert_float4_rtn(lower_i), exp_i - 8);
213     aabb.upper = base + bitShiftLdexp4(convert_float4_rtp(upper_i), exp_i - 8);
214 #endif
215     return aabb;
216 }
217 
InternalNode_getAABB3f(struct InternalNode * node)218 GRL_INLINE struct AABB3f InternalNode_getAABB3f(struct InternalNode* node)
219 {
220     return AABB3fFromAABB(getAABB_QBVHNodeN((struct QBVHNodeN*)node));
221 }
222 
getNumChildren_QBVHNodeN(struct QBVHNodeN * qnode)223 GRL_INLINE uint getNumChildren_QBVHNodeN(struct QBVHNodeN *qnode)
224 {
225     uint children = 0;
226     for (uint i = 0; i < BVH_NODE_N6; i++)
227     {
228         uint lx = qnode->qbounds.lower_x[i];
229         uint ux = qnode->qbounds.upper_x[i];
230         bool valid = lx <= ux;
231         if (valid)
232             children++;
233     }
234     return children;
235 }
236 
extractQBVHNodeN_offset(struct QBVHNodeN * qnode)237 GRL_INLINE long extractQBVHNodeN_offset(struct QBVHNodeN *qnode)
238 {
239     return ((long)qnode->offset) << 6;
240 }
241 
QBVHNodeN_childrenPointer(struct QBVHNodeN * qnode)242 GRL_INLINE void *QBVHNodeN_childrenPointer(struct QBVHNodeN *qnode)
243 {
244     const int offset = qnode->offset;
245     return (void *)(qnode + offset);
246 }
247 
subgroup_setQBVHNodeN_setFields_reduced_bounds(const int offset,const uint type,struct AABB * input_aabb,const uint numChildren,const uchar mask,struct QBVHNodeN * qbvh_node,const bool degenerated,struct AABB reduced_aabb)248 GRL_INLINE void subgroup_setQBVHNodeN_setFields_reduced_bounds(const int offset, const uint type, struct AABB* input_aabb, const uint numChildren, const uchar mask, struct QBVHNodeN* qbvh_node, const bool degenerated, struct AABB reduced_aabb)
249 {
250     const uint subgroupLocalID = get_sub_group_local_id();
251     const uint k = subgroupLocalID;
252     const float up = 1.0f + ulp;
253     const float down = 1.0f - ulp;
254 
255     struct AABB aabb = reduced_aabb; // needs to execute with full subgroup width
256     aabb = AABB_sub_group_broadcast(&aabb, 0);
257 
258     if (subgroupLocalID < BVH_NODE_N6)
259     {
260         struct AABB conservative_aabb = conservativeAABB(&aabb);
261         const float3 len = AABB_size(&conservative_aabb).xyz * up;
262         int3 exp;
263         const float3 mant = frexp_vec3(len, &exp);
264         const float3 org = conservative_aabb.lower.xyz;
265 
266         exp += (mant > (float3)QUANT_MAX_MANT ? (int3)1 : (int3)0);
267 
268         qbvh_node->offset = offset;
269         qbvh_node->type = type;
270 
271         qbvh_node->lower[0] = org.x;
272         qbvh_node->lower[1] = org.y;
273         qbvh_node->lower[2] = org.z;
274 
275         qbvh_node->exp[0] = exp.x;
276         qbvh_node->exp[1] = exp.y;
277         qbvh_node->exp[2] = exp.z;
278 
279         qbvh_node->instMask = mask;
280 
281         uchar3 lower_uchar = (uchar3)(0x80);
282         uchar3 upper_uchar = (uchar3)(0);
283 
284         if (subgroupLocalID < numChildren)
285         {
286             struct AABB child_aabb = conservativeAABB(input_aabb);
287 
288             float3 lower = floor(bitShiftLdexp3((child_aabb.lower.xyz - org) * down, -exp + 8));
289             lower = clamp(lower, (float)(QUANT_MIN), (float)(QUANT_MAX));
290             float3 upper = ceil(bitShiftLdexp3((child_aabb.upper.xyz - org) * up, -exp + 8));
291             upper = clamp(upper, (float)(QUANT_MIN), (float)(QUANT_MAX));
292 
293             lower_uchar = convert_uchar3_rtn(lower);
294             upper_uchar = convert_uchar3_rtp(upper);
295 
296             if (degenerated)
297             {
298                 lower_uchar = upper_uchar = 0;
299             }
300         }
301 
302         qbvh_node->qbounds.lower_x[k] = lower_uchar.x;
303         qbvh_node->qbounds.lower_y[k] = lower_uchar.y;
304         qbvh_node->qbounds.lower_z[k] = lower_uchar.z;
305         qbvh_node->qbounds.upper_x[k] = upper_uchar.x;
306         qbvh_node->qbounds.upper_y[k] = upper_uchar.y;
307         qbvh_node->qbounds.upper_z[k] = upper_uchar.z;
308 
309         qbvh_node->childData[k] = (type == NODE_TYPE_INSTANCE) ? 2 : 1;
310 
311 #if ENABLE_CONVERSION_CHECKS == 1
312 
313         if (!(exp.x >= -128 && exp.x <= 127))
314             printf("exp_x error \n");
315         if (!(exp.y >= -128 && exp.y <= 127))
316             printf("exp_y error \n");
317         if (!(exp.z >= -128 && exp.z <= 127))
318             printf("exp_z error \n");
319 
320         struct AABB child_qaabb = extractAABB_QBVHNodeN(qbvh_node, k);
321         if (!AABB_subset(&child_aabb, &child_qaabb))
322         {
323             uint3 lower_i = convert_uint3(lower_uchar);
324             uint3 upper_i = convert_uint3(upper_uchar);
325 
326             printf("\n ERROR %d\n", k);
327             printf("lower %f upper %f \n lower_i %d  upper_i %d \n", lower, upper, lower_i, upper_i);
328             printf("%i uncompressed \n", k);
329             AABB_print(&child_aabb);
330             printf("%i compressed \n", k);
331             AABB_print(&child_qaabb);
332 
333             printf("%i uncompressed (as int) \n", k);
334             AABB_printasInt(&child_aabb);
335             printf("%i compressed (as int) \n", k);
336             AABB_printasInt(&child_qaabb);
337 
338             int4 e0 = child_aabb.lower < child_qaabb.lower;
339             int4 e1 = child_aabb.upper > child_qaabb.upper;
340             printf("e0 %d e1 %d \n", e0, e1);
341         }
342 #endif
343     }
344 }
345 
subgroup_setQBVHNodeN_setFields(const int offset,const uint type,struct AABB * input_aabb,const uint numChildren,const uchar mask,struct QBVHNodeN * qbvh_node,const bool degenerated)346 GRL_INLINE void subgroup_setQBVHNodeN_setFields(const int offset, const uint type, struct AABB* input_aabb, const uint numChildren, const uchar mask, struct QBVHNodeN* qbvh_node, const bool degenerated)
347 {
348     struct AABB aabb = AABB_sub_group_reduce_N6(input_aabb);
349     subgroup_setQBVHNodeN_setFields_reduced_bounds(offset, type, input_aabb, numChildren, mask, qbvh_node, degenerated, aabb);
350 }
351 
subgroup_setQBVHNodeN_setFields_2xSIMD8_in_SIMD16(const int offset,const uint type,struct AABB * input_aabb,const uint numChildren,const uchar mask,struct QBVHNodeN * qbvh_node,const bool degenerated,bool active_lane)352 GRL_INLINE void subgroup_setQBVHNodeN_setFields_2xSIMD8_in_SIMD16(const int offset, const uint type, struct AABB* input_aabb, const uint numChildren, const uchar mask, struct QBVHNodeN* qbvh_node, const bool degenerated, bool active_lane)
353 {
354     const uint lane = get_sub_group_local_id() % 8;
355     const uint node_in_sg = get_sub_group_local_id() / 8;
356     const uint k = lane;
357     const float up = 1.0f + ulp;
358     const float down = 1.0f - ulp;
359 
360     struct AABB aabb = AABB_sub_group_reduce_N6(input_aabb); // needs to execute with full subgroup width
361     aabb = AABB_sub_group_shuffle(&aabb, node_in_sg * 8);
362 
363     if (lane < BVH_NODE_N6 && active_lane)
364     {
365         struct AABB conservative_aabb = conservativeAABB(&aabb);
366         const float3 len = AABB_size(&conservative_aabb).xyz * up;
367         int3 exp;
368         const float3 mant = frexp_vec3(len, &exp);
369         const float3 org = conservative_aabb.lower.xyz;
370 
371         exp += (mant > (float3)QUANT_MAX_MANT ? (int3)1 : (int3)0);
372 
373         qbvh_node->offset = offset;
374         qbvh_node->type = type;
375 
376         qbvh_node->lower[0] = org.x;
377         qbvh_node->lower[1] = org.y;
378         qbvh_node->lower[2] = org.z;
379 
380         qbvh_node->exp[0] = exp.x;
381         qbvh_node->exp[1] = exp.y;
382         qbvh_node->exp[2] = exp.z;
383 
384         qbvh_node->instMask = mask;
385 
386         uchar3 lower_uchar = (uchar3)(0x80);
387         uchar3 upper_uchar = (uchar3)(0);
388 
389         if (lane < numChildren)
390         {
391             struct AABB child_aabb = conservativeAABB(input_aabb);
392 
393             float3 lower = floor(bitShiftLdexp3((child_aabb.lower.xyz - org) * down, -exp + 8));
394             lower = clamp(lower, (float)(QUANT_MIN), (float)(QUANT_MAX));
395             float3 upper = ceil(bitShiftLdexp3((child_aabb.upper.xyz - org) * up, -exp + 8));
396             upper = clamp(upper, (float)(QUANT_MIN), (float)(QUANT_MAX));
397 
398             lower_uchar = convert_uchar3_rtn(lower);
399             upper_uchar = convert_uchar3_rtp(upper);
400 
401             if (degenerated)
402             {
403                 lower_uchar = upper_uchar = 0;
404             }
405         }
406 
407         qbvh_node->qbounds.lower_x[k] = lower_uchar.x;
408         qbvh_node->qbounds.lower_y[k] = lower_uchar.y;
409         qbvh_node->qbounds.lower_z[k] = lower_uchar.z;
410         qbvh_node->qbounds.upper_x[k] = upper_uchar.x;
411         qbvh_node->qbounds.upper_y[k] = upper_uchar.y;
412         qbvh_node->qbounds.upper_z[k] = upper_uchar.z;
413 
414         qbvh_node->childData[k] = (type == NODE_TYPE_INSTANCE) ? 2 : 1;
415 
416 #if ENABLE_CONVERSION_CHECKS == 1
417 
418         if (!(exp.x >= -128 && exp.x <= 127))
419             printf("exp_x error \n");
420         if (!(exp.y >= -128 && exp.y <= 127))
421             printf("exp_y error \n");
422         if (!(exp.z >= -128 && exp.z <= 127))
423             printf("exp_z error \n");
424 
425         struct AABB child_qaabb = extractAABB_QBVHNodeN(qbvh_node, k);
426         if (!AABB_subset(&child_aabb, &child_qaabb))
427         {
428             uint3 lower_i = convert_uint3(lower_uchar);
429             uint3 upper_i = convert_uint3(upper_uchar);
430 
431             printf("\n ERROR %d\n", k);
432             printf("lower %f upper %f \n lower_i %d  upper_i %d \n", lower, upper, lower_i, upper_i);
433             printf("%i uncompressed \n", k);
434             AABB_print(&child_aabb);
435             printf("%i compressed \n", k);
436             AABB_print(&child_qaabb);
437 
438             printf("%i uncompressed (as int) \n", k);
439             AABB_printasInt(&child_aabb);
440             printf("%i compressed (as int) \n", k);
441             AABB_printasInt(&child_qaabb);
442 
443             int4 e0 = child_aabb.lower < child_qaabb.lower;
444             int4 e1 = child_aabb.upper > child_qaabb.upper;
445             printf("e0 %d e1 %d \n", e0, e1);
446         }
447 #endif
448     }
449 }
450 
subgroup_setInstanceQBVHNodeN(const int offset,struct AABB * input_aabb,const uint numChildren,struct QBVHNodeN * qbvh_node,const uint instMask)451 GRL_INLINE void subgroup_setInstanceQBVHNodeN(const int offset, struct AABB *input_aabb, const uint numChildren, struct QBVHNodeN *qbvh_node, const uint instMask)
452 {
453     const uint subgroupLocalID = get_sub_group_local_id();
454 
455     // for degenerated (or inactive) instance ignore this box in exp, origin calculation and make its box be a point in the node origin.
456     // if it becomes non_degenerated on update, tree topology will be equivalent to what it would be if we would account this degenerated node here.
457     bool degenerated = (instMask == BVH_NODE_DEGENERATED_MASK);
458 
459     struct AABB aabb;
460     AABB_init(&aabb);
461 
462     // if every child is degenerated (or inactive) instance, we need to init aabb with origin point
463     uchar commonMask = sub_group_reduce_or_N6(instMask);
464     if (subgroupLocalID < numChildren && (!degenerated || commonMask == BVH_NODE_DEGENERATED_MASK))
465         aabb = *input_aabb;
466 
467     subgroup_setQBVHNodeN_setFields(offset, NODE_TYPE_INSTANCE, &aabb, numChildren, commonMask, qbvh_node, degenerated);
468 }
469 
470 
471 // return true if is degenerated
subgroup_setInstanceBox_2xSIMD8_in_SIMD16(struct AABB * input_aabb,const uint numChildren,uchar * mask,const uint instMask,bool active_lane)472 GRL_INLINE bool subgroup_setInstanceBox_2xSIMD8_in_SIMD16(struct AABB* input_aabb, const uint numChildren, uchar* mask, const uint instMask, bool active_lane)
473 {
474     const uint lane = get_sub_group_local_id() % 8;
475 
476     // for degenerated (or inactive) instance ignore this box in exp, origin calculation and make its box be a point in the node origin.
477     // if it becomes non_degenerated on update, tree topology will be equivalent to what it would be if we would account this degenerated node here.
478     bool degenerated = (instMask == BVH_NODE_DEGENERATED_MASK);
479 
480     // if every child is degenerated (or inactive) instance, we need to init aabb with origin point
481     uchar commonMask = sub_group_reduce_or_N6_2xSIMD8_in_SIMD16(instMask);
482     if (active_lane)
483         *mask = commonMask;
484 
485     if (active_lane && (degenerated && commonMask != BVH_NODE_DEGENERATED_MASK))
486         AABB_init(input_aabb);
487 
488     return active_lane ? degenerated : false;
489 }
490 
subgroup_setInstanceQBVHNodeN_x2(const int offset,struct AABB * input_aabb,const uint numChildren,struct QBVHNodeN * qbvh_node,const uint instMask,bool active_lane)491 GRL_INLINE void subgroup_setInstanceQBVHNodeN_x2(const int offset, struct AABB* input_aabb, const uint numChildren, struct QBVHNodeN* qbvh_node, const uint instMask, bool active_lane)
492 {
493     const uint lane = get_sub_group_local_id() % 8;
494 
495     // for degenerated (or inactive) instance ignore this box in exp, origin calculation and make its box be a point in the node origin.
496     // if it becomes non_degenerated on update, tree topology will be equivalent to what it would be if we would account this degenerated node here.
497     bool degenerated = (instMask == BVH_NODE_DEGENERATED_MASK);
498 
499     struct AABB aabb;
500     AABB_init(&aabb);
501 
502     // if every child is degenerated (or inactive) instance, we need to init aabb with origin point
503     uchar commonMask = sub_group_reduce_or_N6_2xSIMD8_in_SIMD16(instMask);
504     if (lane < numChildren && (!degenerated || commonMask == BVH_NODE_DEGENERATED_MASK))
505         aabb = *input_aabb;
506 
507     subgroup_setQBVHNodeN_setFields_2xSIMD8_in_SIMD16(offset, NODE_TYPE_INSTANCE, &aabb, numChildren, commonMask, qbvh_node, degenerated, active_lane);
508 }
509 
510 
subgroup_setQBVHNodeN(const int offset,const uint type,struct AABB * input_aabb,const uint numChildren,struct QBVHNodeN * qbvh_node,uint mask)511 GRL_INLINE void subgroup_setQBVHNodeN(const int offset, const uint type, struct AABB* input_aabb, const uint numChildren, struct QBVHNodeN* qbvh_node, uint mask)
512 {
513     const uint subgroupLocalID = get_sub_group_local_id();
514 
515     struct AABB aabb;
516     AABB_init(&aabb);
517 
518     if (subgroupLocalID < numChildren)
519         aabb = *input_aabb;
520 
521     subgroup_setQBVHNodeN_setFields(offset, type, &aabb, numChildren, mask, qbvh_node, false);
522 }
523 
524 
subgroup_setQBVHNodeN_x2(const int offset,const uint type,struct AABB * input_aabb,const uint numChildren,struct QBVHNodeN * qbvh_node,bool active_lane)525 GRL_INLINE void subgroup_setQBVHNodeN_x2(const int offset, const uint type, struct AABB* input_aabb, const uint numChildren, struct QBVHNodeN* qbvh_node, bool active_lane)
526 {
527     const uint lane = get_sub_group_local_id() % 8;
528 
529     struct AABB aabb;
530     AABB_init(&aabb);
531 
532     if (lane < numChildren)
533         aabb = *input_aabb;
534 
535     subgroup_setQBVHNodeN_setFields_2xSIMD8_in_SIMD16(offset, type, &aabb, numChildren, BVH_NODE_DEFAULT_MASK, qbvh_node, false, active_lane);
536 }
537 
538 
subgroup_QBVHNodeN_setBounds(uniform struct QBVHNodeN * qbvh_node,uniform struct AABB reduced_bounds,varying struct AABB input_aabb,uniform uint numChildren,varying ushort lane)539 GRL_INLINE void subgroup_QBVHNodeN_setBounds( uniform struct QBVHNodeN* qbvh_node,
540                                               uniform struct AABB reduced_bounds,
541                                               varying struct AABB input_aabb,
542                                               uniform uint numChildren,
543                                               varying ushort lane )
544 {
545     const float up = 1.0f + ulp;
546     const float down = 1.0f - ulp;
547 
548     int3 exp;
549 
550     struct AABB conservative_aabb = conservativeAABB( &reduced_bounds);
551     const float3 len = AABB_size( &conservative_aabb ).xyz * up;
552     const float3 mant = frexp_vec3( len, &exp );
553     const float3 org = conservative_aabb.lower.xyz;
554 
555     exp += (mant > ( float3 )QUANT_MAX_MANT ? (int3)1 : (int3)0);
556 
557     qbvh_node->lower[0] = org.x;
558     qbvh_node->lower[1] = org.y;
559     qbvh_node->lower[2] = org.z;
560 
561     qbvh_node->exp[0] = exp.x;
562     qbvh_node->exp[1] = exp.y;
563     qbvh_node->exp[2] = exp.z;
564 
565     qbvh_node->instMask = 0xff;
566 
567     uchar3 lower_uchar = 0x80;
568     uchar3 upper_uchar = 0;
569 
570     if ( lane < BVH_NODE_N6 )
571     {
572         ushort k = lane;
573         if( lane < numChildren )
574         {
575             struct AABB child_aabb = conservativeAABB( &input_aabb ); // conservative ???
576 
577             float3 lower = floor( bitShiftLdexp3( (child_aabb.lower.xyz - org) * down, -exp + 8 ) );
578             lower = clamp( lower, (float)(QUANT_MIN), (float)(QUANT_MAX) );
579             float3 upper = ceil( bitShiftLdexp3( (child_aabb.upper.xyz - org) * up, -exp + 8 ) );
580             upper = clamp( upper, (float)(QUANT_MIN), (float)(QUANT_MAX) );
581 
582             lower_uchar = convert_uchar3_rtn( lower );
583             upper_uchar = convert_uchar3_rtp( upper );
584         }
585 
586         qbvh_node->qbounds.lower_x[k] = lower_uchar.x;
587         qbvh_node->qbounds.lower_y[k] = lower_uchar.y;
588         qbvh_node->qbounds.lower_z[k] = lower_uchar.z;
589         qbvh_node->qbounds.upper_x[k] = upper_uchar.x;
590         qbvh_node->qbounds.upper_y[k] = upper_uchar.y;
591         qbvh_node->qbounds.upper_z[k] = upper_uchar.z;
592     }
593 
594 }
595 
QBVHNodeN_setBounds(struct QBVHNodeN * qbvh_node,struct AABB * input_aabb,const uint numChildren)596 GRL_INLINE void QBVHNodeN_setBounds(struct QBVHNodeN *qbvh_node, struct AABB *input_aabb, const uint numChildren)
597 {
598     const float up = 1.0f + ulp;
599     const float down = 1.0f - ulp;
600 
601     int3 exp;
602     struct AABB aabb;
603     AABB_init(&aabb);
604     for (uint i = 0; i < numChildren; i++)
605         AABB_extend(&aabb, &input_aabb[i]);
606 
607     struct AABB conservative_aabb = conservativeAABB(&aabb);
608     const float3 len = AABB_size(&conservative_aabb).xyz * up;
609     const float3 mant = frexp_vec3(len, &exp);
610     const float3 org = conservative_aabb.lower.xyz;
611 
612     exp += (mant > (float3)QUANT_MAX_MANT ? (int3)1 : (int3)0);
613 
614     qbvh_node->lower[0] = org.x;
615     qbvh_node->lower[1] = org.y;
616     qbvh_node->lower[2] = org.z;
617 
618     qbvh_node->exp[0] = exp.x;
619     qbvh_node->exp[1] = exp.y;
620     qbvh_node->exp[2] = exp.z;
621 
622     qbvh_node->instMask = 0xff;
623 
624     for (uint k = 0; k < numChildren; k++)
625     {
626         struct AABB child_aabb = conservativeAABB(&input_aabb[k]); // conservative ???
627 
628         float3 lower = floor(bitShiftLdexp3((child_aabb.lower.xyz - org) * down, -exp + 8));
629         lower = clamp(lower, (float)(QUANT_MIN), (float)(QUANT_MAX));
630         float3 upper = ceil(bitShiftLdexp3((child_aabb.upper.xyz - org) * up, -exp + 8));
631         upper = clamp(upper, (float)(QUANT_MIN), (float)(QUANT_MAX));
632 
633         uchar3 lower_uchar = convert_uchar3_rtn(lower);
634         uchar3 upper_uchar = convert_uchar3_rtp(upper);
635 
636         qbvh_node->qbounds.lower_x[k] = lower_uchar.x;
637         qbvh_node->qbounds.lower_y[k] = lower_uchar.y;
638         qbvh_node->qbounds.lower_z[k] = lower_uchar.z;
639         qbvh_node->qbounds.upper_x[k] = upper_uchar.x;
640         qbvh_node->qbounds.upper_y[k] = upper_uchar.y;
641         qbvh_node->qbounds.upper_z[k] = upper_uchar.z;
642 
643 #if ENABLE_CONVERSION_CHECKS == 1
644         if (!(exp.x >= -128 && exp.x <= 127))
645             printf("exp_x error \n");
646         if (!(exp.y >= -128 && exp.y <= 127))
647             printf("exp_y error \n");
648         if (!(exp.z >= -128 && exp.z <= 127))
649             printf("exp_z error \n");
650 
651         struct AABB child_qaabb = extractAABB_QBVHNodeN(qbvh_node, k);
652         if (!AABB_subset(&child_aabb, &child_qaabb))
653         {
654             uint3 lower_i = convert_uint3(lower_uchar);
655             uint3 upper_i = convert_uint3(upper_uchar);
656 
657             printf("\n ERROR %d\n", k);
658             printf("lower %f upper %f \n lower_i %d  upper_i %d \n", lower, upper, lower_i, upper_i);
659             printf("%i uncompressed \n", k);
660             AABB_print(&child_aabb);
661             printf("%i compressed \n", k);
662             AABB_print(&child_qaabb);
663 
664             printf("%i uncompressed (as int) \n", k);
665             AABB_printasInt(&child_aabb);
666             printf("%i compressed (as int) \n", k);
667             AABB_printasInt(&child_qaabb);
668 
669             int4 e0 = child_aabb.lower < child_qaabb.lower;
670             int4 e1 = child_aabb.upper > child_qaabb.upper;
671             printf("e0 %d e1 %d \n", e0, e1);
672         }
673 #endif
674     }
675     for (uint k = numChildren; k < BVH_NODE_N6; k++)
676     {
677         qbvh_node->qbounds.lower_x[k] = 0x80;
678         qbvh_node->qbounds.lower_y[k] = 0x80;
679         qbvh_node->qbounds.lower_z[k] = 0x80;
680         qbvh_node->qbounds.upper_x[k] = 0;
681         qbvh_node->qbounds.upper_y[k] = 0;
682         qbvh_node->qbounds.upper_z[k] = 0;
683     }
684 }
685 
QBVHNodeN_setChildren(struct QBVHNodeN * qbvh_node,const int offset,const uint numChildren)686 GRL_INLINE void QBVHNodeN_setChildren(struct QBVHNodeN *qbvh_node, const int offset, const uint numChildren)
687 {
688     qbvh_node->offset = offset;
689     for (uint k = 0; k < BVH_NODE_N6; k++)
690         qbvh_node->childData[k] = 1;
691 }
692 
QBVHNodeN_setChildIncr1(struct QBVHNodeN * qbvh_node)693 GRL_INLINE void QBVHNodeN_setChildIncr1(struct QBVHNodeN *qbvh_node)
694 {
695     for (uint k = 0; k < BVH_NODE_N6; k++)
696         qbvh_node->childData[k] = 1;
697 }
698 
SUBGROUP_QBVHNodeN_setChildIncr1(struct QBVHNodeN * qbvh_node)699 GRL_INLINE void SUBGROUP_QBVHNodeN_setChildIncr1(struct QBVHNodeN *qbvh_node)
700 {
701     if( get_sub_group_local_id() < BVH_NODE_N6 )
702         qbvh_node->childData[get_sub_group_local_id()] = 1;
703 }
704 
705 
QBVHNodeN_setChildIncr2(struct QBVHNodeN * qbvh_node)706 GRL_INLINE void QBVHNodeN_setChildIncr2(struct QBVHNodeN *qbvh_node)
707 {
708     for (uint k = 0; k < BVH_NODE_N6; k++)
709         qbvh_node->childData[k] = 2;
710 }
711 
QBVHNodeN_setType(struct QBVHNodeN * qbvh_node,const uint type)712 GRL_INLINE void QBVHNodeN_setType(struct QBVHNodeN *qbvh_node, const uint type)
713 {
714     qbvh_node->type = type;
715 }
716 
setQBVHNodeN(const int offset,const uint type,struct AABB * input_aabb,const uint numChildren,struct QBVHNodeN * qbvh_node)717 GRL_INLINE void setQBVHNodeN(const int offset, const uint type, struct AABB *input_aabb, const uint numChildren, struct QBVHNodeN *qbvh_node)
718 {
719     QBVHNodeN_setType(qbvh_node, type);
720     QBVHNodeN_setChildren(qbvh_node, offset, numChildren);
721     QBVHNodeN_setBounds(qbvh_node, input_aabb, numChildren);
722 }
723 
printQBVHNodeN(struct QBVHNodeN * qnode)724 GRL_INLINE void printQBVHNodeN(struct QBVHNodeN *qnode)
725 {
726     printf(" offset %d type %d \n", qnode->offset, (int)qnode->type);
727     printf(" lower %f %f %f \n", qnode->lower[0], qnode->lower[1], qnode->lower[2]);
728     printf(" exp %d %d %d \n", (int)qnode->exp[0], (int)qnode->exp[1], (int)qnode->exp[2]);
729     printf(" instMask %d \n", qnode->instMask);
730 
731     struct AABB aabb0 = extractAABB_QBVHNodeN(qnode, 0);
732     struct AABB aabb1 = extractAABB_QBVHNodeN(qnode, 1);
733     struct AABB aabb2 = extractAABB_QBVHNodeN(qnode, 2);
734     struct AABB aabb3 = extractAABB_QBVHNodeN(qnode, 3);
735     struct AABB aabb4 = extractAABB_QBVHNodeN(qnode, 4);
736     struct AABB aabb5 = extractAABB_QBVHNodeN(qnode, 5);
737 
738     printf(" lower_x %d %d %d %d %d %d %f %f %f %f %f %f\n", qnode->qbounds.lower_x[0], qnode->qbounds.lower_x[1], qnode->qbounds.lower_x[2], qnode->qbounds.lower_x[3], qnode->qbounds.lower_x[4], qnode->qbounds.lower_x[5], aabb0.lower.x, aabb1.lower.x, aabb2.lower.x, aabb3.lower.x, aabb4.lower.x, aabb5.lower.x);
739     printf(" upper_x %d %d %d %d %d %d %f %f %f %f %f %f\n", qnode->qbounds.upper_x[0], qnode->qbounds.upper_x[1], qnode->qbounds.upper_x[2], qnode->qbounds.upper_x[3], qnode->qbounds.upper_x[4], qnode->qbounds.upper_x[5], aabb0.upper.x, aabb1.upper.x, aabb2.upper.x, aabb3.upper.x, aabb4.upper.x, aabb5.upper.x);
740 
741     printf(" lower_y %d %d %d %d %d %d %f %f %f %f %f %f\n", qnode->qbounds.lower_y[0], qnode->qbounds.lower_y[1], qnode->qbounds.lower_y[2], qnode->qbounds.lower_y[3], qnode->qbounds.lower_y[4], qnode->qbounds.lower_y[5], aabb0.lower.y, aabb1.lower.y, aabb2.lower.y, aabb3.lower.y, aabb4.lower.y, aabb5.lower.y);
742     printf(" upper_y %d %d %d %d %d %d %f %f %f %f %f %f\n", qnode->qbounds.upper_y[0], qnode->qbounds.upper_y[1], qnode->qbounds.upper_y[2], qnode->qbounds.upper_y[3], qnode->qbounds.upper_y[4], qnode->qbounds.upper_y[5], aabb0.upper.y, aabb1.upper.y, aabb2.upper.y, aabb3.upper.y, aabb4.upper.y, aabb5.upper.y);
743 
744     printf(" lower_z %d %d %d %d %d %d %f %f %f %f %f %f\n", qnode->qbounds.lower_z[0], qnode->qbounds.lower_z[1], qnode->qbounds.lower_z[2], qnode->qbounds.lower_z[3], qnode->qbounds.lower_z[4], qnode->qbounds.lower_z[5], aabb0.lower.z, aabb1.lower.z, aabb2.lower.z, aabb3.lower.z, aabb4.lower.z, aabb5.lower.z);
745     printf(" upper_z %d %d %d %d %d %d %f %f %f %f %f %f\n", qnode->qbounds.upper_z[0], qnode->qbounds.upper_z[1], qnode->qbounds.upper_z[2], qnode->qbounds.upper_z[3], qnode->qbounds.upper_z[4], qnode->qbounds.upper_z[5], aabb0.upper.z, aabb1.upper.z, aabb2.upper.z, aabb3.upper.z, aabb4.upper.z, aabb5.upper.z);
746 }
747 
encodeOffset(global char * bvh_mem,global void * parent,int global_child_offset)748 GRL_INLINE int encodeOffset(global char *bvh_mem, global void *parent, int global_child_offset)
749 {
750     long global_parent_offset = (long)parent - (long)bvh_mem;
751     global_parent_offset = global_parent_offset & (~(64 - 1));        // FIXME: (sw) this should not be necessary?
752     int relative_offset = global_child_offset - global_parent_offset; // FIXME: this limits BVH size to 4GB
753     //if ((int)relative_offset <= 0) printf("relative offset <= 0 %d global_child_offset %d global_parent_offset %d \n", relative_offset,global_child_offset,global_parent_offset);
754     return relative_offset;
755 }
756 
QBVH6Node_set_offset(struct QBVHNodeN * qnode,void * children)757 GRL_INLINE void QBVH6Node_set_offset(struct QBVHNodeN *qnode, void *children)
758 {
759     int ofs = (struct QBVHNodeN *)children - qnode;
760     qnode->offset = ofs;
761 }
762 
QBVH6Node_set_type(struct QBVHNodeN * qnode,uint type)763 GRL_INLINE void QBVH6Node_set_type(struct QBVHNodeN *qnode, uint type)
764 {
765     qnode->type = type;
766 }
767 
sortBVHChildrenIDs(uint input)768 GRL_INLINE uint sortBVHChildrenIDs(uint input)
769 {
770 #if BVH_NODE_N == 8
771     return sort8_descending(input);
772 #else
773     return sort4_descending(input);
774 #endif
775 }
776 
777 enum XFM_BOX_OPTION {
778     XFM_BOX_NO_CLIP = 0,
779     XFM_BOX_NOT_REFINED_CLIPPED = 1, //<<use clipbox, for not refined, compute bbox from children, transform after extending to one box
780     XFM_BOX_NOT_REFINED_TAKE_CLIPBOX = 2 //<<use clipbox, for not refined, just transform xlipbox, don't take children boxes into account
781 };
782 
783 #define DEB_PRINTFS 0
784 #ifndef FINE_TRANSFORM_NODE_BOX
785 #define FINE_TRANSFORM_NODE_BOX 0
786 #endif
787 
compute_xfm_bbox(const float * xfm,InternalNode * pnode,enum XFM_BOX_OPTION clipOpt,const AABB3f * clipBox,float matrixTransformOverhead)788 GRL_INLINE struct AABB3f GRL_OVERLOADABLE compute_xfm_bbox(const float* xfm, InternalNode* pnode, enum XFM_BOX_OPTION clipOpt, const AABB3f* clipBox, float matrixTransformOverhead)
789 {
790     AABB3f childrenbox;
791 #if FINE_TRANSFORM_NODE_BOX
792     struct AffineSpace3f axfm = AffineSpace3f_load_row_major(xfm);
793     bool computeFine = matrixTransformOverhead < 0.6f;
794     computeFine = sub_group_any(computeFine);
795     if (computeFine)
796     {
797         bool clip = clipOpt != XFM_BOX_NO_CLIP;
798         InternalNode node = *pnode;
799 
800 #if DEB_PRINTFS
801         if (InternalNode_IsChildValid(&node, 5) && !InternalNode_IsChildValid(&node, 4))
802             printf("child 5 valid && child 4 invalid\n");
803         if (InternalNode_IsChildValid(&node, 4) && !InternalNode_IsChildValid(&node, 3))
804             printf("child 4 valid && child 3 invalid\n");
805         if (InternalNode_IsChildValid(&node, 3) && !InternalNode_IsChildValid(&node, 2))
806             printf("child 3 valid && child 2 invalid\n");
807         if (InternalNode_IsChildValid(&node, 2) && !InternalNode_IsChildValid(&node, 1))
808             printf("child 2 valid && child 1 invalid\n");
809         if (InternalNode_IsChildValid(&node, 1) && !InternalNode_IsChildValid(&node, 0))
810             printf("child 1 valid && child 0 invalid\n");
811 #endif
812 
813 #if DEB_PRINTFS
814         printf("F");
815 #endif
816         AABB3f child_bounds0 = InternalNode_GetChildAABB(&node, 0);
817         AABB3f child_bounds1 = InternalNode_GetChildAABB(&node, 1);
818         AABB3f child_bounds2 = InternalNode_GetChildAABB(&node, 2);
819         AABB3f child_bounds3 = InternalNode_GetChildAABB(&node, 3);
820         AABB3f child_bounds4 = InternalNode_GetChildAABB(&node, 4);
821         AABB3f child_bounds5 = InternalNode_GetChildAABB(&node, 5);
822 
823         // we bravely assumme we will have at least 2 children here.
824         if(!InternalNode_IsChildValid(&node, 2)) child_bounds2 = child_bounds0;
825         if(!InternalNode_IsChildValid(&node, 3)) child_bounds3 = child_bounds0;
826         if(!InternalNode_IsChildValid(&node, 4)) child_bounds4 = child_bounds0;
827         if(!InternalNode_IsChildValid(&node, 5)) child_bounds5 = child_bounds0;
828 
829         if (clip)
830         {
831             AABB3f_trim_upper(&child_bounds0, clipBox->upper);
832             AABB3f_trim_upper(&child_bounds1, clipBox->upper);
833             AABB3f_trim_upper(&child_bounds2, clipBox->upper);
834             AABB3f_trim_upper(&child_bounds3, clipBox->upper);
835             AABB3f_trim_upper(&child_bounds4, clipBox->upper);
836             AABB3f_trim_upper(&child_bounds5, clipBox->upper);
837         }
838 
839         child_bounds0 = transform_aabb(child_bounds0, xfm);
840         child_bounds1 = transform_aabb(child_bounds1, xfm);
841         child_bounds2 = transform_aabb(child_bounds2, xfm);
842         child_bounds3 = transform_aabb(child_bounds3, xfm);
843         child_bounds4 = transform_aabb(child_bounds4, xfm);
844         child_bounds5 = transform_aabb(child_bounds5, xfm);
845 
846         AABB3f_extend(&child_bounds0, &child_bounds1);
847         AABB3f_extend(&child_bounds2, &child_bounds3);
848         AABB3f_extend(&child_bounds4, &child_bounds5);
849         AABB3f_extend(&child_bounds0, &child_bounds2);
850         AABB3f_extend(&child_bounds0, &child_bounds4);
851 
852         return child_bounds0;
853     }
854 #endif
855 
856 #if DEB_PRINTFS
857     printf("0");
858 #endif
859 
860     struct AABB3f child_bounds;
861 
862     if (clipOpt != XFM_BOX_NOT_REFINED_TAKE_CLIPBOX)
863     {
864         // XFM_BOX_NOT_REFINED_CLIPPED || XFM_BOX_NO_CLIP
865         child_bounds = InternalNode_getAABB3f(pnode);
866         if (clipOpt != XFM_BOX_NO_CLIP)
867         {
868             AABB3f_intersect(&child_bounds, *clipBox);
869         }
870     }
871     else
872     {
873         //XFM_BOX_NOT_REFINED_TAKE_CLIPBOX
874         child_bounds = *clipBox;
875     }
876 
877     child_bounds = transform_aabb(child_bounds, xfm);
878     //child_bounds = conservativeAABB3f(&child_bounds);
879     return child_bounds;
880 }
881 
compute_xfm_bbox(struct AffineSpace3f xfm,InternalNode * pnode,bool clip,AABB3f * clipBox,float matOverhead)882 GRL_INLINE AABB3f GRL_OVERLOADABLE compute_xfm_bbox(struct AffineSpace3f xfm, InternalNode* pnode, bool clip, AABB3f* clipBox, float matOverhead)
883 {
884     float transform[12];
885     load_row_major_from_AffineSpace3f(xfm, transform);
886     return compute_xfm_bbox(transform, pnode, clip, clipBox, matOverhead);
887 }
888 
compute_refit_structs_compacted_size(BVHBase * base)889 GRL_INLINE uint64_t compute_refit_structs_compacted_size(BVHBase* base)
890 {
891     uint dataSize = 0;
892 
893     if (BVHBase_HasBackPointers(base))
894     {
895         const uint fatleafEntrySize = (base->fatLeafCount * sizeof(LeafTableEntry) + 63) & ~63;
896         const uint innerEntrySize = (base->innerCount * sizeof(InnerNodeTableEntry) + 63) & ~63;
897 
898         // New atomic update
899         if(base->quadIndicesDataStart > base->backPointerDataStart)
900         {
901             uint numQuads = BVHBase_GetNumQuads(base);
902 
903             const uint quadTableMainBufferSize = (numQuads + 255) & ~255;
904             const uint quadLeftoversSize = (base->quadLeftoversCountNewAtomicUpdate + 255) & ~255;
905             const uint quadTableEntriesSize = (((quadTableMainBufferSize + quadLeftoversSize) * sizeof(LeafTableEntry) + 63) & ~63);
906 
907             const uint quadIndicesDataSize = (numQuads * sizeof(QuadDataIndices) + 63) & ~63;
908 
909             dataSize += quadTableEntriesSize + quadIndicesDataSize;
910         }
911 
912         dataSize +=
913             ((BVHBase_GetNumInternalNodes(base) * sizeof(uint) + 63) & ~63)
914             + fatleafEntrySize + innerEntrySize;
915     }
916 
917     return (uint64_t)dataSize;
918 }
919 
compute_compacted_size(BVHBase * base)920 GRL_INLINE uint64_t compute_compacted_size(BVHBase* base)
921 {
922     uint64_t size = sizeof(BVHBase);
923     size += BVHBase_GetNumHWInstanceLeaves(base) * sizeof(HwInstanceLeaf);
924     size += BVHBase_GetNumProcedurals(base) * sizeof(ProceduralLeaf);
925     size += BVHBase_GetNumQuads(base) * sizeof(QuadLeaf);
926     size += compute_refit_structs_compacted_size(base);
927     size += BVHBase_GetNumInternalNodes(base) * sizeof(InternalNode);
928     size += sizeof(InstanceDesc) * base->Meta.instanceCount;
929     size += (sizeof(GeoMetaData) * base->Meta.geoCount + 63) & ~63; // align to 64
930     size = (size + 63) & ~63;
931 
932     return size;
933 }
934