1 //
2 // Copyright (C) 2009-2021 Intel Corporation
3 //
4 // SPDX-License-Identifier: MIT
5 //
6 //
7
8 #pragma once
9
10 #include "GRLGen12.h"
11
12 #include "shared.h"
13 #include "quad.h"
14
15 /* ====== GENERAL BVH config ====== */
16
17 #define BVH_NODE_N6 6
18 #define BVH_NODE_N 8
19 #define BVH_NODE_N_LOG 3
20
21 #define SAH_LOG_BLOCK_SHIFT 2
22 #define BVH_LEAF_N_MIN BVH_NODE_N6
23 #define BVH_LEAF_N_MAX BVH_NODE_N6
24
25 #define BVH_NODE_DEFAULT_MASK 0xff
26 #define BVH_NODE_DEGENERATED_MASK 0x00
27
28 /* ====== QUANTIZATION config ====== */
29
30 #define QUANT_BITS 8
31 #define QUANT_MIN 0
32 #define QUANT_MAX 255
33 #define QUANT_MAX_MANT (255.0f / 256.0f)
34
35 #define NO_NODE_OFFSET 0
36
37 /* ======================================================================= */
38 /* ============================== BVH BASE =============================== */
39 /* ======================================================================= */
40
setBVHBaseBounds(struct BVHBase * base,struct AABB * aabb)41 GRL_INLINE void setBVHBaseBounds(struct BVHBase *base, struct AABB *aabb)
42 {
43 base->Meta.bounds.lower[0] = aabb->lower.x;
44 base->Meta.bounds.lower[1] = aabb->lower.y;
45 base->Meta.bounds.lower[2] = aabb->lower.z;
46
47 base->Meta.bounds.upper[0] = aabb->upper.x;
48 base->Meta.bounds.upper[1] = aabb->upper.y;
49 base->Meta.bounds.upper[2] = aabb->upper.z;
50 }
51
BVHBase_nodeData(struct BVHBase * bvh)52 GRL_INLINE global struct QBVHNodeN *BVHBase_nodeData(struct BVHBase *bvh)
53 {
54 return (global struct QBVHNodeN *)((void *)bvh + BVH_ROOT_NODE_OFFSET);
55 }
56
BVHBase_rootNode(struct BVHBase * bvh)57 GRL_INLINE global struct QBVHNodeN *BVHBase_rootNode(struct BVHBase *bvh)
58 {
59 return (global struct QBVHNodeN *)((void *)bvh + BVH_ROOT_NODE_OFFSET);
60 }
61
BVHBase_quadLeaves(struct BVHBase * bvh)62 GRL_INLINE global struct Quad *BVHBase_quadLeaves(struct BVHBase *bvh)
63 {
64 return (global struct Quad *)((void *)bvh + 64 * (ulong)bvh->quadLeafStart);
65 }
66
BVHBase_numNodes(struct BVHBase * bvh)67 GRL_INLINE uint64_t BVHBase_numNodes(struct BVHBase *bvh)
68 {
69 return bvh->nodeDataCur - BVH_ROOT_NODE_OFFSET / 64;
70 }
71
BVHBase_numQuads(struct BVHBase * bvh)72 GRL_INLINE uint64_t BVHBase_numQuads(struct BVHBase *bvh)
73 {
74 return bvh->quadLeafCur - bvh->quadLeafStart;
75 }
76
BVHBase_numProcedurals(struct BVHBase * bvh)77 GRL_INLINE uint64_t BVHBase_numProcedurals(struct BVHBase *bvh)
78 {
79 return bvh->proceduralDataCur - bvh->proceduralDataStart;
80 }
81
BVHBase_numInstances(struct BVHBase * bvh)82 GRL_INLINE uint64_t BVHBase_numInstances(struct BVHBase *bvh)
83 {
84 return bvh->instanceLeafEnd - bvh->instanceLeafStart;
85 }
86
87 /* =================================================================== */
88 /* ============================== QBVH =============================== */
89 /* =================================================================== */
90
91 __constant const float ulp = FLT_EPSILON;
92
conservativeAABB(struct AABB * aabb)93 GRL_INLINE struct AABB conservativeAABB(struct AABB *aabb)
94 {
95 struct AABB box;
96 const float4 v4 = max(fabs(aabb->lower), fabs(aabb->upper));
97 const float v = ulp * max(v4.x, max(v4.y, v4.z));
98 box.lower = aabb->lower - (float4)v;
99 box.upper = aabb->upper + (float4)v;
100 return box;
101 }
102
conservativeAABB3f(struct AABB3f * aabb3d)103 GRL_INLINE struct AABB3f conservativeAABB3f(struct AABB3f* aabb3d)
104 {
105 struct AABB aabb4d = AABBfromAABB3f(*aabb3d);
106 struct AABB box = conservativeAABB(&aabb4d);
107 return AABB3fFromAABB(box);
108 }
109
110 struct QBVH_AABB
111 {
112 uchar lower_x[BVH_NODE_N6];
113 uchar upper_x[BVH_NODE_N6];
114 uchar lower_y[BVH_NODE_N6];
115 uchar upper_y[BVH_NODE_N6];
116 uchar lower_z[BVH_NODE_N6];
117 uchar upper_z[BVH_NODE_N6];
118 };
119
120 struct QBVHNodeN
121 {
122 float lower[3];
123 int offset;
124 // 16 bytes
125 uchar type;
126 uchar pad;
127 // 18 bytes
128 char exp[3];
129 uchar instMask;
130 // 22 bytes
131 uchar childData[6];
132 // 28 bytes
133 struct QBVH_AABB qbounds; // + 36 bytes
134 // 64 bytes
135 };
136
QBVHNodeN_blockIncr(struct QBVHNodeN * This,uint childID)137 GRL_INLINE uint QBVHNodeN_blockIncr(struct QBVHNodeN *This, uint childID)
138 {
139 return This->childData[childID] & 0x3;
140 }
141
QBVHNodeN_startPrim(struct QBVHNodeN * This,uint childID)142 GRL_INLINE uint QBVHNodeN_startPrim(struct QBVHNodeN *This, uint childID)
143 {
144 return (This->childData[childID] >> 2) & 0xF;
145 }
146
initQBVHNodeN(struct QBVHNodeN * qnode)147 GRL_INLINE void initQBVHNodeN(struct QBVHNodeN *qnode)
148 {
149 uint *ptr = (uint *)qnode;
150 for (uint i = 0; i < 16; i++)
151 ptr[i] = 0;
152 }
153
extractAABB_QBVHNodeN(struct QBVHNodeN * qnode,uint i)154 GRL_INLINE struct AABB extractAABB_QBVHNodeN(struct QBVHNodeN *qnode, uint i)
155 {
156 struct AABB aabb;
157 const float4 base = (float4)(qnode->lower[0], qnode->lower[1], qnode->lower[2], 0.0f);
158 const int4 lower_i = (int4)(qnode->qbounds.lower_x[i], qnode->qbounds.lower_y[i], qnode->qbounds.lower_z[i], 0);
159 const int4 upper_i = (int4)(qnode->qbounds.upper_x[i], qnode->qbounds.upper_y[i], qnode->qbounds.upper_z[i], 0);
160 const int4 exp_i = (int4)(qnode->exp[0], qnode->exp[1], qnode->exp[2], 0.0f);
161 aabb.lower = base + bitShiftLdexp4(convert_float4_rtn(lower_i), exp_i - 8);
162 aabb.upper = base + bitShiftLdexp4(convert_float4_rtp(upper_i), exp_i - 8);
163 return aabb;
164 }
165
getAABB_QBVHNodeN(struct QBVHNodeN * qnode)166 GRL_INLINE struct AABB getAABB_QBVHNodeN(struct QBVHNodeN *qnode)
167 {
168 struct AABB aabb;
169 #if 0
170 AABB_init(&aabb);
171 for (uint i = 0; i < BVH_NODE_N6; i++)
172 {
173 struct AABB v = extractAABB_QBVHNodeN(qnode, i);
174 AABB_extend(&aabb, &v);
175 }
176 #else
177 uint lower_x = qnode->qbounds.lower_x[0];
178 uint lower_y = qnode->qbounds.lower_y[0];
179 uint lower_z = qnode->qbounds.lower_z[0];
180
181 uint upper_x = qnode->qbounds.upper_x[0];
182 uint upper_y = qnode->qbounds.upper_y[0];
183 uint upper_z = qnode->qbounds.upper_z[0];
184
185 for (uint i = 1; i < BVH_NODE_N6; i++)
186 {
187 uint lx = qnode->qbounds.lower_x[i];
188 uint ly = qnode->qbounds.lower_y[i];
189 uint lz = qnode->qbounds.lower_z[i];
190
191 uint ux = qnode->qbounds.upper_x[i];
192 uint uy = qnode->qbounds.upper_y[i];
193 uint uz = qnode->qbounds.upper_z[i];
194
195 bool valid = lx <= ux;
196 if (valid)
197 {
198 lower_x = min(lower_x, lx);
199 lower_y = min(lower_y, ly);
200 lower_z = min(lower_z, lz);
201
202 upper_x = max(upper_x, ux);
203 upper_y = max(upper_y, uy);
204 upper_z = max(upper_z, uz);
205 }
206 }
207
208 const float4 base = (float4)(qnode->lower[0], qnode->lower[1], qnode->lower[2], 0.0f);
209 const int4 lower_i = (int4)(lower_x, lower_y, lower_z, 0);
210 const int4 upper_i = (int4)(upper_x, upper_y, upper_z, 0);
211 const int4 exp_i = (int4)(qnode->exp[0], qnode->exp[1], qnode->exp[2], 0.0f);
212 aabb.lower = base + bitShiftLdexp4(convert_float4_rtn(lower_i), exp_i - 8);
213 aabb.upper = base + bitShiftLdexp4(convert_float4_rtp(upper_i), exp_i - 8);
214 #endif
215 return aabb;
216 }
217
InternalNode_getAABB3f(struct InternalNode * node)218 GRL_INLINE struct AABB3f InternalNode_getAABB3f(struct InternalNode* node)
219 {
220 return AABB3fFromAABB(getAABB_QBVHNodeN((struct QBVHNodeN*)node));
221 }
222
getNumChildren_QBVHNodeN(struct QBVHNodeN * qnode)223 GRL_INLINE uint getNumChildren_QBVHNodeN(struct QBVHNodeN *qnode)
224 {
225 uint children = 0;
226 for (uint i = 0; i < BVH_NODE_N6; i++)
227 {
228 uint lx = qnode->qbounds.lower_x[i];
229 uint ux = qnode->qbounds.upper_x[i];
230 bool valid = lx <= ux;
231 if (valid)
232 children++;
233 }
234 return children;
235 }
236
extractQBVHNodeN_offset(struct QBVHNodeN * qnode)237 GRL_INLINE long extractQBVHNodeN_offset(struct QBVHNodeN *qnode)
238 {
239 return ((long)qnode->offset) << 6;
240 }
241
QBVHNodeN_childrenPointer(struct QBVHNodeN * qnode)242 GRL_INLINE void *QBVHNodeN_childrenPointer(struct QBVHNodeN *qnode)
243 {
244 const int offset = qnode->offset;
245 return (void *)(qnode + offset);
246 }
247
subgroup_setQBVHNodeN_setFields_reduced_bounds(const int offset,const uint type,struct AABB * input_aabb,const uint numChildren,const uchar mask,struct QBVHNodeN * qbvh_node,const bool degenerated,struct AABB reduced_aabb)248 GRL_INLINE void subgroup_setQBVHNodeN_setFields_reduced_bounds(const int offset, const uint type, struct AABB* input_aabb, const uint numChildren, const uchar mask, struct QBVHNodeN* qbvh_node, const bool degenerated, struct AABB reduced_aabb)
249 {
250 const uint subgroupLocalID = get_sub_group_local_id();
251 const uint k = subgroupLocalID;
252 const float up = 1.0f + ulp;
253 const float down = 1.0f - ulp;
254
255 struct AABB aabb = reduced_aabb; // needs to execute with full subgroup width
256 aabb = AABB_sub_group_broadcast(&aabb, 0);
257
258 if (subgroupLocalID < BVH_NODE_N6)
259 {
260 struct AABB conservative_aabb = conservativeAABB(&aabb);
261 const float3 len = AABB_size(&conservative_aabb).xyz * up;
262 int3 exp;
263 const float3 mant = frexp_vec3(len, &exp);
264 const float3 org = conservative_aabb.lower.xyz;
265
266 exp += (mant > (float3)QUANT_MAX_MANT ? (int3)1 : (int3)0);
267
268 qbvh_node->offset = offset;
269 qbvh_node->type = type;
270
271 qbvh_node->lower[0] = org.x;
272 qbvh_node->lower[1] = org.y;
273 qbvh_node->lower[2] = org.z;
274
275 qbvh_node->exp[0] = exp.x;
276 qbvh_node->exp[1] = exp.y;
277 qbvh_node->exp[2] = exp.z;
278
279 qbvh_node->instMask = mask;
280
281 uchar3 lower_uchar = (uchar3)(0x80);
282 uchar3 upper_uchar = (uchar3)(0);
283
284 if (subgroupLocalID < numChildren)
285 {
286 struct AABB child_aabb = conservativeAABB(input_aabb);
287
288 float3 lower = floor(bitShiftLdexp3((child_aabb.lower.xyz - org) * down, -exp + 8));
289 lower = clamp(lower, (float)(QUANT_MIN), (float)(QUANT_MAX));
290 float3 upper = ceil(bitShiftLdexp3((child_aabb.upper.xyz - org) * up, -exp + 8));
291 upper = clamp(upper, (float)(QUANT_MIN), (float)(QUANT_MAX));
292
293 lower_uchar = convert_uchar3_rtn(lower);
294 upper_uchar = convert_uchar3_rtp(upper);
295
296 if (degenerated)
297 {
298 lower_uchar = upper_uchar = 0;
299 }
300 }
301
302 qbvh_node->qbounds.lower_x[k] = lower_uchar.x;
303 qbvh_node->qbounds.lower_y[k] = lower_uchar.y;
304 qbvh_node->qbounds.lower_z[k] = lower_uchar.z;
305 qbvh_node->qbounds.upper_x[k] = upper_uchar.x;
306 qbvh_node->qbounds.upper_y[k] = upper_uchar.y;
307 qbvh_node->qbounds.upper_z[k] = upper_uchar.z;
308
309 qbvh_node->childData[k] = (type == NODE_TYPE_INSTANCE) ? 2 : 1;
310
311 #if ENABLE_CONVERSION_CHECKS == 1
312
313 if (!(exp.x >= -128 && exp.x <= 127))
314 printf("exp_x error \n");
315 if (!(exp.y >= -128 && exp.y <= 127))
316 printf("exp_y error \n");
317 if (!(exp.z >= -128 && exp.z <= 127))
318 printf("exp_z error \n");
319
320 struct AABB child_qaabb = extractAABB_QBVHNodeN(qbvh_node, k);
321 if (!AABB_subset(&child_aabb, &child_qaabb))
322 {
323 uint3 lower_i = convert_uint3(lower_uchar);
324 uint3 upper_i = convert_uint3(upper_uchar);
325
326 printf("\n ERROR %d\n", k);
327 printf("lower %f upper %f \n lower_i %d upper_i %d \n", lower, upper, lower_i, upper_i);
328 printf("%i uncompressed \n", k);
329 AABB_print(&child_aabb);
330 printf("%i compressed \n", k);
331 AABB_print(&child_qaabb);
332
333 printf("%i uncompressed (as int) \n", k);
334 AABB_printasInt(&child_aabb);
335 printf("%i compressed (as int) \n", k);
336 AABB_printasInt(&child_qaabb);
337
338 int4 e0 = child_aabb.lower < child_qaabb.lower;
339 int4 e1 = child_aabb.upper > child_qaabb.upper;
340 printf("e0 %d e1 %d \n", e0, e1);
341 }
342 #endif
343 }
344 }
345
subgroup_setQBVHNodeN_setFields(const int offset,const uint type,struct AABB * input_aabb,const uint numChildren,const uchar mask,struct QBVHNodeN * qbvh_node,const bool degenerated)346 GRL_INLINE void subgroup_setQBVHNodeN_setFields(const int offset, const uint type, struct AABB* input_aabb, const uint numChildren, const uchar mask, struct QBVHNodeN* qbvh_node, const bool degenerated)
347 {
348 struct AABB aabb = AABB_sub_group_reduce_N6(input_aabb);
349 subgroup_setQBVHNodeN_setFields_reduced_bounds(offset, type, input_aabb, numChildren, mask, qbvh_node, degenerated, aabb);
350 }
351
subgroup_setQBVHNodeN_setFields_2xSIMD8_in_SIMD16(const int offset,const uint type,struct AABB * input_aabb,const uint numChildren,const uchar mask,struct QBVHNodeN * qbvh_node,const bool degenerated,bool active_lane)352 GRL_INLINE void subgroup_setQBVHNodeN_setFields_2xSIMD8_in_SIMD16(const int offset, const uint type, struct AABB* input_aabb, const uint numChildren, const uchar mask, struct QBVHNodeN* qbvh_node, const bool degenerated, bool active_lane)
353 {
354 const uint lane = get_sub_group_local_id() % 8;
355 const uint node_in_sg = get_sub_group_local_id() / 8;
356 const uint k = lane;
357 const float up = 1.0f + ulp;
358 const float down = 1.0f - ulp;
359
360 struct AABB aabb = AABB_sub_group_reduce_N6(input_aabb); // needs to execute with full subgroup width
361 aabb = AABB_sub_group_shuffle(&aabb, node_in_sg * 8);
362
363 if (lane < BVH_NODE_N6 && active_lane)
364 {
365 struct AABB conservative_aabb = conservativeAABB(&aabb);
366 const float3 len = AABB_size(&conservative_aabb).xyz * up;
367 int3 exp;
368 const float3 mant = frexp_vec3(len, &exp);
369 const float3 org = conservative_aabb.lower.xyz;
370
371 exp += (mant > (float3)QUANT_MAX_MANT ? (int3)1 : (int3)0);
372
373 qbvh_node->offset = offset;
374 qbvh_node->type = type;
375
376 qbvh_node->lower[0] = org.x;
377 qbvh_node->lower[1] = org.y;
378 qbvh_node->lower[2] = org.z;
379
380 qbvh_node->exp[0] = exp.x;
381 qbvh_node->exp[1] = exp.y;
382 qbvh_node->exp[2] = exp.z;
383
384 qbvh_node->instMask = mask;
385
386 uchar3 lower_uchar = (uchar3)(0x80);
387 uchar3 upper_uchar = (uchar3)(0);
388
389 if (lane < numChildren)
390 {
391 struct AABB child_aabb = conservativeAABB(input_aabb);
392
393 float3 lower = floor(bitShiftLdexp3((child_aabb.lower.xyz - org) * down, -exp + 8));
394 lower = clamp(lower, (float)(QUANT_MIN), (float)(QUANT_MAX));
395 float3 upper = ceil(bitShiftLdexp3((child_aabb.upper.xyz - org) * up, -exp + 8));
396 upper = clamp(upper, (float)(QUANT_MIN), (float)(QUANT_MAX));
397
398 lower_uchar = convert_uchar3_rtn(lower);
399 upper_uchar = convert_uchar3_rtp(upper);
400
401 if (degenerated)
402 {
403 lower_uchar = upper_uchar = 0;
404 }
405 }
406
407 qbvh_node->qbounds.lower_x[k] = lower_uchar.x;
408 qbvh_node->qbounds.lower_y[k] = lower_uchar.y;
409 qbvh_node->qbounds.lower_z[k] = lower_uchar.z;
410 qbvh_node->qbounds.upper_x[k] = upper_uchar.x;
411 qbvh_node->qbounds.upper_y[k] = upper_uchar.y;
412 qbvh_node->qbounds.upper_z[k] = upper_uchar.z;
413
414 qbvh_node->childData[k] = (type == NODE_TYPE_INSTANCE) ? 2 : 1;
415
416 #if ENABLE_CONVERSION_CHECKS == 1
417
418 if (!(exp.x >= -128 && exp.x <= 127))
419 printf("exp_x error \n");
420 if (!(exp.y >= -128 && exp.y <= 127))
421 printf("exp_y error \n");
422 if (!(exp.z >= -128 && exp.z <= 127))
423 printf("exp_z error \n");
424
425 struct AABB child_qaabb = extractAABB_QBVHNodeN(qbvh_node, k);
426 if (!AABB_subset(&child_aabb, &child_qaabb))
427 {
428 uint3 lower_i = convert_uint3(lower_uchar);
429 uint3 upper_i = convert_uint3(upper_uchar);
430
431 printf("\n ERROR %d\n", k);
432 printf("lower %f upper %f \n lower_i %d upper_i %d \n", lower, upper, lower_i, upper_i);
433 printf("%i uncompressed \n", k);
434 AABB_print(&child_aabb);
435 printf("%i compressed \n", k);
436 AABB_print(&child_qaabb);
437
438 printf("%i uncompressed (as int) \n", k);
439 AABB_printasInt(&child_aabb);
440 printf("%i compressed (as int) \n", k);
441 AABB_printasInt(&child_qaabb);
442
443 int4 e0 = child_aabb.lower < child_qaabb.lower;
444 int4 e1 = child_aabb.upper > child_qaabb.upper;
445 printf("e0 %d e1 %d \n", e0, e1);
446 }
447 #endif
448 }
449 }
450
subgroup_setInstanceQBVHNodeN(const int offset,struct AABB * input_aabb,const uint numChildren,struct QBVHNodeN * qbvh_node,const uint instMask)451 GRL_INLINE void subgroup_setInstanceQBVHNodeN(const int offset, struct AABB *input_aabb, const uint numChildren, struct QBVHNodeN *qbvh_node, const uint instMask)
452 {
453 const uint subgroupLocalID = get_sub_group_local_id();
454
455 // for degenerated (or inactive) instance ignore this box in exp, origin calculation and make its box be a point in the node origin.
456 // if it becomes non_degenerated on update, tree topology will be equivalent to what it would be if we would account this degenerated node here.
457 bool degenerated = (instMask == BVH_NODE_DEGENERATED_MASK);
458
459 struct AABB aabb;
460 AABB_init(&aabb);
461
462 // if every child is degenerated (or inactive) instance, we need to init aabb with origin point
463 uchar commonMask = sub_group_reduce_or_N6(instMask);
464 if (subgroupLocalID < numChildren && (!degenerated || commonMask == BVH_NODE_DEGENERATED_MASK))
465 aabb = *input_aabb;
466
467 subgroup_setQBVHNodeN_setFields(offset, NODE_TYPE_INSTANCE, &aabb, numChildren, commonMask, qbvh_node, degenerated);
468 }
469
470
471 // return true if is degenerated
subgroup_setInstanceBox_2xSIMD8_in_SIMD16(struct AABB * input_aabb,const uint numChildren,uchar * mask,const uint instMask,bool active_lane)472 GRL_INLINE bool subgroup_setInstanceBox_2xSIMD8_in_SIMD16(struct AABB* input_aabb, const uint numChildren, uchar* mask, const uint instMask, bool active_lane)
473 {
474 const uint lane = get_sub_group_local_id() % 8;
475
476 // for degenerated (or inactive) instance ignore this box in exp, origin calculation and make its box be a point in the node origin.
477 // if it becomes non_degenerated on update, tree topology will be equivalent to what it would be if we would account this degenerated node here.
478 bool degenerated = (instMask == BVH_NODE_DEGENERATED_MASK);
479
480 // if every child is degenerated (or inactive) instance, we need to init aabb with origin point
481 uchar commonMask = sub_group_reduce_or_N6_2xSIMD8_in_SIMD16(instMask);
482 if (active_lane)
483 *mask = commonMask;
484
485 if (active_lane && (degenerated && commonMask != BVH_NODE_DEGENERATED_MASK))
486 AABB_init(input_aabb);
487
488 return active_lane ? degenerated : false;
489 }
490
subgroup_setInstanceQBVHNodeN_x2(const int offset,struct AABB * input_aabb,const uint numChildren,struct QBVHNodeN * qbvh_node,const uint instMask,bool active_lane)491 GRL_INLINE void subgroup_setInstanceQBVHNodeN_x2(const int offset, struct AABB* input_aabb, const uint numChildren, struct QBVHNodeN* qbvh_node, const uint instMask, bool active_lane)
492 {
493 const uint lane = get_sub_group_local_id() % 8;
494
495 // for degenerated (or inactive) instance ignore this box in exp, origin calculation and make its box be a point in the node origin.
496 // if it becomes non_degenerated on update, tree topology will be equivalent to what it would be if we would account this degenerated node here.
497 bool degenerated = (instMask == BVH_NODE_DEGENERATED_MASK);
498
499 struct AABB aabb;
500 AABB_init(&aabb);
501
502 // if every child is degenerated (or inactive) instance, we need to init aabb with origin point
503 uchar commonMask = sub_group_reduce_or_N6_2xSIMD8_in_SIMD16(instMask);
504 if (lane < numChildren && (!degenerated || commonMask == BVH_NODE_DEGENERATED_MASK))
505 aabb = *input_aabb;
506
507 subgroup_setQBVHNodeN_setFields_2xSIMD8_in_SIMD16(offset, NODE_TYPE_INSTANCE, &aabb, numChildren, commonMask, qbvh_node, degenerated, active_lane);
508 }
509
510
subgroup_setQBVHNodeN(const int offset,const uint type,struct AABB * input_aabb,const uint numChildren,struct QBVHNodeN * qbvh_node,uint mask)511 GRL_INLINE void subgroup_setQBVHNodeN(const int offset, const uint type, struct AABB* input_aabb, const uint numChildren, struct QBVHNodeN* qbvh_node, uint mask)
512 {
513 const uint subgroupLocalID = get_sub_group_local_id();
514
515 struct AABB aabb;
516 AABB_init(&aabb);
517
518 if (subgroupLocalID < numChildren)
519 aabb = *input_aabb;
520
521 subgroup_setQBVHNodeN_setFields(offset, type, &aabb, numChildren, mask, qbvh_node, false);
522 }
523
524
subgroup_setQBVHNodeN_x2(const int offset,const uint type,struct AABB * input_aabb,const uint numChildren,struct QBVHNodeN * qbvh_node,bool active_lane)525 GRL_INLINE void subgroup_setQBVHNodeN_x2(const int offset, const uint type, struct AABB* input_aabb, const uint numChildren, struct QBVHNodeN* qbvh_node, bool active_lane)
526 {
527 const uint lane = get_sub_group_local_id() % 8;
528
529 struct AABB aabb;
530 AABB_init(&aabb);
531
532 if (lane < numChildren)
533 aabb = *input_aabb;
534
535 subgroup_setQBVHNodeN_setFields_2xSIMD8_in_SIMD16(offset, type, &aabb, numChildren, BVH_NODE_DEFAULT_MASK, qbvh_node, false, active_lane);
536 }
537
538
subgroup_QBVHNodeN_setBounds(uniform struct QBVHNodeN * qbvh_node,uniform struct AABB reduced_bounds,varying struct AABB input_aabb,uniform uint numChildren,varying ushort lane)539 GRL_INLINE void subgroup_QBVHNodeN_setBounds( uniform struct QBVHNodeN* qbvh_node,
540 uniform struct AABB reduced_bounds,
541 varying struct AABB input_aabb,
542 uniform uint numChildren,
543 varying ushort lane )
544 {
545 const float up = 1.0f + ulp;
546 const float down = 1.0f - ulp;
547
548 int3 exp;
549
550 struct AABB conservative_aabb = conservativeAABB( &reduced_bounds);
551 const float3 len = AABB_size( &conservative_aabb ).xyz * up;
552 const float3 mant = frexp_vec3( len, &exp );
553 const float3 org = conservative_aabb.lower.xyz;
554
555 exp += (mant > ( float3 )QUANT_MAX_MANT ? (int3)1 : (int3)0);
556
557 qbvh_node->lower[0] = org.x;
558 qbvh_node->lower[1] = org.y;
559 qbvh_node->lower[2] = org.z;
560
561 qbvh_node->exp[0] = exp.x;
562 qbvh_node->exp[1] = exp.y;
563 qbvh_node->exp[2] = exp.z;
564
565 qbvh_node->instMask = 0xff;
566
567 uchar3 lower_uchar = 0x80;
568 uchar3 upper_uchar = 0;
569
570 if ( lane < BVH_NODE_N6 )
571 {
572 ushort k = lane;
573 if( lane < numChildren )
574 {
575 struct AABB child_aabb = conservativeAABB( &input_aabb ); // conservative ???
576
577 float3 lower = floor( bitShiftLdexp3( (child_aabb.lower.xyz - org) * down, -exp + 8 ) );
578 lower = clamp( lower, (float)(QUANT_MIN), (float)(QUANT_MAX) );
579 float3 upper = ceil( bitShiftLdexp3( (child_aabb.upper.xyz - org) * up, -exp + 8 ) );
580 upper = clamp( upper, (float)(QUANT_MIN), (float)(QUANT_MAX) );
581
582 lower_uchar = convert_uchar3_rtn( lower );
583 upper_uchar = convert_uchar3_rtp( upper );
584 }
585
586 qbvh_node->qbounds.lower_x[k] = lower_uchar.x;
587 qbvh_node->qbounds.lower_y[k] = lower_uchar.y;
588 qbvh_node->qbounds.lower_z[k] = lower_uchar.z;
589 qbvh_node->qbounds.upper_x[k] = upper_uchar.x;
590 qbvh_node->qbounds.upper_y[k] = upper_uchar.y;
591 qbvh_node->qbounds.upper_z[k] = upper_uchar.z;
592 }
593
594 }
595
QBVHNodeN_setBounds(struct QBVHNodeN * qbvh_node,struct AABB * input_aabb,const uint numChildren)596 GRL_INLINE void QBVHNodeN_setBounds(struct QBVHNodeN *qbvh_node, struct AABB *input_aabb, const uint numChildren)
597 {
598 const float up = 1.0f + ulp;
599 const float down = 1.0f - ulp;
600
601 int3 exp;
602 struct AABB aabb;
603 AABB_init(&aabb);
604 for (uint i = 0; i < numChildren; i++)
605 AABB_extend(&aabb, &input_aabb[i]);
606
607 struct AABB conservative_aabb = conservativeAABB(&aabb);
608 const float3 len = AABB_size(&conservative_aabb).xyz * up;
609 const float3 mant = frexp_vec3(len, &exp);
610 const float3 org = conservative_aabb.lower.xyz;
611
612 exp += (mant > (float3)QUANT_MAX_MANT ? (int3)1 : (int3)0);
613
614 qbvh_node->lower[0] = org.x;
615 qbvh_node->lower[1] = org.y;
616 qbvh_node->lower[2] = org.z;
617
618 qbvh_node->exp[0] = exp.x;
619 qbvh_node->exp[1] = exp.y;
620 qbvh_node->exp[2] = exp.z;
621
622 qbvh_node->instMask = 0xff;
623
624 for (uint k = 0; k < numChildren; k++)
625 {
626 struct AABB child_aabb = conservativeAABB(&input_aabb[k]); // conservative ???
627
628 float3 lower = floor(bitShiftLdexp3((child_aabb.lower.xyz - org) * down, -exp + 8));
629 lower = clamp(lower, (float)(QUANT_MIN), (float)(QUANT_MAX));
630 float3 upper = ceil(bitShiftLdexp3((child_aabb.upper.xyz - org) * up, -exp + 8));
631 upper = clamp(upper, (float)(QUANT_MIN), (float)(QUANT_MAX));
632
633 uchar3 lower_uchar = convert_uchar3_rtn(lower);
634 uchar3 upper_uchar = convert_uchar3_rtp(upper);
635
636 qbvh_node->qbounds.lower_x[k] = lower_uchar.x;
637 qbvh_node->qbounds.lower_y[k] = lower_uchar.y;
638 qbvh_node->qbounds.lower_z[k] = lower_uchar.z;
639 qbvh_node->qbounds.upper_x[k] = upper_uchar.x;
640 qbvh_node->qbounds.upper_y[k] = upper_uchar.y;
641 qbvh_node->qbounds.upper_z[k] = upper_uchar.z;
642
643 #if ENABLE_CONVERSION_CHECKS == 1
644 if (!(exp.x >= -128 && exp.x <= 127))
645 printf("exp_x error \n");
646 if (!(exp.y >= -128 && exp.y <= 127))
647 printf("exp_y error \n");
648 if (!(exp.z >= -128 && exp.z <= 127))
649 printf("exp_z error \n");
650
651 struct AABB child_qaabb = extractAABB_QBVHNodeN(qbvh_node, k);
652 if (!AABB_subset(&child_aabb, &child_qaabb))
653 {
654 uint3 lower_i = convert_uint3(lower_uchar);
655 uint3 upper_i = convert_uint3(upper_uchar);
656
657 printf("\n ERROR %d\n", k);
658 printf("lower %f upper %f \n lower_i %d upper_i %d \n", lower, upper, lower_i, upper_i);
659 printf("%i uncompressed \n", k);
660 AABB_print(&child_aabb);
661 printf("%i compressed \n", k);
662 AABB_print(&child_qaabb);
663
664 printf("%i uncompressed (as int) \n", k);
665 AABB_printasInt(&child_aabb);
666 printf("%i compressed (as int) \n", k);
667 AABB_printasInt(&child_qaabb);
668
669 int4 e0 = child_aabb.lower < child_qaabb.lower;
670 int4 e1 = child_aabb.upper > child_qaabb.upper;
671 printf("e0 %d e1 %d \n", e0, e1);
672 }
673 #endif
674 }
675 for (uint k = numChildren; k < BVH_NODE_N6; k++)
676 {
677 qbvh_node->qbounds.lower_x[k] = 0x80;
678 qbvh_node->qbounds.lower_y[k] = 0x80;
679 qbvh_node->qbounds.lower_z[k] = 0x80;
680 qbvh_node->qbounds.upper_x[k] = 0;
681 qbvh_node->qbounds.upper_y[k] = 0;
682 qbvh_node->qbounds.upper_z[k] = 0;
683 }
684 }
685
QBVHNodeN_setChildren(struct QBVHNodeN * qbvh_node,const int offset,const uint numChildren)686 GRL_INLINE void QBVHNodeN_setChildren(struct QBVHNodeN *qbvh_node, const int offset, const uint numChildren)
687 {
688 qbvh_node->offset = offset;
689 for (uint k = 0; k < BVH_NODE_N6; k++)
690 qbvh_node->childData[k] = 1;
691 }
692
QBVHNodeN_setChildIncr1(struct QBVHNodeN * qbvh_node)693 GRL_INLINE void QBVHNodeN_setChildIncr1(struct QBVHNodeN *qbvh_node)
694 {
695 for (uint k = 0; k < BVH_NODE_N6; k++)
696 qbvh_node->childData[k] = 1;
697 }
698
SUBGROUP_QBVHNodeN_setChildIncr1(struct QBVHNodeN * qbvh_node)699 GRL_INLINE void SUBGROUP_QBVHNodeN_setChildIncr1(struct QBVHNodeN *qbvh_node)
700 {
701 if( get_sub_group_local_id() < BVH_NODE_N6 )
702 qbvh_node->childData[get_sub_group_local_id()] = 1;
703 }
704
705
QBVHNodeN_setChildIncr2(struct QBVHNodeN * qbvh_node)706 GRL_INLINE void QBVHNodeN_setChildIncr2(struct QBVHNodeN *qbvh_node)
707 {
708 for (uint k = 0; k < BVH_NODE_N6; k++)
709 qbvh_node->childData[k] = 2;
710 }
711
QBVHNodeN_setType(struct QBVHNodeN * qbvh_node,const uint type)712 GRL_INLINE void QBVHNodeN_setType(struct QBVHNodeN *qbvh_node, const uint type)
713 {
714 qbvh_node->type = type;
715 }
716
setQBVHNodeN(const int offset,const uint type,struct AABB * input_aabb,const uint numChildren,struct QBVHNodeN * qbvh_node)717 GRL_INLINE void setQBVHNodeN(const int offset, const uint type, struct AABB *input_aabb, const uint numChildren, struct QBVHNodeN *qbvh_node)
718 {
719 QBVHNodeN_setType(qbvh_node, type);
720 QBVHNodeN_setChildren(qbvh_node, offset, numChildren);
721 QBVHNodeN_setBounds(qbvh_node, input_aabb, numChildren);
722 }
723
printQBVHNodeN(struct QBVHNodeN * qnode)724 GRL_INLINE void printQBVHNodeN(struct QBVHNodeN *qnode)
725 {
726 printf(" offset %d type %d \n", qnode->offset, (int)qnode->type);
727 printf(" lower %f %f %f \n", qnode->lower[0], qnode->lower[1], qnode->lower[2]);
728 printf(" exp %d %d %d \n", (int)qnode->exp[0], (int)qnode->exp[1], (int)qnode->exp[2]);
729 printf(" instMask %d \n", qnode->instMask);
730
731 struct AABB aabb0 = extractAABB_QBVHNodeN(qnode, 0);
732 struct AABB aabb1 = extractAABB_QBVHNodeN(qnode, 1);
733 struct AABB aabb2 = extractAABB_QBVHNodeN(qnode, 2);
734 struct AABB aabb3 = extractAABB_QBVHNodeN(qnode, 3);
735 struct AABB aabb4 = extractAABB_QBVHNodeN(qnode, 4);
736 struct AABB aabb5 = extractAABB_QBVHNodeN(qnode, 5);
737
738 printf(" lower_x %d %d %d %d %d %d %f %f %f %f %f %f\n", qnode->qbounds.lower_x[0], qnode->qbounds.lower_x[1], qnode->qbounds.lower_x[2], qnode->qbounds.lower_x[3], qnode->qbounds.lower_x[4], qnode->qbounds.lower_x[5], aabb0.lower.x, aabb1.lower.x, aabb2.lower.x, aabb3.lower.x, aabb4.lower.x, aabb5.lower.x);
739 printf(" upper_x %d %d %d %d %d %d %f %f %f %f %f %f\n", qnode->qbounds.upper_x[0], qnode->qbounds.upper_x[1], qnode->qbounds.upper_x[2], qnode->qbounds.upper_x[3], qnode->qbounds.upper_x[4], qnode->qbounds.upper_x[5], aabb0.upper.x, aabb1.upper.x, aabb2.upper.x, aabb3.upper.x, aabb4.upper.x, aabb5.upper.x);
740
741 printf(" lower_y %d %d %d %d %d %d %f %f %f %f %f %f\n", qnode->qbounds.lower_y[0], qnode->qbounds.lower_y[1], qnode->qbounds.lower_y[2], qnode->qbounds.lower_y[3], qnode->qbounds.lower_y[4], qnode->qbounds.lower_y[5], aabb0.lower.y, aabb1.lower.y, aabb2.lower.y, aabb3.lower.y, aabb4.lower.y, aabb5.lower.y);
742 printf(" upper_y %d %d %d %d %d %d %f %f %f %f %f %f\n", qnode->qbounds.upper_y[0], qnode->qbounds.upper_y[1], qnode->qbounds.upper_y[2], qnode->qbounds.upper_y[3], qnode->qbounds.upper_y[4], qnode->qbounds.upper_y[5], aabb0.upper.y, aabb1.upper.y, aabb2.upper.y, aabb3.upper.y, aabb4.upper.y, aabb5.upper.y);
743
744 printf(" lower_z %d %d %d %d %d %d %f %f %f %f %f %f\n", qnode->qbounds.lower_z[0], qnode->qbounds.lower_z[1], qnode->qbounds.lower_z[2], qnode->qbounds.lower_z[3], qnode->qbounds.lower_z[4], qnode->qbounds.lower_z[5], aabb0.lower.z, aabb1.lower.z, aabb2.lower.z, aabb3.lower.z, aabb4.lower.z, aabb5.lower.z);
745 printf(" upper_z %d %d %d %d %d %d %f %f %f %f %f %f\n", qnode->qbounds.upper_z[0], qnode->qbounds.upper_z[1], qnode->qbounds.upper_z[2], qnode->qbounds.upper_z[3], qnode->qbounds.upper_z[4], qnode->qbounds.upper_z[5], aabb0.upper.z, aabb1.upper.z, aabb2.upper.z, aabb3.upper.z, aabb4.upper.z, aabb5.upper.z);
746 }
747
encodeOffset(global char * bvh_mem,global void * parent,int global_child_offset)748 GRL_INLINE int encodeOffset(global char *bvh_mem, global void *parent, int global_child_offset)
749 {
750 long global_parent_offset = (long)parent - (long)bvh_mem;
751 global_parent_offset = global_parent_offset & (~(64 - 1)); // FIXME: (sw) this should not be necessary?
752 int relative_offset = global_child_offset - global_parent_offset; // FIXME: this limits BVH size to 4GB
753 //if ((int)relative_offset <= 0) printf("relative offset <= 0 %d global_child_offset %d global_parent_offset %d \n", relative_offset,global_child_offset,global_parent_offset);
754 return relative_offset;
755 }
756
QBVH6Node_set_offset(struct QBVHNodeN * qnode,void * children)757 GRL_INLINE void QBVH6Node_set_offset(struct QBVHNodeN *qnode, void *children)
758 {
759 int ofs = (struct QBVHNodeN *)children - qnode;
760 qnode->offset = ofs;
761 }
762
QBVH6Node_set_type(struct QBVHNodeN * qnode,uint type)763 GRL_INLINE void QBVH6Node_set_type(struct QBVHNodeN *qnode, uint type)
764 {
765 qnode->type = type;
766 }
767
sortBVHChildrenIDs(uint input)768 GRL_INLINE uint sortBVHChildrenIDs(uint input)
769 {
770 #if BVH_NODE_N == 8
771 return sort8_descending(input);
772 #else
773 return sort4_descending(input);
774 #endif
775 }
776
777 enum XFM_BOX_OPTION {
778 XFM_BOX_NO_CLIP = 0,
779 XFM_BOX_NOT_REFINED_CLIPPED = 1, //<<use clipbox, for not refined, compute bbox from children, transform after extending to one box
780 XFM_BOX_NOT_REFINED_TAKE_CLIPBOX = 2 //<<use clipbox, for not refined, just transform xlipbox, don't take children boxes into account
781 };
782
783 #define DEB_PRINTFS 0
784 #ifndef FINE_TRANSFORM_NODE_BOX
785 #define FINE_TRANSFORM_NODE_BOX 0
786 #endif
787
compute_xfm_bbox(const float * xfm,InternalNode * pnode,enum XFM_BOX_OPTION clipOpt,const AABB3f * clipBox,float matrixTransformOverhead)788 GRL_INLINE struct AABB3f GRL_OVERLOADABLE compute_xfm_bbox(const float* xfm, InternalNode* pnode, enum XFM_BOX_OPTION clipOpt, const AABB3f* clipBox, float matrixTransformOverhead)
789 {
790 AABB3f childrenbox;
791 #if FINE_TRANSFORM_NODE_BOX
792 struct AffineSpace3f axfm = AffineSpace3f_load_row_major(xfm);
793 bool computeFine = matrixTransformOverhead < 0.6f;
794 computeFine = sub_group_any(computeFine);
795 if (computeFine)
796 {
797 bool clip = clipOpt != XFM_BOX_NO_CLIP;
798 InternalNode node = *pnode;
799
800 #if DEB_PRINTFS
801 if (InternalNode_IsChildValid(&node, 5) && !InternalNode_IsChildValid(&node, 4))
802 printf("child 5 valid && child 4 invalid\n");
803 if (InternalNode_IsChildValid(&node, 4) && !InternalNode_IsChildValid(&node, 3))
804 printf("child 4 valid && child 3 invalid\n");
805 if (InternalNode_IsChildValid(&node, 3) && !InternalNode_IsChildValid(&node, 2))
806 printf("child 3 valid && child 2 invalid\n");
807 if (InternalNode_IsChildValid(&node, 2) && !InternalNode_IsChildValid(&node, 1))
808 printf("child 2 valid && child 1 invalid\n");
809 if (InternalNode_IsChildValid(&node, 1) && !InternalNode_IsChildValid(&node, 0))
810 printf("child 1 valid && child 0 invalid\n");
811 #endif
812
813 #if DEB_PRINTFS
814 printf("F");
815 #endif
816 AABB3f child_bounds0 = InternalNode_GetChildAABB(&node, 0);
817 AABB3f child_bounds1 = InternalNode_GetChildAABB(&node, 1);
818 AABB3f child_bounds2 = InternalNode_GetChildAABB(&node, 2);
819 AABB3f child_bounds3 = InternalNode_GetChildAABB(&node, 3);
820 AABB3f child_bounds4 = InternalNode_GetChildAABB(&node, 4);
821 AABB3f child_bounds5 = InternalNode_GetChildAABB(&node, 5);
822
823 // we bravely assumme we will have at least 2 children here.
824 if(!InternalNode_IsChildValid(&node, 2)) child_bounds2 = child_bounds0;
825 if(!InternalNode_IsChildValid(&node, 3)) child_bounds3 = child_bounds0;
826 if(!InternalNode_IsChildValid(&node, 4)) child_bounds4 = child_bounds0;
827 if(!InternalNode_IsChildValid(&node, 5)) child_bounds5 = child_bounds0;
828
829 if (clip)
830 {
831 AABB3f_trim_upper(&child_bounds0, clipBox->upper);
832 AABB3f_trim_upper(&child_bounds1, clipBox->upper);
833 AABB3f_trim_upper(&child_bounds2, clipBox->upper);
834 AABB3f_trim_upper(&child_bounds3, clipBox->upper);
835 AABB3f_trim_upper(&child_bounds4, clipBox->upper);
836 AABB3f_trim_upper(&child_bounds5, clipBox->upper);
837 }
838
839 child_bounds0 = transform_aabb(child_bounds0, xfm);
840 child_bounds1 = transform_aabb(child_bounds1, xfm);
841 child_bounds2 = transform_aabb(child_bounds2, xfm);
842 child_bounds3 = transform_aabb(child_bounds3, xfm);
843 child_bounds4 = transform_aabb(child_bounds4, xfm);
844 child_bounds5 = transform_aabb(child_bounds5, xfm);
845
846 AABB3f_extend(&child_bounds0, &child_bounds1);
847 AABB3f_extend(&child_bounds2, &child_bounds3);
848 AABB3f_extend(&child_bounds4, &child_bounds5);
849 AABB3f_extend(&child_bounds0, &child_bounds2);
850 AABB3f_extend(&child_bounds0, &child_bounds4);
851
852 return child_bounds0;
853 }
854 #endif
855
856 #if DEB_PRINTFS
857 printf("0");
858 #endif
859
860 struct AABB3f child_bounds;
861
862 if (clipOpt != XFM_BOX_NOT_REFINED_TAKE_CLIPBOX)
863 {
864 // XFM_BOX_NOT_REFINED_CLIPPED || XFM_BOX_NO_CLIP
865 child_bounds = InternalNode_getAABB3f(pnode);
866 if (clipOpt != XFM_BOX_NO_CLIP)
867 {
868 AABB3f_intersect(&child_bounds, *clipBox);
869 }
870 }
871 else
872 {
873 //XFM_BOX_NOT_REFINED_TAKE_CLIPBOX
874 child_bounds = *clipBox;
875 }
876
877 child_bounds = transform_aabb(child_bounds, xfm);
878 //child_bounds = conservativeAABB3f(&child_bounds);
879 return child_bounds;
880 }
881
compute_xfm_bbox(struct AffineSpace3f xfm,InternalNode * pnode,bool clip,AABB3f * clipBox,float matOverhead)882 GRL_INLINE AABB3f GRL_OVERLOADABLE compute_xfm_bbox(struct AffineSpace3f xfm, InternalNode* pnode, bool clip, AABB3f* clipBox, float matOverhead)
883 {
884 float transform[12];
885 load_row_major_from_AffineSpace3f(xfm, transform);
886 return compute_xfm_bbox(transform, pnode, clip, clipBox, matOverhead);
887 }
888
compute_refit_structs_compacted_size(BVHBase * base)889 GRL_INLINE uint64_t compute_refit_structs_compacted_size(BVHBase* base)
890 {
891 uint dataSize = 0;
892
893 if (BVHBase_HasBackPointers(base))
894 {
895 const uint fatleafEntrySize = (base->fatLeafCount * sizeof(LeafTableEntry) + 63) & ~63;
896 const uint innerEntrySize = (base->innerCount * sizeof(InnerNodeTableEntry) + 63) & ~63;
897
898 // New atomic update
899 if(base->quadIndicesDataStart > base->backPointerDataStart)
900 {
901 uint numQuads = BVHBase_GetNumQuads(base);
902
903 const uint quadTableMainBufferSize = (numQuads + 255) & ~255;
904 const uint quadLeftoversSize = (base->quadLeftoversCountNewAtomicUpdate + 255) & ~255;
905 const uint quadTableEntriesSize = (((quadTableMainBufferSize + quadLeftoversSize) * sizeof(LeafTableEntry) + 63) & ~63);
906
907 const uint quadIndicesDataSize = (numQuads * sizeof(QuadDataIndices) + 63) & ~63;
908
909 dataSize += quadTableEntriesSize + quadIndicesDataSize;
910 }
911
912 dataSize +=
913 ((BVHBase_GetNumInternalNodes(base) * sizeof(uint) + 63) & ~63)
914 + fatleafEntrySize + innerEntrySize;
915 }
916
917 return (uint64_t)dataSize;
918 }
919
compute_compacted_size(BVHBase * base)920 GRL_INLINE uint64_t compute_compacted_size(BVHBase* base)
921 {
922 uint64_t size = sizeof(BVHBase);
923 size += BVHBase_GetNumHWInstanceLeaves(base) * sizeof(HwInstanceLeaf);
924 size += BVHBase_GetNumProcedurals(base) * sizeof(ProceduralLeaf);
925 size += BVHBase_GetNumQuads(base) * sizeof(QuadLeaf);
926 size += compute_refit_structs_compacted_size(base);
927 size += BVHBase_GetNumInternalNodes(base) * sizeof(InternalNode);
928 size += sizeof(InstanceDesc) * base->Meta.instanceCount;
929 size += (sizeof(GeoMetaData) * base->Meta.geoCount + 63) & ~63; // align to 64
930 size = (size + 63) & ~63;
931
932 return size;
933 }
934