1/* 2 * Copyright © 2022 Friedrich Vock 3 * 4 * SPDX-License-Identifier: MIT 5 */ 6 7#version 460 8 9#extension GL_GOOGLE_include_directive : require 10 11#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require 12#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require 13#extension GL_EXT_shader_explicit_arithmetic_types_int32 : require 14#extension GL_EXT_shader_explicit_arithmetic_types_int64 : require 15#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require 16#extension GL_EXT_scalar_block_layout : require 17#extension GL_EXT_buffer_reference : require 18#extension GL_EXT_buffer_reference2 : require 19#extension GL_KHR_memory_scope_semantics : require 20 21layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in; 22 23#include "build_helpers.h" 24#include "build_interface.h" 25 26layout(push_constant) uniform CONSTS { 27 encode_args args; 28}; 29 30void set_parent(uint32_t child, uint32_t parent) 31{ 32 uint64_t addr = args.output_bvh - child / 8 * 4 - 4; 33 DEREF(REF(uint32_t)(addr)) = parent; 34} 35 36void 37main() 38{ 39 /* Revert the order so we start at the root */ 40 uint32_t global_id = DEREF(args.header).ir_internal_node_count - 1 - gl_GlobalInvocationID.x; 41 42 uint32_t output_leaf_node_size; 43 switch (args.geometry_type) { 44 case VK_GEOMETRY_TYPE_TRIANGLES_KHR: 45 output_leaf_node_size = SIZEOF(radv_bvh_triangle_node); 46 break; 47 case VK_GEOMETRY_TYPE_AABBS_KHR: 48 output_leaf_node_size = SIZEOF(radv_bvh_aabb_node); 49 break; 50 default: /* instances */ 51 output_leaf_node_size = SIZEOF(radv_bvh_instance_node); 52 break; 53 } 54 55 uint32_t intermediate_leaf_nodes_size = args.leaf_node_count * SIZEOF(radv_ir_node); 56 uint32_t dst_leaf_offset = 57 id_to_offset(RADV_BVH_ROOT_NODE) + SIZEOF(radv_bvh_box32_node); 58 uint32_t dst_internal_offset = dst_leaf_offset + args.leaf_node_count * output_leaf_node_size; 59 60 REF(radv_ir_box_node) intermediate_internal_nodes = 61 REF(radv_ir_box_node)OFFSET(args.intermediate_bvh, intermediate_leaf_nodes_size); 62 REF(radv_ir_box_node) src_node = INDEX(radv_ir_box_node, intermediate_internal_nodes, global_id); 63 radv_ir_box_node src = DEREF(src_node); 64 65 bool is_root_node = global_id == DEREF(args.header).ir_internal_node_count - 1; 66 67 for (;;) { 68 /* Make changes to the current node's BVH offset value visible. */ 69 memoryBarrier(gl_ScopeDevice, gl_StorageSemanticsBuffer, 70 gl_SemanticsAcquireRelease | gl_SemanticsMakeAvailable | gl_SemanticsMakeVisible); 71 72 uint32_t bvh_offset = is_root_node ? id_to_offset(RADV_BVH_ROOT_NODE) : DEREF(src_node).bvh_offset; 73 if (bvh_offset == RADV_UNKNOWN_BVH_OFFSET) 74 continue; 75 76 if (bvh_offset == RADV_NULL_BVH_OFFSET) 77 break; 78 79 REF(radv_bvh_box32_node) dst_node = REF(radv_bvh_box32_node)(OFFSET(args.output_bvh, bvh_offset)); 80 uint32_t node_id = pack_node_id(bvh_offset, radv_bvh_node_box32); 81 82 uint32_t found_child_count = 0; 83 uint32_t children[4] = {RADV_BVH_INVALID_NODE, RADV_BVH_INVALID_NODE, 84 RADV_BVH_INVALID_NODE, RADV_BVH_INVALID_NODE}; 85 86 for (uint32_t i = 0; i < 2; ++i) 87 if (src.children[i] != RADV_BVH_INVALID_NODE) 88 children[found_child_count++] = src.children[i]; 89 90 while (found_child_count < 4) { 91 int32_t collapsed_child_index = -1; 92 float largest_surface_area = -INFINITY; 93 94 for (int32_t i = 0; i < found_child_count; ++i) { 95 if (ir_id_to_type(children[i]) != radv_ir_node_internal) 96 continue; 97 98 radv_aabb bounds = 99 DEREF(REF(radv_ir_node)OFFSET(args.intermediate_bvh, 100 ir_id_to_offset(children[i]))).aabb; 101 102 float surface_area = aabb_surface_area(bounds); 103 if (surface_area > largest_surface_area) { 104 largest_surface_area = surface_area; 105 collapsed_child_index = i; 106 } 107 } 108 109 if (collapsed_child_index != -1) { 110 REF(radv_ir_box_node) child_node = 111 REF(radv_ir_box_node)OFFSET(args.intermediate_bvh, 112 ir_id_to_offset(children[collapsed_child_index])); 113 uint32_t grandchildren[2] = DEREF(child_node).children; 114 uint32_t valid_grandchild_count = 0; 115 116 if (grandchildren[1] != RADV_BVH_INVALID_NODE) 117 ++valid_grandchild_count; 118 119 if (grandchildren[0] != RADV_BVH_INVALID_NODE) 120 ++valid_grandchild_count; 121 else 122 grandchildren[0] = grandchildren[1]; 123 124 if (valid_grandchild_count > 1) 125 children[found_child_count++] = grandchildren[1]; 126 127 if (valid_grandchild_count > 0) 128 children[collapsed_child_index] = grandchildren[0]; 129 else { 130 found_child_count--; 131 children[collapsed_child_index] = children[found_child_count]; 132 } 133 134 DEREF(child_node).bvh_offset = RADV_NULL_BVH_OFFSET; 135 } else 136 break; 137 } 138 139 for (uint32_t i = 0; i < found_child_count; ++i) { 140 uint32_t type = ir_id_to_type(children[i]); 141 uint32_t offset = ir_id_to_offset(children[i]); 142 uint32_t dst_offset; 143 144 if (type == radv_ir_node_internal) { 145#if COMPACT 146 dst_offset = atomicAdd(DEREF(args.header).dst_node_offset, SIZEOF(radv_bvh_box32_node)); 147#else 148 uint32_t offset_in_internal_nodes = offset - intermediate_leaf_nodes_size; 149 uint32_t child_index = offset_in_internal_nodes / SIZEOF(radv_ir_box_node); 150 dst_offset = dst_internal_offset + child_index * SIZEOF(radv_bvh_box32_node); 151#endif 152 153 REF(radv_ir_box_node) child_node = REF(radv_ir_box_node)OFFSET(args.intermediate_bvh, offset); 154 DEREF(child_node).bvh_offset = dst_offset; 155 } else { 156 uint32_t child_index = offset / SIZEOF(radv_ir_node); 157 dst_offset = dst_leaf_offset + child_index * output_leaf_node_size; 158 } 159 160 radv_aabb child_aabb = 161 DEREF(REF(radv_ir_node)OFFSET(args.intermediate_bvh, offset)).aabb; 162 163 DEREF(dst_node).coords[i] = child_aabb; 164 165 uint32_t child_id = pack_node_id(dst_offset, ir_type_to_bvh_type(type)); 166 children[i] = child_id; 167 set_parent(child_id, node_id); 168 } 169 170 for (uint i = found_child_count; i < 4; ++i) { 171 for (uint comp = 0; comp < 3; ++comp) { 172 DEREF(dst_node).coords[i].min[comp] = NAN; 173 DEREF(dst_node).coords[i].max[comp] = NAN; 174 } 175 } 176 177 /* Make changes to the children's BVH offset value available to the other invocations. */ 178 memoryBarrier(gl_ScopeDevice, gl_StorageSemanticsBuffer, 179 gl_SemanticsAcquireRelease | gl_SemanticsMakeAvailable | gl_SemanticsMakeVisible); 180 181 DEREF(dst_node).children = children; 182 break; 183 } 184 185 if (is_root_node) { 186 REF(radv_accel_struct_header) header = REF(radv_accel_struct_header)(args.output_bvh - args.output_bvh_offset); 187 DEREF(header).aabb = src.base.aabb; 188 DEREF(header).bvh_offset = args.output_bvh_offset; 189 190 set_parent(RADV_BVH_ROOT_NODE, RADV_BVH_INVALID_NODE); 191 } 192} 193