1// 2// Copyright (C) 2009-2021 Intel Corporation 3// 4// SPDX-License-Identifier: MIT 5// 6// 7 8module misc; 9 10kernel_module misc("misc.cl") 11{ 12 kernel opencl_kernel_batched_init_globals < kernelFunction="batched_init_globals" >; 13 kernel opencl_kernel_copy_instances < kernelFunction="copy_instances" >; 14 kernel opencl_kernel_copy_instances_indirect < kernelFunction="copy_instances_indirect" >; 15 kernel opencl_kernel_copy_instance_ptrs < kernelFunction="copy_instance_ptrs" >; 16 kernel opencl_kernel_copy_instance_ptrs_indirect < kernelFunction="copy_instance_ptrs_indirect" >; 17 kernel opencl_kernel_copy_instances_base_ptr < kernelFunction="copy_instances_base_ptr" >; 18 kernel opencl_kernel_copy_instances_base_ptr_indirect < kernelFunction="copy_instances_base_ptr_indirect" >; 19 kernel opencl_kernel_copy_instance_ptrs_base_ptr < kernelFunction="copy_instance_ptrs_base_ptr" >; 20 kernel opencl_kernel_copy_instance_ptrs_base_ptr_indirect < kernelFunction="copy_instance_ptrs_base_ptr_indirect" >; 21 kernel opencl_kernel_copy_geo_meta_data < kernelFunction="copy_geo_meta_data" >; 22 kernel opencl_kernel_copy_geo_descs_indirect_build < source="misc.cl", kernelFunction="copy_geo_descs_indirect_build" >; 23 kernel opencl_kernel_copy_mock < kernelFunction="copy_mock" >; 24 kernel opencl_kernel_memset < kernelFunction="mem_set" >; 25 kernel opencl_kernel_memset_size_ptr < kernelFunction="mem_set_size_ptr" >; 26} 27 28import struct MKBuilderState "structs.grl"; 29import struct MKSizeEstimate "structs.grl"; 30 31 32metakernel batched_init_globals( 33 qword p_data, 34 dword numWgs) 35{ 36 dispatch opencl_kernel_batched_init_globals(numWgs,1,1) args(p_data); 37} 38 39metakernel copy_instances( 40 qword bvh_buffer, 41 qword instanceDescsBuffer, 42 qword totalSizeToCopy, 43 dword numThreads) 44{ 45 dispatch opencl_kernel_copy_instances (numThreads, 1, 1) args( 46 bvh_buffer, 47 instanceDescsBuffer, 48 totalSizeToCopy); 49} 50 51metakernel 52copy_instances_indirect( qword bvh_buffer, qword instanceDescsBuffer, qword indirectBuildRangeInfo ) 53{ 54 55 define num_groups REG0; 56 define C_2 REG2; 57 define C_3 REG3; 58 59 C_2 = 2; 60 C_3 = 3; 61 62 // sizeof(InstanceDesc) == 64, matches DXR and Vulkan API definitions 63 // num_groups = ((num_instances << log_2(64)) >> 8) + 3 = (num_instances >> 2) + 3 64 num_groups = load_dword( indirectBuildRangeInfo ); 65 num_groups = num_groups >> C_2; 66 num_groups = num_groups + C_3; 67 68 DISPATCHDIM_X = num_groups.lo; 69 DISPATCHDIM_Y = 1; 70 DISPATCHDIM_Z = 1; 71 72 dispatch_indirect opencl_kernel_copy_instances_indirect args( 73 bvh_buffer, 74 instanceDescsBuffer, 75 indirectBuildRangeInfo); 76} 77 78metakernel copy_instance_ptrs( 79 qword bvh_buffer, 80 qword instanceDescPtrsBuffer, 81 qword totalSizeToCopy, 82 dword numThreads) 83{ 84 dispatch opencl_kernel_copy_instance_ptrs (numThreads, 1, 1) args( 85 bvh_buffer, 86 instanceDescPtrsBuffer, 87 totalSizeToCopy); 88} 89 90metakernel copy_instance_ptrs_indirect( 91 qword bvh_buffer, 92 qword instanceDescPtrsBuffer, 93 qword indirectBuildRangeInfo) 94{ 95 define num_groups REG0; 96 define C_2 REG2; 97 define C_3 REG3; 98 99 C_2 = 2; 100 C_3 = 3; 101 102 // sizeof(InstanceDesc) == 64, matches DXR and Vulkan API definitions 103 // num_groups = ((num_instances << log_2(64)) >> 8) + 3 = (num_instances >> 2) + 3 104 num_groups = load_dword( indirectBuildRangeInfo ); 105 num_groups = num_groups >> C_2; 106 num_groups = num_groups + C_3; 107 108 DISPATCHDIM_X = num_groups.lo; 109 DISPATCHDIM_Y = 1; 110 DISPATCHDIM_Z = 1; 111 112 dispatch_indirect opencl_kernel_copy_instance_ptrs_indirect args( 113 bvh_buffer, 114 instanceDescPtrsBuffer, 115 indirectBuildRangeInfo); 116} 117 118metakernel copy_instances_base_ptr( 119 qword bvh_buffer, 120 qword instanceDescsBuffer, 121 qword totalSizeToCopy, 122 dword numThreads) 123{ 124 dispatch opencl_kernel_copy_instances_base_ptr (numThreads, 1, 1) args( 125 bvh_buffer, 126 instanceDescsBuffer, 127 totalSizeToCopy); 128} 129 130metakernel copy_instances_base_ptr_indirect( 131 qword bvh_buffer, 132 qword instanceDescsBuffer, 133 qword indirectBuildRangeInfo) 134{ 135 define num_groups REG0; 136 define C_2 REG2; 137 define C_3 REG3; 138 139 C_2 = 2; 140 C_3 = 3; 141 142 // sizeof(InstanceDesc) == 64, matches DXR and Vulkan API definitions 143 // num_groups = ((num_instances << log_2(64)) >> 8) + 3 = (num_instances >> 2) + 3 144 num_groups = load_dword( indirectBuildRangeInfo ); 145 num_groups = num_groups >> C_2; 146 num_groups = num_groups + C_3; 147 148 DISPATCHDIM_X = num_groups.lo; 149 DISPATCHDIM_Y = 1; 150 DISPATCHDIM_Z = 1; 151 152 dispatch_indirect opencl_kernel_copy_instances_base_ptr_indirect args( 153 bvh_buffer, 154 instanceDescsBuffer, 155 indirectBuildRangeInfo); 156} 157 158metakernel copy_instance_ptrs_base_ptr( 159 qword bvh_buffer, 160 qword instanceDescPtrsBuffer, 161 qword totalSizeToCopy, 162 dword numThreads) 163{ 164 dispatch opencl_kernel_copy_instance_ptrs_base_ptr (numThreads, 1, 1) args( 165 bvh_buffer, 166 instanceDescPtrsBuffer, 167 totalSizeToCopy); 168} 169 170metakernel copy_instance_ptrs_base_ptr_indirect( 171 qword bvh_buffer, 172 qword instanceDescPtrsBuffer, 173 qword indirectBuildRangeInfo) 174{ 175 define num_groups REG0; 176 define C_2 REG2; 177 define C_3 REG3; 178 179 C_2 = 2; 180 C_3 = 3; 181 182 // sizeof(InstanceDesc) == 64, matches DXR and Vulkan API definitions 183 // num_groups = ((num_instances << log_2(64)) >> 8) + 3 = (num_instances >> 2) + 3 184 num_groups = load_dword( indirectBuildRangeInfo ); 185 num_groups = num_groups >> C_2; 186 num_groups = num_groups + C_3; 187 188 DISPATCHDIM_X = num_groups.lo; 189 DISPATCHDIM_Y = 1; 190 DISPATCHDIM_Z = 1; 191 192 dispatch_indirect opencl_kernel_copy_instance_ptrs_base_ptr_indirect args( 193 bvh_buffer, 194 instanceDescPtrsBuffer, 195 indirectBuildRangeInfo); 196} 197 198metakernel copy_geo_descs( 199 qword private_dest, 200 qword transient_src, 201 qword indirectBuildRangeInfo, 202 dword numGeometries) 203{ 204 205 define num_groups (numGeometries + 16 - 1) / 16; 206 dispatch opencl_kernel_copy_geo_descs_indirect_build(num_groups, 1, 1) args( 207 private_dest, 208 transient_src, 209 indirectBuildRangeInfo, 210 numGeometries); 211} 212 213metakernel copy_geo_meta_data( 214 qword bvh_buffer, 215 qword geomdesc_buffer, 216 qword totalSizeToCopy, 217 dword numThreads) 218{ 219 dispatch opencl_kernel_copy_geo_meta_data (numThreads, 1, 1) args( 220 bvh_buffer, 221 geomdesc_buffer, 222 totalSizeToCopy); 223} 224 225 226const COPY_MOCK_GROUP_SIZE = 16; 227 228metakernel copy_mock( 229 qword dest, 230 qword src, 231 dword size) 232{ 233 define num_groups (size + COPY_MOCK_GROUP_SIZE - 1) / COPY_MOCK_GROUP_SIZE; 234 dispatch opencl_kernel_copy_mock(num_groups, 1, 1) args( 235 dest, 236 src, 237 size); 238} 239 240metakernel memset( 241 qword dest, 242 dword byte, 243 dword size) 244{ 245 define num_groups (size + 32 - 1) / 32; 246 dispatch opencl_kernel_memset(num_groups, 1, 1) args( 247 dest, 248 byte, 249 size); 250} 251 252metakernel memset_size_ptr( 253 qword dest, 254 dword byte, 255 qword sizePtr) 256{ 257 define byteSize REG0; 258 define C_32 REG1; C_32 = 32; 259 define C_1 REG2; C_1 = 1; 260 define C_4 REG3; C_4 = 4; 261 define numGroupsRqd REG4; 262 263 byteSize = load_dword(sizePtr); 264 265 numGroupsRqd = byteSize + C_32; 266 numGroupsRqd = numGroupsRqd - C_1; 267 numGroupsRqd = numGroupsRqd >> C_4; 268 numGroupsRqd = numGroupsRqd >> C_1; 269 270 DISPATCHDIM_X = numGroupsRqd.lo; 271 DISPATCHDIM_Y = 1; 272 DISPATCHDIM_Z = 1; 273 274 dispatch_indirect opencl_kernel_memset_size_ptr args( 275 dest, 276 byte, 277 sizePtr); 278} 279