// // Copyright (C) 2009-2021 Intel Corporation // // SPDX-License-Identifier: MIT // // module misc; kernel_module misc("misc.cl") { kernel opencl_kernel_batched_init_globals < kernelFunction="batched_init_globals" >; kernel opencl_kernel_copy_instances < kernelFunction="copy_instances" >; kernel opencl_kernel_copy_instances_indirect < kernelFunction="copy_instances_indirect" >; kernel opencl_kernel_copy_instance_ptrs < kernelFunction="copy_instance_ptrs" >; kernel opencl_kernel_copy_instance_ptrs_indirect < kernelFunction="copy_instance_ptrs_indirect" >; kernel opencl_kernel_copy_instances_base_ptr < kernelFunction="copy_instances_base_ptr" >; kernel opencl_kernel_copy_instances_base_ptr_indirect < kernelFunction="copy_instances_base_ptr_indirect" >; kernel opencl_kernel_copy_instance_ptrs_base_ptr < kernelFunction="copy_instance_ptrs_base_ptr" >; kernel opencl_kernel_copy_instance_ptrs_base_ptr_indirect < kernelFunction="copy_instance_ptrs_base_ptr_indirect" >; kernel opencl_kernel_copy_geo_meta_data < kernelFunction="copy_geo_meta_data" >; kernel opencl_kernel_copy_geo_descs_indirect_build < source="misc.cl", kernelFunction="copy_geo_descs_indirect_build" >; kernel opencl_kernel_copy_mock < kernelFunction="copy_mock" >; kernel opencl_kernel_memset < kernelFunction="mem_set" >; kernel opencl_kernel_memset_size_ptr < kernelFunction="mem_set_size_ptr" >; } import struct MKBuilderState "structs.grl"; import struct MKSizeEstimate "structs.grl"; metakernel batched_init_globals( qword p_data, dword numWgs) { dispatch opencl_kernel_batched_init_globals(numWgs,1,1) args(p_data); } metakernel copy_instances( qword bvh_buffer, qword instanceDescsBuffer, qword totalSizeToCopy, dword numThreads) { dispatch opencl_kernel_copy_instances (numThreads, 1, 1) args( bvh_buffer, instanceDescsBuffer, totalSizeToCopy); } metakernel copy_instances_indirect( qword bvh_buffer, qword instanceDescsBuffer, qword indirectBuildRangeInfo ) { define num_groups REG0; define C_2 REG2; define C_3 REG3; C_2 = 2; C_3 = 3; // sizeof(InstanceDesc) == 64, matches DXR and Vulkan API definitions // num_groups = ((num_instances << log_2(64)) >> 8) + 3 = (num_instances >> 2) + 3 num_groups = load_dword( indirectBuildRangeInfo ); num_groups = num_groups >> C_2; num_groups = num_groups + C_3; DISPATCHDIM_X = num_groups.lo; DISPATCHDIM_Y = 1; DISPATCHDIM_Z = 1; dispatch_indirect opencl_kernel_copy_instances_indirect args( bvh_buffer, instanceDescsBuffer, indirectBuildRangeInfo); } metakernel copy_instance_ptrs( qword bvh_buffer, qword instanceDescPtrsBuffer, qword totalSizeToCopy, dword numThreads) { dispatch opencl_kernel_copy_instance_ptrs (numThreads, 1, 1) args( bvh_buffer, instanceDescPtrsBuffer, totalSizeToCopy); } metakernel copy_instance_ptrs_indirect( qword bvh_buffer, qword instanceDescPtrsBuffer, qword indirectBuildRangeInfo) { define num_groups REG0; define C_2 REG2; define C_3 REG3; C_2 = 2; C_3 = 3; // sizeof(InstanceDesc) == 64, matches DXR and Vulkan API definitions // num_groups = ((num_instances << log_2(64)) >> 8) + 3 = (num_instances >> 2) + 3 num_groups = load_dword( indirectBuildRangeInfo ); num_groups = num_groups >> C_2; num_groups = num_groups + C_3; DISPATCHDIM_X = num_groups.lo; DISPATCHDIM_Y = 1; DISPATCHDIM_Z = 1; dispatch_indirect opencl_kernel_copy_instance_ptrs_indirect args( bvh_buffer, instanceDescPtrsBuffer, indirectBuildRangeInfo); } metakernel copy_instances_base_ptr( qword bvh_buffer, qword instanceDescsBuffer, qword totalSizeToCopy, dword numThreads) { dispatch opencl_kernel_copy_instances_base_ptr (numThreads, 1, 1) args( bvh_buffer, instanceDescsBuffer, totalSizeToCopy); } metakernel copy_instances_base_ptr_indirect( qword bvh_buffer, qword instanceDescsBuffer, qword indirectBuildRangeInfo) { define num_groups REG0; define C_2 REG2; define C_3 REG3; C_2 = 2; C_3 = 3; // sizeof(InstanceDesc) == 64, matches DXR and Vulkan API definitions // num_groups = ((num_instances << log_2(64)) >> 8) + 3 = (num_instances >> 2) + 3 num_groups = load_dword( indirectBuildRangeInfo ); num_groups = num_groups >> C_2; num_groups = num_groups + C_3; DISPATCHDIM_X = num_groups.lo; DISPATCHDIM_Y = 1; DISPATCHDIM_Z = 1; dispatch_indirect opencl_kernel_copy_instances_base_ptr_indirect args( bvh_buffer, instanceDescsBuffer, indirectBuildRangeInfo); } metakernel copy_instance_ptrs_base_ptr( qword bvh_buffer, qword instanceDescPtrsBuffer, qword totalSizeToCopy, dword numThreads) { dispatch opencl_kernel_copy_instance_ptrs_base_ptr (numThreads, 1, 1) args( bvh_buffer, instanceDescPtrsBuffer, totalSizeToCopy); } metakernel copy_instance_ptrs_base_ptr_indirect( qword bvh_buffer, qword instanceDescPtrsBuffer, qword indirectBuildRangeInfo) { define num_groups REG0; define C_2 REG2; define C_3 REG3; C_2 = 2; C_3 = 3; // sizeof(InstanceDesc) == 64, matches DXR and Vulkan API definitions // num_groups = ((num_instances << log_2(64)) >> 8) + 3 = (num_instances >> 2) + 3 num_groups = load_dword( indirectBuildRangeInfo ); num_groups = num_groups >> C_2; num_groups = num_groups + C_3; DISPATCHDIM_X = num_groups.lo; DISPATCHDIM_Y = 1; DISPATCHDIM_Z = 1; dispatch_indirect opencl_kernel_copy_instance_ptrs_base_ptr_indirect args( bvh_buffer, instanceDescPtrsBuffer, indirectBuildRangeInfo); } metakernel copy_geo_descs( qword private_dest, qword transient_src, qword indirectBuildRangeInfo, dword numGeometries) { define num_groups (numGeometries + 16 - 1) / 16; dispatch opencl_kernel_copy_geo_descs_indirect_build(num_groups, 1, 1) args( private_dest, transient_src, indirectBuildRangeInfo, numGeometries); } metakernel copy_geo_meta_data( qword bvh_buffer, qword geomdesc_buffer, qword totalSizeToCopy, dword numThreads) { dispatch opencl_kernel_copy_geo_meta_data (numThreads, 1, 1) args( bvh_buffer, geomdesc_buffer, totalSizeToCopy); } const COPY_MOCK_GROUP_SIZE = 16; metakernel copy_mock( qword dest, qword src, dword size) { define num_groups (size + COPY_MOCK_GROUP_SIZE - 1) / COPY_MOCK_GROUP_SIZE; dispatch opencl_kernel_copy_mock(num_groups, 1, 1) args( dest, src, size); } metakernel memset( qword dest, dword byte, dword size) { define num_groups (size + 32 - 1) / 32; dispatch opencl_kernel_memset(num_groups, 1, 1) args( dest, byte, size); } metakernel memset_size_ptr( qword dest, dword byte, qword sizePtr) { define byteSize REG0; define C_32 REG1; C_32 = 32; define C_1 REG2; C_1 = 1; define C_4 REG3; C_4 = 4; define numGroupsRqd REG4; byteSize = load_dword(sizePtr); numGroupsRqd = byteSize + C_32; numGroupsRqd = numGroupsRqd - C_1; numGroupsRqd = numGroupsRqd >> C_4; numGroupsRqd = numGroupsRqd >> C_1; DISPATCHDIM_X = numGroupsRqd.lo; DISPATCHDIM_Y = 1; DISPATCHDIM_Z = 1; dispatch_indirect opencl_kernel_memset_size_ptr args( dest, byte, sizePtr); }