xref: /aosp_15_r20/external/mesa3d/src/intel/vulkan/grl/gpu/misc.grl (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1//
2// Copyright (C) 2009-2021 Intel Corporation
3//
4// SPDX-License-Identifier: MIT
5//
6//
7
8module misc;
9
10kernel_module misc("misc.cl")
11{
12    kernel opencl_kernel_batched_init_globals                 < kernelFunction="batched_init_globals" >;
13    kernel opencl_kernel_copy_instances                       < kernelFunction="copy_instances" >;
14    kernel opencl_kernel_copy_instances_indirect              < kernelFunction="copy_instances_indirect" >;
15    kernel opencl_kernel_copy_instance_ptrs                   < kernelFunction="copy_instance_ptrs" >;
16    kernel opencl_kernel_copy_instance_ptrs_indirect          < kernelFunction="copy_instance_ptrs_indirect" >;
17    kernel opencl_kernel_copy_instances_base_ptr              < kernelFunction="copy_instances_base_ptr" >;
18    kernel opencl_kernel_copy_instances_base_ptr_indirect     < kernelFunction="copy_instances_base_ptr_indirect" >;
19    kernel opencl_kernel_copy_instance_ptrs_base_ptr          < kernelFunction="copy_instance_ptrs_base_ptr" >;
20    kernel opencl_kernel_copy_instance_ptrs_base_ptr_indirect < kernelFunction="copy_instance_ptrs_base_ptr_indirect" >;
21    kernel opencl_kernel_copy_geo_meta_data                   < kernelFunction="copy_geo_meta_data" >;
22    kernel opencl_kernel_copy_geo_descs_indirect_build        < source="misc.cl", kernelFunction="copy_geo_descs_indirect_build" >;
23    kernel opencl_kernel_copy_mock                            < kernelFunction="copy_mock" >;
24    kernel opencl_kernel_memset                               < kernelFunction="mem_set" >;
25    kernel opencl_kernel_memset_size_ptr                      < kernelFunction="mem_set_size_ptr" >;
26}
27
28import struct MKBuilderState "structs.grl";
29import struct MKSizeEstimate "structs.grl";
30
31
32metakernel batched_init_globals(
33    qword p_data,
34    dword numWgs)
35{
36    dispatch opencl_kernel_batched_init_globals(numWgs,1,1) args(p_data);
37}
38
39metakernel copy_instances(
40    qword bvh_buffer,
41    qword instanceDescsBuffer,
42    qword totalSizeToCopy,
43    dword numThreads)
44{
45    dispatch opencl_kernel_copy_instances (numThreads, 1, 1) args(
46        bvh_buffer,
47        instanceDescsBuffer,
48        totalSizeToCopy);
49}
50
51metakernel
52copy_instances_indirect( qword bvh_buffer, qword instanceDescsBuffer, qword indirectBuildRangeInfo )
53{
54
55    define num_groups REG0;
56    define C_2        REG2;
57    define C_3        REG3;
58
59    C_2       = 2;
60    C_3       = 3;
61
62    // sizeof(InstanceDesc) == 64, matches DXR and Vulkan API definitions
63    // num_groups = ((num_instances << log_2(64)) >> 8) + 3 = (num_instances >> 2) + 3
64    num_groups = load_dword( indirectBuildRangeInfo );
65    num_groups = num_groups >> C_2;
66    num_groups = num_groups + C_3;
67
68    DISPATCHDIM_X = num_groups.lo;
69    DISPATCHDIM_Y = 1;
70    DISPATCHDIM_Z = 1;
71
72    dispatch_indirect opencl_kernel_copy_instances_indirect args(
73        bvh_buffer,
74        instanceDescsBuffer,
75        indirectBuildRangeInfo);
76}
77
78metakernel copy_instance_ptrs(
79    qword bvh_buffer,
80    qword instanceDescPtrsBuffer,
81    qword totalSizeToCopy,
82    dword numThreads)
83{
84    dispatch opencl_kernel_copy_instance_ptrs (numThreads, 1, 1) args(
85        bvh_buffer,
86        instanceDescPtrsBuffer,
87        totalSizeToCopy);
88}
89
90metakernel copy_instance_ptrs_indirect(
91    qword bvh_buffer,
92    qword instanceDescPtrsBuffer,
93    qword indirectBuildRangeInfo)
94{
95    define num_groups REG0;
96    define C_2        REG2;
97    define C_3        REG3;
98
99    C_2       = 2;
100    C_3       = 3;
101
102    // sizeof(InstanceDesc) == 64, matches DXR and Vulkan API definitions
103    // num_groups = ((num_instances << log_2(64)) >> 8) + 3 = (num_instances >> 2) + 3
104    num_groups = load_dword( indirectBuildRangeInfo );
105    num_groups = num_groups >> C_2;
106    num_groups = num_groups + C_3;
107
108    DISPATCHDIM_X = num_groups.lo;
109    DISPATCHDIM_Y = 1;
110    DISPATCHDIM_Z = 1;
111
112    dispatch_indirect opencl_kernel_copy_instance_ptrs_indirect args(
113        bvh_buffer,
114        instanceDescPtrsBuffer,
115        indirectBuildRangeInfo);
116}
117
118metakernel copy_instances_base_ptr(
119    qword bvh_buffer,
120    qword instanceDescsBuffer,
121    qword totalSizeToCopy,
122    dword numThreads)
123{
124    dispatch opencl_kernel_copy_instances_base_ptr (numThreads, 1, 1) args(
125        bvh_buffer,
126        instanceDescsBuffer,
127        totalSizeToCopy);
128}
129
130metakernel copy_instances_base_ptr_indirect(
131    qword bvh_buffer,
132    qword instanceDescsBuffer,
133    qword indirectBuildRangeInfo)
134{
135    define num_groups REG0;
136    define C_2        REG2;
137    define C_3        REG3;
138
139    C_2       = 2;
140    C_3       = 3;
141
142    // sizeof(InstanceDesc) == 64, matches DXR and Vulkan API definitions
143    // num_groups = ((num_instances << log_2(64)) >> 8) + 3 = (num_instances >> 2) + 3
144    num_groups = load_dword( indirectBuildRangeInfo );
145    num_groups = num_groups >> C_2;
146    num_groups = num_groups + C_3;
147
148    DISPATCHDIM_X = num_groups.lo;
149    DISPATCHDIM_Y = 1;
150    DISPATCHDIM_Z = 1;
151
152    dispatch_indirect opencl_kernel_copy_instances_base_ptr_indirect args(
153        bvh_buffer,
154        instanceDescsBuffer,
155        indirectBuildRangeInfo);
156}
157
158metakernel copy_instance_ptrs_base_ptr(
159    qword bvh_buffer,
160    qword instanceDescPtrsBuffer,
161    qword totalSizeToCopy,
162    dword numThreads)
163{
164    dispatch opencl_kernel_copy_instance_ptrs_base_ptr (numThreads, 1, 1) args(
165        bvh_buffer,
166        instanceDescPtrsBuffer,
167        totalSizeToCopy);
168}
169
170metakernel copy_instance_ptrs_base_ptr_indirect(
171    qword bvh_buffer,
172    qword instanceDescPtrsBuffer,
173    qword indirectBuildRangeInfo)
174{
175    define num_groups REG0;
176    define C_2        REG2;
177    define C_3        REG3;
178
179    C_2       = 2;
180    C_3       = 3;
181
182    // sizeof(InstanceDesc) == 64, matches DXR and Vulkan API definitions
183    // num_groups = ((num_instances << log_2(64)) >> 8) + 3 = (num_instances >> 2) + 3
184    num_groups = load_dword( indirectBuildRangeInfo );
185    num_groups = num_groups >> C_2;
186    num_groups = num_groups + C_3;
187
188    DISPATCHDIM_X = num_groups.lo;
189    DISPATCHDIM_Y = 1;
190    DISPATCHDIM_Z = 1;
191
192    dispatch_indirect opencl_kernel_copy_instance_ptrs_base_ptr_indirect  args(
193        bvh_buffer,
194        instanceDescPtrsBuffer,
195        indirectBuildRangeInfo);
196}
197
198metakernel copy_geo_descs(
199    qword private_dest,
200    qword transient_src,
201    qword indirectBuildRangeInfo,
202    dword numGeometries)
203{
204
205    define num_groups (numGeometries + 16 - 1) / 16;
206    dispatch opencl_kernel_copy_geo_descs_indirect_build(num_groups, 1, 1) args(
207        private_dest,
208        transient_src,
209        indirectBuildRangeInfo,
210        numGeometries);
211}
212
213metakernel copy_geo_meta_data(
214    qword bvh_buffer,
215    qword geomdesc_buffer,
216    qword totalSizeToCopy,
217    dword numThreads)
218{
219    dispatch opencl_kernel_copy_geo_meta_data (numThreads, 1, 1) args(
220        bvh_buffer,
221        geomdesc_buffer,
222        totalSizeToCopy);
223}
224
225
226const COPY_MOCK_GROUP_SIZE = 16;
227
228metakernel copy_mock(
229    qword dest,
230    qword src,
231    dword size)
232{
233    define num_groups (size + COPY_MOCK_GROUP_SIZE - 1) / COPY_MOCK_GROUP_SIZE;
234    dispatch opencl_kernel_copy_mock(num_groups, 1, 1) args(
235        dest,
236        src,
237        size);
238}
239
240metakernel memset(
241    qword dest,
242    dword byte,
243    dword size)
244{
245    define num_groups (size + 32 - 1) / 32;
246    dispatch opencl_kernel_memset(num_groups, 1, 1) args(
247        dest,
248        byte,
249        size);
250}
251
252metakernel memset_size_ptr(
253    qword dest,
254    dword byte,
255    qword sizePtr)
256{
257    define byteSize REG0;
258    define C_32 REG1; C_32 = 32;
259    define C_1 REG2; C_1 = 1;
260    define C_4 REG3; C_4 = 4;
261    define numGroupsRqd REG4;
262
263    byteSize = load_dword(sizePtr);
264
265    numGroupsRqd = byteSize + C_32;
266    numGroupsRqd = numGroupsRqd - C_1;
267    numGroupsRqd = numGroupsRqd >> C_4;
268    numGroupsRqd = numGroupsRqd >> C_1;
269
270    DISPATCHDIM_X = numGroupsRqd.lo;
271    DISPATCHDIM_Y = 1;
272    DISPATCHDIM_Z = 1;
273
274    dispatch_indirect opencl_kernel_memset_size_ptr args(
275        dest,
276        byte,
277        sizePtr);
278}
279