xref: /aosp_15_r20/external/OpenCL-CTS/test_conformance/subgroups/test_workitem.cpp (revision 6467f958c7de8070b317fc65bcb0f6472e388d82)
1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "procs.h"
17 #include "harness/conversions.h"
18 #include "harness/typeWrappers.h"
19 #include <CL/cl.h>
20 
21 struct get_test_data
22 {
23     cl_uint subGroupSize;
24     cl_uint maxSubGroupSize;
25     cl_uint numSubGroups;
26     cl_uint enqNumSubGroups;
27     cl_uint subGroupId;
28     cl_uint subGroupLocalId;
operator ==get_test_data29     bool operator==(get_test_data x)
30     {
31         return subGroupSize == x.subGroupSize
32             && maxSubGroupSize == x.maxSubGroupSize
33             && numSubGroups == x.numSubGroups && subGroupId == x.subGroupId
34             && subGroupLocalId == x.subGroupLocalId;
35     }
36 };
37 
check_group(const get_test_data * result,int nw,cl_uint ensg,size_t maxwgs)38 static int check_group(const get_test_data *result, int nw, cl_uint ensg,
39                        size_t maxwgs)
40 {
41     int first = -1;
42     int last = -1;
43     int i, j;
44     cl_uint hit[32];
45 
46     for (i = 0; i < nw; ++i)
47     {
48         if (result[i].subGroupId == 0 && result[i].subGroupLocalId == 0)
49             first = i;
50         if (result[i].subGroupId == result[0].numSubGroups - 1
51             && result[i].subGroupLocalId == 0)
52             last = i;
53         if (first != -1 && last != -1) break;
54     }
55 
56     if (first == -1 || last == -1)
57     {
58         log_error("ERROR: expected sub group id's are missing\n");
59         return -1;
60     }
61 
62     // Check them
63     if (result[first].subGroupSize == 0)
64     {
65         log_error("ERROR: get_sub_group_size() returned 0\n");
66         return -1;
67     }
68     if (result[first].maxSubGroupSize == 0
69         || result[first].maxSubGroupSize > maxwgs)
70     {
71         log_error(
72             "ERROR: get_max_subgroup_size() returned incorrect result: %u\n",
73             result[first].maxSubGroupSize);
74         return -1;
75     }
76     if (result[first].subGroupSize > result[first].maxSubGroupSize)
77     {
78         log_error("ERROR: get_sub_group_size() > get_max_sub_group_size()\n");
79         return -1;
80     }
81     if (result[last].subGroupSize > result[first].subGroupSize)
82     {
83         log_error("ERROR: last sub group larger than first sub group\n");
84         return -1;
85     }
86     if (result[first].numSubGroups == 0 || result[first].numSubGroups > ensg)
87     {
88         log_error(
89             "ERROR: get_num_sub_groups() returned incorrect result:  %u \n",
90             result[first].numSubGroups);
91         return -1;
92     }
93 
94     memset(hit, 0, sizeof(hit));
95     for (i = 0; i < nw; ++i)
96     {
97         if (result[i].maxSubGroupSize != result[first].maxSubGroupSize
98             || result[i].numSubGroups != result[first].numSubGroups)
99         {
100             log_error("ERROR: unexpected variation in get_*_sub_group_*()\n");
101             return -1;
102         }
103         if (result[i].subGroupId >= result[first].numSubGroups)
104         {
105             log_error(
106                 "ERROR: get_sub_group_id() returned out of range value: %u\n",
107                 result[i].subGroupId);
108             return -1;
109         }
110         if (result[i].enqNumSubGroups != ensg)
111         {
112             log_error("ERROR: get_enqueued_num_sub_groups() returned incorrect "
113                       "value: %u\n",
114                       result[i].enqNumSubGroups);
115             return -1;
116         }
117         if (result[first].numSubGroups > 1)
118         {
119             if (result[i].subGroupId < result[first].numSubGroups - 1)
120             {
121                 if (result[i].subGroupSize != result[first].subGroupSize)
122                 {
123                     log_error(
124                         "ERROR: unexpected variation in get_*_sub_group_*()\n");
125                     return -1;
126                 }
127                 if (result[i].subGroupLocalId >= result[first].subGroupSize)
128                 {
129                     log_error("ERROR: get_sub_group_local_id() returned out of "
130                               "bounds value: %u \n",
131                               result[i].subGroupLocalId);
132                     return -1;
133                 }
134             }
135             else
136             {
137                 if (result[i].subGroupSize != result[last].subGroupSize)
138                 {
139                     log_error(
140                         "ERROR: unexpected variation in get_*_sub_group_*()\n");
141                     return -1;
142                 }
143                 if (result[i].subGroupLocalId >= result[last].subGroupSize)
144                 {
145                     log_error("ERROR: get_sub_group_local_id() returned out of "
146                               "bounds value: %u \n",
147                               result[i].subGroupLocalId);
148                     return -1;
149                 }
150             }
151         }
152         else
153         {
154             if (result[i].subGroupSize != result[first].subGroupSize)
155             {
156                 log_error(
157                     "ERROR: unexpected variation in get_*_sub_group_*()\n");
158                 return -1;
159             }
160             if (result[i].subGroupLocalId >= result[first].subGroupSize)
161             {
162                 log_error("ERROR: get_sub_group_local_id() returned out of "
163                           "bounds value: %u \n",
164                           result[i].subGroupLocalId);
165                 return -1;
166             }
167         }
168 
169         j = (result[first].subGroupSize + 31) / 32 * result[i].subGroupId
170             + (result[i].subGroupLocalId >> 5);
171         if (j < static_cast<int>(sizeof(hit) / 4))
172         {
173             cl_uint b = 1U << (result[i].subGroupLocalId & 0x1fU);
174             if ((hit[j] & b) != 0)
175             {
176                 log_error("ERROR: get_sub_group_local_id() repeated a result "
177                           "in the same sub group\n");
178                 return -1;
179             }
180             hit[j] |= b;
181         }
182     }
183 
184     return 0;
185 }
186 
test_work_item_functions(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements,bool useCoreSubgroups)187 int test_work_item_functions(cl_device_id device, cl_context context,
188                              cl_command_queue queue, int num_elements,
189                              bool useCoreSubgroups)
190 {
191     static const size_t lsize = 200;
192     int error;
193     int i, j, k, q, r, nw;
194     size_t maxwgs;
195     cl_uint ensg;
196     size_t global;
197     size_t local;
198     get_test_data result[lsize * 6];
199     clProgramWrapper program;
200     clKernelWrapper kernel;
201     clMemWrapper out;
202     std::stringstream kernel_sstr;
203     if (useCoreSubgroups)
204     {
205         kernel_sstr << "#pragma OPENCL EXTENSION cl_khr_subgroups : enable\n";
206     }
207     kernel_sstr
208         << "\n"
209            "\n"
210            "typedef struct {\n"
211            "    uint subGroupSize;\n"
212            "    uint maxSubGroupSize;\n"
213            "    uint numSubGroups;\n"
214            "    uint enqNumSubGroups;\n"
215            "    uint subGroupId;\n"
216            "    uint subGroupLocalId;\n"
217            "} get_test_data;\n"
218            "\n"
219            "__kernel void get_test( __global get_test_data *outData )\n"
220            "{\n"
221            "    int gid = get_global_id( 0 );\n"
222            "    outData[gid].subGroupSize = get_sub_group_size();\n"
223            "    outData[gid].maxSubGroupSize = get_max_sub_group_size();\n"
224            "    outData[gid].numSubGroups = get_num_sub_groups();\n"
225            "    outData[gid].enqNumSubGroups = get_enqueued_num_sub_groups();\n"
226            "    outData[gid].subGroupId = get_sub_group_id();\n"
227            "    outData[gid].subGroupLocalId = get_sub_group_local_id();\n"
228            "}";
229     const std::string &kernel_str = kernel_sstr.str();
230     const char *kernel_src = kernel_str.c_str();
231     error = create_single_kernel_helper(context, &program, &kernel, 1,
232                                         &kernel_src, "get_test");
233     if (error != 0) return error;
234 
235     error = get_max_allowed_work_group_size(context, kernel, &local, NULL);
236     if (error != 0) return error;
237 
238     maxwgs = local;
239 
240     // Limit it a bit so we have muliple work groups
241     // Ideally this will still be large enough to give us multiple subgroups
242     if (local > lsize) local = lsize;
243 
244     // Create our buffer
245     out = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(result), NULL,
246                          &error);
247     test_error(error, "clCreateBuffer failed");
248 
249     // Set argument
250     error = clSetKernelArg(kernel, 0, sizeof(out), &out);
251     test_error(error, "clSetKernelArg failed");
252 
253     global = local * 5;
254 
255     // Non-uniform work-groups are an optional feature from 3.0 onward.
256     cl_bool device_supports_non_uniform_wg = CL_TRUE;
257     if (get_device_cl_version(device) >= Version(3, 0))
258     {
259         error = clGetDeviceInfo(
260             device, CL_DEVICE_NON_UNIFORM_WORK_GROUP_SUPPORT, sizeof(cl_bool),
261             &device_supports_non_uniform_wg, nullptr);
262         test_error(error, "clGetDeviceInfo failed");
263     }
264 
265     if (device_supports_non_uniform_wg)
266     {
267         // Make sure we have a flexible range
268         global += 3 * local / 4;
269     }
270 
271     // Collect the data
272     memset((void *)&result, 0xf0, sizeof(result));
273 
274     error = clEnqueueWriteBuffer(queue, out, CL_FALSE, 0, sizeof(result),
275                                  (void *)&result, 0, NULL, NULL);
276     test_error(error, "clEnqueueWriteBuffer failed");
277 
278     error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0,
279                                    NULL, NULL);
280     test_error(error, "clEnqueueNDRangeKernel failed");
281 
282     error = clEnqueueReadBuffer(queue, out, CL_FALSE, 0, sizeof(result),
283                                 (void *)&result, 0, NULL, NULL);
284     test_error(error, "clEnqueueReadBuffer failed");
285 
286     error = clFinish(queue);
287     test_error(error, "clFinish failed");
288 
289     nw = (int)local;
290     ensg = result[0].enqNumSubGroups;
291 
292     // Check the first group
293     error = check_group(result, nw, ensg, maxwgs);
294     if (error) return error;
295 
296     q = (int)global / nw;
297     r = (int)global % nw;
298 
299     // Check the remaining work groups including the last if it is the same size
300     for (k = 1; k < q; ++k)
301     {
302         for (j = 0; j < nw; ++j)
303         {
304             i = k * nw + j;
305             if (!(result[i] == result[i - nw]))
306             {
307                 log_error("ERROR: sub group mapping is not identical for all "
308                           "work groups\n");
309                 return -1;
310             }
311         }
312     }
313 
314     // Check the last group if it wasn't the same size
315     if (r != 0)
316     {
317         error = check_group(result + q * nw, r, ensg, maxwgs);
318         if (error) return error;
319     }
320 
321     return 0;
322 }
323 
test_work_item_functions_core(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)324 int test_work_item_functions_core(cl_device_id device, cl_context context,
325                                   cl_command_queue queue, int num_elements)
326 {
327     return test_work_item_functions(device, context, queue, num_elements, true);
328 }
329 
test_work_item_functions_ext(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)330 int test_work_item_functions_ext(cl_device_id device, cl_context context,
331                                  cl_command_queue queue, int num_elements)
332 {
333     bool hasExtension = is_extension_available(device, "cl_khr_subgroups");
334 
335     if (!hasExtension)
336     {
337         log_info(
338             "Device does not support 'cl_khr_subgroups'. Skipping the test.\n");
339         return TEST_SKIPPED_ITSELF;
340     }
341 
342     return test_work_item_functions(device, context, queue, num_elements,
343                                     false);
344 }
345