1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "procs.h"
17 #include "harness/conversions.h"
18 #include "harness/typeWrappers.h"
19 #include <CL/cl.h>
20
21 struct get_test_data
22 {
23 cl_uint subGroupSize;
24 cl_uint maxSubGroupSize;
25 cl_uint numSubGroups;
26 cl_uint enqNumSubGroups;
27 cl_uint subGroupId;
28 cl_uint subGroupLocalId;
operator ==get_test_data29 bool operator==(get_test_data x)
30 {
31 return subGroupSize == x.subGroupSize
32 && maxSubGroupSize == x.maxSubGroupSize
33 && numSubGroups == x.numSubGroups && subGroupId == x.subGroupId
34 && subGroupLocalId == x.subGroupLocalId;
35 }
36 };
37
check_group(const get_test_data * result,int nw,cl_uint ensg,size_t maxwgs)38 static int check_group(const get_test_data *result, int nw, cl_uint ensg,
39 size_t maxwgs)
40 {
41 int first = -1;
42 int last = -1;
43 int i, j;
44 cl_uint hit[32];
45
46 for (i = 0; i < nw; ++i)
47 {
48 if (result[i].subGroupId == 0 && result[i].subGroupLocalId == 0)
49 first = i;
50 if (result[i].subGroupId == result[0].numSubGroups - 1
51 && result[i].subGroupLocalId == 0)
52 last = i;
53 if (first != -1 && last != -1) break;
54 }
55
56 if (first == -1 || last == -1)
57 {
58 log_error("ERROR: expected sub group id's are missing\n");
59 return -1;
60 }
61
62 // Check them
63 if (result[first].subGroupSize == 0)
64 {
65 log_error("ERROR: get_sub_group_size() returned 0\n");
66 return -1;
67 }
68 if (result[first].maxSubGroupSize == 0
69 || result[first].maxSubGroupSize > maxwgs)
70 {
71 log_error(
72 "ERROR: get_max_subgroup_size() returned incorrect result: %u\n",
73 result[first].maxSubGroupSize);
74 return -1;
75 }
76 if (result[first].subGroupSize > result[first].maxSubGroupSize)
77 {
78 log_error("ERROR: get_sub_group_size() > get_max_sub_group_size()\n");
79 return -1;
80 }
81 if (result[last].subGroupSize > result[first].subGroupSize)
82 {
83 log_error("ERROR: last sub group larger than first sub group\n");
84 return -1;
85 }
86 if (result[first].numSubGroups == 0 || result[first].numSubGroups > ensg)
87 {
88 log_error(
89 "ERROR: get_num_sub_groups() returned incorrect result: %u \n",
90 result[first].numSubGroups);
91 return -1;
92 }
93
94 memset(hit, 0, sizeof(hit));
95 for (i = 0; i < nw; ++i)
96 {
97 if (result[i].maxSubGroupSize != result[first].maxSubGroupSize
98 || result[i].numSubGroups != result[first].numSubGroups)
99 {
100 log_error("ERROR: unexpected variation in get_*_sub_group_*()\n");
101 return -1;
102 }
103 if (result[i].subGroupId >= result[first].numSubGroups)
104 {
105 log_error(
106 "ERROR: get_sub_group_id() returned out of range value: %u\n",
107 result[i].subGroupId);
108 return -1;
109 }
110 if (result[i].enqNumSubGroups != ensg)
111 {
112 log_error("ERROR: get_enqueued_num_sub_groups() returned incorrect "
113 "value: %u\n",
114 result[i].enqNumSubGroups);
115 return -1;
116 }
117 if (result[first].numSubGroups > 1)
118 {
119 if (result[i].subGroupId < result[first].numSubGroups - 1)
120 {
121 if (result[i].subGroupSize != result[first].subGroupSize)
122 {
123 log_error(
124 "ERROR: unexpected variation in get_*_sub_group_*()\n");
125 return -1;
126 }
127 if (result[i].subGroupLocalId >= result[first].subGroupSize)
128 {
129 log_error("ERROR: get_sub_group_local_id() returned out of "
130 "bounds value: %u \n",
131 result[i].subGroupLocalId);
132 return -1;
133 }
134 }
135 else
136 {
137 if (result[i].subGroupSize != result[last].subGroupSize)
138 {
139 log_error(
140 "ERROR: unexpected variation in get_*_sub_group_*()\n");
141 return -1;
142 }
143 if (result[i].subGroupLocalId >= result[last].subGroupSize)
144 {
145 log_error("ERROR: get_sub_group_local_id() returned out of "
146 "bounds value: %u \n",
147 result[i].subGroupLocalId);
148 return -1;
149 }
150 }
151 }
152 else
153 {
154 if (result[i].subGroupSize != result[first].subGroupSize)
155 {
156 log_error(
157 "ERROR: unexpected variation in get_*_sub_group_*()\n");
158 return -1;
159 }
160 if (result[i].subGroupLocalId >= result[first].subGroupSize)
161 {
162 log_error("ERROR: get_sub_group_local_id() returned out of "
163 "bounds value: %u \n",
164 result[i].subGroupLocalId);
165 return -1;
166 }
167 }
168
169 j = (result[first].subGroupSize + 31) / 32 * result[i].subGroupId
170 + (result[i].subGroupLocalId >> 5);
171 if (j < static_cast<int>(sizeof(hit) / 4))
172 {
173 cl_uint b = 1U << (result[i].subGroupLocalId & 0x1fU);
174 if ((hit[j] & b) != 0)
175 {
176 log_error("ERROR: get_sub_group_local_id() repeated a result "
177 "in the same sub group\n");
178 return -1;
179 }
180 hit[j] |= b;
181 }
182 }
183
184 return 0;
185 }
186
test_work_item_functions(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements,bool useCoreSubgroups)187 int test_work_item_functions(cl_device_id device, cl_context context,
188 cl_command_queue queue, int num_elements,
189 bool useCoreSubgroups)
190 {
191 static const size_t lsize = 200;
192 int error;
193 int i, j, k, q, r, nw;
194 size_t maxwgs;
195 cl_uint ensg;
196 size_t global;
197 size_t local;
198 get_test_data result[lsize * 6];
199 clProgramWrapper program;
200 clKernelWrapper kernel;
201 clMemWrapper out;
202 std::stringstream kernel_sstr;
203 if (useCoreSubgroups)
204 {
205 kernel_sstr << "#pragma OPENCL EXTENSION cl_khr_subgroups : enable\n";
206 }
207 kernel_sstr
208 << "\n"
209 "\n"
210 "typedef struct {\n"
211 " uint subGroupSize;\n"
212 " uint maxSubGroupSize;\n"
213 " uint numSubGroups;\n"
214 " uint enqNumSubGroups;\n"
215 " uint subGroupId;\n"
216 " uint subGroupLocalId;\n"
217 "} get_test_data;\n"
218 "\n"
219 "__kernel void get_test( __global get_test_data *outData )\n"
220 "{\n"
221 " int gid = get_global_id( 0 );\n"
222 " outData[gid].subGroupSize = get_sub_group_size();\n"
223 " outData[gid].maxSubGroupSize = get_max_sub_group_size();\n"
224 " outData[gid].numSubGroups = get_num_sub_groups();\n"
225 " outData[gid].enqNumSubGroups = get_enqueued_num_sub_groups();\n"
226 " outData[gid].subGroupId = get_sub_group_id();\n"
227 " outData[gid].subGroupLocalId = get_sub_group_local_id();\n"
228 "}";
229 const std::string &kernel_str = kernel_sstr.str();
230 const char *kernel_src = kernel_str.c_str();
231 error = create_single_kernel_helper(context, &program, &kernel, 1,
232 &kernel_src, "get_test");
233 if (error != 0) return error;
234
235 error = get_max_allowed_work_group_size(context, kernel, &local, NULL);
236 if (error != 0) return error;
237
238 maxwgs = local;
239
240 // Limit it a bit so we have muliple work groups
241 // Ideally this will still be large enough to give us multiple subgroups
242 if (local > lsize) local = lsize;
243
244 // Create our buffer
245 out = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(result), NULL,
246 &error);
247 test_error(error, "clCreateBuffer failed");
248
249 // Set argument
250 error = clSetKernelArg(kernel, 0, sizeof(out), &out);
251 test_error(error, "clSetKernelArg failed");
252
253 global = local * 5;
254
255 // Non-uniform work-groups are an optional feature from 3.0 onward.
256 cl_bool device_supports_non_uniform_wg = CL_TRUE;
257 if (get_device_cl_version(device) >= Version(3, 0))
258 {
259 error = clGetDeviceInfo(
260 device, CL_DEVICE_NON_UNIFORM_WORK_GROUP_SUPPORT, sizeof(cl_bool),
261 &device_supports_non_uniform_wg, nullptr);
262 test_error(error, "clGetDeviceInfo failed");
263 }
264
265 if (device_supports_non_uniform_wg)
266 {
267 // Make sure we have a flexible range
268 global += 3 * local / 4;
269 }
270
271 // Collect the data
272 memset((void *)&result, 0xf0, sizeof(result));
273
274 error = clEnqueueWriteBuffer(queue, out, CL_FALSE, 0, sizeof(result),
275 (void *)&result, 0, NULL, NULL);
276 test_error(error, "clEnqueueWriteBuffer failed");
277
278 error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0,
279 NULL, NULL);
280 test_error(error, "clEnqueueNDRangeKernel failed");
281
282 error = clEnqueueReadBuffer(queue, out, CL_FALSE, 0, sizeof(result),
283 (void *)&result, 0, NULL, NULL);
284 test_error(error, "clEnqueueReadBuffer failed");
285
286 error = clFinish(queue);
287 test_error(error, "clFinish failed");
288
289 nw = (int)local;
290 ensg = result[0].enqNumSubGroups;
291
292 // Check the first group
293 error = check_group(result, nw, ensg, maxwgs);
294 if (error) return error;
295
296 q = (int)global / nw;
297 r = (int)global % nw;
298
299 // Check the remaining work groups including the last if it is the same size
300 for (k = 1; k < q; ++k)
301 {
302 for (j = 0; j < nw; ++j)
303 {
304 i = k * nw + j;
305 if (!(result[i] == result[i - nw]))
306 {
307 log_error("ERROR: sub group mapping is not identical for all "
308 "work groups\n");
309 return -1;
310 }
311 }
312 }
313
314 // Check the last group if it wasn't the same size
315 if (r != 0)
316 {
317 error = check_group(result + q * nw, r, ensg, maxwgs);
318 if (error) return error;
319 }
320
321 return 0;
322 }
323
test_work_item_functions_core(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)324 int test_work_item_functions_core(cl_device_id device, cl_context context,
325 cl_command_queue queue, int num_elements)
326 {
327 return test_work_item_functions(device, context, queue, num_elements, true);
328 }
329
test_work_item_functions_ext(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)330 int test_work_item_functions_ext(cl_device_id device, cl_context context,
331 cl_command_queue queue, int num_elements)
332 {
333 bool hasExtension = is_extension_available(device, "cl_khr_subgroups");
334
335 if (!hasExtension)
336 {
337 log_info(
338 "Device does not support 'cl_khr_subgroups'. Skipping the test.\n");
339 return TEST_SKIPPED_ITSELF;
340 }
341
342 return test_work_item_functions(device, context, queue, num_elements,
343 false);
344 }
345