1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16
17 #include <memory>
18
19 #include "testBase.h"
20 #include "harness/conversions.h"
21
22 // clang-format off
23 const char *atomic_index_source =
24 "#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n"
25 "// Counter keeps track of which index in counts we are using.\n"
26 "// We get that value, increment it, and then set that index in counts to our thread ID.\n"
27 "// At the end of this we should have all thread IDs in some random location in counts\n"
28 "// exactly once. If atom_add failed then we will write over various thread IDs and we\n"
29 "// will be missing some.\n"
30 "\n"
31 "__kernel void add_index_test(__global int *counter, __global int *counts) {\n"
32 " int tid = get_global_id(0);\n"
33 " \n"
34 " int counter_to_use = atom_add(counter, 1);\n"
35 " counts[counter_to_use] = tid;\n"
36 "}";
37 // clang-format on
38
test_atomic_add_index(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)39 int test_atomic_add_index(cl_device_id deviceID, cl_context context,
40 cl_command_queue queue, int num_elements)
41 {
42 clProgramWrapper program;
43 clKernelWrapper kernel;
44 clMemWrapper counter, counters;
45 size_t numGlobalThreads, numLocalThreads;
46 int fail = 0, err;
47
48 /* Check if atomics are supported. */
49 if (!is_extension_available(deviceID, "cl_khr_global_int32_base_atomics"))
50 {
51 log_info("Base atomics not supported "
52 "(cl_khr_global_int32_base_atomics). Skipping test.\n");
53 return 0;
54 }
55
56 //===== add_index test
57 // The index test replicates what particles does.
58 // It uses one memory location to keep track of the current index and then
59 // each thread does an atomic add to it to get its new location. The threads
60 // then write to their assigned location. At the end we check to make sure
61 // that each thread's ID shows up exactly once in the output.
62
63 numGlobalThreads = 2048;
64
65 if (create_single_kernel_helper(context, &program, &kernel, 1,
66 &atomic_index_source, "add_index_test"))
67 return -1;
68
69 if (get_max_common_work_group_size(context, kernel, numGlobalThreads,
70 &numLocalThreads))
71 return -1;
72
73 log_info("Execute global_threads:%d local_threads:%d\n",
74 (int)numGlobalThreads, (int)numLocalThreads);
75
76 // Create the counter that will keep track of where each thread writes.
77 counter = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int) * 1,
78 NULL, NULL);
79 // Create the counters that will hold the results of each thread writing
80 // its ID into a (hopefully) unique location.
81 counters = clCreateBuffer(context, CL_MEM_READ_WRITE,
82 sizeof(cl_int) * numGlobalThreads, NULL, NULL);
83
84 // Reset all those locations to -1 to indciate they have not been used.
85 cl_int *values = (cl_int *)malloc(sizeof(cl_int) * numGlobalThreads);
86 if (values == NULL)
87 {
88 log_error(
89 "add_index_test FAILED to allocate memory for initial values.\n");
90 fail = 1;
91 }
92 else
93 {
94 memset(values, -1, numLocalThreads);
95 unsigned int i = 0;
96 for (i = 0; i < numGlobalThreads; i++) values[i] = -1;
97 int init = 0;
98 err = clEnqueueWriteBuffer(queue, counters, true, 0,
99 numGlobalThreads * sizeof(cl_int), values, 0,
100 NULL, NULL);
101 err |= clEnqueueWriteBuffer(queue, counter, true, 0, 1 * sizeof(cl_int),
102 &init, 0, NULL, NULL);
103 if (err)
104 {
105 log_error(
106 "add_index_test FAILED to write initial values to arrays: %d\n",
107 err);
108 fail = 1;
109 }
110 else
111 {
112 err = clSetKernelArg(kernel, 0, sizeof(counter), &counter);
113 err |= clSetKernelArg(kernel, 1, sizeof(counters), &counters);
114 if (err)
115 {
116 log_error("add_index_test FAILED to set kernel arguments: %d\n",
117 err);
118 fail = 1;
119 }
120 else
121 {
122 err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL,
123 &numGlobalThreads,
124 &numLocalThreads, 0, NULL, NULL);
125 if (err)
126 {
127 log_error("add_index_test FAILED to execute kernel: %d\n",
128 err);
129 fail = 1;
130 }
131 else
132 {
133 err = clEnqueueReadBuffer(queue, counters, true, 0,
134 sizeof(cl_int) * numGlobalThreads,
135 values, 0, NULL, NULL);
136 if (err)
137 {
138 log_error(
139 "add_index_test FAILED to read back results: %d\n",
140 err);
141 fail = 1;
142 }
143 else
144 {
145 unsigned int looking_for, index;
146 for (looking_for = 0; looking_for < numGlobalThreads;
147 looking_for++)
148 {
149 int instances_found = 0;
150 for (index = 0; index < numGlobalThreads; index++)
151 {
152 if (values[index] == (int)looking_for)
153 instances_found++;
154 }
155 if (instances_found != 1)
156 {
157 log_error(
158 "add_index_test FAILED: wrong number of "
159 "instances (%d!=1) for counter %d.\n",
160 instances_found, looking_for);
161 fail = 1;
162 }
163 }
164 }
165 }
166 }
167 }
168 if (!fail)
169 {
170 log_info(
171 "add_index_test passed. Each thread used exactly one index.\n");
172 }
173 free(values);
174 }
175 return fail;
176 }
177
178 // clang-format off
179 const char *add_index_bin_kernel[] = {
180 "#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n"
181 "// This test assigns a bunch of values to bins and then tries to put them in the bins in parallel\n"
182 "// using an atomic add to keep track of the current location to write into in each bin.\n"
183 "// This is the same as the memory update for the particles demo.\n"
184 "\n"
185 "__kernel void add_index_bin_test(__global int *bin_counters, __global int *bins, __global int *bin_assignments, int max_counts_per_bin) {\n"
186 " int tid = get_global_id(0);\n"
187 "\n"
188 " int location = bin_assignments[tid];\n"
189 " int counter = atom_add(&bin_counters[location], 1);\n"
190 " bins[location*max_counts_per_bin + counter] = tid;\n"
191 "}" };
192 // clang-format on
193
194 // This test assigns a bunch of values to bins and then tries to put them in the
195 // bins in parallel using an atomic add to keep track of the current location to
196 // write into in each bin. This is the same as the memory update for the
197 // particles demo.
add_index_bin_test(size_t * global_threads,cl_command_queue queue,cl_context context,MTdata d)198 int add_index_bin_test(size_t *global_threads, cl_command_queue queue,
199 cl_context context, MTdata d)
200 {
201 int number_of_items = (int)global_threads[0];
202 size_t local_threads[1];
203 int divisor = 12;
204 int number_of_bins = number_of_items / divisor;
205 int max_counts_per_bin = divisor * 2;
206
207 int err;
208
209 clProgramWrapper program;
210 clKernelWrapper kernel;
211
212 // log_info("add_index_bin_test: %d items, into %d bins, with a max of %d
213 // items per bin (bins is %d long).\n",
214 // number_of_items, number_of_bins, max_counts_per_bin,
215 // number_of_bins*max_counts_per_bin);
216
217 //===== add_index_bin test
218 // The index test replicates what particles does.
219 err =
220 create_single_kernel_helper(context, &program, &kernel, 1,
221 add_index_bin_kernel, "add_index_bin_test");
222 test_error(err, "Unable to create testing kernel");
223
224 if (get_max_common_work_group_size(context, kernel, global_threads[0],
225 &local_threads[0]))
226 return -1;
227
228 log_info("Execute global_threads:%d local_threads:%d\n",
229 (int)global_threads[0], (int)local_threads[0]);
230
231 // Allocate our storage
232 clMemWrapper bin_counters =
233 clCreateBuffer(context, CL_MEM_READ_WRITE,
234 sizeof(cl_int) * number_of_bins, NULL, NULL);
235 clMemWrapper bins = clCreateBuffer(
236 context, CL_MEM_READ_WRITE,
237 sizeof(cl_int) * number_of_bins * max_counts_per_bin, NULL, NULL);
238 clMemWrapper bin_assignments =
239 clCreateBuffer(context, CL_MEM_READ_ONLY,
240 sizeof(cl_int) * number_of_items, NULL, NULL);
241
242 if (bin_counters == NULL)
243 {
244 log_error("add_index_bin_test FAILED to allocate bin_counters.\n");
245 return -1;
246 }
247 if (bins == NULL)
248 {
249 log_error("add_index_bin_test FAILED to allocate bins.\n");
250 return -1;
251 }
252 if (bin_assignments == NULL)
253 {
254 log_error("add_index_bin_test FAILED to allocate bin_assignments.\n");
255 return -1;
256 }
257
258 // Initialize our storage
259 std::unique_ptr<cl_int[]> l_bin_counts(new cl_int[number_of_bins]);
260 if (!l_bin_counts)
261 {
262 log_error("add_index_bin_test FAILED to allocate initial values for "
263 "bin_counters.\n");
264 return -1;
265 }
266 int i;
267 for (i = 0; i < number_of_bins; i++) l_bin_counts[i] = 0;
268 err = clEnqueueWriteBuffer(queue, bin_counters, true, 0,
269 sizeof(cl_int) * number_of_bins,
270 l_bin_counts.get(), 0, NULL, NULL);
271 if (err)
272 {
273 log_error("add_index_bin_test FAILED to set initial values for "
274 "bin_counters: %d\n",
275 err);
276 return -1;
277 }
278
279 std::unique_ptr<cl_int[]> values(
280 new cl_int[number_of_bins * max_counts_per_bin]);
281 if (!values)
282 {
283 log_error(
284 "add_index_bin_test FAILED to allocate initial values for bins.\n");
285 return -1;
286 }
287 for (i = 0; i < number_of_bins * max_counts_per_bin; i++) values[i] = -1;
288 err = clEnqueueWriteBuffer(queue, bins, true, 0,
289 sizeof(cl_int) * number_of_bins
290 * max_counts_per_bin,
291 values.get(), 0, NULL, NULL);
292 if (err)
293 {
294 log_error(
295 "add_index_bin_test FAILED to set initial values for bins: %d\n",
296 err);
297 return -1;
298 }
299
300 std::unique_ptr<cl_int[]> l_bin_assignments(new cl_int[number_of_items]);
301 if (!l_bin_assignments)
302 {
303 log_error("add_index_bin_test FAILED to allocate initial values for "
304 "l_bin_assignments.\n");
305 return -1;
306 }
307 for (i = 0; i < number_of_items; i++)
308 {
309 int bin = random_in_range(0, number_of_bins - 1, d);
310 while (l_bin_counts[bin] >= max_counts_per_bin)
311 {
312 bin = random_in_range(0, number_of_bins - 1, d);
313 }
314 if (bin >= number_of_bins)
315 log_error("add_index_bin_test internal error generating bin "
316 "assignments: bin %d >= number_of_bins %d.\n",
317 bin, number_of_bins);
318 if (l_bin_counts[bin] + 1 > max_counts_per_bin)
319 log_error(
320 "add_index_bin_test internal error generating bin assignments: "
321 "bin %d has more entries (%d) than max_counts_per_bin (%d).\n",
322 bin, l_bin_counts[bin], max_counts_per_bin);
323 l_bin_counts[bin]++;
324 l_bin_assignments[i] = bin;
325 // log_info("item %d assigned to bin %d (%d items)\n", i, bin,
326 // l_bin_counts[bin]);
327 }
328 err = clEnqueueWriteBuffer(queue, bin_assignments, true, 0,
329 sizeof(cl_int) * number_of_items,
330 l_bin_assignments.get(), 0, NULL, NULL);
331 if (err)
332 {
333 log_error("add_index_bin_test FAILED to set initial values for "
334 "bin_assignments: %d\n",
335 err);
336 return -1;
337 }
338 // Setup the kernel
339 err = clSetKernelArg(kernel, 0, sizeof(bin_counters), &bin_counters);
340 err |= clSetKernelArg(kernel, 1, sizeof(bins), &bins);
341 err |= clSetKernelArg(kernel, 2, sizeof(bin_assignments), &bin_assignments);
342 err |= clSetKernelArg(kernel, 3, sizeof(max_counts_per_bin),
343 &max_counts_per_bin);
344 if (err)
345 {
346 log_error("add_index_bin_test FAILED to set kernel arguments: %d\n",
347 err);
348 return -1;
349 }
350
351 err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_threads,
352 local_threads, 0, NULL, NULL);
353 if (err)
354 {
355 log_error("add_index_bin_test FAILED to execute kernel: %d\n", err);
356 return -1;
357 }
358
359 std::unique_ptr<cl_int[]> final_bin_assignments(
360 new cl_int[number_of_bins * max_counts_per_bin]);
361 if (!final_bin_assignments)
362 {
363 log_error("add_index_bin_test FAILED to allocate initial values for "
364 "final_bin_assignments.\n");
365 return -1;
366 }
367 err = clEnqueueReadBuffer(queue, bins, true, 0,
368 sizeof(cl_int) * number_of_bins
369 * max_counts_per_bin,
370 final_bin_assignments.get(), 0, NULL, NULL);
371 if (err)
372 {
373 log_error("add_index_bin_test FAILED to read back bins: %d\n", err);
374 return -1;
375 }
376
377 std::unique_ptr<cl_int[]> final_bin_counts(new cl_int[number_of_bins]);
378 if (!final_bin_counts)
379 {
380 log_error("add_index_bin_test FAILED to allocate initial values for "
381 "final_bin_counts.\n");
382 return -1;
383 }
384 err = clEnqueueReadBuffer(queue, bin_counters, true, 0,
385 sizeof(cl_int) * number_of_bins,
386 final_bin_counts.get(), 0, NULL, NULL);
387 if (err)
388 {
389 log_error("add_index_bin_test FAILED to read back bin_counters: %d\n",
390 err);
391 return -1;
392 }
393
394 // Verification.
395 int errors = 0;
396 int current_bin;
397 int search;
398 // Print out all the contents of the bins.
399 // for (current_bin=0; current_bin<number_of_bins; current_bin++)
400 // for (search=0; search<max_counts_per_bin; search++)
401 // log_info("[bin %d, entry %d] = %d\n", current_bin, search,
402 // final_bin_assignments[current_bin*max_counts_per_bin+search]);
403
404 // First verify that there are the correct number in each bin.
405 for (current_bin = 0; current_bin < number_of_bins; current_bin++)
406 {
407 int expected_number = l_bin_counts[current_bin];
408 int actual_number = final_bin_counts[current_bin];
409 if (expected_number != actual_number)
410 {
411 log_error("add_index_bin_test FAILED: bin %d reported %d entries "
412 "when %d were expected.\n",
413 current_bin, actual_number, expected_number);
414 errors++;
415 }
416 for (search = 0; search < expected_number; search++)
417 {
418 if (final_bin_assignments[current_bin * max_counts_per_bin + search]
419 == -1)
420 {
421 log_error("add_index_bin_test FAILED: bin %d had no entry at "
422 "position %d when it should have had %d entries.\n",
423 current_bin, search, expected_number);
424 errors++;
425 }
426 }
427 for (search = expected_number; search < max_counts_per_bin; search++)
428 {
429 if (final_bin_assignments[current_bin * max_counts_per_bin + search]
430 != -1)
431 {
432 log_error(
433 "add_index_bin_test FAILED: bin %d had an extra entry at "
434 "position %d when it should have had only %d entries.\n",
435 current_bin, search, expected_number);
436 errors++;
437 }
438 }
439 }
440 // Now verify that the correct ones are in each bin
441 int index;
442 for (index = 0; index < number_of_items; index++)
443 {
444 int expected_bin = l_bin_assignments[index];
445 int found_it = 0;
446 for (search = 0; search < l_bin_counts[expected_bin]; search++)
447 {
448 if (final_bin_assignments[expected_bin * max_counts_per_bin
449 + search]
450 == index)
451 {
452 found_it = 1;
453 }
454 }
455 if (found_it == 0)
456 {
457 log_error(
458 "add_index_bin_test FAILED: did not find item %d in bin %d.\n",
459 index, expected_bin);
460 errors++;
461 }
462 }
463
464 if (errors == 0)
465 {
466 log_info("add_index_bin_test passed. Each item was put in the correct "
467 "bin in parallel.\n");
468 return 0;
469 }
470 else
471 {
472 log_error("add_index_bin_test FAILED: %d errors.\n", errors);
473 return -1;
474 }
475 }
476
test_atomic_add_index_bin(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)477 int test_atomic_add_index_bin(cl_device_id deviceID, cl_context context,
478 cl_command_queue queue, int num_elements)
479 {
480 //===== add_index_bin test
481 size_t numGlobalThreads = 2048;
482 int iteration = 0;
483 int err, failed = 0;
484 MTdata d = init_genrand(gRandomSeed);
485
486 /* Check if atomics are supported. */
487 if (!is_extension_available(deviceID, "cl_khr_global_int32_base_atomics"))
488 {
489 log_info("Base atomics not supported "
490 "(cl_khr_global_int32_base_atomics). Skipping test.\n");
491 free_mtdata(d);
492 return 0;
493 }
494
495 for (iteration = 0; iteration < 10; iteration++)
496 {
497 log_info("add_index_bin_test with %d elements:\n",
498 (int)numGlobalThreads);
499 err = add_index_bin_test(&numGlobalThreads, queue, context, d);
500 if (err)
501 {
502 failed++;
503 break;
504 }
505 numGlobalThreads *= 2;
506 }
507 free_mtdata(d);
508 return failed;
509 }
510