xref: /aosp_15_r20/external/OpenCL-CTS/test_conformance/atomics/test_indexed_cases.cpp (revision 6467f958c7de8070b317fc65bcb0f6472e388d82)
1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 
17 #include <memory>
18 
19 #include "testBase.h"
20 #include "harness/conversions.h"
21 
22 // clang-format off
23 const char *atomic_index_source =
24     "#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n"
25     "// Counter keeps track of which index in counts we are using.\n"
26     "// We get that value, increment it, and then set that index in counts to our thread ID.\n"
27     "// At the end of this we should have all thread IDs in some random location in counts\n"
28     "// exactly once. If atom_add failed then we will write over various thread IDs and we\n"
29     "// will be missing some.\n"
30     "\n"
31     "__kernel void add_index_test(__global int *counter, __global int *counts) {\n"
32     "    int tid = get_global_id(0);\n"
33     "    \n"
34     "    int counter_to_use = atom_add(counter, 1);\n"
35     "    counts[counter_to_use] = tid;\n"
36     "}";
37 // clang-format on
38 
test_atomic_add_index(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)39 int test_atomic_add_index(cl_device_id deviceID, cl_context context,
40                           cl_command_queue queue, int num_elements)
41 {
42     clProgramWrapper program;
43     clKernelWrapper kernel;
44     clMemWrapper counter, counters;
45     size_t numGlobalThreads, numLocalThreads;
46     int fail = 0, err;
47 
48     /* Check if atomics are supported. */
49     if (!is_extension_available(deviceID, "cl_khr_global_int32_base_atomics"))
50     {
51         log_info("Base atomics not supported "
52                  "(cl_khr_global_int32_base_atomics). Skipping test.\n");
53         return 0;
54     }
55 
56     //===== add_index test
57     // The index test replicates what particles does.
58     // It uses one memory location to keep track of the current index and then
59     // each thread does an atomic add to it to get its new location. The threads
60     // then write to their assigned location. At the end we check to make sure
61     // that each thread's ID shows up exactly once in the output.
62 
63     numGlobalThreads = 2048;
64 
65     if (create_single_kernel_helper(context, &program, &kernel, 1,
66                                     &atomic_index_source, "add_index_test"))
67         return -1;
68 
69     if (get_max_common_work_group_size(context, kernel, numGlobalThreads,
70                                        &numLocalThreads))
71         return -1;
72 
73     log_info("Execute global_threads:%d local_threads:%d\n",
74              (int)numGlobalThreads, (int)numLocalThreads);
75 
76     // Create the counter that will keep track of where each thread writes.
77     counter = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int) * 1,
78                              NULL, NULL);
79     // Create the counters that will hold the results of each thread writing
80     // its ID into a (hopefully) unique location.
81     counters = clCreateBuffer(context, CL_MEM_READ_WRITE,
82                               sizeof(cl_int) * numGlobalThreads, NULL, NULL);
83 
84     // Reset all those locations to -1 to indciate they have not been used.
85     cl_int *values = (cl_int *)malloc(sizeof(cl_int) * numGlobalThreads);
86     if (values == NULL)
87     {
88         log_error(
89             "add_index_test FAILED to allocate memory for initial values.\n");
90         fail = 1;
91     }
92     else
93     {
94         memset(values, -1, numLocalThreads);
95         unsigned int i = 0;
96         for (i = 0; i < numGlobalThreads; i++) values[i] = -1;
97         int init = 0;
98         err = clEnqueueWriteBuffer(queue, counters, true, 0,
99                                    numGlobalThreads * sizeof(cl_int), values, 0,
100                                    NULL, NULL);
101         err |= clEnqueueWriteBuffer(queue, counter, true, 0, 1 * sizeof(cl_int),
102                                     &init, 0, NULL, NULL);
103         if (err)
104         {
105             log_error(
106                 "add_index_test FAILED to write initial values to arrays: %d\n",
107                 err);
108             fail = 1;
109         }
110         else
111         {
112             err = clSetKernelArg(kernel, 0, sizeof(counter), &counter);
113             err |= clSetKernelArg(kernel, 1, sizeof(counters), &counters);
114             if (err)
115             {
116                 log_error("add_index_test FAILED to set kernel arguments: %d\n",
117                           err);
118                 fail = 1;
119             }
120             else
121             {
122                 err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL,
123                                              &numGlobalThreads,
124                                              &numLocalThreads, 0, NULL, NULL);
125                 if (err)
126                 {
127                     log_error("add_index_test FAILED to execute kernel: %d\n",
128                               err);
129                     fail = 1;
130                 }
131                 else
132                 {
133                     err = clEnqueueReadBuffer(queue, counters, true, 0,
134                                               sizeof(cl_int) * numGlobalThreads,
135                                               values, 0, NULL, NULL);
136                     if (err)
137                     {
138                         log_error(
139                             "add_index_test FAILED to read back results: %d\n",
140                             err);
141                         fail = 1;
142                     }
143                     else
144                     {
145                         unsigned int looking_for, index;
146                         for (looking_for = 0; looking_for < numGlobalThreads;
147                              looking_for++)
148                         {
149                             int instances_found = 0;
150                             for (index = 0; index < numGlobalThreads; index++)
151                             {
152                                 if (values[index] == (int)looking_for)
153                                     instances_found++;
154                             }
155                             if (instances_found != 1)
156                             {
157                                 log_error(
158                                     "add_index_test FAILED: wrong number of "
159                                     "instances (%d!=1) for counter %d.\n",
160                                     instances_found, looking_for);
161                                 fail = 1;
162                             }
163                         }
164                     }
165                 }
166             }
167         }
168         if (!fail)
169         {
170             log_info(
171                 "add_index_test passed. Each thread used exactly one index.\n");
172         }
173         free(values);
174     }
175     return fail;
176 }
177 
178 // clang-format off
179 const char *add_index_bin_kernel[] = {
180     "#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n"
181     "// This test assigns a bunch of values to bins and then tries to put them in the bins in parallel\n"
182     "// using an atomic add to keep track of the current location to write into in each bin.\n"
183     "// This is the same as the memory update for the particles demo.\n"
184     "\n"
185     "__kernel void add_index_bin_test(__global int *bin_counters, __global int *bins, __global int *bin_assignments, int max_counts_per_bin) {\n"
186     "    int tid = get_global_id(0);\n"
187     "\n"
188     "    int location = bin_assignments[tid];\n"
189     "    int counter = atom_add(&bin_counters[location], 1);\n"
190     "    bins[location*max_counts_per_bin + counter] = tid;\n"
191     "}" };
192 // clang-format on
193 
194 // This test assigns a bunch of values to bins and then tries to put them in the
195 // bins in parallel using an atomic add to keep track of the current location to
196 // write into in each bin. This is the same as the memory update for the
197 // particles demo.
add_index_bin_test(size_t * global_threads,cl_command_queue queue,cl_context context,MTdata d)198 int add_index_bin_test(size_t *global_threads, cl_command_queue queue,
199                        cl_context context, MTdata d)
200 {
201     int number_of_items = (int)global_threads[0];
202     size_t local_threads[1];
203     int divisor = 12;
204     int number_of_bins = number_of_items / divisor;
205     int max_counts_per_bin = divisor * 2;
206 
207     int err;
208 
209     clProgramWrapper program;
210     clKernelWrapper kernel;
211 
212     //  log_info("add_index_bin_test: %d items, into %d bins, with a max of %d
213     //  items per bin (bins is %d long).\n",
214     //           number_of_items, number_of_bins, max_counts_per_bin,
215     //           number_of_bins*max_counts_per_bin);
216 
217     //===== add_index_bin test
218     // The index test replicates what particles does.
219     err =
220         create_single_kernel_helper(context, &program, &kernel, 1,
221                                     add_index_bin_kernel, "add_index_bin_test");
222     test_error(err, "Unable to create testing kernel");
223 
224     if (get_max_common_work_group_size(context, kernel, global_threads[0],
225                                        &local_threads[0]))
226         return -1;
227 
228     log_info("Execute global_threads:%d local_threads:%d\n",
229              (int)global_threads[0], (int)local_threads[0]);
230 
231     // Allocate our storage
232     clMemWrapper bin_counters =
233         clCreateBuffer(context, CL_MEM_READ_WRITE,
234                        sizeof(cl_int) * number_of_bins, NULL, NULL);
235     clMemWrapper bins = clCreateBuffer(
236         context, CL_MEM_READ_WRITE,
237         sizeof(cl_int) * number_of_bins * max_counts_per_bin, NULL, NULL);
238     clMemWrapper bin_assignments =
239         clCreateBuffer(context, CL_MEM_READ_ONLY,
240                        sizeof(cl_int) * number_of_items, NULL, NULL);
241 
242     if (bin_counters == NULL)
243     {
244         log_error("add_index_bin_test FAILED to allocate bin_counters.\n");
245         return -1;
246     }
247     if (bins == NULL)
248     {
249         log_error("add_index_bin_test FAILED to allocate bins.\n");
250         return -1;
251     }
252     if (bin_assignments == NULL)
253     {
254         log_error("add_index_bin_test FAILED to allocate bin_assignments.\n");
255         return -1;
256     }
257 
258     // Initialize our storage
259     std::unique_ptr<cl_int[]> l_bin_counts(new cl_int[number_of_bins]);
260     if (!l_bin_counts)
261     {
262         log_error("add_index_bin_test FAILED to allocate initial values for "
263                   "bin_counters.\n");
264         return -1;
265     }
266     int i;
267     for (i = 0; i < number_of_bins; i++) l_bin_counts[i] = 0;
268     err = clEnqueueWriteBuffer(queue, bin_counters, true, 0,
269                                sizeof(cl_int) * number_of_bins,
270                                l_bin_counts.get(), 0, NULL, NULL);
271     if (err)
272     {
273         log_error("add_index_bin_test FAILED to set initial values for "
274                   "bin_counters: %d\n",
275                   err);
276         return -1;
277     }
278 
279     std::unique_ptr<cl_int[]> values(
280         new cl_int[number_of_bins * max_counts_per_bin]);
281     if (!values)
282     {
283         log_error(
284             "add_index_bin_test FAILED to allocate initial values for bins.\n");
285         return -1;
286     }
287     for (i = 0; i < number_of_bins * max_counts_per_bin; i++) values[i] = -1;
288     err = clEnqueueWriteBuffer(queue, bins, true, 0,
289                                sizeof(cl_int) * number_of_bins
290                                    * max_counts_per_bin,
291                                values.get(), 0, NULL, NULL);
292     if (err)
293     {
294         log_error(
295             "add_index_bin_test FAILED to set initial values for bins: %d\n",
296             err);
297         return -1;
298     }
299 
300     std::unique_ptr<cl_int[]> l_bin_assignments(new cl_int[number_of_items]);
301     if (!l_bin_assignments)
302     {
303         log_error("add_index_bin_test FAILED to allocate initial values for "
304                   "l_bin_assignments.\n");
305         return -1;
306     }
307     for (i = 0; i < number_of_items; i++)
308     {
309         int bin = random_in_range(0, number_of_bins - 1, d);
310         while (l_bin_counts[bin] >= max_counts_per_bin)
311         {
312             bin = random_in_range(0, number_of_bins - 1, d);
313         }
314         if (bin >= number_of_bins)
315             log_error("add_index_bin_test internal error generating bin "
316                       "assignments: bin %d >= number_of_bins %d.\n",
317                       bin, number_of_bins);
318         if (l_bin_counts[bin] + 1 > max_counts_per_bin)
319             log_error(
320                 "add_index_bin_test internal error generating bin assignments: "
321                 "bin %d has more entries (%d) than max_counts_per_bin (%d).\n",
322                 bin, l_bin_counts[bin], max_counts_per_bin);
323         l_bin_counts[bin]++;
324         l_bin_assignments[i] = bin;
325         //     log_info("item %d assigned to bin %d (%d items)\n", i, bin,
326         //     l_bin_counts[bin]);
327     }
328     err = clEnqueueWriteBuffer(queue, bin_assignments, true, 0,
329                                sizeof(cl_int) * number_of_items,
330                                l_bin_assignments.get(), 0, NULL, NULL);
331     if (err)
332     {
333         log_error("add_index_bin_test FAILED to set initial values for "
334                   "bin_assignments: %d\n",
335                   err);
336         return -1;
337     }
338     // Setup the kernel
339     err = clSetKernelArg(kernel, 0, sizeof(bin_counters), &bin_counters);
340     err |= clSetKernelArg(kernel, 1, sizeof(bins), &bins);
341     err |= clSetKernelArg(kernel, 2, sizeof(bin_assignments), &bin_assignments);
342     err |= clSetKernelArg(kernel, 3, sizeof(max_counts_per_bin),
343                           &max_counts_per_bin);
344     if (err)
345     {
346         log_error("add_index_bin_test FAILED to set kernel arguments: %d\n",
347                   err);
348         return -1;
349     }
350 
351     err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_threads,
352                                  local_threads, 0, NULL, NULL);
353     if (err)
354     {
355         log_error("add_index_bin_test FAILED to execute kernel: %d\n", err);
356         return -1;
357     }
358 
359     std::unique_ptr<cl_int[]> final_bin_assignments(
360         new cl_int[number_of_bins * max_counts_per_bin]);
361     if (!final_bin_assignments)
362     {
363         log_error("add_index_bin_test FAILED to allocate initial values for "
364                   "final_bin_assignments.\n");
365         return -1;
366     }
367     err = clEnqueueReadBuffer(queue, bins, true, 0,
368                               sizeof(cl_int) * number_of_bins
369                                   * max_counts_per_bin,
370                               final_bin_assignments.get(), 0, NULL, NULL);
371     if (err)
372     {
373         log_error("add_index_bin_test FAILED to read back bins: %d\n", err);
374         return -1;
375     }
376 
377     std::unique_ptr<cl_int[]> final_bin_counts(new cl_int[number_of_bins]);
378     if (!final_bin_counts)
379     {
380         log_error("add_index_bin_test FAILED to allocate initial values for "
381                   "final_bin_counts.\n");
382         return -1;
383     }
384     err = clEnqueueReadBuffer(queue, bin_counters, true, 0,
385                               sizeof(cl_int) * number_of_bins,
386                               final_bin_counts.get(), 0, NULL, NULL);
387     if (err)
388     {
389         log_error("add_index_bin_test FAILED to read back bin_counters: %d\n",
390                   err);
391         return -1;
392     }
393 
394     // Verification.
395     int errors = 0;
396     int current_bin;
397     int search;
398     //  Print out all the contents of the bins.
399     //  for (current_bin=0; current_bin<number_of_bins; current_bin++)
400     //        for (search=0; search<max_counts_per_bin; search++)
401     //      log_info("[bin %d, entry %d] = %d\n", current_bin, search,
402     //      final_bin_assignments[current_bin*max_counts_per_bin+search]);
403 
404     // First verify that there are the correct number in each bin.
405     for (current_bin = 0; current_bin < number_of_bins; current_bin++)
406     {
407         int expected_number = l_bin_counts[current_bin];
408         int actual_number = final_bin_counts[current_bin];
409         if (expected_number != actual_number)
410         {
411             log_error("add_index_bin_test FAILED: bin %d reported %d entries "
412                       "when %d were expected.\n",
413                       current_bin, actual_number, expected_number);
414             errors++;
415         }
416         for (search = 0; search < expected_number; search++)
417         {
418             if (final_bin_assignments[current_bin * max_counts_per_bin + search]
419                 == -1)
420             {
421                 log_error("add_index_bin_test FAILED: bin %d had no entry at "
422                           "position %d when it should have had %d entries.\n",
423                           current_bin, search, expected_number);
424                 errors++;
425             }
426         }
427         for (search = expected_number; search < max_counts_per_bin; search++)
428         {
429             if (final_bin_assignments[current_bin * max_counts_per_bin + search]
430                 != -1)
431             {
432                 log_error(
433                     "add_index_bin_test FAILED: bin %d had an extra entry at "
434                     "position %d when it should have had only %d entries.\n",
435                     current_bin, search, expected_number);
436                 errors++;
437             }
438         }
439     }
440     // Now verify that the correct ones are in each bin
441     int index;
442     for (index = 0; index < number_of_items; index++)
443     {
444         int expected_bin = l_bin_assignments[index];
445         int found_it = 0;
446         for (search = 0; search < l_bin_counts[expected_bin]; search++)
447         {
448             if (final_bin_assignments[expected_bin * max_counts_per_bin
449                                       + search]
450                 == index)
451             {
452                 found_it = 1;
453             }
454         }
455         if (found_it == 0)
456         {
457             log_error(
458                 "add_index_bin_test FAILED: did not find item %d in bin %d.\n",
459                 index, expected_bin);
460             errors++;
461         }
462     }
463 
464     if (errors == 0)
465     {
466         log_info("add_index_bin_test passed. Each item was put in the correct "
467                  "bin in parallel.\n");
468         return 0;
469     }
470     else
471     {
472         log_error("add_index_bin_test FAILED: %d errors.\n", errors);
473         return -1;
474     }
475 }
476 
test_atomic_add_index_bin(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)477 int test_atomic_add_index_bin(cl_device_id deviceID, cl_context context,
478                               cl_command_queue queue, int num_elements)
479 {
480     //===== add_index_bin test
481     size_t numGlobalThreads = 2048;
482     int iteration = 0;
483     int err, failed = 0;
484     MTdata d = init_genrand(gRandomSeed);
485 
486     /* Check if atomics are supported. */
487     if (!is_extension_available(deviceID, "cl_khr_global_int32_base_atomics"))
488     {
489         log_info("Base atomics not supported "
490                  "(cl_khr_global_int32_base_atomics). Skipping test.\n");
491         free_mtdata(d);
492         return 0;
493     }
494 
495     for (iteration = 0; iteration < 10; iteration++)
496     {
497         log_info("add_index_bin_test with %d elements:\n",
498                  (int)numGlobalThreads);
499         err = add_index_bin_test(&numGlobalThreads, queue, context, d);
500         if (err)
501         {
502             failed++;
503             break;
504         }
505         numGlobalThreads *= 2;
506     }
507     free_mtdata(d);
508     return failed;
509 }
510