1 //
2 // Copyright (c) 2021 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/compat.h"
17 
18 #include <stdio.h>
19 #include <iostream>
20 #include <string.h>
21 #include <sys/types.h>
22 #include <sys/stat.h>
23 
24 #include "procs.h"
25 #include <CL/cl_ext.h>
26 
27 /** @brief Gets the number of elements of type s in a fixed length array of s */
28 #define NELEMS(s) (sizeof(s) / sizeof((s)[0]))
29 #define test_error_ret_and_free(errCode, msg, retValue, ptr)                   \
30     {                                                                          \
31         auto errCodeResult = errCode;                                          \
32         if (errCodeResult != CL_SUCCESS)                                       \
33         {                                                                      \
34             print_error(errCodeResult, msg);                                   \
35             free(ptr);                                                         \
36             return retValue;                                                   \
37         }                                                                      \
38     }
39 
40 const char* wg_scan_local_work_group_size = R"(
41     bool is_zero_linear_id()
42     {
43         size_t linear_id;
44 #if __OPENCL_VERSION__ < CL_VERSION_2_0
45         linear_id = ((get_global_id(2) - get_global_offset(2)) * get_global_size(1) * get_global_size(0)) +
46                     ((get_global_id(1) - get_global_offset(1)) * get_global_size(0)) +
47                     (get_global_id(0) - get_global_offset(0));
48 #else
49         linear_id = get_global_linear_id();
50 #endif
51         return linear_id == 0;
52     }
53 
54     uint get_l_size(size_t dim)
55     {
56 #if __OPENCL_VERSION__ < CL_VERSION_2_0
57         return get_local_size(dim);
58 #else
59         return get_enqueued_local_size(dim);
60 #endif
61     }
62 
63     __kernel void test_wg_scan_local_work_group_size(global uint *output)
64     {
65         if(!is_zero_linear_id()) return;
66         for (uint i = 0; i < 3; i++)
67         {
68             output[i] = get_l_size(i);
69         }
70     }
71     __kernel void test_wg_scan_local_work_group_size_static_local(
72                                             global uint *output)
73     {
74         __local char c[LOCAL_MEM_SIZE];
75 
76         if(!is_zero_linear_id()) return;
77         for (uint i = 0; i < 3; i++)
78         {
79             output[i] = get_l_size(i);
80         }
81     }
82     __kernel void test_wg_scan_local_work_group_size_dynlocal(
83                                         global uint *output,
84                                         __local char * c)
85     {
86         if(!is_zero_linear_id()) return;
87         for (uint i = 0; i < 3; i++)
88         {
89             output[i] = get_l_size(i);
90         }
91     };)";
92 
is_prime(size_t a)93 bool is_prime(size_t a)
94 {
95     size_t c;
96 
97     for (c = 2; c < a; c++)
98     {
99         if (a % c == 0) return false;
100     }
101     return true;
102 }
103 
is_not_prime(size_t a)104 bool is_not_prime(size_t a) { return !is_prime(a); }
105 
is_not_even(size_t a)106 bool is_not_even(size_t a) { return (is_prime(a) || (a % 2 == 1)); }
107 
is_not_odd(size_t a)108 bool is_not_odd(size_t a) { return (is_prime(a) || (a % 2 == 0)); }
109 
110 #define NELEMS(s) (sizeof(s) / sizeof((s)[0]))
111 /* The value_range_nD contains numbers to be used for the experiments with 2D
112    and 3D global work sizes. This is because we need smaller numbers so that the
113    resulting number of work items is meaningful and does not become too large.
114    The cases here are: 64 that is a power of 2, 3 is an odd and small prime
115    number, 12 is a multiple of 4 but not a power of 2, 113 is a large prime
116    number
117    and 1 is to test the lack of this dimension if the others are present */
118 const size_t value_range_nD[] = { 64, 3, 12, 113, 1 };
119 const size_t basic_increment = 16;
120 const size_t primes_increment = 1;
121 enum num_dims
122 {
123     _1D = 1,
124     _2D = 2,
125     _3D = 3
126 };
127 
do_test(cl_device_id device,cl_context context,cl_command_queue queue,cl_kernel scan_kernel,int work_dim,size_t global_work_offset[3],size_t test_values[3],size_t dyn_mem_size)128 int do_test(cl_device_id device, cl_context context, cl_command_queue queue,
129             cl_kernel scan_kernel, int work_dim, size_t global_work_offset[3],
130             size_t test_values[3], size_t dyn_mem_size)
131 {
132     size_t local_work_size[] = { 1, 1, 1 };
133     size_t suggested_total_size;
134     size_t workgroupinfo_size;
135     cl_uint kernel_work_size[3] = { 0 };
136     clMemWrapper buffer;
137     cl_platform_id platform;
138 
139     int err = clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(platform),
140                               &platform, NULL);
141     test_error_ret(err, "clGetDeviceInfo failed", -1);
142     clGetKernelSuggestedLocalWorkSizeKHR_fn
143         clGetKernelSuggestedLocalWorkSizeKHR =
144             (clGetKernelSuggestedLocalWorkSizeKHR_fn)
145                 clGetExtensionFunctionAddressForPlatform(
146                     platform, "clGetKernelSuggestedLocalWorkSizeKHR");
147 
148     if (clGetKernelSuggestedLocalWorkSizeKHR == NULL)
149     {
150         log_info("Extension 'cl_khr_suggested_local_work_size' could not be "
151                  "found.\n");
152         return TEST_FAIL;
153     }
154 
155     /* Create the actual buffer, using local_buffer as the host pointer, and ask
156      * to copy that into the buffer */
157     buffer = clCreateBuffer(context, CL_MEM_READ_WRITE,
158                             sizeof(kernel_work_size), NULL, &err);
159     test_error_ret(err, "clCreateBuffer failed", -1);
160     err = clSetKernelArg(scan_kernel, 0, sizeof(buffer), &buffer);
161     test_error_ret(err, "clSetKernelArg failed", -1);
162     if (dyn_mem_size)
163     {
164         err = clSetKernelArg(scan_kernel, 1, dyn_mem_size, NULL);
165         test_error_ret(err, "clSetKernelArg failed", -1);
166     }
167     err = clGetKernelSuggestedLocalWorkSizeKHR(queue, scan_kernel, work_dim,
168                                                global_work_offset, test_values,
169                                                local_work_size);
170     test_error_ret(err, "clGetKernelSuggestedLocalWorkSizeKHR failed", -1);
171     suggested_total_size =
172         local_work_size[0] * local_work_size[1] * local_work_size[2];
173     err = clGetKernelWorkGroupInfo(
174         scan_kernel, device, CL_KERNEL_WORK_GROUP_SIZE,
175         sizeof(workgroupinfo_size), &workgroupinfo_size, NULL);
176     test_error_ret(err, "clGetKernelWorkGroupInfo failed", -1);
177     if (suggested_total_size > workgroupinfo_size)
178     {
179         std::cout << "The suggested work group size consist of "
180                   << suggested_total_size << " work items.\n"
181                   << "Work items are limited by " << workgroupinfo_size
182                   << std::endl;
183         std::cout << "Size from clGetKernelWorkGroupInfo: "
184                   << workgroupinfo_size;
185         std::cout << "\nSize from clGetKernelSuggestedLocalWorkSizeKHR: "
186                   << local_work_size[0] * local_work_size[1]
187                 * local_work_size[2]
188                   << std::endl;
189         return -1;
190     }
191 
192     err =
193         clEnqueueNDRangeKernel(queue, scan_kernel, work_dim, global_work_offset,
194                                test_values, // global work size
195                                NULL, 0, NULL, NULL);
196     test_error_ret(err, "clEnqueueNDRangeKernel failed", -1);
197     err = clEnqueueReadBuffer(queue, buffer, CL_NON_BLOCKING, 0,
198                               sizeof(kernel_work_size), kernel_work_size, 0,
199                               NULL, NULL);
200     test_error_ret(err, "clEnqueueReadBuffer failed", -1);
201     err = clFinish(queue);
202     test_error_ret(err, "clFinish failed", -1);
203 
204     if (kernel_work_size[0] != local_work_size[0]
205         || kernel_work_size[1] != local_work_size[1]
206         || kernel_work_size[2] != local_work_size[2])
207     {
208         std::cout
209             << "Kernel work size differs from local work size suggested:\n"
210             << "Kernel work size: (" << kernel_work_size[0] << ", "
211             << kernel_work_size[1] << ", " << kernel_work_size[2] << ")"
212             << "Local work size: (" << local_work_size[0] << ", "
213             << local_work_size[1] << ", " << local_work_size[2] << ")\n";
214         return -1;
215     }
216     return err;
217 }
218 
do_test_work_group_suggested_local_size(cl_device_id device,cl_context context,cl_command_queue queue,bool (* skip_cond)(size_t),size_t start,size_t end,size_t incr,cl_ulong max_local_mem_size,size_t global_work_offset[],num_dims dim)219 int do_test_work_group_suggested_local_size(
220     cl_device_id device, cl_context context, cl_command_queue queue,
221     bool (*skip_cond)(size_t), size_t start, size_t end, size_t incr,
222     cl_ulong max_local_mem_size, size_t global_work_offset[], num_dims dim)
223 {
224     int err;
225     size_t test_values[] = { 1, 1, 1 };
226     std::string kernel_names[6] = {
227         "test_wg_scan_local_work_group_size",
228         "test_wg_scan_local_work_group_size_static_local",
229         "test_wg_scan_local_work_group_size_static_local",
230         "test_wg_scan_local_work_group_size_static_local",
231         "test_wg_scan_local_work_group_size_static_local",
232         "test_wg_scan_local_work_group_size_dynlocal"
233     };
234     std::string str_local_mem_size[6] = {
235         "-DLOCAL_MEM_SIZE=1",     "-DLOCAL_MEM_SIZE=1024",
236         "-DLOCAL_MEM_SIZE=4096",  "-DLOCAL_MEM_SIZE=16384",
237         "-DLOCAL_MEM_SIZE=32768", "-DLOCAL_MEM_SIZE=1"
238     };
239     size_t local_mem_size[6] = { 1, 1024, 4096, 16384, 32768, 1 };
240     size_t dyn_mem_size[6] = { 0, 0, 0, 0, 0, 1024 };
241     cl_ulong kernel_local_mem_size;
242     for (int kernel_num = 0; kernel_num < 6; kernel_num++)
243     {
244         if (max_local_mem_size < local_mem_size[kernel_num]) continue;
245         clProgramWrapper scan_program;
246         clKernelWrapper scan_kernel;
247         // Create the kernel
248         err = create_single_kernel_helper(
249             context, &scan_program, &scan_kernel, 1,
250             &wg_scan_local_work_group_size, (kernel_names[kernel_num]).c_str(),
251             (str_local_mem_size[kernel_num]).c_str());
252         test_error_ret(err,
253                        ("create_single_kernel_helper failed for kernel "
254                         + kernel_names[kernel_num])
255                            .c_str(),
256                        -1);
257 
258         // Check if the local memory used by the kernel is going to exceed the
259         // max_local_mem_size
260         err = clGetKernelWorkGroupInfo(
261             scan_kernel, device, CL_KERNEL_LOCAL_MEM_SIZE,
262             sizeof(kernel_local_mem_size), &kernel_local_mem_size, NULL);
263         test_error_ret(err, "clGetKernelWorkGroupInfo failed", -1);
264         if (kernel_local_mem_size > max_local_mem_size) continue;
265         // return error if no number is found due to the skip condition
266         err = -1;
267         unsigned int j = 0;
268         size_t num_elems = NELEMS(value_range_nD);
269         for (size_t i = start; i < end; i += incr)
270         {
271             if (skip_cond(i)) continue;
272             err = 0;
273             test_values[0] = i;
274             if (dim == _2D) test_values[1] = value_range_nD[j++ % num_elems];
275             if (dim == _3D)
276             {
277                 test_values[1] = value_range_nD[j++ % num_elems];
278                 test_values[2] = value_range_nD[rand() % num_elems];
279             }
280             err |= do_test(device, context, queue, scan_kernel, dim,
281                            global_work_offset, test_values,
282                            dyn_mem_size[kernel_num]);
283             test_error_ret(
284                 err,
285                 ("do_test failed for kernel " + kernel_names[kernel_num])
286                     .c_str(),
287                 -1);
288         }
289     }
290     return err;
291 }
292 
test_work_group_suggested_local_size_1D(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)293 int test_work_group_suggested_local_size_1D(cl_device_id device,
294                                             cl_context context,
295                                             cl_command_queue queue, int n_elems)
296 {
297     if (!is_extension_available(device, "cl_khr_suggested_local_work_size"))
298     {
299         log_info("Device does not support 'cl_khr_suggested_local_work_size'. "
300                  "Skipping the test.\n");
301         return TEST_SKIPPED_ITSELF;
302     }
303     cl_ulong max_local_mem_size;
304     cl_int err =
305         clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_SIZE,
306                         sizeof(max_local_mem_size), &max_local_mem_size, NULL);
307     test_error_ret(err, "clGetDeviceInfo for CL_DEVICE_LOCAL_MEM_SIZE failed.",
308                    -1);
309 
310     size_t start, end, incr;
311     size_t global_work_offset[] = { 0, 0, 0 };
312     size_t max_work_items = 0;
313     clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE,
314                     sizeof(max_work_items), &max_work_items, NULL);
315 
316     // odds
317     start = 1;
318     end = max_work_items;
319     incr = basic_increment;
320     err = do_test_work_group_suggested_local_size(
321         device, context, queue, is_not_odd, start, end, incr,
322         max_local_mem_size, global_work_offset, _1D);
323     test_error_ret(
324         err, "test_work_group_suggested_local_size_1D for odds failed.", -1);
325     log_info("test_work_group_suggested_local_size_1D odds passed\n");
326 
327     // evens
328     start = 2;
329     end = max_work_items;
330     incr = basic_increment;
331     err = do_test_work_group_suggested_local_size(
332         device, context, queue, is_not_even, start, end, incr,
333         max_local_mem_size, global_work_offset, _1D);
334     test_error_ret(
335         err, "test_work_group_suggested_local_size_1D for evens failed.", -1);
336     log_info("test_work_group_suggested_local_size_1D evens passed\n");
337 
338     // primes
339     start = max_work_items + 1;
340     end = 2 * max_work_items;
341     incr = primes_increment;
342     err = do_test_work_group_suggested_local_size(
343         device, context, queue, is_not_prime, start, end, incr,
344         max_local_mem_size, global_work_offset, _1D);
345     test_error_ret(
346         err, "test_work_group_suggested_local_size_1D for primes failed.", -1);
347     log_info("test_work_group_suggested_local_size_1D primes passed\n");
348 
349     global_work_offset[0] = 10;
350     global_work_offset[1] = 10;
351     global_work_offset[2] = 10;
352     // odds
353     start = 1;
354     end = max_work_items;
355     incr = basic_increment;
356     err = do_test_work_group_suggested_local_size(
357         device, context, queue, is_not_odd, start, end, incr,
358         max_local_mem_size, global_work_offset, _1D);
359     test_error_ret(err,
360                    "test_work_group_suggested_local_size_1D for odds with "
361                    "global_work_offset failed.",
362                    -1);
363     log_info("test_work_group_suggested_local_size_1D odds with "
364              "global_work_offset passed\n");
365 
366     // evens
367     start = 2;
368     end = max_work_items;
369     incr = basic_increment;
370     err = do_test_work_group_suggested_local_size(
371         device, context, queue, is_not_even, start, end, incr,
372         max_local_mem_size, global_work_offset, _1D);
373     test_error_ret(err,
374                    "test_work_group_suggested_local_size_1D for evens with "
375                    "global_work_offset failed.",
376                    -1);
377     log_info("test_work_group_suggested_local_size_1D evens with "
378              "global_work_offset passed\n");
379 
380     // primes
381     start = max_work_items + 1;
382     end = 2 * max_work_items;
383     incr = primes_increment;
384     err = do_test_work_group_suggested_local_size(
385         device, context, queue, is_not_prime, start, end, incr,
386         max_local_mem_size, global_work_offset, _1D);
387     test_error_ret(err,
388                    "test_work_group_suggested_local_size_1D for primes with "
389                    "global_work_offset failed.",
390                    -1);
391     log_info("test_work_group_suggested_local_size_1D primes with "
392              "global_work_offset passed\n");
393 
394     return err;
395 }
396 
test_work_group_suggested_local_size_2D(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)397 int test_work_group_suggested_local_size_2D(cl_device_id device,
398                                             cl_context context,
399                                             cl_command_queue queue, int n_elems)
400 {
401     if (!is_extension_available(device, "cl_khr_suggested_local_work_size"))
402     {
403         log_info("Device does not support 'cl_khr_suggested_local_work_size'. "
404                  "Skipping the test.\n");
405         return TEST_SKIPPED_ITSELF;
406     }
407     cl_long max_local_mem_size;
408     cl_int err =
409         clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_SIZE,
410                         sizeof(max_local_mem_size), &max_local_mem_size, NULL);
411     test_error_ret(err, "clGetDeviceInfo for CL_DEVICE_LOCAL_MEM_SIZE failed.",
412                    -1);
413 
414     size_t start, end, incr;
415     size_t global_work_offset[] = { 0, 0, 0 };
416     size_t max_work_items = 0;
417     clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE,
418                     sizeof(max_work_items), &max_work_items, NULL);
419 
420     // odds
421     start = 1;
422     end = max_work_items;
423     incr = basic_increment;
424     err = do_test_work_group_suggested_local_size(
425         device, context, queue, is_not_odd, start, end, incr,
426         max_local_mem_size, global_work_offset, _2D);
427     test_error_ret(
428         err, "test_work_group_suggested_local_size_2D for odds failed.", -1);
429     log_info("test_work_group_suggested_local_size_2D odds passed\n");
430 
431     // evens
432     start = 2;
433     end = max_work_items;
434     incr = basic_increment;
435     err = do_test_work_group_suggested_local_size(
436         device, context, queue, is_not_even, start, end, incr,
437         max_local_mem_size, global_work_offset, _2D);
438     test_error_ret(
439         err, "test_work_group_suggested_local_size_2D for evens failed.", -1);
440     log_info("test_work_group_suggested_local_size_2D evens passed\n");
441 
442     // primes
443     start = max_work_items + 1;
444     end = max_work_items + max_work_items / 4;
445     incr = primes_increment;
446     err = do_test_work_group_suggested_local_size(
447         device, context, queue, is_not_prime, start, end, incr,
448         max_local_mem_size, global_work_offset, _2D);
449     test_error_ret(
450         err, "test_work_group_suggested_local_size_2D for primes failed.", -1);
451     log_info("test_work_group_suggested_local_size_2D primes passed\n");
452 
453     global_work_offset[0] = 10;
454     global_work_offset[1] = 10;
455     global_work_offset[2] = 10;
456 
457     // odds
458     start = 1;
459     end = max_work_items;
460     incr = basic_increment;
461     err = do_test_work_group_suggested_local_size(
462         device, context, queue, is_not_odd, start, end, incr,
463         max_local_mem_size, global_work_offset, _2D);
464     test_error_ret(err,
465                    "test_work_group_suggested_local_size_2D for odds with "
466                    "global_work_offset failed.",
467                    -1);
468     log_info("test_work_group_suggested_local_size_2D odds with "
469              "global_work_offset passed\n");
470 
471     // evens
472     start = 2;
473     end = max_work_items;
474     incr = basic_increment;
475     err = do_test_work_group_suggested_local_size(
476         device, context, queue, is_not_even, start, end, incr,
477         max_local_mem_size, global_work_offset, _2D);
478     test_error_ret(err,
479                    "test_work_group_suggested_local_size_2D for evens with "
480                    "global_work_offset failed.",
481                    -1);
482     log_info("test_work_group_suggested_local_size_2D evens with "
483              "global_work_offset passed\n");
484 
485     // primes
486     start = max_work_items + 1;
487     end = max_work_items + max_work_items / 4;
488     incr = primes_increment;
489     err = do_test_work_group_suggested_local_size(
490         device, context, queue, is_not_prime, start, end, incr,
491         max_local_mem_size, global_work_offset, _2D);
492     test_error_ret(err,
493                    "test_work_group_suggested_local_size_2D for primes with "
494                    "global_work_offset failed.",
495                    -1);
496     log_info("test_work_group_suggested_local_size_2D primes with "
497              "global_work_offset passed\n");
498 
499     return err;
500 }
501 
test_work_group_suggested_local_size_3D(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)502 int test_work_group_suggested_local_size_3D(cl_device_id device,
503                                             cl_context context,
504                                             cl_command_queue queue, int n_elems)
505 {
506     if (!is_extension_available(device, "cl_khr_suggested_local_work_size"))
507     {
508         log_info("Device does not support 'cl_khr_suggested_local_work_size'. "
509                  "Skipping the test.\n");
510         return TEST_SKIPPED_ITSELF;
511     }
512     cl_long max_local_mem_size;
513     cl_int err =
514         clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_SIZE,
515                         sizeof(max_local_mem_size), &max_local_mem_size, NULL);
516     test_error_ret(err, "clGetDeviceInfo for CL_DEVICE_LOCAL_MEM_SIZE failed.",
517                    -1);
518 
519     size_t start, end, incr;
520     size_t global_work_offset[] = { 0, 0, 0 };
521     size_t max_work_items = 0;
522     clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE,
523                     sizeof(max_work_items), &max_work_items, NULL);
524 
525     // odds
526     start = 1;
527     end = max_work_items / 2;
528     incr = basic_increment;
529     err = do_test_work_group_suggested_local_size(
530         device, context, queue, is_not_odd, start, end, incr,
531         max_local_mem_size, global_work_offset, _3D);
532     test_error_ret(
533         err, "test_work_group_suggested_local_size_3D for odds failed.", -1);
534     log_info("test_work_group_suggested_local_size_3D odds passed\n");
535 
536     // evens
537     start = 2;
538     end = max_work_items / 2;
539     incr = basic_increment;
540     err = do_test_work_group_suggested_local_size(
541         device, context, queue, is_not_even, start, end, incr,
542         max_local_mem_size, global_work_offset, _3D);
543     test_error_ret(
544         err, "test_work_group_suggested_local_size_3D for evens failed.", -1);
545     log_info("test_work_group_suggested_local_size_3D evens passed\n");
546 
547     // primes
548     start = max_work_items + 1;
549     end = max_work_items + max_work_items / 4;
550     incr = primes_increment;
551     err = do_test_work_group_suggested_local_size(
552         device, context, queue, is_not_prime, start, end, incr,
553         max_local_mem_size, global_work_offset, _3D);
554     test_error_ret(
555         err, "test_work_group_suggested_local_size_3D for primes failed.", -1);
556     log_info("test_work_group_suggested_local_size_3D primes passed\n");
557 
558     global_work_offset[0] = 10;
559     global_work_offset[1] = 10;
560     global_work_offset[2] = 10;
561 
562     // odds
563     start = 1;
564     end = max_work_items / 2;
565     incr = basic_increment;
566     err = do_test_work_group_suggested_local_size(
567         device, context, queue, is_not_odd, start, end, incr,
568         max_local_mem_size, global_work_offset, _3D);
569     test_error_ret(err,
570                    "test_work_group_suggested_local_size_3D for odds with "
571                    "global_work_offset failed.",
572                    -1);
573     log_info("test_work_group_suggested_local_size_3D odds with "
574              "global_work_offset passed\n");
575 
576     // evens
577     start = 2;
578     end = max_work_items / 2;
579     incr = basic_increment;
580     err = do_test_work_group_suggested_local_size(
581         device, context, queue, is_not_even, start, end, incr,
582         max_local_mem_size, global_work_offset, _3D);
583     test_error_ret(err,
584                    "test_work_group_suggested_local_size_3D for evens with "
585                    "global_work_offset failed.",
586                    -1);
587     log_info("test_work_group_suggested_local_size_3D evens with "
588              "global_work_offset passed\n");
589 
590     // primes
591     start = max_work_items + 1;
592     end = max_work_items + max_work_items / 4;
593     incr = primes_increment;
594     err = do_test_work_group_suggested_local_size(
595         device, context, queue, is_not_prime, start, end, incr,
596         max_local_mem_size, global_work_offset, _3D);
597     test_error_ret(err,
598                    "test_work_group_suggested_local_size_3D for primes with "
599                    "global_work_offset failed.",
600                    -1);
601     log_info("test_work_group_suggested_local_size_3D primes with "
602              "global_work_offset passed\n");
603 
604     return err;
605 }
606