xref: /aosp_15_r20/external/OpenCL-CTS/test_conformance/math_brute_force/binary_i_float.cpp (revision 6467f958c7de8070b317fc65bcb0f6472e388d82)
1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 
17 #include "common.h"
18 #include "function_list.h"
19 #include "test_functions.h"
20 #include "utility.h"
21 
22 #include <climits>
23 #include <cstring>
24 
25 namespace {
26 
BuildKernelFn(cl_uint job_id,cl_uint thread_id UNUSED,void * p)27 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
28 {
29     BuildKernelInfo &info = *(BuildKernelInfo *)p;
30     auto generator = [](const std::string &kernel_name, const char *builtin,
31                         cl_uint vector_size_index) {
32         return GetBinaryKernel(kernel_name, builtin, ParameterType::Float,
33                                ParameterType::Float, ParameterType::Int,
34                                vector_size_index);
35     };
36     return BuildKernels(info, job_id, generator);
37 }
38 
39 // Thread specific data for a worker thread
40 struct ThreadInfo
41 {
42     // Input and output buffers for the thread
43     clMemWrapper inBuf;
44     clMemWrapper inBuf2;
45     Buffers outBuf;
46 
47     float maxError; // max error value. Init to 0.
48     double
49         maxErrorValue; // position of the max error value (param 1).  Init to 0.
50     cl_int maxErrorValue2; // position of the max error value (param 2).  Init
51                            // to 0.
52     MTdataHolder d;
53 
54     // Per thread command queue to improve performance
55     clCommandQueueWrapper tQueue;
56 };
57 
58 struct TestInfo
59 {
60     size_t subBufferSize; // Size of the sub-buffer in elements
61     const Func *f; // A pointer to the function info
62 
63     // Programs for various vector sizes.
64     Programs programs;
65 
66     // Thread-specific kernels for each vector size:
67     // k[vector_size][thread_id]
68     KernelMatrix k;
69 
70     // Array of thread specific information
71     std::vector<ThreadInfo> tinfo;
72 
73     cl_uint threadCount; // Number of worker threads
74     cl_uint jobCount; // Number of jobs
75     cl_uint step; // step between each chunk and the next.
76     cl_uint scale; // stride between individual test values
77     float ulps; // max_allowed ulps
78     int ftz; // non-zero if running in flush to zero mode
79     bool relaxedMode; // True if test is running in relaxed mode, false
80                       // otherwise.
81     // no special values
82 };
83 
84 // A table of more difficult cases to get right
85 const float specialValues[] = {
86     -NAN,
87     -INFINITY,
88     -FLT_MAX,
89     MAKE_HEX_FLOAT(-0x1.000002p64f, -0x1000002L, 40),
90     MAKE_HEX_FLOAT(-0x1.0p64f, -0x1L, 64),
91     MAKE_HEX_FLOAT(-0x1.fffffep63f, -0x1fffffeL, 39),
92     MAKE_HEX_FLOAT(-0x1.000002p63f, -0x1000002L, 39),
93     MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63),
94     MAKE_HEX_FLOAT(-0x1.fffffep62f, -0x1fffffeL, 38),
95     MAKE_HEX_FLOAT(-0x1.000002p32f, -0x1000002L, 8),
96     MAKE_HEX_FLOAT(-0x1.0p32f, -0x1L, 32),
97     MAKE_HEX_FLOAT(-0x1.fffffep31f, -0x1fffffeL, 7),
98     MAKE_HEX_FLOAT(-0x1.000002p31f, -0x1000002L, 7),
99     MAKE_HEX_FLOAT(-0x1.0p31f, -0x1L, 31),
100     MAKE_HEX_FLOAT(-0x1.fffffep30f, -0x1fffffeL, 6),
101     -1000.f,
102     -100.f,
103     -4.0f,
104     -3.5f,
105     -3.0f,
106     MAKE_HEX_FLOAT(-0x1.800002p1f, -0x1800002L, -23),
107     -2.5f,
108     MAKE_HEX_FLOAT(-0x1.7ffffep1f, -0x17ffffeL, -23),
109     -2.0f,
110     MAKE_HEX_FLOAT(-0x1.800002p0f, -0x1800002L, -24),
111     -1.5f,
112     MAKE_HEX_FLOAT(-0x1.7ffffep0f, -0x17ffffeL, -24),
113     MAKE_HEX_FLOAT(-0x1.000002p0f, -0x1000002L, -24),
114     -1.0f,
115     MAKE_HEX_FLOAT(-0x1.fffffep-1f, -0x1fffffeL, -25),
116     MAKE_HEX_FLOAT(-0x1.000002p-1f, -0x1000002L, -25),
117     -0.5f,
118     MAKE_HEX_FLOAT(-0x1.fffffep-2f, -0x1fffffeL, -26),
119     MAKE_HEX_FLOAT(-0x1.000002p-2f, -0x1000002L, -26),
120     -0.25f,
121     MAKE_HEX_FLOAT(-0x1.fffffep-3f, -0x1fffffeL, -27),
122     MAKE_HEX_FLOAT(-0x1.000002p-126f, -0x1000002L, -150),
123     -FLT_MIN,
124     MAKE_HEX_FLOAT(-0x0.fffffep-126f, -0x0fffffeL, -150),
125     MAKE_HEX_FLOAT(-0x0.000ffep-126f, -0x0000ffeL, -150),
126     MAKE_HEX_FLOAT(-0x0.0000fep-126f, -0x00000feL, -150),
127     MAKE_HEX_FLOAT(-0x0.00000ep-126f, -0x000000eL, -150),
128     MAKE_HEX_FLOAT(-0x0.00000cp-126f, -0x000000cL, -150),
129     MAKE_HEX_FLOAT(-0x0.00000ap-126f, -0x000000aL, -150),
130     MAKE_HEX_FLOAT(-0x0.000008p-126f, -0x0000008L, -150),
131     MAKE_HEX_FLOAT(-0x0.000006p-126f, -0x0000006L, -150),
132     MAKE_HEX_FLOAT(-0x0.000004p-126f, -0x0000004L, -150),
133     MAKE_HEX_FLOAT(-0x0.000002p-126f, -0x0000002L, -150),
134     -0.0f,
135 
136     +NAN,
137     +INFINITY,
138     +FLT_MAX,
139     MAKE_HEX_FLOAT(+0x1.000002p64f, +0x1000002L, 40),
140     MAKE_HEX_FLOAT(+0x1.0p64f, +0x1L, 64),
141     MAKE_HEX_FLOAT(+0x1.fffffep63f, +0x1fffffeL, 39),
142     MAKE_HEX_FLOAT(+0x1.000002p63f, +0x1000002L, 39),
143     MAKE_HEX_FLOAT(+0x1.0p63f, +0x1L, 63),
144     MAKE_HEX_FLOAT(+0x1.fffffep62f, +0x1fffffeL, 38),
145     MAKE_HEX_FLOAT(+0x1.000002p32f, +0x1000002L, 8),
146     MAKE_HEX_FLOAT(+0x1.0p32f, +0x1L, 32),
147     MAKE_HEX_FLOAT(+0x1.fffffep31f, +0x1fffffeL, 7),
148     MAKE_HEX_FLOAT(+0x1.000002p31f, +0x1000002L, 7),
149     MAKE_HEX_FLOAT(+0x1.0p31f, +0x1L, 31),
150     MAKE_HEX_FLOAT(+0x1.fffffep30f, +0x1fffffeL, 6),
151     +1000.f,
152     +100.f,
153     +4.0f,
154     +3.5f,
155     +3.0f,
156     MAKE_HEX_FLOAT(+0x1.800002p1f, +0x1800002L, -23),
157     2.5f,
158     MAKE_HEX_FLOAT(+0x1.7ffffep1f, +0x17ffffeL, -23),
159     +2.0f,
160     MAKE_HEX_FLOAT(+0x1.800002p0f, +0x1800002L, -24),
161     1.5f,
162     MAKE_HEX_FLOAT(+0x1.7ffffep0f, +0x17ffffeL, -24),
163     MAKE_HEX_FLOAT(+0x1.000002p0f, +0x1000002L, -24),
164     +1.0f,
165     MAKE_HEX_FLOAT(+0x1.fffffep-1f, +0x1fffffeL, -25),
166     MAKE_HEX_FLOAT(+0x1.000002p-1f, +0x1000002L, -25),
167     +0.5f,
168     MAKE_HEX_FLOAT(+0x1.fffffep-2f, +0x1fffffeL, -26),
169     MAKE_HEX_FLOAT(+0x1.000002p-2f, +0x1000002L, -26),
170     +0.25f,
171     MAKE_HEX_FLOAT(+0x1.fffffep-3f, +0x1fffffeL, -27),
172     MAKE_HEX_FLOAT(0x1.000002p-126f, 0x1000002L, -150),
173     +FLT_MIN,
174     MAKE_HEX_FLOAT(+0x0.fffffep-126f, +0x0fffffeL, -150),
175     MAKE_HEX_FLOAT(+0x0.000ffep-126f, +0x0000ffeL, -150),
176     MAKE_HEX_FLOAT(+0x0.0000fep-126f, +0x00000feL, -150),
177     MAKE_HEX_FLOAT(+0x0.00000ep-126f, +0x000000eL, -150),
178     MAKE_HEX_FLOAT(+0x0.00000cp-126f, +0x000000cL, -150),
179     MAKE_HEX_FLOAT(+0x0.00000ap-126f, +0x000000aL, -150),
180     MAKE_HEX_FLOAT(+0x0.000008p-126f, +0x0000008L, -150),
181     MAKE_HEX_FLOAT(+0x0.000006p-126f, +0x0000006L, -150),
182     MAKE_HEX_FLOAT(+0x0.000004p-126f, +0x0000004L, -150),
183     MAKE_HEX_FLOAT(+0x0.000002p-126f, +0x0000002L, -150),
184     +0.0f,
185 };
186 
187 constexpr size_t specialValuesCount =
188     sizeof(specialValues) / sizeof(specialValues[0]);
189 
190 const int specialValuesInt[] = {
191     0,           1,           2,           3,          126,        127,
192     128,         0x02000001,  0x04000001,  1465264071, 1488522147, -1,
193     -2,          -3,          -126,        -127,       -128,       -0x02000001,
194     -0x04000001, -1465264071, -1488522147,
195 };
196 
197 constexpr size_t specialValuesIntCount =
198     sizeof(specialValuesInt) / sizeof(specialValuesInt[0]);
199 
Test(cl_uint job_id,cl_uint thread_id,void * data)200 cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
201 {
202     TestInfo *job = (TestInfo *)data;
203     size_t buffer_elements = job->subBufferSize;
204     size_t buffer_size = buffer_elements * sizeof(cl_float);
205     cl_uint base = job_id * (cl_uint)job->step;
206     ThreadInfo *tinfo = &(job->tinfo[thread_id]);
207     fptr func = job->f->func;
208     int ftz = job->ftz;
209     bool relaxedMode = job->relaxedMode;
210     float ulps = job->ulps;
211     MTdata d = tinfo->d;
212     cl_int error;
213     const char *name = job->f->name;
214     cl_uint *t = 0;
215     cl_float *r = 0;
216     cl_float *s = 0;
217     cl_int *s2 = 0;
218 
219     cl_event e[VECTOR_SIZE_COUNT];
220     cl_uint *out[VECTOR_SIZE_COUNT];
221     if (gHostFill)
222     {
223         // start the map of the output arrays
224         for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
225         {
226             out[j] = (cl_uint *)clEnqueueMapBuffer(
227                 tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
228                 buffer_size, 0, NULL, e + j, &error);
229             if (error || NULL == out[j])
230             {
231                 vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
232                            error);
233                 return error;
234             }
235         }
236 
237         // Get that moving
238         if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n");
239     }
240 
241     // Init input array
242     cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements;
243     cl_uint *p2 = (cl_uint *)gIn2 + thread_id * buffer_elements;
244     size_t idx = 0;
245     int totalSpecialValueCount = specialValuesCount * specialValuesIntCount;
246     int lastSpecialJobIndex = (totalSpecialValueCount - 1) / buffer_elements;
247 
248     if (job_id <= (cl_uint)lastSpecialJobIndex)
249     { // test edge cases
250         float *fp = (float *)p;
251         cl_int *ip2 = (cl_int *)p2;
252         uint32_t x, y;
253 
254         x = (job_id * buffer_elements) % specialValuesCount;
255         y = (job_id * buffer_elements) / specialValuesCount;
256 
257         for (; idx < buffer_elements; idx++)
258         {
259             fp[idx] = specialValues[x];
260             ip2[idx] = specialValuesInt[y];
261             ++x;
262             if (x >= specialValuesCount)
263             {
264                 x = 0;
265                 y++;
266                 if (y >= specialValuesIntCount) break;
267             }
268         }
269     }
270 
271     // Init any remaining values.
272     for (; idx < buffer_elements; idx++)
273     {
274         p[idx] = genrand_int32(d);
275         p2[idx] = genrand_int32(d);
276     }
277 
278     if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
279                                       buffer_size, p, 0, NULL, NULL)))
280     {
281         vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
282         return error;
283     }
284 
285     if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf2, CL_FALSE, 0,
286                                       buffer_size, p2, 0, NULL, NULL)))
287     {
288         vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
289         return error;
290     }
291 
292     for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
293     {
294         if (gHostFill)
295         {
296             // Wait for the map to finish
297             if ((error = clWaitForEvents(1, e + j)))
298             {
299                 vlog_error("Error: clWaitForEvents failed! err: %d\n", error);
300                 return error;
301             }
302             if ((error = clReleaseEvent(e[j])))
303             {
304                 vlog_error("Error: clReleaseEvent failed! err: %d\n", error);
305                 return error;
306             }
307         }
308 
309         // Fill the result buffer with garbage, so that old results don't carry
310         // over
311         uint32_t pattern = 0xffffdead;
312         if (gHostFill)
313         {
314             memset_pattern4(out[j], &pattern, buffer_size);
315             if ((error = clEnqueueUnmapMemObject(
316                      tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL)))
317             {
318                 vlog_error("Error: clEnqueueUnmapMemObject failed! err: %d\n",
319                            error);
320                 return error;
321             }
322         }
323         else
324         {
325             if ((error = clEnqueueFillBuffer(tinfo->tQueue, tinfo->outBuf[j],
326                                              &pattern, sizeof(pattern), 0,
327                                              buffer_size, 0, NULL, NULL)))
328             {
329                 vlog_error("Error: clEnqueueFillBuffer failed! err: %d\n",
330                            error);
331                 return error;
332             }
333         }
334 
335         // Run the kernel
336         size_t vectorCount =
337             (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
338         cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
339                                                  // own copy of the cl_kernel
340         cl_program program = job->programs[j];
341 
342         if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
343                                     &tinfo->outBuf[j])))
344         {
345             LogBuildError(program);
346             return error;
347         }
348         if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf),
349                                     &tinfo->inBuf)))
350         {
351             LogBuildError(program);
352             return error;
353         }
354         if ((error = clSetKernelArg(kernel, 2, sizeof(tinfo->inBuf2),
355                                     &tinfo->inBuf2)))
356         {
357             LogBuildError(program);
358             return error;
359         }
360 
361         if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL,
362                                             &vectorCount, NULL, 0, NULL, NULL)))
363         {
364             vlog_error("FAILED -- could not execute kernel\n");
365             return error;
366         }
367     }
368 
369     // Get that moving
370     if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 2 failed\n");
371 
372     if (gSkipCorrectnessTesting) return CL_SUCCESS;
373 
374     // Calculate the correctly rounded reference result
375     r = (float *)gOut_Ref + thread_id * buffer_elements;
376     s = (float *)gIn + thread_id * buffer_elements;
377     s2 = (cl_int *)gIn2 + thread_id * buffer_elements;
378     for (size_t j = 0; j < buffer_elements; j++)
379         r[j] = (float)func.f_fi(s[j], s2[j]);
380 
381     // Read the data back -- no need to wait for the first N-1 buffers but wait
382     // for the last buffer. This is an in order queue.
383     for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
384     {
385         cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
386         out[j] = (cl_uint *)clEnqueueMapBuffer(
387             tinfo->tQueue, tinfo->outBuf[j], blocking, CL_MAP_READ, 0,
388             buffer_size, 0, NULL, NULL, &error);
389         if (error || NULL == out[j])
390         {
391             vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
392                        error);
393             return error;
394         }
395     }
396 
397     // Verify data
398     t = (cl_uint *)r;
399     for (size_t j = 0; j < buffer_elements; j++)
400     {
401         for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
402         {
403             cl_uint *q = out[k];
404 
405             // If we aren't getting the correctly rounded result
406             if (t[j] != q[j])
407             {
408                 float test = ((float *)q)[j];
409                 double correct = func.f_fi(s[j], s2[j]);
410                 float err = Ulp_Error(test, correct);
411                 int fail = !(fabsf(err) <= ulps);
412 
413                 if (fail && (ftz || relaxedMode))
414                 {
415                     // retry per section 6.5.3.2
416                     if (IsFloatResultSubnormal(correct, ulps))
417                     {
418                         fail = fail && (test != 0.0f);
419                         if (!fail) err = 0.0f;
420                     }
421 
422                     // retry per section 6.5.3.3
423                     if (IsFloatSubnormal(s[j]))
424                     {
425                         double correct2, correct3;
426                         float err2, err3;
427                         correct2 = func.f_fi(0.0, s2[j]);
428                         correct3 = func.f_fi(-0.0, s2[j]);
429                         err2 = Ulp_Error(test, correct2);
430                         err3 = Ulp_Error(test, correct3);
431                         fail = fail
432                             && ((!(fabsf(err2) <= ulps))
433                                 && (!(fabsf(err3) <= ulps)));
434                         if (fabsf(err2) < fabsf(err)) err = err2;
435                         if (fabsf(err3) < fabsf(err)) err = err3;
436 
437                         // retry per section 6.5.3.4
438                         if (IsFloatResultSubnormal(correct2, ulps)
439                             || IsFloatResultSubnormal(correct3, ulps))
440                         {
441                             fail = fail && (test != 0.0f);
442                             if (!fail) err = 0.0f;
443                         }
444                     }
445                 }
446 
447                 if (fabsf(err) > tinfo->maxError)
448                 {
449                     tinfo->maxError = fabsf(err);
450                     tinfo->maxErrorValue = s[j];
451                     tinfo->maxErrorValue2 = s2[j];
452                 }
453                 if (fail)
454                 {
455                     vlog_error(
456                         "\nERROR: %s%s: %f ulp error at {%a (0x%8.8x), %d}: "
457                         "*%a (0x%8.8x) vs. %a (0x%8.8x) at index: %zu\n",
458                         name, sizeNames[k], err, s[j], ((uint32_t *)s)[j],
459                         s2[j], r[j], ((uint32_t *)r)[j], test,
460                         ((cl_uint *)&test)[0], j);
461                     return -1;
462                 }
463             }
464         }
465     }
466 
467     for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
468     {
469         if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
470                                              out[j], 0, NULL, NULL)))
471         {
472             vlog_error("Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n",
473                        j, error);
474             return error;
475         }
476     }
477 
478     if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 3 failed\n");
479 
480 
481     if (0 == (base & 0x0fffffff))
482     {
483         if (gVerboseBruteForce)
484         {
485             vlog("base:%14u step:%10u scale:%10u buf_elements:%10zu ulps:%5.3f "
486                  "ThreadCount:%2u\n",
487                  base, job->step, job->scale, buffer_elements, job->ulps,
488                  job->threadCount);
489         }
490         else
491         {
492             vlog(".");
493         }
494         fflush(stdout);
495     }
496 
497     return CL_SUCCESS;
498 }
499 
500 } // anonymous namespace
501 
TestFunc_Float_Float_Int(const Func * f,MTdata d,bool relaxedMode)502 int TestFunc_Float_Float_Int(const Func *f, MTdata d, bool relaxedMode)
503 {
504     TestInfo test_info{};
505     cl_int error;
506     float maxError = 0.0f;
507     double maxErrorVal = 0.0;
508     cl_int maxErrorVal2 = 0;
509 
510     logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
511 
512     // Init test_info
513     test_info.threadCount = GetThreadCount();
514     test_info.subBufferSize = BUFFER_SIZE
515         / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
516     test_info.scale = getTestScale(sizeof(cl_float));
517 
518     test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
519     if (test_info.step / test_info.subBufferSize != test_info.scale)
520     {
521         // there was overflow
522         test_info.jobCount = 1;
523     }
524     else
525     {
526         test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
527     }
528 
529     test_info.f = f;
530     test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps;
531     test_info.ftz =
532         f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
533     test_info.relaxedMode = relaxedMode;
534 
535     test_info.tinfo.resize(test_info.threadCount);
536     for (cl_uint i = 0; i < test_info.threadCount; i++)
537     {
538         cl_buffer_region region = {
539             i * test_info.subBufferSize * sizeof(cl_float),
540             test_info.subBufferSize * sizeof(cl_float)
541         };
542         test_info.tinfo[i].inBuf =
543             clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
544                               CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
545         if (error || NULL == test_info.tinfo[i].inBuf)
546         {
547             vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
548                        "region {%zd, %zd}\n",
549                        region.origin, region.size);
550             return error;
551         }
552         cl_buffer_region region2 = { i * test_info.subBufferSize
553                                          * sizeof(cl_int),
554                                      test_info.subBufferSize * sizeof(cl_int) };
555         test_info.tinfo[i].inBuf2 =
556             clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
557                               CL_BUFFER_CREATE_TYPE_REGION, &region2, &error);
558         if (error || NULL == test_info.tinfo[i].inBuf2)
559         {
560             vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
561                        "region {%zd, %zd}\n",
562                        region.origin, region.size);
563             return error;
564         }
565 
566         for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
567         {
568             test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
569                 gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
570                 &region, &error);
571             if (error || NULL == test_info.tinfo[i].outBuf[j])
572             {
573                 vlog_error("Error: Unable to create sub-buffer of "
574                            "gOutBuffer[%d] for region {%zd, %zd}\n",
575                            (int)j, region.origin, region.size);
576                 return error;
577             }
578         }
579         test_info.tinfo[i].tQueue =
580             clCreateCommandQueue(gContext, gDevice, 0, &error);
581         if (NULL == test_info.tinfo[i].tQueue || error)
582         {
583             vlog_error("clCreateCommandQueue failed. (%d)\n", error);
584             return error;
585         }
586 
587         test_info.tinfo[i].d = MTdataHolder(genrand_int32(d));
588     }
589 
590     // Init the kernels
591     BuildKernelInfo build_info{ test_info.threadCount, test_info.k,
592                                 test_info.programs, f->nameInCode,
593                                 relaxedMode };
594     if ((error = ThreadPool_Do(BuildKernelFn,
595                                gMaxVectorSizeIndex - gMinVectorSizeIndex,
596                                &build_info)))
597         return error;
598 
599     // Run the kernels
600     if (!gSkipCorrectnessTesting)
601     {
602         error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
603         if (error) return error;
604 
605         // Accumulate the arithmetic errors
606         for (cl_uint i = 0; i < test_info.threadCount; i++)
607         {
608             if (test_info.tinfo[i].maxError > maxError)
609             {
610                 maxError = test_info.tinfo[i].maxError;
611                 maxErrorVal = test_info.tinfo[i].maxErrorValue;
612                 maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
613             }
614         }
615 
616         if (gWimpyMode)
617             vlog("Wimp pass");
618         else
619             vlog("passed");
620 
621         vlog("\t%8.2f @ {%a, %d}", maxError, maxErrorVal, maxErrorVal2);
622     }
623 
624     vlog("\n");
625 
626     return CL_SUCCESS;
627 }
628