xref: /aosp_15_r20/external/OpenCL-CTS/test_conformance/math_brute_force/macro_binary_float.cpp (revision 6467f958c7de8070b317fc65bcb0f6472e388d82)
1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 
17 #include "common.h"
18 #include "function_list.h"
19 #include "test_functions.h"
20 #include "utility.h"
21 
22 #include <cstring>
23 
24 namespace {
25 
BuildKernelFn(cl_uint job_id,cl_uint thread_id UNUSED,void * p)26 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
27 {
28     BuildKernelInfo &info = *(BuildKernelInfo *)p;
29     auto generator = [](const std::string &kernel_name, const char *builtin,
30                         cl_uint vector_size_index) {
31         return GetBinaryKernel(kernel_name, builtin, ParameterType::Int,
32                                ParameterType::Float, ParameterType::Float,
33                                vector_size_index);
34     };
35     return BuildKernels(info, job_id, generator);
36 }
37 
38 // Thread specific data for a worker thread
39 struct ThreadInfo
40 {
41     // Input and output buffers for the thread
42     clMemWrapper inBuf;
43     clMemWrapper inBuf2;
44     Buffers outBuf;
45 
46     MTdataHolder d;
47 
48     // Per thread command queue to improve performance
49     clCommandQueueWrapper tQueue;
50 };
51 
52 struct TestInfo
53 {
54     size_t subBufferSize; // Size of the sub-buffer in elements
55     const Func *f; // A pointer to the function info
56 
57     // Programs for various vector sizes.
58     Programs programs;
59 
60     // Thread-specific kernels for each vector size:
61     // k[vector_size][thread_id]
62     KernelMatrix k;
63 
64     // Array of thread specific information
65     std::vector<ThreadInfo> tinfo;
66 
67     cl_uint threadCount; // Number of worker threads
68     cl_uint jobCount; // Number of jobs
69     cl_uint step; // step between each chunk and the next.
70     cl_uint scale; // stride between individual test values
71     int ftz; // non-zero if running in flush to zero mode
72     bool relaxedMode; // True if test is running in relaxed mode, false
73                       // otherwise.
74 };
75 
76 // A table of more difficult cases to get right
77 const float specialValues[] = {
78     -NAN,
79     -INFINITY,
80     -FLT_MAX,
81     MAKE_HEX_FLOAT(-0x1.000002p64f, -0x1000002L, 40),
82     MAKE_HEX_FLOAT(-0x1.0p64f, -0x1L, 64),
83     MAKE_HEX_FLOAT(-0x1.fffffep63f, -0x1fffffeL, 39),
84     MAKE_HEX_FLOAT(-0x1.000002p63f, -0x1000002L, 39),
85     MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63),
86     MAKE_HEX_FLOAT(-0x1.fffffep62f, -0x1fffffeL, 38),
87     MAKE_HEX_FLOAT(-0x1.000002p32f, -0x1000002L, 8),
88     MAKE_HEX_FLOAT(-0x1.0p32f, -0x1L, 32),
89     MAKE_HEX_FLOAT(-0x1.fffffep31f, -0x1fffffeL, 7),
90     MAKE_HEX_FLOAT(-0x1.000002p31f, -0x1000002L, 7),
91     MAKE_HEX_FLOAT(-0x1.0p31f, -0x1L, 31),
92     MAKE_HEX_FLOAT(-0x1.fffffep30f, -0x1fffffeL, 6),
93     -1000.f,
94     -100.f,
95     -4.0f,
96     -3.5f,
97     -3.0f,
98     MAKE_HEX_FLOAT(-0x1.800002p1f, -0x1800002L, -23),
99     -2.5f,
100     MAKE_HEX_FLOAT(-0x1.7ffffep1f, -0x17ffffeL, -23),
101     -2.0f,
102     MAKE_HEX_FLOAT(-0x1.800002p0f, -0x1800002L, -24),
103     -1.5f,
104     MAKE_HEX_FLOAT(-0x1.7ffffep0f, -0x17ffffeL, -24),
105     MAKE_HEX_FLOAT(-0x1.000002p0f, -0x1000002L, -24),
106     -1.0f,
107     MAKE_HEX_FLOAT(-0x1.fffffep-1f, -0x1fffffeL, -25),
108     MAKE_HEX_FLOAT(-0x1.000002p-1f, -0x1000002L, -25),
109     -0.5f,
110     MAKE_HEX_FLOAT(-0x1.fffffep-2f, -0x1fffffeL, -26),
111     MAKE_HEX_FLOAT(-0x1.000002p-2f, -0x1000002L, -26),
112     -0.25f,
113     MAKE_HEX_FLOAT(-0x1.fffffep-3f, -0x1fffffeL, -27),
114     MAKE_HEX_FLOAT(-0x1.000002p-126f, -0x1000002L, -150),
115     -FLT_MIN,
116     MAKE_HEX_FLOAT(-0x0.fffffep-126f, -0x0fffffeL, -150),
117     MAKE_HEX_FLOAT(-0x0.000ffep-126f, -0x0000ffeL, -150),
118     MAKE_HEX_FLOAT(-0x0.0000fep-126f, -0x00000feL, -150),
119     MAKE_HEX_FLOAT(-0x0.00000ep-126f, -0x000000eL, -150),
120     MAKE_HEX_FLOAT(-0x0.00000cp-126f, -0x000000cL, -150),
121     MAKE_HEX_FLOAT(-0x0.00000ap-126f, -0x000000aL, -150),
122     MAKE_HEX_FLOAT(-0x0.000008p-126f, -0x0000008L, -150),
123     MAKE_HEX_FLOAT(-0x0.000006p-126f, -0x0000006L, -150),
124     MAKE_HEX_FLOAT(-0x0.000004p-126f, -0x0000004L, -150),
125     MAKE_HEX_FLOAT(-0x0.000002p-126f, -0x0000002L, -150),
126     -0.0f,
127 
128     +NAN,
129     +INFINITY,
130     +FLT_MAX,
131     MAKE_HEX_FLOAT(+0x1.000002p64f, +0x1000002L, 40),
132     MAKE_HEX_FLOAT(+0x1.0p64f, +0x1L, 64),
133     MAKE_HEX_FLOAT(+0x1.fffffep63f, +0x1fffffeL, 39),
134     MAKE_HEX_FLOAT(+0x1.000002p63f, +0x1000002L, 39),
135     MAKE_HEX_FLOAT(+0x1.0p63f, +0x1L, 63),
136     MAKE_HEX_FLOAT(+0x1.fffffep62f, +0x1fffffeL, 38),
137     MAKE_HEX_FLOAT(+0x1.000002p32f, +0x1000002L, 8),
138     MAKE_HEX_FLOAT(+0x1.0p32f, +0x1L, 32),
139     MAKE_HEX_FLOAT(+0x1.fffffep31f, +0x1fffffeL, 7),
140     MAKE_HEX_FLOAT(+0x1.000002p31f, +0x1000002L, 7),
141     MAKE_HEX_FLOAT(+0x1.0p31f, +0x1L, 31),
142     MAKE_HEX_FLOAT(+0x1.fffffep30f, +0x1fffffeL, 6),
143     +1000.f,
144     +100.f,
145     +4.0f,
146     +3.5f,
147     +3.0f,
148     MAKE_HEX_FLOAT(+0x1.800002p1f, +0x1800002L, -23),
149     2.5f,
150     MAKE_HEX_FLOAT(+0x1.7ffffep1f, +0x17ffffeL, -23),
151     +2.0f,
152     MAKE_HEX_FLOAT(+0x1.800002p0f, +0x1800002L, -24),
153     1.5f,
154     MAKE_HEX_FLOAT(+0x1.7ffffep0f, +0x17ffffeL, -24),
155     MAKE_HEX_FLOAT(+0x1.000002p0f, +0x1000002L, -24),
156     +1.0f,
157     MAKE_HEX_FLOAT(+0x1.fffffep-1f, +0x1fffffeL, -25),
158     MAKE_HEX_FLOAT(+0x1.000002p-1f, +0x1000002L, -25),
159     +0.5f,
160     MAKE_HEX_FLOAT(+0x1.fffffep-2f, +0x1fffffeL, -26),
161     MAKE_HEX_FLOAT(+0x1.000002p-2f, +0x1000002L, -26),
162     +0.25f,
163     MAKE_HEX_FLOAT(+0x1.fffffep-3f, +0x1fffffeL, -27),
164     MAKE_HEX_FLOAT(0x1.000002p-126f, 0x1000002L, -150),
165     +FLT_MIN,
166     MAKE_HEX_FLOAT(+0x0.fffffep-126f, +0x0fffffeL, -150),
167     MAKE_HEX_FLOAT(+0x0.000ffep-126f, +0x0000ffeL, -150),
168     MAKE_HEX_FLOAT(+0x0.0000fep-126f, +0x00000feL, -150),
169     MAKE_HEX_FLOAT(+0x0.00000ep-126f, +0x000000eL, -150),
170     MAKE_HEX_FLOAT(+0x0.00000cp-126f, +0x000000cL, -150),
171     MAKE_HEX_FLOAT(+0x0.00000ap-126f, +0x000000aL, -150),
172     MAKE_HEX_FLOAT(+0x0.000008p-126f, +0x0000008L, -150),
173     MAKE_HEX_FLOAT(+0x0.000006p-126f, +0x0000006L, -150),
174     MAKE_HEX_FLOAT(+0x0.000004p-126f, +0x0000004L, -150),
175     MAKE_HEX_FLOAT(+0x0.000002p-126f, +0x0000002L, -150),
176     +0.0f,
177 };
178 
179 constexpr size_t specialValuesCount =
180     sizeof(specialValues) / sizeof(specialValues[0]);
181 
Test(cl_uint job_id,cl_uint thread_id,void * data)182 cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
183 {
184     TestInfo *job = (TestInfo *)data;
185     size_t buffer_elements = job->subBufferSize;
186     size_t buffer_size = buffer_elements * sizeof(cl_float);
187     cl_uint base = job_id * (cl_uint)job->step;
188     ThreadInfo *tinfo = &(job->tinfo[thread_id]);
189     fptr func = job->f->func;
190     int ftz = job->ftz;
191     bool relaxedMode = job->relaxedMode;
192     MTdata d = tinfo->d;
193     cl_int error;
194     const char *name = job->f->name;
195     cl_int *t = 0;
196     cl_int *r = 0;
197     cl_float *s = 0;
198     cl_float *s2 = 0;
199 
200     cl_event e[VECTOR_SIZE_COUNT];
201     cl_int *out[VECTOR_SIZE_COUNT];
202     if (gHostFill)
203     {
204         // start the map of the output arrays
205         for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
206         {
207             out[j] = (cl_int *)clEnqueueMapBuffer(
208                 tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
209                 buffer_size, 0, NULL, e + j, &error);
210             if (error || NULL == out[j])
211             {
212                 vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
213                            error);
214                 return error;
215             }
216         }
217 
218         // Get that moving
219         if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n");
220     }
221 
222     // Init input array
223     cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements;
224     cl_uint *p2 = (cl_uint *)gIn2 + thread_id * buffer_elements;
225     cl_uint idx = 0;
226 
227     int totalSpecialValueCount = specialValuesCount * specialValuesCount;
228     int lastSpecialJobIndex = (totalSpecialValueCount - 1) / buffer_elements;
229 
230     // Test edge cases
231     if (job_id <= (cl_uint)lastSpecialJobIndex)
232     {
233         float *fp = (float *)p;
234         float *fp2 = (float *)p2;
235         uint32_t x, y;
236 
237         x = (job_id * buffer_elements) % specialValuesCount;
238         y = (job_id * buffer_elements) / specialValuesCount;
239 
240         for (; idx < buffer_elements; idx++)
241         {
242             fp[idx] = specialValues[x];
243             fp2[idx] = specialValues[y];
244             ++x;
245             if (x >= specialValuesCount)
246             {
247                 x = 0;
248                 y++;
249                 if (y >= specialValuesCount) break;
250             }
251         }
252     }
253 
254     // Init any remaining values
255     for (; idx < buffer_elements; idx++)
256     {
257         p[idx] = genrand_int32(d);
258         p2[idx] = genrand_int32(d);
259     }
260 
261     if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
262                                       buffer_size, p, 0, NULL, NULL)))
263     {
264         vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
265         return error;
266     }
267 
268     if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf2, CL_FALSE, 0,
269                                       buffer_size, p2, 0, NULL, NULL)))
270     {
271         vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
272         return error;
273     }
274 
275     for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
276     {
277         if (gHostFill)
278         {
279             // Wait for the map to finish
280             if ((error = clWaitForEvents(1, e + j)))
281             {
282                 vlog_error("Error: clWaitForEvents failed! err: %d\n", error);
283                 return error;
284             }
285             if ((error = clReleaseEvent(e[j])))
286             {
287                 vlog_error("Error: clReleaseEvent failed! err: %d\n", error);
288                 return error;
289             }
290         }
291 
292         // Fill the result buffer with garbage, so that old results don't carry
293         // over
294         uint32_t pattern = 0xffffdead;
295         if (gHostFill)
296         {
297             memset_pattern4(out[j], &pattern, buffer_size);
298             if ((error = clEnqueueUnmapMemObject(
299                      tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL)))
300             {
301                 vlog_error("Error: clEnqueueUnmapMemObject failed! err: %d\n",
302                            error);
303                 return error;
304             }
305         }
306         else
307         {
308             if ((error = clEnqueueFillBuffer(tinfo->tQueue, tinfo->outBuf[j],
309                                              &pattern, sizeof(pattern), 0,
310                                              buffer_size, 0, NULL, NULL)))
311             {
312                 vlog_error("Error: clEnqueueFillBuffer failed! err: %d\n",
313                            error);
314                 return error;
315             }
316         }
317 
318         // Run the kernel
319         size_t vectorCount =
320             (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
321         cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
322                                                  // own copy of the cl_kernel
323         cl_program program = job->programs[j];
324 
325         if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
326                                     &tinfo->outBuf[j])))
327         {
328             LogBuildError(program);
329             return error;
330         }
331         if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf),
332                                     &tinfo->inBuf)))
333         {
334             LogBuildError(program);
335             return error;
336         }
337         if ((error = clSetKernelArg(kernel, 2, sizeof(tinfo->inBuf2),
338                                     &tinfo->inBuf2)))
339         {
340             LogBuildError(program);
341             return error;
342         }
343 
344         if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL,
345                                             &vectorCount, NULL, 0, NULL, NULL)))
346         {
347             vlog_error("FAILED -- could not execute kernel\n");
348             return error;
349         }
350     }
351 
352     // Get that moving
353     if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 2 failed\n");
354 
355     if (gSkipCorrectnessTesting) return CL_SUCCESS;
356 
357     // Calculate the correctly rounded reference result
358     r = (cl_int *)gOut_Ref + thread_id * buffer_elements;
359     s = (float *)gIn + thread_id * buffer_elements;
360     s2 = (float *)gIn2 + thread_id * buffer_elements;
361     for (size_t j = 0; j < buffer_elements; j++) r[j] = func.i_ff(s[j], s2[j]);
362 
363     // Read the data back -- no need to wait for the first N-1 buffers but wait
364     // for the last buffer. This is an in order queue.
365     for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
366     {
367         cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
368         out[j] = (cl_int *)clEnqueueMapBuffer(
369             tinfo->tQueue, tinfo->outBuf[j], blocking, CL_MAP_READ, 0,
370             buffer_size, 0, NULL, NULL, &error);
371         if (error || NULL == out[j])
372         {
373             vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
374                        error);
375             return error;
376         }
377     }
378 
379     // Verify data
380     t = (cl_int *)r;
381     for (size_t j = 0; j < buffer_elements; j++)
382     {
383         cl_int *q = out[0];
384 
385         if (gMinVectorSizeIndex == 0 && t[j] != q[j])
386         {
387             if (ftz || relaxedMode)
388             {
389                 if (IsFloatSubnormal(s[j]))
390                 {
391                     if (IsFloatSubnormal(s2[j]))
392                     {
393                         int correct = func.i_ff(0.0f, 0.0f);
394                         int correct2 = func.i_ff(0.0f, -0.0f);
395                         int correct3 = func.i_ff(-0.0f, 0.0f);
396                         int correct4 = func.i_ff(-0.0f, -0.0f);
397 
398                         if (correct == q[j] || correct2 == q[j]
399                             || correct3 == q[j] || correct4 == q[j])
400                             continue;
401                     }
402                     else
403                     {
404                         int correct = func.i_ff(0.0f, s2[j]);
405                         int correct2 = func.i_ff(-0.0f, s2[j]);
406                         if (correct == q[j] || correct2 == q[j]) continue;
407                     }
408                 }
409                 else if (IsFloatSubnormal(s2[j]))
410                 {
411                     int correct = func.i_ff(s[j], 0.0f);
412                     int correct2 = func.i_ff(s[j], -0.0f);
413                     if (correct == q[j] || correct2 == q[j]) continue;
414                 }
415             }
416 
417             uint32_t err = t[j] - q[j];
418             if (q[j] > t[j]) err = q[j] - t[j];
419             vlog_error("\nERROR: %s: %d ulp error at {%a, %a}: *0x%8.8x vs. "
420                        "0x%8.8x (index: %zu)\n",
421                        name, err, ((float *)s)[j], ((float *)s2)[j], t[j], q[j],
422                        j);
423             return -1;
424         }
425 
426         for (auto k = std::max(1U, gMinVectorSizeIndex);
427              k < gMaxVectorSizeIndex; k++)
428         {
429             q = out[k];
430             // If we aren't getting the correctly rounded result
431             if (-t[j] != q[j])
432             {
433                 if (ftz || relaxedMode)
434                 {
435                     if (IsFloatSubnormal(s[j]))
436                     {
437                         if (IsFloatSubnormal(s2[j]))
438                         {
439                             int correct = -func.i_ff(0.0f, 0.0f);
440                             int correct2 = -func.i_ff(0.0f, -0.0f);
441                             int correct3 = -func.i_ff(-0.0f, 0.0f);
442                             int correct4 = -func.i_ff(-0.0f, -0.0f);
443 
444                             if (correct == q[j] || correct2 == q[j]
445                                 || correct3 == q[j] || correct4 == q[j])
446                                 continue;
447                         }
448                         else
449                         {
450                             int correct = -func.i_ff(0.0f, s2[j]);
451                             int correct2 = -func.i_ff(-0.0f, s2[j]);
452                             if (correct == q[j] || correct2 == q[j]) continue;
453                         }
454                     }
455                     else if (IsFloatSubnormal(s2[j]))
456                     {
457                         int correct = -func.i_ff(s[j], 0.0f);
458                         int correct2 = -func.i_ff(s[j], -0.0f);
459                         if (correct == q[j] || correct2 == q[j]) continue;
460                     }
461                 }
462                 cl_uint err = -t[j] - q[j];
463                 if (q[j] > -t[j]) err = q[j] + t[j];
464                 vlog_error("\nERROR: %s%s: %d ulp error at {%a, %a}: *0x%8.8x "
465                            "vs. 0x%8.8x (index: %zu)\n",
466                            name, sizeNames[k], err, ((float *)s)[j],
467                            ((float *)s2)[j], -t[j], q[j], j);
468                 return -1;
469             }
470         }
471     }
472 
473     for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
474     {
475         if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
476                                              out[j], 0, NULL, NULL)))
477         {
478             vlog_error("Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n",
479                        j, error);
480             return error;
481         }
482     }
483 
484     if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 3 failed\n");
485 
486 
487     if (0 == (base & 0x0fffffff))
488     {
489         if (gVerboseBruteForce)
490         {
491             vlog("base:%14u step:%10u scale:%10u buf_elements:%10zd "
492                  "ThreadCount:%2u\n",
493                  base, job->step, job->scale, buffer_elements,
494                  job->threadCount);
495         }
496         else
497         {
498             vlog(".");
499         }
500         fflush(stdout);
501     }
502 
503     return CL_SUCCESS;
504 }
505 
506 } // anonymous namespace
507 
TestMacro_Int_Float_Float(const Func * f,MTdata d,bool relaxedMode)508 int TestMacro_Int_Float_Float(const Func *f, MTdata d, bool relaxedMode)
509 {
510     TestInfo test_info{};
511     cl_int error;
512 
513     logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
514 
515     // Init test_info
516     test_info.threadCount = GetThreadCount();
517     test_info.subBufferSize = BUFFER_SIZE
518         / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
519     test_info.scale = getTestScale(sizeof(cl_float));
520 
521     test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
522     if (test_info.step / test_info.subBufferSize != test_info.scale)
523     {
524         // there was overflow
525         test_info.jobCount = 1;
526     }
527     else
528     {
529         test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
530     }
531 
532     test_info.f = f;
533     test_info.ftz =
534         f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
535     test_info.relaxedMode = relaxedMode;
536 
537     test_info.tinfo.resize(test_info.threadCount);
538     for (cl_uint i = 0; i < test_info.threadCount; i++)
539     {
540         cl_buffer_region region = {
541             i * test_info.subBufferSize * sizeof(cl_float),
542             test_info.subBufferSize * sizeof(cl_float)
543         };
544         test_info.tinfo[i].inBuf =
545             clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
546                               CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
547         if (error || NULL == test_info.tinfo[i].inBuf)
548         {
549             vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
550                        "region {%zd, %zd}\n",
551                        region.origin, region.size);
552             return error;
553         }
554         test_info.tinfo[i].inBuf2 =
555             clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
556                               CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
557         if (error || NULL == test_info.tinfo[i].inBuf2)
558         {
559             vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
560                        "region {%zd, %zd}\n",
561                        region.origin, region.size);
562             return error;
563         }
564 
565         for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
566         {
567             test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
568                 gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
569                 &region, &error);
570             if (error || NULL == test_info.tinfo[i].outBuf[j])
571             {
572                 vlog_error("Error: Unable to create sub-buffer of "
573                            "gOutBuffer[%d] for region {%zd, %zd}\n",
574                            (int)j, region.origin, region.size);
575                 return error;
576             }
577         }
578         test_info.tinfo[i].tQueue =
579             clCreateCommandQueue(gContext, gDevice, 0, &error);
580         if (NULL == test_info.tinfo[i].tQueue || error)
581         {
582             vlog_error("clCreateCommandQueue failed. (%d)\n", error);
583             return error;
584         }
585 
586         test_info.tinfo[i].d = MTdataHolder(genrand_int32(d));
587     }
588 
589     // Init the kernels
590     BuildKernelInfo build_info{ test_info.threadCount, test_info.k,
591                                 test_info.programs, f->nameInCode,
592                                 relaxedMode };
593     if ((error = ThreadPool_Do(BuildKernelFn,
594                                gMaxVectorSizeIndex - gMinVectorSizeIndex,
595                                &build_info)))
596         return error;
597 
598     // Run the kernels
599     if (!gSkipCorrectnessTesting)
600     {
601         error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
602         if (error) return error;
603 
604         if (gWimpyMode)
605             vlog("Wimp pass");
606         else
607             vlog("passed");
608     }
609 
610     vlog("\n");
611 
612     return CL_SUCCESS;
613 }
614