1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16
17 #include "common.h"
18 #include "function_list.h"
19 #include "test_functions.h"
20 #include "utility.h"
21
22 #include <cstring>
23
24 namespace {
25
BuildKernelFn(cl_uint job_id,cl_uint thread_id UNUSED,void * p)26 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
27 {
28 BuildKernelInfo &info = *(BuildKernelInfo *)p;
29 auto generator = [](const std::string &kernel_name, const char *builtin,
30 cl_uint vector_size_index) {
31 return GetBinaryKernel(kernel_name, builtin, ParameterType::Int,
32 ParameterType::Float, ParameterType::Float,
33 vector_size_index);
34 };
35 return BuildKernels(info, job_id, generator);
36 }
37
38 // Thread specific data for a worker thread
39 struct ThreadInfo
40 {
41 // Input and output buffers for the thread
42 clMemWrapper inBuf;
43 clMemWrapper inBuf2;
44 Buffers outBuf;
45
46 MTdataHolder d;
47
48 // Per thread command queue to improve performance
49 clCommandQueueWrapper tQueue;
50 };
51
52 struct TestInfo
53 {
54 size_t subBufferSize; // Size of the sub-buffer in elements
55 const Func *f; // A pointer to the function info
56
57 // Programs for various vector sizes.
58 Programs programs;
59
60 // Thread-specific kernels for each vector size:
61 // k[vector_size][thread_id]
62 KernelMatrix k;
63
64 // Array of thread specific information
65 std::vector<ThreadInfo> tinfo;
66
67 cl_uint threadCount; // Number of worker threads
68 cl_uint jobCount; // Number of jobs
69 cl_uint step; // step between each chunk and the next.
70 cl_uint scale; // stride between individual test values
71 int ftz; // non-zero if running in flush to zero mode
72 bool relaxedMode; // True if test is running in relaxed mode, false
73 // otherwise.
74 };
75
76 // A table of more difficult cases to get right
77 const float specialValues[] = {
78 -NAN,
79 -INFINITY,
80 -FLT_MAX,
81 MAKE_HEX_FLOAT(-0x1.000002p64f, -0x1000002L, 40),
82 MAKE_HEX_FLOAT(-0x1.0p64f, -0x1L, 64),
83 MAKE_HEX_FLOAT(-0x1.fffffep63f, -0x1fffffeL, 39),
84 MAKE_HEX_FLOAT(-0x1.000002p63f, -0x1000002L, 39),
85 MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63),
86 MAKE_HEX_FLOAT(-0x1.fffffep62f, -0x1fffffeL, 38),
87 MAKE_HEX_FLOAT(-0x1.000002p32f, -0x1000002L, 8),
88 MAKE_HEX_FLOAT(-0x1.0p32f, -0x1L, 32),
89 MAKE_HEX_FLOAT(-0x1.fffffep31f, -0x1fffffeL, 7),
90 MAKE_HEX_FLOAT(-0x1.000002p31f, -0x1000002L, 7),
91 MAKE_HEX_FLOAT(-0x1.0p31f, -0x1L, 31),
92 MAKE_HEX_FLOAT(-0x1.fffffep30f, -0x1fffffeL, 6),
93 -1000.f,
94 -100.f,
95 -4.0f,
96 -3.5f,
97 -3.0f,
98 MAKE_HEX_FLOAT(-0x1.800002p1f, -0x1800002L, -23),
99 -2.5f,
100 MAKE_HEX_FLOAT(-0x1.7ffffep1f, -0x17ffffeL, -23),
101 -2.0f,
102 MAKE_HEX_FLOAT(-0x1.800002p0f, -0x1800002L, -24),
103 -1.5f,
104 MAKE_HEX_FLOAT(-0x1.7ffffep0f, -0x17ffffeL, -24),
105 MAKE_HEX_FLOAT(-0x1.000002p0f, -0x1000002L, -24),
106 -1.0f,
107 MAKE_HEX_FLOAT(-0x1.fffffep-1f, -0x1fffffeL, -25),
108 MAKE_HEX_FLOAT(-0x1.000002p-1f, -0x1000002L, -25),
109 -0.5f,
110 MAKE_HEX_FLOAT(-0x1.fffffep-2f, -0x1fffffeL, -26),
111 MAKE_HEX_FLOAT(-0x1.000002p-2f, -0x1000002L, -26),
112 -0.25f,
113 MAKE_HEX_FLOAT(-0x1.fffffep-3f, -0x1fffffeL, -27),
114 MAKE_HEX_FLOAT(-0x1.000002p-126f, -0x1000002L, -150),
115 -FLT_MIN,
116 MAKE_HEX_FLOAT(-0x0.fffffep-126f, -0x0fffffeL, -150),
117 MAKE_HEX_FLOAT(-0x0.000ffep-126f, -0x0000ffeL, -150),
118 MAKE_HEX_FLOAT(-0x0.0000fep-126f, -0x00000feL, -150),
119 MAKE_HEX_FLOAT(-0x0.00000ep-126f, -0x000000eL, -150),
120 MAKE_HEX_FLOAT(-0x0.00000cp-126f, -0x000000cL, -150),
121 MAKE_HEX_FLOAT(-0x0.00000ap-126f, -0x000000aL, -150),
122 MAKE_HEX_FLOAT(-0x0.000008p-126f, -0x0000008L, -150),
123 MAKE_HEX_FLOAT(-0x0.000006p-126f, -0x0000006L, -150),
124 MAKE_HEX_FLOAT(-0x0.000004p-126f, -0x0000004L, -150),
125 MAKE_HEX_FLOAT(-0x0.000002p-126f, -0x0000002L, -150),
126 -0.0f,
127
128 +NAN,
129 +INFINITY,
130 +FLT_MAX,
131 MAKE_HEX_FLOAT(+0x1.000002p64f, +0x1000002L, 40),
132 MAKE_HEX_FLOAT(+0x1.0p64f, +0x1L, 64),
133 MAKE_HEX_FLOAT(+0x1.fffffep63f, +0x1fffffeL, 39),
134 MAKE_HEX_FLOAT(+0x1.000002p63f, +0x1000002L, 39),
135 MAKE_HEX_FLOAT(+0x1.0p63f, +0x1L, 63),
136 MAKE_HEX_FLOAT(+0x1.fffffep62f, +0x1fffffeL, 38),
137 MAKE_HEX_FLOAT(+0x1.000002p32f, +0x1000002L, 8),
138 MAKE_HEX_FLOAT(+0x1.0p32f, +0x1L, 32),
139 MAKE_HEX_FLOAT(+0x1.fffffep31f, +0x1fffffeL, 7),
140 MAKE_HEX_FLOAT(+0x1.000002p31f, +0x1000002L, 7),
141 MAKE_HEX_FLOAT(+0x1.0p31f, +0x1L, 31),
142 MAKE_HEX_FLOAT(+0x1.fffffep30f, +0x1fffffeL, 6),
143 +1000.f,
144 +100.f,
145 +4.0f,
146 +3.5f,
147 +3.0f,
148 MAKE_HEX_FLOAT(+0x1.800002p1f, +0x1800002L, -23),
149 2.5f,
150 MAKE_HEX_FLOAT(+0x1.7ffffep1f, +0x17ffffeL, -23),
151 +2.0f,
152 MAKE_HEX_FLOAT(+0x1.800002p0f, +0x1800002L, -24),
153 1.5f,
154 MAKE_HEX_FLOAT(+0x1.7ffffep0f, +0x17ffffeL, -24),
155 MAKE_HEX_FLOAT(+0x1.000002p0f, +0x1000002L, -24),
156 +1.0f,
157 MAKE_HEX_FLOAT(+0x1.fffffep-1f, +0x1fffffeL, -25),
158 MAKE_HEX_FLOAT(+0x1.000002p-1f, +0x1000002L, -25),
159 +0.5f,
160 MAKE_HEX_FLOAT(+0x1.fffffep-2f, +0x1fffffeL, -26),
161 MAKE_HEX_FLOAT(+0x1.000002p-2f, +0x1000002L, -26),
162 +0.25f,
163 MAKE_HEX_FLOAT(+0x1.fffffep-3f, +0x1fffffeL, -27),
164 MAKE_HEX_FLOAT(0x1.000002p-126f, 0x1000002L, -150),
165 +FLT_MIN,
166 MAKE_HEX_FLOAT(+0x0.fffffep-126f, +0x0fffffeL, -150),
167 MAKE_HEX_FLOAT(+0x0.000ffep-126f, +0x0000ffeL, -150),
168 MAKE_HEX_FLOAT(+0x0.0000fep-126f, +0x00000feL, -150),
169 MAKE_HEX_FLOAT(+0x0.00000ep-126f, +0x000000eL, -150),
170 MAKE_HEX_FLOAT(+0x0.00000cp-126f, +0x000000cL, -150),
171 MAKE_HEX_FLOAT(+0x0.00000ap-126f, +0x000000aL, -150),
172 MAKE_HEX_FLOAT(+0x0.000008p-126f, +0x0000008L, -150),
173 MAKE_HEX_FLOAT(+0x0.000006p-126f, +0x0000006L, -150),
174 MAKE_HEX_FLOAT(+0x0.000004p-126f, +0x0000004L, -150),
175 MAKE_HEX_FLOAT(+0x0.000002p-126f, +0x0000002L, -150),
176 +0.0f,
177 };
178
179 constexpr size_t specialValuesCount =
180 sizeof(specialValues) / sizeof(specialValues[0]);
181
Test(cl_uint job_id,cl_uint thread_id,void * data)182 cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
183 {
184 TestInfo *job = (TestInfo *)data;
185 size_t buffer_elements = job->subBufferSize;
186 size_t buffer_size = buffer_elements * sizeof(cl_float);
187 cl_uint base = job_id * (cl_uint)job->step;
188 ThreadInfo *tinfo = &(job->tinfo[thread_id]);
189 fptr func = job->f->func;
190 int ftz = job->ftz;
191 bool relaxedMode = job->relaxedMode;
192 MTdata d = tinfo->d;
193 cl_int error;
194 const char *name = job->f->name;
195 cl_int *t = 0;
196 cl_int *r = 0;
197 cl_float *s = 0;
198 cl_float *s2 = 0;
199
200 cl_event e[VECTOR_SIZE_COUNT];
201 cl_int *out[VECTOR_SIZE_COUNT];
202 if (gHostFill)
203 {
204 // start the map of the output arrays
205 for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
206 {
207 out[j] = (cl_int *)clEnqueueMapBuffer(
208 tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
209 buffer_size, 0, NULL, e + j, &error);
210 if (error || NULL == out[j])
211 {
212 vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
213 error);
214 return error;
215 }
216 }
217
218 // Get that moving
219 if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n");
220 }
221
222 // Init input array
223 cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements;
224 cl_uint *p2 = (cl_uint *)gIn2 + thread_id * buffer_elements;
225 cl_uint idx = 0;
226
227 int totalSpecialValueCount = specialValuesCount * specialValuesCount;
228 int lastSpecialJobIndex = (totalSpecialValueCount - 1) / buffer_elements;
229
230 // Test edge cases
231 if (job_id <= (cl_uint)lastSpecialJobIndex)
232 {
233 float *fp = (float *)p;
234 float *fp2 = (float *)p2;
235 uint32_t x, y;
236
237 x = (job_id * buffer_elements) % specialValuesCount;
238 y = (job_id * buffer_elements) / specialValuesCount;
239
240 for (; idx < buffer_elements; idx++)
241 {
242 fp[idx] = specialValues[x];
243 fp2[idx] = specialValues[y];
244 ++x;
245 if (x >= specialValuesCount)
246 {
247 x = 0;
248 y++;
249 if (y >= specialValuesCount) break;
250 }
251 }
252 }
253
254 // Init any remaining values
255 for (; idx < buffer_elements; idx++)
256 {
257 p[idx] = genrand_int32(d);
258 p2[idx] = genrand_int32(d);
259 }
260
261 if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
262 buffer_size, p, 0, NULL, NULL)))
263 {
264 vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
265 return error;
266 }
267
268 if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf2, CL_FALSE, 0,
269 buffer_size, p2, 0, NULL, NULL)))
270 {
271 vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
272 return error;
273 }
274
275 for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
276 {
277 if (gHostFill)
278 {
279 // Wait for the map to finish
280 if ((error = clWaitForEvents(1, e + j)))
281 {
282 vlog_error("Error: clWaitForEvents failed! err: %d\n", error);
283 return error;
284 }
285 if ((error = clReleaseEvent(e[j])))
286 {
287 vlog_error("Error: clReleaseEvent failed! err: %d\n", error);
288 return error;
289 }
290 }
291
292 // Fill the result buffer with garbage, so that old results don't carry
293 // over
294 uint32_t pattern = 0xffffdead;
295 if (gHostFill)
296 {
297 memset_pattern4(out[j], &pattern, buffer_size);
298 if ((error = clEnqueueUnmapMemObject(
299 tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL)))
300 {
301 vlog_error("Error: clEnqueueUnmapMemObject failed! err: %d\n",
302 error);
303 return error;
304 }
305 }
306 else
307 {
308 if ((error = clEnqueueFillBuffer(tinfo->tQueue, tinfo->outBuf[j],
309 &pattern, sizeof(pattern), 0,
310 buffer_size, 0, NULL, NULL)))
311 {
312 vlog_error("Error: clEnqueueFillBuffer failed! err: %d\n",
313 error);
314 return error;
315 }
316 }
317
318 // Run the kernel
319 size_t vectorCount =
320 (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
321 cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
322 // own copy of the cl_kernel
323 cl_program program = job->programs[j];
324
325 if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
326 &tinfo->outBuf[j])))
327 {
328 LogBuildError(program);
329 return error;
330 }
331 if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf),
332 &tinfo->inBuf)))
333 {
334 LogBuildError(program);
335 return error;
336 }
337 if ((error = clSetKernelArg(kernel, 2, sizeof(tinfo->inBuf2),
338 &tinfo->inBuf2)))
339 {
340 LogBuildError(program);
341 return error;
342 }
343
344 if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL,
345 &vectorCount, NULL, 0, NULL, NULL)))
346 {
347 vlog_error("FAILED -- could not execute kernel\n");
348 return error;
349 }
350 }
351
352 // Get that moving
353 if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 2 failed\n");
354
355 if (gSkipCorrectnessTesting) return CL_SUCCESS;
356
357 // Calculate the correctly rounded reference result
358 r = (cl_int *)gOut_Ref + thread_id * buffer_elements;
359 s = (float *)gIn + thread_id * buffer_elements;
360 s2 = (float *)gIn2 + thread_id * buffer_elements;
361 for (size_t j = 0; j < buffer_elements; j++) r[j] = func.i_ff(s[j], s2[j]);
362
363 // Read the data back -- no need to wait for the first N-1 buffers but wait
364 // for the last buffer. This is an in order queue.
365 for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
366 {
367 cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
368 out[j] = (cl_int *)clEnqueueMapBuffer(
369 tinfo->tQueue, tinfo->outBuf[j], blocking, CL_MAP_READ, 0,
370 buffer_size, 0, NULL, NULL, &error);
371 if (error || NULL == out[j])
372 {
373 vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
374 error);
375 return error;
376 }
377 }
378
379 // Verify data
380 t = (cl_int *)r;
381 for (size_t j = 0; j < buffer_elements; j++)
382 {
383 cl_int *q = out[0];
384
385 if (gMinVectorSizeIndex == 0 && t[j] != q[j])
386 {
387 if (ftz || relaxedMode)
388 {
389 if (IsFloatSubnormal(s[j]))
390 {
391 if (IsFloatSubnormal(s2[j]))
392 {
393 int correct = func.i_ff(0.0f, 0.0f);
394 int correct2 = func.i_ff(0.0f, -0.0f);
395 int correct3 = func.i_ff(-0.0f, 0.0f);
396 int correct4 = func.i_ff(-0.0f, -0.0f);
397
398 if (correct == q[j] || correct2 == q[j]
399 || correct3 == q[j] || correct4 == q[j])
400 continue;
401 }
402 else
403 {
404 int correct = func.i_ff(0.0f, s2[j]);
405 int correct2 = func.i_ff(-0.0f, s2[j]);
406 if (correct == q[j] || correct2 == q[j]) continue;
407 }
408 }
409 else if (IsFloatSubnormal(s2[j]))
410 {
411 int correct = func.i_ff(s[j], 0.0f);
412 int correct2 = func.i_ff(s[j], -0.0f);
413 if (correct == q[j] || correct2 == q[j]) continue;
414 }
415 }
416
417 uint32_t err = t[j] - q[j];
418 if (q[j] > t[j]) err = q[j] - t[j];
419 vlog_error("\nERROR: %s: %d ulp error at {%a, %a}: *0x%8.8x vs. "
420 "0x%8.8x (index: %zu)\n",
421 name, err, ((float *)s)[j], ((float *)s2)[j], t[j], q[j],
422 j);
423 return -1;
424 }
425
426 for (auto k = std::max(1U, gMinVectorSizeIndex);
427 k < gMaxVectorSizeIndex; k++)
428 {
429 q = out[k];
430 // If we aren't getting the correctly rounded result
431 if (-t[j] != q[j])
432 {
433 if (ftz || relaxedMode)
434 {
435 if (IsFloatSubnormal(s[j]))
436 {
437 if (IsFloatSubnormal(s2[j]))
438 {
439 int correct = -func.i_ff(0.0f, 0.0f);
440 int correct2 = -func.i_ff(0.0f, -0.0f);
441 int correct3 = -func.i_ff(-0.0f, 0.0f);
442 int correct4 = -func.i_ff(-0.0f, -0.0f);
443
444 if (correct == q[j] || correct2 == q[j]
445 || correct3 == q[j] || correct4 == q[j])
446 continue;
447 }
448 else
449 {
450 int correct = -func.i_ff(0.0f, s2[j]);
451 int correct2 = -func.i_ff(-0.0f, s2[j]);
452 if (correct == q[j] || correct2 == q[j]) continue;
453 }
454 }
455 else if (IsFloatSubnormal(s2[j]))
456 {
457 int correct = -func.i_ff(s[j], 0.0f);
458 int correct2 = -func.i_ff(s[j], -0.0f);
459 if (correct == q[j] || correct2 == q[j]) continue;
460 }
461 }
462 cl_uint err = -t[j] - q[j];
463 if (q[j] > -t[j]) err = q[j] + t[j];
464 vlog_error("\nERROR: %s%s: %d ulp error at {%a, %a}: *0x%8.8x "
465 "vs. 0x%8.8x (index: %zu)\n",
466 name, sizeNames[k], err, ((float *)s)[j],
467 ((float *)s2)[j], -t[j], q[j], j);
468 return -1;
469 }
470 }
471 }
472
473 for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
474 {
475 if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
476 out[j], 0, NULL, NULL)))
477 {
478 vlog_error("Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n",
479 j, error);
480 return error;
481 }
482 }
483
484 if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 3 failed\n");
485
486
487 if (0 == (base & 0x0fffffff))
488 {
489 if (gVerboseBruteForce)
490 {
491 vlog("base:%14u step:%10u scale:%10u buf_elements:%10zd "
492 "ThreadCount:%2u\n",
493 base, job->step, job->scale, buffer_elements,
494 job->threadCount);
495 }
496 else
497 {
498 vlog(".");
499 }
500 fflush(stdout);
501 }
502
503 return CL_SUCCESS;
504 }
505
506 } // anonymous namespace
507
TestMacro_Int_Float_Float(const Func * f,MTdata d,bool relaxedMode)508 int TestMacro_Int_Float_Float(const Func *f, MTdata d, bool relaxedMode)
509 {
510 TestInfo test_info{};
511 cl_int error;
512
513 logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
514
515 // Init test_info
516 test_info.threadCount = GetThreadCount();
517 test_info.subBufferSize = BUFFER_SIZE
518 / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
519 test_info.scale = getTestScale(sizeof(cl_float));
520
521 test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
522 if (test_info.step / test_info.subBufferSize != test_info.scale)
523 {
524 // there was overflow
525 test_info.jobCount = 1;
526 }
527 else
528 {
529 test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
530 }
531
532 test_info.f = f;
533 test_info.ftz =
534 f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
535 test_info.relaxedMode = relaxedMode;
536
537 test_info.tinfo.resize(test_info.threadCount);
538 for (cl_uint i = 0; i < test_info.threadCount; i++)
539 {
540 cl_buffer_region region = {
541 i * test_info.subBufferSize * sizeof(cl_float),
542 test_info.subBufferSize * sizeof(cl_float)
543 };
544 test_info.tinfo[i].inBuf =
545 clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
546 CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error);
547 if (error || NULL == test_info.tinfo[i].inBuf)
548 {
549 vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
550 "region {%zd, %zd}\n",
551 region.origin, region.size);
552 return error;
553 }
554 test_info.tinfo[i].inBuf2 =
555 clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
556 CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error);
557 if (error || NULL == test_info.tinfo[i].inBuf2)
558 {
559 vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
560 "region {%zd, %zd}\n",
561 region.origin, region.size);
562 return error;
563 }
564
565 for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
566 {
567 test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
568 gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
569 ®ion, &error);
570 if (error || NULL == test_info.tinfo[i].outBuf[j])
571 {
572 vlog_error("Error: Unable to create sub-buffer of "
573 "gOutBuffer[%d] for region {%zd, %zd}\n",
574 (int)j, region.origin, region.size);
575 return error;
576 }
577 }
578 test_info.tinfo[i].tQueue =
579 clCreateCommandQueue(gContext, gDevice, 0, &error);
580 if (NULL == test_info.tinfo[i].tQueue || error)
581 {
582 vlog_error("clCreateCommandQueue failed. (%d)\n", error);
583 return error;
584 }
585
586 test_info.tinfo[i].d = MTdataHolder(genrand_int32(d));
587 }
588
589 // Init the kernels
590 BuildKernelInfo build_info{ test_info.threadCount, test_info.k,
591 test_info.programs, f->nameInCode,
592 relaxedMode };
593 if ((error = ThreadPool_Do(BuildKernelFn,
594 gMaxVectorSizeIndex - gMinVectorSizeIndex,
595 &build_info)))
596 return error;
597
598 // Run the kernels
599 if (!gSkipCorrectnessTesting)
600 {
601 error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
602 if (error) return error;
603
604 if (gWimpyMode)
605 vlog("Wimp pass");
606 else
607 vlog("passed");
608 }
609
610 vlog("\n");
611
612 return CL_SUCCESS;
613 }
614