1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16
17 #include "common.h"
18 #include "function_list.h"
19 #include "test_functions.h"
20 #include "utility.h"
21
22 #include <climits>
23 #include <cstring>
24
25 namespace {
26
BuildKernelFn(cl_uint job_id,cl_uint thread_id UNUSED,void * p)27 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
28 {
29 BuildKernelInfo &info = *(BuildKernelInfo *)p;
30 auto generator = [](const std::string &kernel_name, const char *builtin,
31 cl_uint vector_size_index) {
32 return GetBinaryKernel(kernel_name, builtin, ParameterType::Float,
33 ParameterType::Float, ParameterType::Int,
34 vector_size_index);
35 };
36 return BuildKernels(info, job_id, generator);
37 }
38
39 // Thread specific data for a worker thread
40 struct ThreadInfo
41 {
42 // Input and output buffers for the thread
43 clMemWrapper inBuf;
44 clMemWrapper inBuf2;
45 Buffers outBuf;
46
47 float maxError; // max error value. Init to 0.
48 double
49 maxErrorValue; // position of the max error value (param 1). Init to 0.
50 cl_int maxErrorValue2; // position of the max error value (param 2). Init
51 // to 0.
52 MTdataHolder d;
53
54 // Per thread command queue to improve performance
55 clCommandQueueWrapper tQueue;
56 };
57
58 struct TestInfo
59 {
60 size_t subBufferSize; // Size of the sub-buffer in elements
61 const Func *f; // A pointer to the function info
62
63 // Programs for various vector sizes.
64 Programs programs;
65
66 // Thread-specific kernels for each vector size:
67 // k[vector_size][thread_id]
68 KernelMatrix k;
69
70 // Array of thread specific information
71 std::vector<ThreadInfo> tinfo;
72
73 cl_uint threadCount; // Number of worker threads
74 cl_uint jobCount; // Number of jobs
75 cl_uint step; // step between each chunk and the next.
76 cl_uint scale; // stride between individual test values
77 float ulps; // max_allowed ulps
78 int ftz; // non-zero if running in flush to zero mode
79 bool relaxedMode; // True if test is running in relaxed mode, false
80 // otherwise.
81 // no special values
82 };
83
84 // A table of more difficult cases to get right
85 const float specialValues[] = {
86 -NAN,
87 -INFINITY,
88 -FLT_MAX,
89 MAKE_HEX_FLOAT(-0x1.000002p64f, -0x1000002L, 40),
90 MAKE_HEX_FLOAT(-0x1.0p64f, -0x1L, 64),
91 MAKE_HEX_FLOAT(-0x1.fffffep63f, -0x1fffffeL, 39),
92 MAKE_HEX_FLOAT(-0x1.000002p63f, -0x1000002L, 39),
93 MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63),
94 MAKE_HEX_FLOAT(-0x1.fffffep62f, -0x1fffffeL, 38),
95 MAKE_HEX_FLOAT(-0x1.000002p32f, -0x1000002L, 8),
96 MAKE_HEX_FLOAT(-0x1.0p32f, -0x1L, 32),
97 MAKE_HEX_FLOAT(-0x1.fffffep31f, -0x1fffffeL, 7),
98 MAKE_HEX_FLOAT(-0x1.000002p31f, -0x1000002L, 7),
99 MAKE_HEX_FLOAT(-0x1.0p31f, -0x1L, 31),
100 MAKE_HEX_FLOAT(-0x1.fffffep30f, -0x1fffffeL, 6),
101 -1000.f,
102 -100.f,
103 -4.0f,
104 -3.5f,
105 -3.0f,
106 MAKE_HEX_FLOAT(-0x1.800002p1f, -0x1800002L, -23),
107 -2.5f,
108 MAKE_HEX_FLOAT(-0x1.7ffffep1f, -0x17ffffeL, -23),
109 -2.0f,
110 MAKE_HEX_FLOAT(-0x1.800002p0f, -0x1800002L, -24),
111 -1.5f,
112 MAKE_HEX_FLOAT(-0x1.7ffffep0f, -0x17ffffeL, -24),
113 MAKE_HEX_FLOAT(-0x1.000002p0f, -0x1000002L, -24),
114 -1.0f,
115 MAKE_HEX_FLOAT(-0x1.fffffep-1f, -0x1fffffeL, -25),
116 MAKE_HEX_FLOAT(-0x1.000002p-1f, -0x1000002L, -25),
117 -0.5f,
118 MAKE_HEX_FLOAT(-0x1.fffffep-2f, -0x1fffffeL, -26),
119 MAKE_HEX_FLOAT(-0x1.000002p-2f, -0x1000002L, -26),
120 -0.25f,
121 MAKE_HEX_FLOAT(-0x1.fffffep-3f, -0x1fffffeL, -27),
122 MAKE_HEX_FLOAT(-0x1.000002p-126f, -0x1000002L, -150),
123 -FLT_MIN,
124 MAKE_HEX_FLOAT(-0x0.fffffep-126f, -0x0fffffeL, -150),
125 MAKE_HEX_FLOAT(-0x0.000ffep-126f, -0x0000ffeL, -150),
126 MAKE_HEX_FLOAT(-0x0.0000fep-126f, -0x00000feL, -150),
127 MAKE_HEX_FLOAT(-0x0.00000ep-126f, -0x000000eL, -150),
128 MAKE_HEX_FLOAT(-0x0.00000cp-126f, -0x000000cL, -150),
129 MAKE_HEX_FLOAT(-0x0.00000ap-126f, -0x000000aL, -150),
130 MAKE_HEX_FLOAT(-0x0.000008p-126f, -0x0000008L, -150),
131 MAKE_HEX_FLOAT(-0x0.000006p-126f, -0x0000006L, -150),
132 MAKE_HEX_FLOAT(-0x0.000004p-126f, -0x0000004L, -150),
133 MAKE_HEX_FLOAT(-0x0.000002p-126f, -0x0000002L, -150),
134 -0.0f,
135
136 +NAN,
137 +INFINITY,
138 +FLT_MAX,
139 MAKE_HEX_FLOAT(+0x1.000002p64f, +0x1000002L, 40),
140 MAKE_HEX_FLOAT(+0x1.0p64f, +0x1L, 64),
141 MAKE_HEX_FLOAT(+0x1.fffffep63f, +0x1fffffeL, 39),
142 MAKE_HEX_FLOAT(+0x1.000002p63f, +0x1000002L, 39),
143 MAKE_HEX_FLOAT(+0x1.0p63f, +0x1L, 63),
144 MAKE_HEX_FLOAT(+0x1.fffffep62f, +0x1fffffeL, 38),
145 MAKE_HEX_FLOAT(+0x1.000002p32f, +0x1000002L, 8),
146 MAKE_HEX_FLOAT(+0x1.0p32f, +0x1L, 32),
147 MAKE_HEX_FLOAT(+0x1.fffffep31f, +0x1fffffeL, 7),
148 MAKE_HEX_FLOAT(+0x1.000002p31f, +0x1000002L, 7),
149 MAKE_HEX_FLOAT(+0x1.0p31f, +0x1L, 31),
150 MAKE_HEX_FLOAT(+0x1.fffffep30f, +0x1fffffeL, 6),
151 +1000.f,
152 +100.f,
153 +4.0f,
154 +3.5f,
155 +3.0f,
156 MAKE_HEX_FLOAT(+0x1.800002p1f, +0x1800002L, -23),
157 2.5f,
158 MAKE_HEX_FLOAT(+0x1.7ffffep1f, +0x17ffffeL, -23),
159 +2.0f,
160 MAKE_HEX_FLOAT(+0x1.800002p0f, +0x1800002L, -24),
161 1.5f,
162 MAKE_HEX_FLOAT(+0x1.7ffffep0f, +0x17ffffeL, -24),
163 MAKE_HEX_FLOAT(+0x1.000002p0f, +0x1000002L, -24),
164 +1.0f,
165 MAKE_HEX_FLOAT(+0x1.fffffep-1f, +0x1fffffeL, -25),
166 MAKE_HEX_FLOAT(+0x1.000002p-1f, +0x1000002L, -25),
167 +0.5f,
168 MAKE_HEX_FLOAT(+0x1.fffffep-2f, +0x1fffffeL, -26),
169 MAKE_HEX_FLOAT(+0x1.000002p-2f, +0x1000002L, -26),
170 +0.25f,
171 MAKE_HEX_FLOAT(+0x1.fffffep-3f, +0x1fffffeL, -27),
172 MAKE_HEX_FLOAT(0x1.000002p-126f, 0x1000002L, -150),
173 +FLT_MIN,
174 MAKE_HEX_FLOAT(+0x0.fffffep-126f, +0x0fffffeL, -150),
175 MAKE_HEX_FLOAT(+0x0.000ffep-126f, +0x0000ffeL, -150),
176 MAKE_HEX_FLOAT(+0x0.0000fep-126f, +0x00000feL, -150),
177 MAKE_HEX_FLOAT(+0x0.00000ep-126f, +0x000000eL, -150),
178 MAKE_HEX_FLOAT(+0x0.00000cp-126f, +0x000000cL, -150),
179 MAKE_HEX_FLOAT(+0x0.00000ap-126f, +0x000000aL, -150),
180 MAKE_HEX_FLOAT(+0x0.000008p-126f, +0x0000008L, -150),
181 MAKE_HEX_FLOAT(+0x0.000006p-126f, +0x0000006L, -150),
182 MAKE_HEX_FLOAT(+0x0.000004p-126f, +0x0000004L, -150),
183 MAKE_HEX_FLOAT(+0x0.000002p-126f, +0x0000002L, -150),
184 +0.0f,
185 };
186
187 constexpr size_t specialValuesCount =
188 sizeof(specialValues) / sizeof(specialValues[0]);
189
190 const int specialValuesInt[] = {
191 0, 1, 2, 3, 126, 127,
192 128, 0x02000001, 0x04000001, 1465264071, 1488522147, -1,
193 -2, -3, -126, -127, -128, -0x02000001,
194 -0x04000001, -1465264071, -1488522147,
195 };
196
197 constexpr size_t specialValuesIntCount =
198 sizeof(specialValuesInt) / sizeof(specialValuesInt[0]);
199
Test(cl_uint job_id,cl_uint thread_id,void * data)200 cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
201 {
202 TestInfo *job = (TestInfo *)data;
203 size_t buffer_elements = job->subBufferSize;
204 size_t buffer_size = buffer_elements * sizeof(cl_float);
205 cl_uint base = job_id * (cl_uint)job->step;
206 ThreadInfo *tinfo = &(job->tinfo[thread_id]);
207 fptr func = job->f->func;
208 int ftz = job->ftz;
209 bool relaxedMode = job->relaxedMode;
210 float ulps = job->ulps;
211 MTdata d = tinfo->d;
212 cl_int error;
213 const char *name = job->f->name;
214 cl_uint *t = 0;
215 cl_float *r = 0;
216 cl_float *s = 0;
217 cl_int *s2 = 0;
218
219 cl_event e[VECTOR_SIZE_COUNT];
220 cl_uint *out[VECTOR_SIZE_COUNT];
221 if (gHostFill)
222 {
223 // start the map of the output arrays
224 for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
225 {
226 out[j] = (cl_uint *)clEnqueueMapBuffer(
227 tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
228 buffer_size, 0, NULL, e + j, &error);
229 if (error || NULL == out[j])
230 {
231 vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
232 error);
233 return error;
234 }
235 }
236
237 // Get that moving
238 if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n");
239 }
240
241 // Init input array
242 cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements;
243 cl_uint *p2 = (cl_uint *)gIn2 + thread_id * buffer_elements;
244 size_t idx = 0;
245 int totalSpecialValueCount = specialValuesCount * specialValuesIntCount;
246 int lastSpecialJobIndex = (totalSpecialValueCount - 1) / buffer_elements;
247
248 if (job_id <= (cl_uint)lastSpecialJobIndex)
249 { // test edge cases
250 float *fp = (float *)p;
251 cl_int *ip2 = (cl_int *)p2;
252 uint32_t x, y;
253
254 x = (job_id * buffer_elements) % specialValuesCount;
255 y = (job_id * buffer_elements) / specialValuesCount;
256
257 for (; idx < buffer_elements; idx++)
258 {
259 fp[idx] = specialValues[x];
260 ip2[idx] = specialValuesInt[y];
261 ++x;
262 if (x >= specialValuesCount)
263 {
264 x = 0;
265 y++;
266 if (y >= specialValuesIntCount) break;
267 }
268 }
269 }
270
271 // Init any remaining values.
272 for (; idx < buffer_elements; idx++)
273 {
274 p[idx] = genrand_int32(d);
275 p2[idx] = genrand_int32(d);
276 }
277
278 if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
279 buffer_size, p, 0, NULL, NULL)))
280 {
281 vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
282 return error;
283 }
284
285 if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf2, CL_FALSE, 0,
286 buffer_size, p2, 0, NULL, NULL)))
287 {
288 vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
289 return error;
290 }
291
292 for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
293 {
294 if (gHostFill)
295 {
296 // Wait for the map to finish
297 if ((error = clWaitForEvents(1, e + j)))
298 {
299 vlog_error("Error: clWaitForEvents failed! err: %d\n", error);
300 return error;
301 }
302 if ((error = clReleaseEvent(e[j])))
303 {
304 vlog_error("Error: clReleaseEvent failed! err: %d\n", error);
305 return error;
306 }
307 }
308
309 // Fill the result buffer with garbage, so that old results don't carry
310 // over
311 uint32_t pattern = 0xffffdead;
312 if (gHostFill)
313 {
314 memset_pattern4(out[j], &pattern, buffer_size);
315 if ((error = clEnqueueUnmapMemObject(
316 tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL)))
317 {
318 vlog_error("Error: clEnqueueUnmapMemObject failed! err: %d\n",
319 error);
320 return error;
321 }
322 }
323 else
324 {
325 if ((error = clEnqueueFillBuffer(tinfo->tQueue, tinfo->outBuf[j],
326 &pattern, sizeof(pattern), 0,
327 buffer_size, 0, NULL, NULL)))
328 {
329 vlog_error("Error: clEnqueueFillBuffer failed! err: %d\n",
330 error);
331 return error;
332 }
333 }
334
335 // Run the kernel
336 size_t vectorCount =
337 (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
338 cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
339 // own copy of the cl_kernel
340 cl_program program = job->programs[j];
341
342 if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
343 &tinfo->outBuf[j])))
344 {
345 LogBuildError(program);
346 return error;
347 }
348 if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf),
349 &tinfo->inBuf)))
350 {
351 LogBuildError(program);
352 return error;
353 }
354 if ((error = clSetKernelArg(kernel, 2, sizeof(tinfo->inBuf2),
355 &tinfo->inBuf2)))
356 {
357 LogBuildError(program);
358 return error;
359 }
360
361 if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL,
362 &vectorCount, NULL, 0, NULL, NULL)))
363 {
364 vlog_error("FAILED -- could not execute kernel\n");
365 return error;
366 }
367 }
368
369 // Get that moving
370 if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 2 failed\n");
371
372 if (gSkipCorrectnessTesting) return CL_SUCCESS;
373
374 // Calculate the correctly rounded reference result
375 r = (float *)gOut_Ref + thread_id * buffer_elements;
376 s = (float *)gIn + thread_id * buffer_elements;
377 s2 = (cl_int *)gIn2 + thread_id * buffer_elements;
378 for (size_t j = 0; j < buffer_elements; j++)
379 r[j] = (float)func.f_fi(s[j], s2[j]);
380
381 // Read the data back -- no need to wait for the first N-1 buffers but wait
382 // for the last buffer. This is an in order queue.
383 for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
384 {
385 cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
386 out[j] = (cl_uint *)clEnqueueMapBuffer(
387 tinfo->tQueue, tinfo->outBuf[j], blocking, CL_MAP_READ, 0,
388 buffer_size, 0, NULL, NULL, &error);
389 if (error || NULL == out[j])
390 {
391 vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
392 error);
393 return error;
394 }
395 }
396
397 // Verify data
398 t = (cl_uint *)r;
399 for (size_t j = 0; j < buffer_elements; j++)
400 {
401 for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
402 {
403 cl_uint *q = out[k];
404
405 // If we aren't getting the correctly rounded result
406 if (t[j] != q[j])
407 {
408 float test = ((float *)q)[j];
409 double correct = func.f_fi(s[j], s2[j]);
410 float err = Ulp_Error(test, correct);
411 int fail = !(fabsf(err) <= ulps);
412
413 if (fail && (ftz || relaxedMode))
414 {
415 // retry per section 6.5.3.2
416 if (IsFloatResultSubnormal(correct, ulps))
417 {
418 fail = fail && (test != 0.0f);
419 if (!fail) err = 0.0f;
420 }
421
422 // retry per section 6.5.3.3
423 if (IsFloatSubnormal(s[j]))
424 {
425 double correct2, correct3;
426 float err2, err3;
427 correct2 = func.f_fi(0.0, s2[j]);
428 correct3 = func.f_fi(-0.0, s2[j]);
429 err2 = Ulp_Error(test, correct2);
430 err3 = Ulp_Error(test, correct3);
431 fail = fail
432 && ((!(fabsf(err2) <= ulps))
433 && (!(fabsf(err3) <= ulps)));
434 if (fabsf(err2) < fabsf(err)) err = err2;
435 if (fabsf(err3) < fabsf(err)) err = err3;
436
437 // retry per section 6.5.3.4
438 if (IsFloatResultSubnormal(correct2, ulps)
439 || IsFloatResultSubnormal(correct3, ulps))
440 {
441 fail = fail && (test != 0.0f);
442 if (!fail) err = 0.0f;
443 }
444 }
445 }
446
447 if (fabsf(err) > tinfo->maxError)
448 {
449 tinfo->maxError = fabsf(err);
450 tinfo->maxErrorValue = s[j];
451 tinfo->maxErrorValue2 = s2[j];
452 }
453 if (fail)
454 {
455 vlog_error(
456 "\nERROR: %s%s: %f ulp error at {%a (0x%8.8x), %d}: "
457 "*%a (0x%8.8x) vs. %a (0x%8.8x) at index: %zu\n",
458 name, sizeNames[k], err, s[j], ((uint32_t *)s)[j],
459 s2[j], r[j], ((uint32_t *)r)[j], test,
460 ((cl_uint *)&test)[0], j);
461 return -1;
462 }
463 }
464 }
465 }
466
467 for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
468 {
469 if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
470 out[j], 0, NULL, NULL)))
471 {
472 vlog_error("Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n",
473 j, error);
474 return error;
475 }
476 }
477
478 if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 3 failed\n");
479
480
481 if (0 == (base & 0x0fffffff))
482 {
483 if (gVerboseBruteForce)
484 {
485 vlog("base:%14u step:%10u scale:%10u buf_elements:%10zu ulps:%5.3f "
486 "ThreadCount:%2u\n",
487 base, job->step, job->scale, buffer_elements, job->ulps,
488 job->threadCount);
489 }
490 else
491 {
492 vlog(".");
493 }
494 fflush(stdout);
495 }
496
497 return CL_SUCCESS;
498 }
499
500 } // anonymous namespace
501
TestFunc_Float_Float_Int(const Func * f,MTdata d,bool relaxedMode)502 int TestFunc_Float_Float_Int(const Func *f, MTdata d, bool relaxedMode)
503 {
504 TestInfo test_info{};
505 cl_int error;
506 float maxError = 0.0f;
507 double maxErrorVal = 0.0;
508 cl_int maxErrorVal2 = 0;
509
510 logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
511
512 // Init test_info
513 test_info.threadCount = GetThreadCount();
514 test_info.subBufferSize = BUFFER_SIZE
515 / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
516 test_info.scale = getTestScale(sizeof(cl_float));
517
518 test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
519 if (test_info.step / test_info.subBufferSize != test_info.scale)
520 {
521 // there was overflow
522 test_info.jobCount = 1;
523 }
524 else
525 {
526 test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
527 }
528
529 test_info.f = f;
530 test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps;
531 test_info.ftz =
532 f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
533 test_info.relaxedMode = relaxedMode;
534
535 test_info.tinfo.resize(test_info.threadCount);
536 for (cl_uint i = 0; i < test_info.threadCount; i++)
537 {
538 cl_buffer_region region = {
539 i * test_info.subBufferSize * sizeof(cl_float),
540 test_info.subBufferSize * sizeof(cl_float)
541 };
542 test_info.tinfo[i].inBuf =
543 clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
544 CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error);
545 if (error || NULL == test_info.tinfo[i].inBuf)
546 {
547 vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
548 "region {%zd, %zd}\n",
549 region.origin, region.size);
550 return error;
551 }
552 cl_buffer_region region2 = { i * test_info.subBufferSize
553 * sizeof(cl_int),
554 test_info.subBufferSize * sizeof(cl_int) };
555 test_info.tinfo[i].inBuf2 =
556 clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
557 CL_BUFFER_CREATE_TYPE_REGION, ®ion2, &error);
558 if (error || NULL == test_info.tinfo[i].inBuf2)
559 {
560 vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
561 "region {%zd, %zd}\n",
562 region.origin, region.size);
563 return error;
564 }
565
566 for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
567 {
568 test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
569 gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
570 ®ion, &error);
571 if (error || NULL == test_info.tinfo[i].outBuf[j])
572 {
573 vlog_error("Error: Unable to create sub-buffer of "
574 "gOutBuffer[%d] for region {%zd, %zd}\n",
575 (int)j, region.origin, region.size);
576 return error;
577 }
578 }
579 test_info.tinfo[i].tQueue =
580 clCreateCommandQueue(gContext, gDevice, 0, &error);
581 if (NULL == test_info.tinfo[i].tQueue || error)
582 {
583 vlog_error("clCreateCommandQueue failed. (%d)\n", error);
584 return error;
585 }
586
587 test_info.tinfo[i].d = MTdataHolder(genrand_int32(d));
588 }
589
590 // Init the kernels
591 BuildKernelInfo build_info{ test_info.threadCount, test_info.k,
592 test_info.programs, f->nameInCode,
593 relaxedMode };
594 if ((error = ThreadPool_Do(BuildKernelFn,
595 gMaxVectorSizeIndex - gMinVectorSizeIndex,
596 &build_info)))
597 return error;
598
599 // Run the kernels
600 if (!gSkipCorrectnessTesting)
601 {
602 error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
603 if (error) return error;
604
605 // Accumulate the arithmetic errors
606 for (cl_uint i = 0; i < test_info.threadCount; i++)
607 {
608 if (test_info.tinfo[i].maxError > maxError)
609 {
610 maxError = test_info.tinfo[i].maxError;
611 maxErrorVal = test_info.tinfo[i].maxErrorValue;
612 maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
613 }
614 }
615
616 if (gWimpyMode)
617 vlog("Wimp pass");
618 else
619 vlog("passed");
620
621 vlog("\t%8.2f @ {%a, %d}", maxError, maxErrorVal, maxErrorVal2);
622 }
623
624 vlog("\n");
625
626 return CL_SUCCESS;
627 }
628