1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16
17 #include "common.h"
18 #include "function_list.h"
19 #include "test_functions.h"
20 #include "utility.h"
21
22 #include <climits>
23 #include <cstring>
24
25 namespace {
26
BuildKernelFn(cl_uint job_id,cl_uint thread_id UNUSED,void * p)27 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
28 {
29 BuildKernelInfo &info = *(BuildKernelInfo *)p;
30 auto generator = [](const std::string &kernel_name, const char *builtin,
31 cl_uint vector_size_index) {
32 return GetBinaryKernel(kernel_name, builtin, ParameterType::Double,
33 ParameterType::Double, ParameterType::Int,
34 vector_size_index);
35 };
36 return BuildKernels(info, job_id, generator);
37 }
38
39 // Thread specific data for a worker thread
40 struct ThreadInfo
41 {
42 // Input and output buffers for the thread
43 clMemWrapper inBuf;
44 clMemWrapper inBuf2;
45 Buffers outBuf;
46
47 float maxError; // max error value. Init to 0.
48 double
49 maxErrorValue; // position of the max error value (param 1). Init to 0.
50 cl_int maxErrorValue2; // position of the max error value (param 2). Init
51 // to 0.
52 MTdataHolder d;
53
54 // Per thread command queue to improve performance
55 clCommandQueueWrapper tQueue;
56 };
57
58 struct TestInfo
59 {
60 size_t subBufferSize; // Size of the sub-buffer in elements
61 const Func *f; // A pointer to the function info
62
63 // Programs for various vector sizes.
64 Programs programs;
65
66 // Thread-specific kernels for each vector size:
67 // k[vector_size][thread_id]
68 KernelMatrix k;
69
70 // Array of thread specific information
71 std::vector<ThreadInfo> tinfo;
72
73 cl_uint threadCount; // Number of worker threads
74 cl_uint jobCount; // Number of jobs
75 cl_uint step; // step between each chunk and the next.
76 cl_uint scale; // stride between individual test values
77 float ulps; // max_allowed ulps
78 int ftz; // non-zero if running in flush to zero mode
79 bool relaxedMode; // True if test is running in relaxed mode, false
80 // otherwise.
81
82 // no special values
83 };
84
85 // A table of more difficult cases to get right
86 const double specialValues[] = {
87 -NAN,
88 -INFINITY,
89 -DBL_MAX,
90 MAKE_HEX_DOUBLE(-0x1.0000000000001p64, -0x10000000000001LL, 12),
91 MAKE_HEX_DOUBLE(-0x1.0p64, -0x1LL, 64),
92 MAKE_HEX_DOUBLE(-0x1.fffffffffffffp63, -0x1fffffffffffffLL, 11),
93 MAKE_HEX_DOUBLE(-0x1.0000000000001p63, -0x10000000000001LL, 11),
94 MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63),
95 MAKE_HEX_DOUBLE(-0x1.fffffffffffffp62, -0x1fffffffffffffLL, 10),
96 MAKE_HEX_DOUBLE(-0x1.000002p32, -0x1000002LL, 8),
97 MAKE_HEX_DOUBLE(-0x1.0p32, -0x1LL, 32),
98 MAKE_HEX_DOUBLE(-0x1.fffffffffffffp31, -0x1fffffffffffffLL, -21),
99 MAKE_HEX_DOUBLE(-0x1.0000000000001p31, -0x10000000000001LL, -21),
100 MAKE_HEX_DOUBLE(-0x1.0p31, -0x1LL, 31),
101 MAKE_HEX_DOUBLE(-0x1.fffffffffffffp30, -0x1fffffffffffffLL, -22),
102 -1000.0,
103 -100.0,
104 -4.0,
105 -3.5,
106 -3.0,
107 MAKE_HEX_DOUBLE(-0x1.8000000000001p1, -0x18000000000001LL, -51),
108 -2.5,
109 MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp1, -0x17ffffffffffffLL, -51),
110 -2.0,
111 MAKE_HEX_DOUBLE(-0x1.8000000000001p0, -0x18000000000001LL, -52),
112 -1.5,
113 MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp0, -0x17ffffffffffffLL, -52),
114 MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52),
115 -1.0,
116 MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-1, -0x1fffffffffffffLL, -53),
117 MAKE_HEX_DOUBLE(-0x1.0000000000001p-1, -0x10000000000001LL, -53),
118 -0.5,
119 MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-2, -0x1fffffffffffffLL, -54),
120 MAKE_HEX_DOUBLE(-0x1.0000000000001p-2, -0x10000000000001LL, -54),
121 -0.25,
122 MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-3, -0x1fffffffffffffLL, -55),
123 MAKE_HEX_DOUBLE(-0x1.0000000000001p-1022, -0x10000000000001LL, -1074),
124 -DBL_MIN,
125 MAKE_HEX_DOUBLE(-0x0.fffffffffffffp-1022, -0x0fffffffffffffLL, -1074),
126 MAKE_HEX_DOUBLE(-0x0.0000000000fffp-1022, -0x00000000000fffLL, -1074),
127 MAKE_HEX_DOUBLE(-0x0.00000000000fep-1022, -0x000000000000feLL, -1074),
128 MAKE_HEX_DOUBLE(-0x0.000000000000ep-1022, -0x0000000000000eLL, -1074),
129 MAKE_HEX_DOUBLE(-0x0.000000000000cp-1022, -0x0000000000000cLL, -1074),
130 MAKE_HEX_DOUBLE(-0x0.000000000000ap-1022, -0x0000000000000aLL, -1074),
131 MAKE_HEX_DOUBLE(-0x0.0000000000008p-1022, -0x00000000000008LL, -1074),
132 MAKE_HEX_DOUBLE(-0x0.0000000000007p-1022, -0x00000000000007LL, -1074),
133 MAKE_HEX_DOUBLE(-0x0.0000000000006p-1022, -0x00000000000006LL, -1074),
134 MAKE_HEX_DOUBLE(-0x0.0000000000005p-1022, -0x00000000000005LL, -1074),
135 MAKE_HEX_DOUBLE(-0x0.0000000000004p-1022, -0x00000000000004LL, -1074),
136 MAKE_HEX_DOUBLE(-0x0.0000000000003p-1022, -0x00000000000003LL, -1074),
137 MAKE_HEX_DOUBLE(-0x0.0000000000002p-1022, -0x00000000000002LL, -1074),
138 MAKE_HEX_DOUBLE(-0x0.0000000000001p-1022, -0x00000000000001LL, -1074),
139 -0.0,
140
141 +NAN,
142 +INFINITY,
143 +DBL_MAX,
144 MAKE_HEX_DOUBLE(+0x1.0000000000001p64, +0x10000000000001LL, 12),
145 MAKE_HEX_DOUBLE(+0x1.0p64, +0x1LL, 64),
146 MAKE_HEX_DOUBLE(+0x1.fffffffffffffp63, +0x1fffffffffffffLL, 11),
147 MAKE_HEX_DOUBLE(+0x1.0000000000001p63, +0x10000000000001LL, 11),
148 MAKE_HEX_DOUBLE(+0x1.0p63, +0x1LL, 63),
149 MAKE_HEX_DOUBLE(+0x1.fffffffffffffp62, +0x1fffffffffffffLL, 10),
150 MAKE_HEX_DOUBLE(+0x1.000002p32, +0x1000002LL, 8),
151 MAKE_HEX_DOUBLE(+0x1.0p32, +0x1LL, 32),
152 MAKE_HEX_DOUBLE(+0x1.fffffffffffffp31, +0x1fffffffffffffLL, -21),
153 MAKE_HEX_DOUBLE(+0x1.0000000000001p31, +0x10000000000001LL, -21),
154 MAKE_HEX_DOUBLE(+0x1.0p31, +0x1LL, 31),
155 MAKE_HEX_DOUBLE(+0x1.fffffffffffffp30, +0x1fffffffffffffLL, -22),
156 +1000.0,
157 +100.0,
158 +4.0,
159 +3.5,
160 +3.0,
161 MAKE_HEX_DOUBLE(+0x1.8000000000001p1, +0x18000000000001LL, -51),
162 +2.5,
163 MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp1, +0x17ffffffffffffLL, -51),
164 +2.0,
165 MAKE_HEX_DOUBLE(+0x1.8000000000001p0, +0x18000000000001LL, -52),
166 +1.5,
167 MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp0, +0x17ffffffffffffLL, -52),
168 MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52),
169 +1.0,
170 MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-1, +0x1fffffffffffffLL, -53),
171 MAKE_HEX_DOUBLE(+0x1.0000000000001p-1, +0x10000000000001LL, -53),
172 +0.5,
173 MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-2, +0x1fffffffffffffLL, -54),
174 MAKE_HEX_DOUBLE(+0x1.0000000000001p-2, +0x10000000000001LL, -54),
175 +0.25,
176 MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-3, +0x1fffffffffffffLL, -55),
177 MAKE_HEX_DOUBLE(+0x1.0000000000001p-1022, +0x10000000000001LL, -1074),
178 +DBL_MIN,
179 MAKE_HEX_DOUBLE(+0x0.fffffffffffffp-1022, +0x0fffffffffffffLL, -1074),
180 MAKE_HEX_DOUBLE(+0x0.0000000000fffp-1022, +0x00000000000fffLL, -1074),
181 MAKE_HEX_DOUBLE(+0x0.00000000000fep-1022, +0x000000000000feLL, -1074),
182 MAKE_HEX_DOUBLE(+0x0.000000000000ep-1022, +0x0000000000000eLL, -1074),
183 MAKE_HEX_DOUBLE(+0x0.000000000000cp-1022, +0x0000000000000cLL, -1074),
184 MAKE_HEX_DOUBLE(+0x0.000000000000ap-1022, +0x0000000000000aLL, -1074),
185 MAKE_HEX_DOUBLE(+0x0.0000000000008p-1022, +0x00000000000008LL, -1074),
186 MAKE_HEX_DOUBLE(+0x0.0000000000007p-1022, +0x00000000000007LL, -1074),
187 MAKE_HEX_DOUBLE(+0x0.0000000000006p-1022, +0x00000000000006LL, -1074),
188 MAKE_HEX_DOUBLE(+0x0.0000000000005p-1022, +0x00000000000005LL, -1074),
189 MAKE_HEX_DOUBLE(+0x0.0000000000004p-1022, +0x00000000000004LL, -1074),
190 MAKE_HEX_DOUBLE(+0x0.0000000000003p-1022, +0x00000000000003LL, -1074),
191 MAKE_HEX_DOUBLE(+0x0.0000000000002p-1022, +0x00000000000002LL, -1074),
192 MAKE_HEX_DOUBLE(+0x0.0000000000001p-1022, +0x00000000000001LL, -1074),
193 +0.0,
194 };
195
196 constexpr size_t specialValuesCount =
197 sizeof(specialValues) / sizeof(specialValues[0]);
198
199 const int specialValuesInt[] = {
200 0, 1, 2, 3, 1022, 1023, 1024, INT_MIN,
201 INT_MAX, -1, -2, -3, -1022, -1023, -11024, -INT_MAX,
202 };
203
204 constexpr size_t specialValuesIntCount =
205 sizeof(specialValuesInt) / sizeof(specialValuesInt[0]);
206
Test(cl_uint job_id,cl_uint thread_id,void * data)207 cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
208 {
209 TestInfo *job = (TestInfo *)data;
210 size_t buffer_elements = job->subBufferSize;
211 size_t buffer_size = buffer_elements * sizeof(cl_double);
212 cl_uint base = job_id * (cl_uint)job->step;
213 ThreadInfo *tinfo = &(job->tinfo[thread_id]);
214 float ulps = job->ulps;
215 dptr func = job->f->dfunc;
216 int ftz = job->ftz;
217 bool relaxedMode = job->relaxedMode;
218 MTdata d = tinfo->d;
219 cl_int error;
220 const char *name = job->f->name;
221 cl_ulong *t;
222 cl_double *r;
223 cl_double *s;
224 cl_int *s2;
225
226 Force64BitFPUPrecision();
227
228 cl_event e[VECTOR_SIZE_COUNT];
229 cl_ulong *out[VECTOR_SIZE_COUNT];
230 if (gHostFill)
231 {
232 // Start the map of the output arrays
233 for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
234 {
235 out[j] = (cl_ulong *)clEnqueueMapBuffer(
236 tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
237 buffer_size, 0, NULL, e + j, &error);
238 if (error || NULL == out[j])
239 {
240 vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
241 error);
242 return error;
243 }
244 }
245
246 // Get that moving
247 if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n");
248 }
249
250 // Init input array
251 cl_ulong *p = (cl_ulong *)gIn + thread_id * buffer_elements;
252 cl_int *p2 = (cl_int *)gIn2 + thread_id * buffer_elements;
253 size_t idx = 0;
254 int totalSpecialValueCount = specialValuesCount * specialValuesIntCount;
255 int lastSpecialJobIndex = (totalSpecialValueCount - 1) / buffer_elements;
256
257 // Test edge cases
258 if (job_id <= (cl_uint)lastSpecialJobIndex)
259 {
260 cl_double *fp = (cl_double *)p;
261 cl_int *ip2 = (cl_int *)p2;
262 uint32_t x, y;
263
264 x = (job_id * buffer_elements) % specialValuesCount;
265 y = (job_id * buffer_elements) / specialValuesCount;
266
267 for (; idx < buffer_elements; idx++)
268 {
269 fp[idx] = specialValues[x];
270 ip2[idx] = specialValuesInt[y];
271 if (++x >= specialValuesCount)
272 {
273 x = 0;
274 y++;
275 if (y >= specialValuesIntCount) break;
276 }
277 }
278 }
279
280 // Init any remaining values.
281 for (; idx < buffer_elements; idx++)
282 {
283 p[idx] = DoubleFromUInt32(genrand_int32(d));
284 p2[idx] = genrand_int32(d);
285 }
286
287 if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
288 buffer_size, p, 0, NULL, NULL)))
289 {
290 vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
291 return error;
292 }
293
294 if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf2, CL_FALSE, 0,
295 buffer_size / 2, p2, 0, NULL, NULL)))
296 {
297 vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
298 return error;
299 }
300
301 for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
302 {
303 if (gHostFill)
304 {
305 // Wait for the map to finish
306 if ((error = clWaitForEvents(1, e + j)))
307 {
308 vlog_error("Error: clWaitForEvents failed! err: %d\n", error);
309 return error;
310 }
311 if ((error = clReleaseEvent(e[j])))
312 {
313 vlog_error("Error: clReleaseEvent failed! err: %d\n", error);
314 return error;
315 }
316 }
317
318 // Fill the result buffer with garbage, so that old results don't carry
319 // over
320 uint32_t pattern = 0xffffdead;
321 if (gHostFill)
322 {
323 memset_pattern4(out[j], &pattern, buffer_size);
324 if ((error = clEnqueueUnmapMemObject(
325 tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL)))
326 {
327 vlog_error("Error: clEnqueueUnmapMemObject failed! err: %d\n",
328 error);
329 return error;
330 }
331 }
332 else
333 {
334 if ((error = clEnqueueFillBuffer(tinfo->tQueue, tinfo->outBuf[j],
335 &pattern, sizeof(pattern), 0,
336 buffer_size, 0, NULL, NULL)))
337 {
338 vlog_error("Error: clEnqueueFillBuffer failed! err: %d\n",
339 error);
340 return error;
341 }
342 }
343
344 // Run the kernel
345 size_t vectorCount =
346 (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
347 cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
348 // own copy of the cl_kernel
349 cl_program program = job->programs[j];
350
351 if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
352 &tinfo->outBuf[j])))
353 {
354 LogBuildError(program);
355 return error;
356 }
357 if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf),
358 &tinfo->inBuf)))
359 {
360 LogBuildError(program);
361 return error;
362 }
363 if ((error = clSetKernelArg(kernel, 2, sizeof(tinfo->inBuf2),
364 &tinfo->inBuf2)))
365 {
366 LogBuildError(program);
367 return error;
368 }
369
370 if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL,
371 &vectorCount, NULL, 0, NULL, NULL)))
372 {
373 vlog_error("FAILED -- could not execute kernel\n");
374 return error;
375 }
376 }
377
378 // Get that moving
379 if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 2 failed\n");
380
381 if (gSkipCorrectnessTesting) return CL_SUCCESS;
382
383 // Calculate the correctly rounded reference result
384 r = (cl_double *)gOut_Ref + thread_id * buffer_elements;
385 s = (cl_double *)gIn + thread_id * buffer_elements;
386 s2 = (cl_int *)gIn2 + thread_id * buffer_elements;
387 for (size_t j = 0; j < buffer_elements; j++)
388 r[j] = (cl_double)func.f_fi(s[j], s2[j]);
389
390 // Read the data back -- no need to wait for the first N-1 buffers but wait
391 // for the last buffer. This is an in order queue.
392 for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
393 {
394 cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
395 out[j] = (cl_ulong *)clEnqueueMapBuffer(
396 tinfo->tQueue, tinfo->outBuf[j], blocking, CL_MAP_READ, 0,
397 buffer_size, 0, NULL, NULL, &error);
398 if (error || NULL == out[j])
399 {
400 vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
401 error);
402 return error;
403 }
404 }
405
406 // Verify data
407 t = (cl_ulong *)r;
408 for (size_t j = 0; j < buffer_elements; j++)
409 {
410 for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
411 {
412 cl_ulong *q = out[k];
413
414 // If we aren't getting the correctly rounded result
415 if (t[j] != q[j])
416 {
417 cl_double test = ((cl_double *)q)[j];
418 long double correct = func.f_fi(s[j], s2[j]);
419 float err = Bruteforce_Ulp_Error_Double(test, correct);
420 int fail = !(fabsf(err) <= ulps);
421
422 if (fail && (ftz || relaxedMode))
423 {
424 // retry per section 6.5.3.2
425 if (IsDoubleResultSubnormal(correct, ulps))
426 {
427 fail = fail && (test != 0.0f);
428 if (!fail) err = 0.0f;
429 }
430
431 // retry per section 6.5.3.3
432 if (IsDoubleSubnormal(s[j]))
433 {
434 long double correct2 = func.f_fi(0.0, s2[j]);
435 long double correct3 = func.f_fi(-0.0, s2[j]);
436 float err2 =
437 Bruteforce_Ulp_Error_Double(test, correct2);
438 float err3 =
439 Bruteforce_Ulp_Error_Double(test, correct3);
440 fail = fail
441 && ((!(fabsf(err2) <= ulps))
442 && (!(fabsf(err3) <= ulps)));
443 if (fabsf(err2) < fabsf(err)) err = err2;
444 if (fabsf(err3) < fabsf(err)) err = err3;
445
446 // retry per section 6.5.3.4
447 if (IsDoubleResultSubnormal(correct2, ulps)
448 || IsDoubleResultSubnormal(correct3, ulps))
449 {
450 fail = fail && (test != 0.0f);
451 if (!fail) err = 0.0f;
452 }
453 }
454 }
455
456 if (fabsf(err) > tinfo->maxError)
457 {
458 tinfo->maxError = fabsf(err);
459 tinfo->maxErrorValue = s[j];
460 tinfo->maxErrorValue2 = s2[j];
461 }
462 if (fail)
463 {
464 vlog_error("\nERROR: %s%s: %f ulp error at {%.13la, %d}: "
465 "*%.13la vs. %.13la\n",
466 name, sizeNames[k], err, s[j], s2[j], r[j],
467 test);
468 return -1;
469 }
470 }
471 }
472 }
473
474 for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
475 {
476 if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
477 out[j], 0, NULL, NULL)))
478 {
479 vlog_error("Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n",
480 j, error);
481 return error;
482 }
483 }
484
485 if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 3 failed\n");
486
487 if (0 == (base & 0x0fffffff))
488 {
489 if (gVerboseBruteForce)
490 {
491 vlog("base:%14u step:%10u scale:%10u buf_elements:%10zd ulps:%5.3f "
492 "ThreadCount:%2u\n",
493 base, job->step, job->scale, buffer_elements, job->ulps,
494 job->threadCount);
495 }
496 else
497 {
498 vlog(".");
499 }
500 fflush(stdout);
501 }
502
503 return CL_SUCCESS;
504 }
505
506 } // anonymous namespace
507
TestFunc_Double_Double_Int(const Func * f,MTdata d,bool relaxedMode)508 int TestFunc_Double_Double_Int(const Func *f, MTdata d, bool relaxedMode)
509 {
510 TestInfo test_info{};
511 cl_int error;
512 float maxError = 0.0f;
513 double maxErrorVal = 0.0;
514 cl_int maxErrorVal2 = 0;
515
516 logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
517
518 // Init test_info
519 test_info.threadCount = GetThreadCount();
520 test_info.subBufferSize = BUFFER_SIZE
521 / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
522 test_info.scale = getTestScale(sizeof(cl_double));
523
524 test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
525 if (test_info.step / test_info.subBufferSize != test_info.scale)
526 {
527 // there was overflow
528 test_info.jobCount = 1;
529 }
530 else
531 {
532 test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
533 }
534
535 test_info.f = f;
536 test_info.ulps = f->double_ulps;
537 test_info.ftz = f->ftz || gForceFTZ;
538 test_info.relaxedMode = relaxedMode;
539
540 test_info.tinfo.resize(test_info.threadCount);
541 for (cl_uint i = 0; i < test_info.threadCount; i++)
542 {
543 cl_buffer_region region = {
544 i * test_info.subBufferSize * sizeof(cl_double),
545 test_info.subBufferSize * sizeof(cl_double)
546 };
547 test_info.tinfo[i].inBuf =
548 clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
549 CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error);
550 if (error || NULL == test_info.tinfo[i].inBuf)
551 {
552 vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
553 "region {%zd, %zd}\n",
554 region.origin, region.size);
555 return error;
556 }
557 cl_buffer_region region2 = { i * test_info.subBufferSize
558 * sizeof(cl_int),
559 test_info.subBufferSize * sizeof(cl_int) };
560 test_info.tinfo[i].inBuf2 =
561 clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
562 CL_BUFFER_CREATE_TYPE_REGION, ®ion2, &error);
563 if (error || NULL == test_info.tinfo[i].inBuf2)
564 {
565 vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
566 "region {%zd, %zd}\n",
567 region.origin, region.size);
568 return error;
569 }
570
571 for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
572 {
573 test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
574 gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
575 ®ion, &error);
576 if (error || NULL == test_info.tinfo[i].outBuf[j])
577 {
578 vlog_error("Error: Unable to create sub-buffer of "
579 "gOutBuffer[%d] for region {%zd, %zd}\n",
580 (int)j, region.origin, region.size);
581 return error;
582 }
583 }
584 test_info.tinfo[i].tQueue =
585 clCreateCommandQueue(gContext, gDevice, 0, &error);
586 if (NULL == test_info.tinfo[i].tQueue || error)
587 {
588 vlog_error("clCreateCommandQueue failed. (%d)\n", error);
589 return error;
590 }
591
592 test_info.tinfo[i].d = MTdataHolder(genrand_int32(d));
593 }
594
595 // Init the kernels
596 BuildKernelInfo build_info{ test_info.threadCount, test_info.k,
597 test_info.programs, f->nameInCode,
598 relaxedMode };
599 if ((error = ThreadPool_Do(BuildKernelFn,
600 gMaxVectorSizeIndex - gMinVectorSizeIndex,
601 &build_info)))
602 return error;
603
604 // Run the kernels
605 if (!gSkipCorrectnessTesting)
606 {
607 error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
608 if (error) return error;
609
610 // Accumulate the arithmetic errors
611 for (cl_uint i = 0; i < test_info.threadCount; i++)
612 {
613 if (test_info.tinfo[i].maxError > maxError)
614 {
615 maxError = test_info.tinfo[i].maxError;
616 maxErrorVal = test_info.tinfo[i].maxErrorValue;
617 maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
618 }
619 }
620
621 if (gWimpyMode)
622 vlog("Wimp pass");
623 else
624 vlog("passed");
625
626 vlog("\t%8.2f @ {%a, %d}", maxError, maxErrorVal, maxErrorVal2);
627 }
628
629 vlog("\n");
630
631 return CL_SUCCESS;
632 }
633