1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16
17 #include "function_list.h"
18 #include "sleep.h"
19 #include "utility.h"
20
21 #include <algorithm>
22 #include <cstdio>
23 #include <cstdlib>
24 #include <ctime>
25 #include <string>
26 #include <vector>
27
28 #include "harness/errorHelpers.h"
29 #include "harness/kernelHelpers.h"
30 #include "harness/parseParameters.h"
31 #include "harness/typeWrappers.h"
32
33 #if defined(__APPLE__)
34 #include <sys/sysctl.h>
35 #include <sys/mman.h>
36 #include <libgen.h>
37 #include <sys/time.h>
38 #elif defined(__linux__)
39 #include <unistd.h>
40 #include <sys/syscall.h>
41 #include <linux/sysctl.h>
42 #include <sys/param.h>
43 #endif
44
45 #if defined(__linux__) || (defined WIN32 && defined __MINGW32__)
46 #include <sys/param.h>
47 #endif
48
49 #include "harness/testHarness.h"
50
51 #define kPageSize 4096
52 #define DOUBLE_REQUIRED_FEATURES \
53 (CL_FP_FMA | CL_FP_ROUND_TO_NEAREST | CL_FP_ROUND_TO_ZERO \
54 | CL_FP_ROUND_TO_INF | CL_FP_INF_NAN | CL_FP_DENORM)
55
56 static std::vector<const char *> gTestNames;
57 static char appName[MAXPATHLEN] = "";
58 cl_device_id gDevice = NULL;
59 cl_context gContext = NULL;
60 cl_command_queue gQueue = NULL;
61 static size_t gStartTestNumber = ~0u;
62 static size_t gEndTestNumber = ~0u;
63 int gSkipCorrectnessTesting = 0;
64 static int gStopOnError = 0;
65 static bool gSkipRestOfTests;
66 int gForceFTZ = 0;
67 int gWimpyMode = 0;
68 int gHostFill = 0;
69 static int gHasDouble = 0;
70 static int gTestFloat = 1;
71 // This flag should be 'ON' by default and it can be changed through the command
72 // line arguments.
73 static int gTestFastRelaxed = 1;
74 /*This flag corresponds to defining if the implementation has Derived Fast
75 Relaxed functions. The spec does not specify ULP for derived function. The
76 derived functions are composed of base functions which are tested for ULP,
77 thus when this flag is enabled, Derived functions will not be tested for ULP,
78 as per table 7.1 of OpenCL 2.0 spec. Since there is no way of quering the
79 device whether it is a derived or non-derived implementation according to
80 OpenCL 2.0 spec then it has to be changed through a command line argument.
81 */
82 int gFastRelaxedDerived = 1;
83 static int gToggleCorrectlyRoundedDivideSqrt = 0;
84 int gDeviceILogb0 = 1;
85 int gDeviceILogbNaN = 1;
86 int gCheckTininessBeforeRounding = 1;
87 int gIsInRTZMode = 0;
88 uint32_t gMaxVectorSizeIndex = VECTOR_SIZE_COUNT;
89 uint32_t gMinVectorSizeIndex = 0;
90 void *gIn = NULL;
91 void *gIn2 = NULL;
92 void *gIn3 = NULL;
93 void *gOut_Ref = NULL;
94 void *gOut[VECTOR_SIZE_COUNT] = { NULL, NULL, NULL, NULL, NULL, NULL };
95 void *gOut_Ref2 = NULL;
96 void *gOut2[VECTOR_SIZE_COUNT] = { NULL, NULL, NULL, NULL, NULL, NULL };
97 cl_mem gInBuffer = NULL;
98 cl_mem gInBuffer2 = NULL;
99 cl_mem gInBuffer3 = NULL;
100 cl_mem gOutBuffer[VECTOR_SIZE_COUNT] = { NULL, NULL, NULL, NULL, NULL, NULL };
101 cl_mem gOutBuffer2[VECTOR_SIZE_COUNT] = { NULL, NULL, NULL, NULL, NULL, NULL };
102 static MTdataHolder gMTdata;
103 cl_device_fp_config gFloatCapabilities = 0;
104 int gWimpyReductionFactor = 32;
105 int gVerboseBruteForce = 0;
106
107 static int ParseArgs(int argc, const char **argv);
108 static void PrintUsage(void);
109 static void PrintFunctions(void);
110 static test_status InitCL(cl_device_id device);
111 static void ReleaseCL(void);
112 static int InitILogbConstants(void);
113 static int IsTininessDetectedBeforeRounding(void);
114 static int
115 IsInRTZMode(void); // expensive. Please check gIsInRTZMode global instead.
116
doTest(const char * name)117 static int doTest(const char *name)
118 {
119 if (gSkipRestOfTests)
120 {
121 vlog("Skipping function because of an earlier error.\n");
122 return 1;
123 }
124
125 int error = 0;
126 const Func *func_data = NULL;
127
128 for (size_t i = 0; i < functionListCount; i++)
129 {
130 const Func *const temp_func = functionList + i;
131 if (strcmp(temp_func->name, name) == 0)
132 {
133 if ((gStartTestNumber != ~0u && i < gStartTestNumber)
134 || i > gEndTestNumber)
135 {
136 vlog("Skipping function #%zu\n", i);
137 return 0;
138 }
139
140 func_data = temp_func;
141 break;
142 }
143 }
144
145 if (func_data == NULL)
146 {
147 vlog("Function '%s' doesn't exist!\n", name);
148 exit(EXIT_FAILURE);
149 }
150
151 if (func_data->func.p == NULL)
152 {
153 vlog("'%s' is missing implementation, skipping function.\n",
154 func_data->name);
155 return 0;
156 }
157
158 // if correctly rounded divide & sqrt are supported by the implementation
159 // then test it; otherwise skip the test
160 if (strcmp(func_data->name, "sqrt_cr") == 0
161 || strcmp(func_data->name, "divide_cr") == 0)
162 {
163 if ((gFloatCapabilities & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT) == 0)
164 {
165 vlog("Correctly rounded divide and sqrt are not supported, "
166 "skipping function.\n");
167 return 0;
168 }
169 }
170
171 {
172 if (0 == strcmp("ilogb", func_data->name))
173 {
174 InitILogbConstants();
175 }
176
177 if (gTestFastRelaxed && func_data->relaxed)
178 {
179 if (get_device_cl_version(gDevice) > Version(1, 2))
180 {
181 gTestCount++;
182 vlog("%3d: ", gTestCount);
183 // Test with relaxed requirements here.
184 if (func_data->vtbl_ptr->TestFunc(func_data, gMTdata,
185 true /* relaxed mode */))
186 {
187 gFailCount++;
188 error++;
189 if (gStopOnError)
190 {
191 gSkipRestOfTests = true;
192 return error;
193 }
194 }
195 }
196 else
197 {
198 vlog("Skipping reduced precision testing for device with "
199 "version 1.2 or less\n");
200 }
201 }
202
203 if (gTestFloat)
204 {
205 gTestCount++;
206 vlog("%3d: ", gTestCount);
207 // Don't test with relaxed requirements.
208 if (func_data->vtbl_ptr->TestFunc(func_data, gMTdata,
209 false /* relaxed mode */))
210 {
211 gFailCount++;
212 error++;
213 if (gStopOnError)
214 {
215 gSkipRestOfTests = true;
216 return error;
217 }
218 }
219 }
220
221 if (gHasDouble && NULL != func_data->vtbl_ptr->DoubleTestFunc
222 && NULL != func_data->dfunc.p)
223 {
224 gTestCount++;
225 vlog("%3d: ", gTestCount);
226 // Don't test with relaxed requirements.
227 if (func_data->vtbl_ptr->DoubleTestFunc(func_data, gMTdata,
228 false /* relaxed mode*/))
229 {
230 gFailCount++;
231 error++;
232 if (gStopOnError)
233 {
234 gSkipRestOfTests = true;
235 return error;
236 }
237 }
238 }
239 }
240
241 return error;
242 }
243
244
245 #define TEST_LAMBDA(name) \
246 [](cl_device_id, cl_context, cl_command_queue, int) { \
247 return doTest(#name); \
248 }
249
250 // Redefine ADD_TEST to use TEST_LAMBDA.
251 #undef ADD_TEST
252 #define ADD_TEST(name) \
253 { \
254 TEST_LAMBDA(name), #name, Version(1, 0) \
255 }
256
257 static test_definition test_list[] = {
258 ADD_TEST(acos), ADD_TEST(acosh), ADD_TEST(acospi),
259 ADD_TEST(asin), ADD_TEST(asinh), ADD_TEST(asinpi),
260 ADD_TEST(atan), ADD_TEST(atanh), ADD_TEST(atanpi),
261 ADD_TEST(atan2), ADD_TEST(atan2pi), ADD_TEST(cbrt),
262 ADD_TEST(ceil), ADD_TEST(copysign), ADD_TEST(cos),
263 ADD_TEST(cosh), ADD_TEST(cospi), ADD_TEST(exp),
264 ADD_TEST(exp2), ADD_TEST(exp10), ADD_TEST(expm1),
265 ADD_TEST(fabs), ADD_TEST(fdim), ADD_TEST(floor),
266 ADD_TEST(fma), ADD_TEST(fmax), ADD_TEST(fmin),
267 ADD_TEST(fmod), ADD_TEST(fract), ADD_TEST(frexp),
268 ADD_TEST(hypot), ADD_TEST(ilogb), ADD_TEST(isequal),
269 ADD_TEST(isfinite), ADD_TEST(isgreater), ADD_TEST(isgreaterequal),
270 ADD_TEST(isinf), ADD_TEST(isless), ADD_TEST(islessequal),
271 ADD_TEST(islessgreater), ADD_TEST(isnan), ADD_TEST(isnormal),
272 ADD_TEST(isnotequal), ADD_TEST(isordered), ADD_TEST(isunordered),
273 ADD_TEST(ldexp), ADD_TEST(lgamma), ADD_TEST(lgamma_r),
274 ADD_TEST(log), ADD_TEST(log2), ADD_TEST(log10),
275 ADD_TEST(log1p), ADD_TEST(logb), ADD_TEST(mad),
276 ADD_TEST(maxmag), ADD_TEST(minmag), ADD_TEST(modf),
277 ADD_TEST(nan), ADD_TEST(nextafter), ADD_TEST(pow),
278 ADD_TEST(pown), ADD_TEST(powr), ADD_TEST(remainder),
279 ADD_TEST(remquo), ADD_TEST(rint), ADD_TEST(rootn),
280 ADD_TEST(round), ADD_TEST(rsqrt), ADD_TEST(signbit),
281 ADD_TEST(sin), ADD_TEST(sincos), ADD_TEST(sinh),
282 ADD_TEST(sinpi), ADD_TEST(sqrt), ADD_TEST(sqrt_cr),
283 ADD_TEST(tan), ADD_TEST(tanh), ADD_TEST(tanpi),
284 ADD_TEST(trunc), ADD_TEST(half_cos), ADD_TEST(half_divide),
285 ADD_TEST(half_exp), ADD_TEST(half_exp2), ADD_TEST(half_exp10),
286 ADD_TEST(half_log), ADD_TEST(half_log2), ADD_TEST(half_log10),
287 ADD_TEST(half_powr), ADD_TEST(half_recip), ADD_TEST(half_rsqrt),
288 ADD_TEST(half_sin), ADD_TEST(half_sqrt), ADD_TEST(half_tan),
289 ADD_TEST(add), ADD_TEST(subtract), ADD_TEST(divide),
290 ADD_TEST(divide_cr), ADD_TEST(multiply), ADD_TEST(assignment),
291 ADD_TEST(not),
292 };
293
294 #undef ADD_TEST
295 #undef TEST_LAMBDA
296
297 static const int test_num = ARRAY_SIZE(test_list);
298
299 #pragma mark -
300
main(int argc,const char * argv[])301 int main(int argc, const char *argv[])
302 {
303 int error;
304
305 argc = parseCustomParam(argc, argv);
306 if (argc == -1)
307 {
308 return -1;
309 }
310
311 error = ParseArgs(argc, argv);
312 if (error) return error;
313
314 // This takes a while, so prevent the machine from going to sleep.
315 PreventSleep();
316 atexit(ResumeSleep);
317
318 if (gSkipCorrectnessTesting)
319 vlog("*** Skipping correctness testing! ***\n\n");
320 else if (gStopOnError)
321 vlog("Stopping at first error.\n");
322
323 vlog(" \t ");
324 if (gWimpyMode) vlog(" ");
325 if (!gSkipCorrectnessTesting) vlog("\t max_ulps");
326
327 vlog("\n-------------------------------------------------------------------"
328 "----------------------------------------\n");
329
330 gMTdata = MTdataHolder(gRandomSeed);
331
332 FPU_mode_type oldMode;
333 DisableFTZ(&oldMode);
334
335 int ret = runTestHarnessWithCheck(gTestNames.size(), gTestNames.data(),
336 test_num, test_list, true, 0, InitCL);
337
338 RestoreFPState(&oldMode);
339
340 if (gQueue)
341 {
342 int error_code = clFinish(gQueue);
343 if (error_code) vlog_error("clFinish failed:%d\n", error_code);
344 }
345
346 ReleaseCL();
347
348 return ret;
349 }
350
ParseArgs(int argc,const char ** argv)351 static int ParseArgs(int argc, const char **argv)
352 {
353 // We only pass test names to runTestHarnessWithCheck, hence global command
354 // line options defined by the harness cannot be used by the user.
355 // To respect the implementation details of runTestHarnessWithCheck,
356 // gTestNames[0] has to exist although its value is not important.
357 gTestNames.push_back("");
358
359 int singleThreaded = 0;
360
361 { // Extract the app name
362 strncpy(appName, argv[0], MAXPATHLEN - 1);
363 appName[MAXPATHLEN - 1] = '\0';
364
365 #if defined(__APPLE__)
366 char baseName[MAXPATHLEN];
367 char *base = NULL;
368 strncpy(baseName, argv[0], MAXPATHLEN - 1);
369 baseName[MAXPATHLEN - 1] = '\0';
370 base = basename(baseName);
371 if (NULL != base)
372 {
373 strncpy(appName, base, sizeof(appName) - 1);
374 appName[sizeof(appName) - 1] = '\0';
375 }
376 #endif
377 }
378
379 vlog("\n%s\t", appName);
380 for (int i = 1; i < argc; i++)
381 {
382 const char *arg = argv[i];
383 if (NULL == arg) break;
384
385 vlog("\t%s", arg);
386 int optionFound = 0;
387 if (arg[0] == '-')
388 {
389 while (arg[1] != '\0')
390 {
391 arg++;
392 optionFound = 1;
393 switch (*arg)
394 {
395 case 'c': gToggleCorrectlyRoundedDivideSqrt ^= 1; break;
396
397 case 'd': gHasDouble ^= 1; break;
398
399 case 'e': gFastRelaxedDerived ^= 1; break;
400
401 case 'f': gTestFloat ^= 1; break;
402
403 case 'h': PrintUsage(); return -1;
404
405 case 'p': PrintFunctions(); return -1;
406
407 case 'l': gSkipCorrectnessTesting ^= 1; break;
408
409 case 'm': singleThreaded ^= 1; break;
410
411 case 'r': gTestFastRelaxed ^= 1; break;
412
413 case 's': gStopOnError ^= 1; break;
414
415 case 'v': gVerboseBruteForce ^= 1; break;
416
417 case 'w': // wimpy mode
418 gWimpyMode ^= 1;
419 break;
420
421 case '[':
422 parseWimpyReductionFactor(arg, gWimpyReductionFactor);
423 break;
424
425 case 'b': gHostFill ^= 1; break;
426
427 case 'z': gForceFTZ ^= 1; break;
428
429 case '1':
430 if (arg[1] == '6')
431 {
432 gMinVectorSizeIndex = 5;
433 gMaxVectorSizeIndex = gMinVectorSizeIndex + 1;
434 arg++;
435 }
436 else
437 {
438 gMinVectorSizeIndex = 0;
439 gMaxVectorSizeIndex = gMinVectorSizeIndex + 1;
440 }
441 break;
442 case '2':
443 gMinVectorSizeIndex = 1;
444 gMaxVectorSizeIndex = gMinVectorSizeIndex + 1;
445 break;
446 case '3':
447 gMinVectorSizeIndex = 2;
448 gMaxVectorSizeIndex = gMinVectorSizeIndex + 1;
449 break;
450 case '4':
451 gMinVectorSizeIndex = 3;
452 gMaxVectorSizeIndex = gMinVectorSizeIndex + 1;
453 break;
454 case '8':
455 gMinVectorSizeIndex = 4;
456 gMaxVectorSizeIndex = gMinVectorSizeIndex + 1;
457 break;
458
459 default:
460 vlog(" <-- unknown flag: %c (0x%2.2x)\n)", *arg, *arg);
461 PrintUsage();
462 return -1;
463 }
464 }
465 }
466
467 if (!optionFound)
468 {
469 char *t = NULL;
470 long number = strtol(arg, &t, 0);
471 if (t != arg)
472 {
473 if (~0u == gStartTestNumber)
474 gStartTestNumber = (int32_t)number;
475 else
476 gEndTestNumber = gStartTestNumber + (int32_t)number;
477 }
478 else
479 {
480 // Make sure this is a valid name
481 unsigned int k;
482 for (k = 0; k < functionListCount; k++)
483 {
484 const Func *f = functionList + k;
485 if (strcmp(arg, f->name) == 0)
486 {
487 gTestNames.push_back(arg);
488 break;
489 }
490 }
491 // If we didn't find it in the list of test names
492 if (k >= functionListCount)
493 {
494 gTestNames.push_back(arg);
495 }
496 }
497 }
498 }
499
500 // Check for the wimpy mode environment variable
501 if (getenv("CL_WIMPY_MODE"))
502 {
503 vlog("\n");
504 vlog("*** Detected CL_WIMPY_MODE env ***\n");
505 gWimpyMode = 1;
506 }
507
508 PrintArch();
509
510 if (gWimpyMode)
511 {
512 vlog("\n");
513 vlog("*** WARNING: Testing in Wimpy mode! ***\n");
514 vlog("*** Wimpy mode is not sufficient to verify correctness. ***\n");
515 vlog("*** Wimpy Reduction Factor: %-27u ***\n\n",
516 gWimpyReductionFactor);
517 }
518
519 if (singleThreaded) SetThreadCount(1);
520
521 return 0;
522 }
523
524
PrintFunctions(void)525 static void PrintFunctions(void)
526 {
527 vlog("\nMath function names:\n");
528 for (size_t i = 0; i < functionListCount; i++)
529 {
530 vlog("\t%s\n", functionList[i].name);
531 }
532 }
533
PrintUsage(void)534 static void PrintUsage(void)
535 {
536 vlog("%s [-cglsz]: <optional: math function names>\n", appName);
537 vlog("\toptions:\n");
538 vlog("\t\t-c\tToggle test fp correctly rounded divide and sqrt (Default: "
539 "off)\n");
540 vlog("\t\t-d\tToggle double precision testing. (Default: on iff khr_fp_64 "
541 "on)\n");
542 vlog("\t\t-f\tToggle float precision testing. (Default: on)\n");
543 vlog("\t\t-r\tToggle fast relaxed math precision testing. (Default: on)\n");
544 vlog("\t\t-e\tToggle test as derived implementations for fast relaxed math "
545 "precision. (Default: on)\n");
546 vlog("\t\t-h\tPrint this message and quit\n");
547 vlog("\t\t-p\tPrint all math function names and quit\n");
548 vlog("\t\t-l\tlink check only (make sure functions are present, skip "
549 "accuracy checks.)\n");
550 vlog("\t\t-m\tToggle run multi-threaded. (Default: on) )\n");
551 vlog("\t\t-s\tStop on error\n");
552 vlog("\t\t-w\tToggle Wimpy Mode, * Not a valid test * \n");
553 vlog("\t\t-[2^n]\tSet wimpy reduction factor, recommended range of n is "
554 "1-10, default factor(%u)\n",
555 gWimpyReductionFactor);
556 vlog("\t\t-b\tFill buffers on host instead of device. (Default: off)\n");
557 vlog("\t\t-z\tToggle FTZ mode (Section 6.5.3) for all functions. (Set by "
558 "device capabilities by default.)\n");
559 vlog("\t\t-v\tToggle Verbosity (Default: off)\n ");
560 vlog("\t\t-#\tTest only vector sizes #, e.g. \"-1\" tests scalar only, "
561 "\"-16\" tests 16-wide vectors only.\n");
562 vlog("\n\tYou may also pass a number instead of a function name.\n");
563 vlog("\tThis causes the first N tests to be skipped. The tests are "
564 "numbered.\n");
565 vlog("\tIf you pass a second number, that is the number tests to run after "
566 "the first one.\n");
567 vlog("\tA name list may be used in conjunction with a number range. In "
568 "that case,\n");
569 vlog("\tonly the named cases in the number range will run.\n");
570 vlog("\tYou may also choose to pass no arguments, in which case all tests "
571 "will be run.\n");
572 vlog("\tYou may pass CL_DEVICE_TYPE_CPU/GPU/ACCELERATOR to select the "
573 "device.\n");
574 vlog("\n");
575 }
576
bruteforce_notify_callback(const char * errinfo,const void * private_info,size_t cb,void * user_data)577 static void CL_CALLBACK bruteforce_notify_callback(const char *errinfo,
578 const void *private_info,
579 size_t cb, void *user_data)
580 {
581 vlog("%s (%p, %zd, %p)\n", errinfo, private_info, cb, user_data);
582 }
583
InitCL(cl_device_id device)584 test_status InitCL(cl_device_id device)
585 {
586 int error;
587 uint32_t i;
588 cl_device_type device_type;
589
590 error = clGetDeviceInfo(device, CL_DEVICE_TYPE, sizeof(device_type),
591 &device_type, NULL);
592 if (error)
593 {
594 print_error(error, "Unable to get device type");
595 return TEST_FAIL;
596 }
597
598 gDevice = device;
599
600 // Check extensions
601 if (is_extension_available(gDevice, "cl_khr_fp64"))
602 {
603 gHasDouble ^= 1;
604 #if defined(CL_DEVICE_DOUBLE_FP_CONFIG)
605 cl_device_fp_config doubleCapabilities = 0;
606 if ((error = clGetDeviceInfo(gDevice, CL_DEVICE_DOUBLE_FP_CONFIG,
607 sizeof(doubleCapabilities),
608 &doubleCapabilities, NULL)))
609 {
610 vlog_error("ERROR: Unable to get device "
611 "CL_DEVICE_DOUBLE_FP_CONFIG. (%d)\n",
612 error);
613 return TEST_FAIL;
614 }
615
616 if (DOUBLE_REQUIRED_FEATURES
617 != (doubleCapabilities & DOUBLE_REQUIRED_FEATURES))
618 {
619 std::string list;
620 if (0 == (doubleCapabilities & CL_FP_FMA)) list += "CL_FP_FMA, ";
621 if (0 == (doubleCapabilities & CL_FP_ROUND_TO_NEAREST))
622 list += "CL_FP_ROUND_TO_NEAREST, ";
623 if (0 == (doubleCapabilities & CL_FP_ROUND_TO_ZERO))
624 list += "CL_FP_ROUND_TO_ZERO, ";
625 if (0 == (doubleCapabilities & CL_FP_ROUND_TO_INF))
626 list += "CL_FP_ROUND_TO_INF, ";
627 if (0 == (doubleCapabilities & CL_FP_INF_NAN))
628 list += "CL_FP_INF_NAN, ";
629 if (0 == (doubleCapabilities & CL_FP_DENORM))
630 list += "CL_FP_DENORM, ";
631 vlog_error("ERROR: required double features are missing: %s\n",
632 list.c_str());
633
634 return TEST_FAIL;
635 }
636 #else
637 vlog_error("FAIL: device says it supports cl_khr_fp64 but "
638 "CL_DEVICE_DOUBLE_FP_CONFIG is not in the headers!\n");
639 return TEST_FAIL;
640 #endif
641 }
642
643 uint32_t deviceFrequency = 0;
644 size_t configSize = sizeof(deviceFrequency);
645 if ((error = clGetDeviceInfo(gDevice, CL_DEVICE_MAX_CLOCK_FREQUENCY,
646 configSize, &deviceFrequency, NULL)))
647 deviceFrequency = 0;
648
649 if ((error = clGetDeviceInfo(gDevice, CL_DEVICE_SINGLE_FP_CONFIG,
650 sizeof(gFloatCapabilities),
651 &gFloatCapabilities, NULL)))
652 {
653 vlog_error(
654 "ERROR: Unable to get device CL_DEVICE_SINGLE_FP_CONFIG. (%d)\n",
655 error);
656 return TEST_FAIL;
657 }
658
659 gContext = clCreateContext(NULL, 1, &gDevice, bruteforce_notify_callback,
660 NULL, &error);
661 if (NULL == gContext || error)
662 {
663 vlog_error("clCreateContext failed. (%d) \n", error);
664 return TEST_FAIL;
665 }
666
667 gQueue = clCreateCommandQueue(gContext, gDevice, 0, &error);
668 if (NULL == gQueue || error)
669 {
670 vlog_error("clCreateCommandQueue failed. (%d)\n", error);
671 return TEST_FAIL;
672 }
673
674 // Allocate buffers
675 cl_uint min_alignment = 0;
676 error = clGetDeviceInfo(gDevice, CL_DEVICE_MEM_BASE_ADDR_ALIGN,
677 sizeof(cl_uint), (void *)&min_alignment, NULL);
678 if (CL_SUCCESS != error)
679 {
680 vlog_error("clGetDeviceInfo failed. (%d)\n", error);
681 return TEST_FAIL;
682 }
683 min_alignment >>= 3; // convert bits to bytes
684
685 gIn = align_malloc(BUFFER_SIZE, min_alignment);
686 if (NULL == gIn) return TEST_FAIL;
687 gIn2 = align_malloc(BUFFER_SIZE, min_alignment);
688 if (NULL == gIn2) return TEST_FAIL;
689 gIn3 = align_malloc(BUFFER_SIZE, min_alignment);
690 if (NULL == gIn3) return TEST_FAIL;
691 gOut_Ref = align_malloc(BUFFER_SIZE, min_alignment);
692 if (NULL == gOut_Ref) return TEST_FAIL;
693 gOut_Ref2 = align_malloc(BUFFER_SIZE, min_alignment);
694 if (NULL == gOut_Ref2) return TEST_FAIL;
695
696 for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
697 {
698 gOut[i] = align_malloc(BUFFER_SIZE, min_alignment);
699 if (NULL == gOut[i]) return TEST_FAIL;
700 gOut2[i] = align_malloc(BUFFER_SIZE, min_alignment);
701 if (NULL == gOut2[i]) return TEST_FAIL;
702 }
703
704 cl_mem_flags device_flags = CL_MEM_READ_ONLY;
705 // save a copy on the host device to make this go faster
706 if (CL_DEVICE_TYPE_CPU == device_type)
707 device_flags |= CL_MEM_USE_HOST_PTR;
708 else
709 device_flags |= CL_MEM_COPY_HOST_PTR;
710
711 // setup input buffers
712 gInBuffer =
713 clCreateBuffer(gContext, device_flags, BUFFER_SIZE, gIn, &error);
714 if (gInBuffer == NULL || error)
715 {
716 vlog_error("clCreateBuffer1 failed for input (%d)\n", error);
717 return TEST_FAIL;
718 }
719
720 gInBuffer2 =
721 clCreateBuffer(gContext, device_flags, BUFFER_SIZE, gIn2, &error);
722 if (gInBuffer2 == NULL || error)
723 {
724 vlog_error("clCreateBuffer2 failed for input (%d)\n", error);
725 return TEST_FAIL;
726 }
727
728 gInBuffer3 =
729 clCreateBuffer(gContext, device_flags, BUFFER_SIZE, gIn3, &error);
730 if (gInBuffer3 == NULL || error)
731 {
732 vlog_error("clCreateBuffer3 failed for input (%d)\n", error);
733 return TEST_FAIL;
734 }
735
736
737 // setup output buffers
738 device_flags = CL_MEM_READ_WRITE;
739 // save a copy on the host device to make this go faster
740 if (CL_DEVICE_TYPE_CPU == device_type)
741 device_flags |= CL_MEM_USE_HOST_PTR;
742 else
743 device_flags |= CL_MEM_COPY_HOST_PTR;
744 for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
745 {
746 gOutBuffer[i] = clCreateBuffer(gContext, device_flags, BUFFER_SIZE,
747 gOut[i], &error);
748 if (gOutBuffer[i] == NULL || error)
749 {
750 vlog_error("clCreateBuffer failed for output (%d)\n", error);
751 return TEST_FAIL;
752 }
753 gOutBuffer2[i] = clCreateBuffer(gContext, device_flags, BUFFER_SIZE,
754 gOut2[i], &error);
755 if (gOutBuffer2[i] == NULL || error)
756 {
757 vlog_error("clCreateBuffer2 failed for output (%d)\n", error);
758 return TEST_FAIL;
759 }
760 }
761
762 // we are embedded, check current rounding mode
763 if (gIsEmbedded)
764 {
765 gIsInRTZMode = IsInRTZMode();
766 }
767
768 // Check tininess detection
769 IsTininessDetectedBeforeRounding();
770
771 cl_platform_id platform;
772 int err = clGetPlatformIDs(1, &platform, NULL);
773 if (err)
774 {
775 print_error(err, "clGetPlatformIDs failed");
776 return TEST_FAIL;
777 }
778
779 char c[1024];
780 static const char *no_yes[] = { "NO", "YES" };
781 vlog("\nCompute Device info:\n");
782 clGetPlatformInfo(platform, CL_PLATFORM_VERSION, sizeof(c), &c, NULL);
783 vlog("\tPlatform Version: %s\n", c);
784 clGetDeviceInfo(gDevice, CL_DEVICE_NAME, sizeof(c), &c, NULL);
785 vlog("\tDevice Name: %s\n", c);
786 clGetDeviceInfo(gDevice, CL_DEVICE_VENDOR, sizeof(c), &c, NULL);
787 vlog("\tVendor: %s\n", c);
788 clGetDeviceInfo(gDevice, CL_DEVICE_VERSION, sizeof(c), &c, NULL);
789 vlog("\tDevice Version: %s\n", c);
790 clGetDeviceInfo(gDevice, CL_DEVICE_OPENCL_C_VERSION, sizeof(c), &c, NULL);
791 vlog("\tCL C Version: %s\n", c);
792 clGetDeviceInfo(gDevice, CL_DRIVER_VERSION, sizeof(c), &c, NULL);
793 vlog("\tDriver Version: %s\n", c);
794 vlog("\tDevice Frequency: %d MHz\n", deviceFrequency);
795 vlog("\tSubnormal values supported for floats? %s\n",
796 no_yes[0 != (CL_FP_DENORM & gFloatCapabilities)]);
797 vlog("\tCorrectly rounded divide and sqrt supported for floats? %s\n",
798 no_yes[0
799 != (CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT & gFloatCapabilities)]);
800 if (gToggleCorrectlyRoundedDivideSqrt)
801 {
802 gFloatCapabilities ^= CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT;
803 }
804 vlog("\tTesting with correctly rounded float divide and sqrt? %s\n",
805 no_yes[0
806 != (CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT & gFloatCapabilities)]);
807 vlog("\tTesting with FTZ mode ON for floats? %s\n",
808 no_yes[0 != gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities)]);
809 vlog("\tTesting single precision? %s\n", no_yes[0 != gTestFloat]);
810 vlog("\tTesting fast relaxed math? %s\n", no_yes[0 != gTestFastRelaxed]);
811 if (gTestFastRelaxed)
812 {
813 vlog("\tFast relaxed math has derived implementations? %s\n",
814 no_yes[0 != gFastRelaxedDerived]);
815 }
816 vlog("\tTesting double precision? %s\n", no_yes[0 != gHasDouble]);
817 if (sizeof(long double) == sizeof(double) && gHasDouble)
818 {
819 vlog("\n\t\tWARNING: Host system long double does not have better "
820 "precision than double!\n");
821 vlog("\t\t All double results that do not match the reference "
822 "result have their reported\n");
823 vlog("\t\t error inflated by 0.5 ulps to account for the fact "
824 "that this system\n");
825 vlog("\t\t can not accurately represent the right result to an "
826 "accuracy closer\n");
827 vlog("\t\t than half an ulp. See comments in "
828 "Bruteforce_Ulp_Error_Double() for more details.\n\n");
829 }
830
831 vlog("\tIs Embedded? %s\n", no_yes[0 != gIsEmbedded]);
832 if (gIsEmbedded)
833 vlog("\tRunning in RTZ mode? %s\n", no_yes[0 != gIsInRTZMode]);
834 vlog("\tTininess is detected before rounding? %s\n",
835 no_yes[0 != gCheckTininessBeforeRounding]);
836 vlog("\tWorker threads: %d\n", GetThreadCount());
837 vlog("\tTesting vector sizes:");
838 for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
839 vlog("\t%d", sizeValues[i]);
840
841 vlog("\n");
842 vlog("\tVerbose? %s\n", no_yes[0 != gVerboseBruteForce]);
843 vlog("\n\n");
844
845 // Check to see if we are using single threaded mode on other than a 1.0
846 // device
847 if (getenv("CL_TEST_SINGLE_THREADED"))
848 {
849
850 char device_version[1024] = { 0 };
851 clGetDeviceInfo(gDevice, CL_DEVICE_VERSION, sizeof(device_version),
852 device_version, NULL);
853
854 if (strcmp("OpenCL 1.0 ", device_version))
855 {
856 vlog("ERROR: CL_TEST_SINGLE_THREADED is set in the environment. "
857 "Running single threaded.\n");
858 }
859 }
860
861 return TEST_PASS;
862 }
863
ReleaseCL(void)864 static void ReleaseCL(void)
865 {
866 uint32_t i;
867 clReleaseMemObject(gInBuffer);
868 clReleaseMemObject(gInBuffer2);
869 clReleaseMemObject(gInBuffer3);
870 for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
871 {
872 clReleaseMemObject(gOutBuffer[i]);
873 clReleaseMemObject(gOutBuffer2[i]);
874 }
875 clReleaseCommandQueue(gQueue);
876 clReleaseContext(gContext);
877
878 align_free(gIn);
879 align_free(gIn2);
880 align_free(gIn3);
881 align_free(gOut_Ref);
882 align_free(gOut_Ref2);
883
884 for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
885 {
886 align_free(gOut[i]);
887 align_free(gOut2[i]);
888 }
889 }
890
_LogBuildError(cl_program p,int line,const char * file)891 void _LogBuildError(cl_program p, int line, const char *file)
892 {
893 char the_log[2048] = "";
894
895 vlog_error("%s:%d: Build Log:\n", file, line);
896 if (0
897 == clGetProgramBuildInfo(p, gDevice, CL_PROGRAM_BUILD_LOG,
898 sizeof(the_log), the_log, NULL))
899 vlog_error("%s", the_log);
900 else
901 vlog_error("*** Error getting build log for program %p\n", p);
902 }
903
InitILogbConstants(void)904 int InitILogbConstants(void)
905 {
906 int error;
907 const char *kernelSource =
908 R"(__kernel void GetILogBConstants( __global int *out )
909 {
910 out[0] = FP_ILOGB0;
911 out[1] = FP_ILOGBNAN;
912 })";
913
914 clProgramWrapper query;
915 clKernelWrapper kernel;
916 error = create_single_kernel_helper(gContext, &query, &kernel, 1,
917 &kernelSource, "GetILogBConstants");
918 if (error != CL_SUCCESS)
919 {
920 vlog_error("Error: Unable to create kernel to get FP_ILOGB0 and "
921 "FP_ILOGBNAN for the device. (%d)",
922 error);
923 return error;
924 }
925
926 if ((error =
927 clSetKernelArg(kernel, 0, sizeof(gOutBuffer[gMinVectorSizeIndex]),
928 &gOutBuffer[gMinVectorSizeIndex])))
929 {
930 vlog_error("Error: Unable to set kernel arg to get FP_ILOGB0 and "
931 "FP_ILOGBNAN for the device. Err = %d",
932 error);
933 return error;
934 }
935
936 size_t dim = 1;
937 if ((error = clEnqueueNDRangeKernel(gQueue, kernel, 1, NULL, &dim, NULL, 0,
938 NULL, NULL)))
939 {
940 vlog_error("Error: Unable to execute kernel to get FP_ILOGB0 and "
941 "FP_ILOGBNAN for the device. Err = %d",
942 error);
943 return error;
944 }
945
946 struct
947 {
948 cl_int ilogb0, ilogbnan;
949 } data;
950 if ((error = clEnqueueReadBuffer(gQueue, gOutBuffer[gMinVectorSizeIndex],
951 CL_TRUE, 0, sizeof(data), &data, 0, NULL,
952 NULL)))
953 {
954 vlog_error("Error: unable to read FP_ILOGB0 and FP_ILOGBNAN from the "
955 "device. Err = %d",
956 error);
957 return error;
958 }
959
960 gDeviceILogb0 = data.ilogb0;
961 gDeviceILogbNaN = data.ilogbnan;
962
963 return 0;
964 }
965
IsTininessDetectedBeforeRounding(void)966 int IsTininessDetectedBeforeRounding(void)
967 {
968 int error;
969 const char *kernelSource =
970 R"(__kernel void IsTininessDetectedBeforeRounding( __global float *out )
971 {
972 volatile float a = 0x1.000002p-126f;
973 volatile float b = 0x1.fffffcp-1f;
974 out[0] = a * b; // product is 0x1.fffffffffff8p-127
975 })";
976
977 clProgramWrapper query;
978 clKernelWrapper kernel;
979 error =
980 create_single_kernel_helper(gContext, &query, &kernel, 1, &kernelSource,
981 "IsTininessDetectedBeforeRounding");
982 if (error != CL_SUCCESS)
983 {
984 vlog_error("Error: Unable to create kernel to detect how tininess is "
985 "detected for the device. (%d)",
986 error);
987 return error;
988 }
989
990 if ((error =
991 clSetKernelArg(kernel, 0, sizeof(gOutBuffer[gMinVectorSizeIndex]),
992 &gOutBuffer[gMinVectorSizeIndex])))
993 {
994 vlog_error("Error: Unable to set kernel arg to detect how tininess is "
995 "detected for the device. Err = %d",
996 error);
997 return error;
998 }
999
1000 size_t dim = 1;
1001 if ((error = clEnqueueNDRangeKernel(gQueue, kernel, 1, NULL, &dim, NULL, 0,
1002 NULL, NULL)))
1003 {
1004 vlog_error("Error: Unable to execute kernel to detect how tininess is "
1005 "detected for the device. Err = %d",
1006 error);
1007 return error;
1008 }
1009
1010 struct
1011 {
1012 cl_uint f;
1013 } data;
1014 if ((error = clEnqueueReadBuffer(gQueue, gOutBuffer[gMinVectorSizeIndex],
1015 CL_TRUE, 0, sizeof(data), &data, 0, NULL,
1016 NULL)))
1017 {
1018 vlog_error("Error: unable to read result from tininess test from the "
1019 "device. Err = %d",
1020 error);
1021 return error;
1022 }
1023
1024 gCheckTininessBeforeRounding = 0 == (data.f & 0x7fffffff);
1025
1026 return 0;
1027 }
1028
IsInRTZMode(void)1029 static int IsInRTZMode(void)
1030 {
1031 int error;
1032 const char *kernelSource =
1033 R"(__kernel void GetRoundingMode( __global int *out )
1034 {
1035 volatile float a = 0x1.0p23f;
1036 volatile float b = -0x1.0p23f;
1037 out[0] = (a + 0x1.fffffep-1f == a) && (b - 0x1.fffffep-1f == b);
1038 })";
1039
1040 clProgramWrapper query;
1041 clKernelWrapper kernel;
1042 error = create_single_kernel_helper(gContext, &query, &kernel, 1,
1043 &kernelSource, "GetRoundingMode");
1044 if (error != CL_SUCCESS)
1045 {
1046 vlog_error("Error: Unable to create kernel to detect RTZ mode for the "
1047 "device. (%d)",
1048 error);
1049 return error;
1050 }
1051
1052 if ((error =
1053 clSetKernelArg(kernel, 0, sizeof(gOutBuffer[gMinVectorSizeIndex]),
1054 &gOutBuffer[gMinVectorSizeIndex])))
1055 {
1056 vlog_error("Error: Unable to set kernel arg to detect RTZ mode for the "
1057 "device. Err = %d",
1058 error);
1059 return error;
1060 }
1061
1062 size_t dim = 1;
1063 if ((error = clEnqueueNDRangeKernel(gQueue, kernel, 1, NULL, &dim, NULL, 0,
1064 NULL, NULL)))
1065 {
1066 vlog_error("Error: Unable to execute kernel to detect RTZ mode for the "
1067 "device. Err = %d",
1068 error);
1069 return error;
1070 }
1071
1072 struct
1073 {
1074 cl_int isRTZ;
1075 } data;
1076 if ((error = clEnqueueReadBuffer(gQueue, gOutBuffer[gMinVectorSizeIndex],
1077 CL_TRUE, 0, sizeof(data), &data, 0, NULL,
1078 NULL)))
1079 {
1080 vlog_error(
1081 "Error: unable to read RTZ mode data from the device. Err = %d",
1082 error);
1083 return error;
1084 }
1085
1086 return data.isRTZ;
1087 }
1088
1089 #pragma mark -
1090
1091 const char *sizeNames[VECTOR_SIZE_COUNT] = { "", "2", "3", "4", "8", "16" };
1092 const int sizeValues[VECTOR_SIZE_COUNT] = { 1, 2, 3, 4, 8, 16 };
1093
1094 // TODO: There is another version of Ulp_Error_Double defined in
1095 // test_common/harness/errorHelpers.c
Bruteforce_Ulp_Error_Double(double test,long double reference)1096 float Bruteforce_Ulp_Error_Double(double test, long double reference)
1097 {
1098 // Check for Non-power-of-two and NaN
1099
1100 // Note: This function presumes that someone has already tested whether the
1101 // result is correctly, rounded before calling this function. That test:
1102 //
1103 // if( (float) reference == test )
1104 // return 0.0f;
1105 //
1106 // would ensure that cases like fabs(reference) > FLT_MAX are weeded out
1107 // before we get here. Otherwise, we'll return inf ulp error here, for what
1108 // are otherwise correctly rounded results.
1109
1110 // Deal with long double = double
1111 // On most systems long double is a higher precision type than double. They
1112 // provide either a 80-bit or greater floating point type, or they provide a
1113 // head-tail double double format. That is sufficient to represent the
1114 // accuracy of a floating point result to many more bits than double and we
1115 // can calculate sub-ulp errors. This is the standard system for which this
1116 // test suite is designed.
1117 //
1118 // On some systems double and long double are the same thing. Then we run
1119 // into a problem, because our representation of the infinitely precise
1120 // result (passed in as reference above) can be off by as much as a half
1121 // double precision ulp itself. In this case, we inflate the reported error
1122 // by half an ulp to take this into account. A more correct and permanent
1123 // fix would be to undertake refactoring the reference code to return
1124 // results in this format:
1125 //
1126 // typedef struct DoubleReference
1127 // { // true value = correctlyRoundedResult + ulps *
1128 // ulp(correctlyRoundedResult) (infinitely precise)
1129 // double correctlyRoundedResult; // as best we can
1130 // double ulps; // plus a fractional amount to
1131 // account for the difference
1132 // }DoubleReference; // between infinitely
1133 // precise result and correctlyRoundedResult, in units of ulps.
1134 //
1135 // This would provide a useful higher-than-double precision format for
1136 // everyone that we can use, and would solve a few problems with
1137 // representing absolute errors below DBL_MIN and over DBL_MAX for systems
1138 // that use a head to tail double double for long double.
1139
1140 int x;
1141 long double testVal = test;
1142
1143 // First, handle special reference values
1144 if (isinf(reference))
1145 {
1146 if (reference == testVal) return 0.0f;
1147
1148 return INFINITY;
1149 }
1150
1151 if (isnan(reference))
1152 {
1153 if (isnan(testVal)) return 0.0f;
1154
1155 return INFINITY;
1156 }
1157
1158 if (0.0L != reference && 0.5L != frexpl(reference, &x))
1159 { // Non-zero and Non-power of two
1160
1161 // allow correctly rounded results to pass through unmolested. (We might
1162 // add error to it below.) There is something of a performance
1163 // optimization here.
1164 if (testVal == reference) return 0.0f;
1165
1166 // The unbiased exponent of the ulp unit place
1167 int ulp_exp =
1168 DBL_MANT_DIG - 1 - std::max(ilogbl(reference), DBL_MIN_EXP - 1);
1169
1170 // Scale the exponent of the error
1171 float result = (float)scalbnl(testVal - reference, ulp_exp);
1172
1173 // account for rounding error in reference result on systems that do not
1174 // have a higher precision floating point type (see above)
1175 if (sizeof(long double) == sizeof(double))
1176 result += copysignf(0.5f, result);
1177
1178 return result;
1179 }
1180
1181 // reference is a normal power of two or a zero
1182 // The unbiased exponent of the ulp unit place
1183 int ulp_exp =
1184 DBL_MANT_DIG - 1 - std::max(ilogbl(reference) - 1, DBL_MIN_EXP - 1);
1185
1186 // allow correctly rounded results to pass through unmolested. (We might add
1187 // error to it below.) There is something of a performance optimization here
1188 // too.
1189 if (testVal == reference) return 0.0f;
1190
1191 // Scale the exponent of the error
1192 float result = (float)scalbnl(testVal - reference, ulp_exp);
1193
1194 // account for rounding error in reference result on systems that do not
1195 // have a higher precision floating point type (see above)
1196 if (sizeof(long double) == sizeof(double))
1197 result += copysignf(0.5f, result);
1198
1199 return result;
1200 }
1201
Abs_Error(float test,double reference)1202 float Abs_Error(float test, double reference)
1203 {
1204 if (isnan(test) && isnan(reference)) return 0.0f;
1205 return fabs((float)(reference - (double)test));
1206 }
1207
RoundUpToNextPowerOfTwo(cl_uint x)1208 cl_uint RoundUpToNextPowerOfTwo(cl_uint x)
1209 {
1210 if (0 == (x & (x - 1))) return x;
1211
1212 while (x & (x - 1)) x &= x - 1;
1213
1214 return x + x;
1215 }
1216