xref: /aosp_15_r20/external/OpenCL-CTS/test_conformance/math_brute_force/main.cpp (revision 6467f958c7de8070b317fc65bcb0f6472e388d82)
1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 
17 #include "function_list.h"
18 #include "sleep.h"
19 #include "utility.h"
20 
21 #include <algorithm>
22 #include <cstdio>
23 #include <cstdlib>
24 #include <ctime>
25 #include <string>
26 #include <vector>
27 
28 #include "harness/errorHelpers.h"
29 #include "harness/kernelHelpers.h"
30 #include "harness/parseParameters.h"
31 #include "harness/typeWrappers.h"
32 
33 #if defined(__APPLE__)
34 #include <sys/sysctl.h>
35 #include <sys/mman.h>
36 #include <libgen.h>
37 #include <sys/time.h>
38 #elif defined(__linux__)
39 #include <unistd.h>
40 #include <sys/syscall.h>
41 #include <linux/sysctl.h>
42 #include <sys/param.h>
43 #endif
44 
45 #if defined(__linux__) || (defined WIN32 && defined __MINGW32__)
46 #include <sys/param.h>
47 #endif
48 
49 #include "harness/testHarness.h"
50 
51 #define kPageSize 4096
52 #define DOUBLE_REQUIRED_FEATURES                                               \
53     (CL_FP_FMA | CL_FP_ROUND_TO_NEAREST | CL_FP_ROUND_TO_ZERO                  \
54      | CL_FP_ROUND_TO_INF | CL_FP_INF_NAN | CL_FP_DENORM)
55 
56 static std::vector<const char *> gTestNames;
57 static char appName[MAXPATHLEN] = "";
58 cl_device_id gDevice = NULL;
59 cl_context gContext = NULL;
60 cl_command_queue gQueue = NULL;
61 static size_t gStartTestNumber = ~0u;
62 static size_t gEndTestNumber = ~0u;
63 int gSkipCorrectnessTesting = 0;
64 static int gStopOnError = 0;
65 static bool gSkipRestOfTests;
66 int gForceFTZ = 0;
67 int gWimpyMode = 0;
68 int gHostFill = 0;
69 static int gHasDouble = 0;
70 static int gTestFloat = 1;
71 // This flag should be 'ON' by default and it can be changed through the command
72 // line arguments.
73 static int gTestFastRelaxed = 1;
74 /*This flag corresponds to defining if the implementation has Derived Fast
75   Relaxed functions. The spec does not specify ULP for derived function.  The
76   derived functions are composed of base functions which are tested for ULP,
77   thus when this flag is enabled, Derived functions will not be tested for ULP,
78   as per table 7.1 of OpenCL 2.0 spec. Since there is no way of quering the
79   device whether it is a derived or non-derived implementation according to
80   OpenCL 2.0 spec then it has to be changed through a command line argument.
81 */
82 int gFastRelaxedDerived = 1;
83 static int gToggleCorrectlyRoundedDivideSqrt = 0;
84 int gDeviceILogb0 = 1;
85 int gDeviceILogbNaN = 1;
86 int gCheckTininessBeforeRounding = 1;
87 int gIsInRTZMode = 0;
88 uint32_t gMaxVectorSizeIndex = VECTOR_SIZE_COUNT;
89 uint32_t gMinVectorSizeIndex = 0;
90 void *gIn = NULL;
91 void *gIn2 = NULL;
92 void *gIn3 = NULL;
93 void *gOut_Ref = NULL;
94 void *gOut[VECTOR_SIZE_COUNT] = { NULL, NULL, NULL, NULL, NULL, NULL };
95 void *gOut_Ref2 = NULL;
96 void *gOut2[VECTOR_SIZE_COUNT] = { NULL, NULL, NULL, NULL, NULL, NULL };
97 cl_mem gInBuffer = NULL;
98 cl_mem gInBuffer2 = NULL;
99 cl_mem gInBuffer3 = NULL;
100 cl_mem gOutBuffer[VECTOR_SIZE_COUNT] = { NULL, NULL, NULL, NULL, NULL, NULL };
101 cl_mem gOutBuffer2[VECTOR_SIZE_COUNT] = { NULL, NULL, NULL, NULL, NULL, NULL };
102 static MTdataHolder gMTdata;
103 cl_device_fp_config gFloatCapabilities = 0;
104 int gWimpyReductionFactor = 32;
105 int gVerboseBruteForce = 0;
106 
107 static int ParseArgs(int argc, const char **argv);
108 static void PrintUsage(void);
109 static void PrintFunctions(void);
110 static test_status InitCL(cl_device_id device);
111 static void ReleaseCL(void);
112 static int InitILogbConstants(void);
113 static int IsTininessDetectedBeforeRounding(void);
114 static int
115 IsInRTZMode(void); // expensive. Please check gIsInRTZMode global instead.
116 
doTest(const char * name)117 static int doTest(const char *name)
118 {
119     if (gSkipRestOfTests)
120     {
121         vlog("Skipping function because of an earlier error.\n");
122         return 1;
123     }
124 
125     int error = 0;
126     const Func *func_data = NULL;
127 
128     for (size_t i = 0; i < functionListCount; i++)
129     {
130         const Func *const temp_func = functionList + i;
131         if (strcmp(temp_func->name, name) == 0)
132         {
133             if ((gStartTestNumber != ~0u && i < gStartTestNumber)
134                 || i > gEndTestNumber)
135             {
136                 vlog("Skipping function #%zu\n", i);
137                 return 0;
138             }
139 
140             func_data = temp_func;
141             break;
142         }
143     }
144 
145     if (func_data == NULL)
146     {
147         vlog("Function '%s' doesn't exist!\n", name);
148         exit(EXIT_FAILURE);
149     }
150 
151     if (func_data->func.p == NULL)
152     {
153         vlog("'%s' is missing implementation, skipping function.\n",
154              func_data->name);
155         return 0;
156     }
157 
158     // if correctly rounded divide & sqrt are supported by the implementation
159     // then test it; otherwise skip the test
160     if (strcmp(func_data->name, "sqrt_cr") == 0
161         || strcmp(func_data->name, "divide_cr") == 0)
162     {
163         if ((gFloatCapabilities & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT) == 0)
164         {
165             vlog("Correctly rounded divide and sqrt are not supported, "
166                  "skipping function.\n");
167             return 0;
168         }
169     }
170 
171     {
172         if (0 == strcmp("ilogb", func_data->name))
173         {
174             InitILogbConstants();
175         }
176 
177         if (gTestFastRelaxed && func_data->relaxed)
178         {
179             if (get_device_cl_version(gDevice) > Version(1, 2))
180             {
181                 gTestCount++;
182                 vlog("%3d: ", gTestCount);
183                 // Test with relaxed requirements here.
184                 if (func_data->vtbl_ptr->TestFunc(func_data, gMTdata,
185                                                   true /* relaxed mode */))
186                 {
187                     gFailCount++;
188                     error++;
189                     if (gStopOnError)
190                     {
191                         gSkipRestOfTests = true;
192                         return error;
193                     }
194                 }
195             }
196             else
197             {
198                 vlog("Skipping reduced precision testing for device with "
199                      "version 1.2 or less\n");
200             }
201         }
202 
203         if (gTestFloat)
204         {
205             gTestCount++;
206             vlog("%3d: ", gTestCount);
207             // Don't test with relaxed requirements.
208             if (func_data->vtbl_ptr->TestFunc(func_data, gMTdata,
209                                               false /* relaxed mode */))
210             {
211                 gFailCount++;
212                 error++;
213                 if (gStopOnError)
214                 {
215                     gSkipRestOfTests = true;
216                     return error;
217                 }
218             }
219         }
220 
221         if (gHasDouble && NULL != func_data->vtbl_ptr->DoubleTestFunc
222             && NULL != func_data->dfunc.p)
223         {
224             gTestCount++;
225             vlog("%3d: ", gTestCount);
226             // Don't test with relaxed requirements.
227             if (func_data->vtbl_ptr->DoubleTestFunc(func_data, gMTdata,
228                                                     false /* relaxed mode*/))
229             {
230                 gFailCount++;
231                 error++;
232                 if (gStopOnError)
233                 {
234                     gSkipRestOfTests = true;
235                     return error;
236                 }
237             }
238         }
239     }
240 
241     return error;
242 }
243 
244 
245 #define TEST_LAMBDA(name)                                                      \
246     [](cl_device_id, cl_context, cl_command_queue, int) {                      \
247         return doTest(#name);                                                  \
248     }
249 
250 // Redefine ADD_TEST to use TEST_LAMBDA.
251 #undef ADD_TEST
252 #define ADD_TEST(name)                                                         \
253     {                                                                          \
254         TEST_LAMBDA(name), #name, Version(1, 0)                                \
255     }
256 
257 static test_definition test_list[] = {
258     ADD_TEST(acos),          ADD_TEST(acosh),      ADD_TEST(acospi),
259     ADD_TEST(asin),          ADD_TEST(asinh),      ADD_TEST(asinpi),
260     ADD_TEST(atan),          ADD_TEST(atanh),      ADD_TEST(atanpi),
261     ADD_TEST(atan2),         ADD_TEST(atan2pi),    ADD_TEST(cbrt),
262     ADD_TEST(ceil),          ADD_TEST(copysign),   ADD_TEST(cos),
263     ADD_TEST(cosh),          ADD_TEST(cospi),      ADD_TEST(exp),
264     ADD_TEST(exp2),          ADD_TEST(exp10),      ADD_TEST(expm1),
265     ADD_TEST(fabs),          ADD_TEST(fdim),       ADD_TEST(floor),
266     ADD_TEST(fma),           ADD_TEST(fmax),       ADD_TEST(fmin),
267     ADD_TEST(fmod),          ADD_TEST(fract),      ADD_TEST(frexp),
268     ADD_TEST(hypot),         ADD_TEST(ilogb),      ADD_TEST(isequal),
269     ADD_TEST(isfinite),      ADD_TEST(isgreater),  ADD_TEST(isgreaterequal),
270     ADD_TEST(isinf),         ADD_TEST(isless),     ADD_TEST(islessequal),
271     ADD_TEST(islessgreater), ADD_TEST(isnan),      ADD_TEST(isnormal),
272     ADD_TEST(isnotequal),    ADD_TEST(isordered),  ADD_TEST(isunordered),
273     ADD_TEST(ldexp),         ADD_TEST(lgamma),     ADD_TEST(lgamma_r),
274     ADD_TEST(log),           ADD_TEST(log2),       ADD_TEST(log10),
275     ADD_TEST(log1p),         ADD_TEST(logb),       ADD_TEST(mad),
276     ADD_TEST(maxmag),        ADD_TEST(minmag),     ADD_TEST(modf),
277     ADD_TEST(nan),           ADD_TEST(nextafter),  ADD_TEST(pow),
278     ADD_TEST(pown),          ADD_TEST(powr),       ADD_TEST(remainder),
279     ADD_TEST(remquo),        ADD_TEST(rint),       ADD_TEST(rootn),
280     ADD_TEST(round),         ADD_TEST(rsqrt),      ADD_TEST(signbit),
281     ADD_TEST(sin),           ADD_TEST(sincos),     ADD_TEST(sinh),
282     ADD_TEST(sinpi),         ADD_TEST(sqrt),       ADD_TEST(sqrt_cr),
283     ADD_TEST(tan),           ADD_TEST(tanh),       ADD_TEST(tanpi),
284     ADD_TEST(trunc),         ADD_TEST(half_cos),   ADD_TEST(half_divide),
285     ADD_TEST(half_exp),      ADD_TEST(half_exp2),  ADD_TEST(half_exp10),
286     ADD_TEST(half_log),      ADD_TEST(half_log2),  ADD_TEST(half_log10),
287     ADD_TEST(half_powr),     ADD_TEST(half_recip), ADD_TEST(half_rsqrt),
288     ADD_TEST(half_sin),      ADD_TEST(half_sqrt),  ADD_TEST(half_tan),
289     ADD_TEST(add),           ADD_TEST(subtract),   ADD_TEST(divide),
290     ADD_TEST(divide_cr),     ADD_TEST(multiply),   ADD_TEST(assignment),
291     ADD_TEST(not),
292 };
293 
294 #undef ADD_TEST
295 #undef TEST_LAMBDA
296 
297 static const int test_num = ARRAY_SIZE(test_list);
298 
299 #pragma mark -
300 
main(int argc,const char * argv[])301 int main(int argc, const char *argv[])
302 {
303     int error;
304 
305     argc = parseCustomParam(argc, argv);
306     if (argc == -1)
307     {
308         return -1;
309     }
310 
311     error = ParseArgs(argc, argv);
312     if (error) return error;
313 
314     // This takes a while, so prevent the machine from going to sleep.
315     PreventSleep();
316     atexit(ResumeSleep);
317 
318     if (gSkipCorrectnessTesting)
319         vlog("*** Skipping correctness testing! ***\n\n");
320     else if (gStopOnError)
321         vlog("Stopping at first error.\n");
322 
323     vlog("   \t                                        ");
324     if (gWimpyMode) vlog("   ");
325     if (!gSkipCorrectnessTesting) vlog("\t  max_ulps");
326 
327     vlog("\n-------------------------------------------------------------------"
328          "----------------------------------------\n");
329 
330     gMTdata = MTdataHolder(gRandomSeed);
331 
332     FPU_mode_type oldMode;
333     DisableFTZ(&oldMode);
334 
335     int ret = runTestHarnessWithCheck(gTestNames.size(), gTestNames.data(),
336                                       test_num, test_list, true, 0, InitCL);
337 
338     RestoreFPState(&oldMode);
339 
340     if (gQueue)
341     {
342         int error_code = clFinish(gQueue);
343         if (error_code) vlog_error("clFinish failed:%d\n", error_code);
344     }
345 
346     ReleaseCL();
347 
348     return ret;
349 }
350 
ParseArgs(int argc,const char ** argv)351 static int ParseArgs(int argc, const char **argv)
352 {
353     // We only pass test names to runTestHarnessWithCheck, hence global command
354     // line options defined by the harness cannot be used by the user.
355     // To respect the implementation details of runTestHarnessWithCheck,
356     // gTestNames[0] has to exist although its value is not important.
357     gTestNames.push_back("");
358 
359     int singleThreaded = 0;
360 
361     { // Extract the app name
362         strncpy(appName, argv[0], MAXPATHLEN - 1);
363         appName[MAXPATHLEN - 1] = '\0';
364 
365 #if defined(__APPLE__)
366         char baseName[MAXPATHLEN];
367         char *base = NULL;
368         strncpy(baseName, argv[0], MAXPATHLEN - 1);
369         baseName[MAXPATHLEN - 1] = '\0';
370         base = basename(baseName);
371         if (NULL != base)
372         {
373             strncpy(appName, base, sizeof(appName) - 1);
374             appName[sizeof(appName) - 1] = '\0';
375         }
376 #endif
377     }
378 
379     vlog("\n%s\t", appName);
380     for (int i = 1; i < argc; i++)
381     {
382         const char *arg = argv[i];
383         if (NULL == arg) break;
384 
385         vlog("\t%s", arg);
386         int optionFound = 0;
387         if (arg[0] == '-')
388         {
389             while (arg[1] != '\0')
390             {
391                 arg++;
392                 optionFound = 1;
393                 switch (*arg)
394                 {
395                     case 'c': gToggleCorrectlyRoundedDivideSqrt ^= 1; break;
396 
397                     case 'd': gHasDouble ^= 1; break;
398 
399                     case 'e': gFastRelaxedDerived ^= 1; break;
400 
401                     case 'f': gTestFloat ^= 1; break;
402 
403                     case 'h': PrintUsage(); return -1;
404 
405                     case 'p': PrintFunctions(); return -1;
406 
407                     case 'l': gSkipCorrectnessTesting ^= 1; break;
408 
409                     case 'm': singleThreaded ^= 1; break;
410 
411                     case 'r': gTestFastRelaxed ^= 1; break;
412 
413                     case 's': gStopOnError ^= 1; break;
414 
415                     case 'v': gVerboseBruteForce ^= 1; break;
416 
417                     case 'w': // wimpy mode
418                         gWimpyMode ^= 1;
419                         break;
420 
421                     case '[':
422                         parseWimpyReductionFactor(arg, gWimpyReductionFactor);
423                         break;
424 
425                     case 'b': gHostFill ^= 1; break;
426 
427                     case 'z': gForceFTZ ^= 1; break;
428 
429                     case '1':
430                         if (arg[1] == '6')
431                         {
432                             gMinVectorSizeIndex = 5;
433                             gMaxVectorSizeIndex = gMinVectorSizeIndex + 1;
434                             arg++;
435                         }
436                         else
437                         {
438                             gMinVectorSizeIndex = 0;
439                             gMaxVectorSizeIndex = gMinVectorSizeIndex + 1;
440                         }
441                         break;
442                     case '2':
443                         gMinVectorSizeIndex = 1;
444                         gMaxVectorSizeIndex = gMinVectorSizeIndex + 1;
445                         break;
446                     case '3':
447                         gMinVectorSizeIndex = 2;
448                         gMaxVectorSizeIndex = gMinVectorSizeIndex + 1;
449                         break;
450                     case '4':
451                         gMinVectorSizeIndex = 3;
452                         gMaxVectorSizeIndex = gMinVectorSizeIndex + 1;
453                         break;
454                     case '8':
455                         gMinVectorSizeIndex = 4;
456                         gMaxVectorSizeIndex = gMinVectorSizeIndex + 1;
457                         break;
458 
459                     default:
460                         vlog(" <-- unknown flag: %c (0x%2.2x)\n)", *arg, *arg);
461                         PrintUsage();
462                         return -1;
463                 }
464             }
465         }
466 
467         if (!optionFound)
468         {
469             char *t = NULL;
470             long number = strtol(arg, &t, 0);
471             if (t != arg)
472             {
473                 if (~0u == gStartTestNumber)
474                     gStartTestNumber = (int32_t)number;
475                 else
476                     gEndTestNumber = gStartTestNumber + (int32_t)number;
477             }
478             else
479             {
480                 // Make sure this is a valid name
481                 unsigned int k;
482                 for (k = 0; k < functionListCount; k++)
483                 {
484                     const Func *f = functionList + k;
485                     if (strcmp(arg, f->name) == 0)
486                     {
487                         gTestNames.push_back(arg);
488                         break;
489                     }
490                 }
491                 // If we didn't find it in the list of test names
492                 if (k >= functionListCount)
493                 {
494                     gTestNames.push_back(arg);
495                 }
496             }
497         }
498     }
499 
500     // Check for the wimpy mode environment variable
501     if (getenv("CL_WIMPY_MODE"))
502     {
503         vlog("\n");
504         vlog("*** Detected CL_WIMPY_MODE env                          ***\n");
505         gWimpyMode = 1;
506     }
507 
508     PrintArch();
509 
510     if (gWimpyMode)
511     {
512         vlog("\n");
513         vlog("*** WARNING: Testing in Wimpy mode!                     ***\n");
514         vlog("*** Wimpy mode is not sufficient to verify correctness. ***\n");
515         vlog("*** Wimpy Reduction Factor: %-27u ***\n\n",
516              gWimpyReductionFactor);
517     }
518 
519     if (singleThreaded) SetThreadCount(1);
520 
521     return 0;
522 }
523 
524 
PrintFunctions(void)525 static void PrintFunctions(void)
526 {
527     vlog("\nMath function names:\n");
528     for (size_t i = 0; i < functionListCount; i++)
529     {
530         vlog("\t%s\n", functionList[i].name);
531     }
532 }
533 
PrintUsage(void)534 static void PrintUsage(void)
535 {
536     vlog("%s [-cglsz]: <optional: math function names>\n", appName);
537     vlog("\toptions:\n");
538     vlog("\t\t-c\tToggle test fp correctly rounded divide and sqrt (Default: "
539          "off)\n");
540     vlog("\t\t-d\tToggle double precision testing. (Default: on iff khr_fp_64 "
541          "on)\n");
542     vlog("\t\t-f\tToggle float precision testing. (Default: on)\n");
543     vlog("\t\t-r\tToggle fast relaxed math precision testing. (Default: on)\n");
544     vlog("\t\t-e\tToggle test as derived implementations for fast relaxed math "
545          "precision. (Default: on)\n");
546     vlog("\t\t-h\tPrint this message and quit\n");
547     vlog("\t\t-p\tPrint all math function names and quit\n");
548     vlog("\t\t-l\tlink check only (make sure functions are present, skip "
549          "accuracy checks.)\n");
550     vlog("\t\t-m\tToggle run multi-threaded. (Default: on) )\n");
551     vlog("\t\t-s\tStop on error\n");
552     vlog("\t\t-w\tToggle Wimpy Mode, * Not a valid test * \n");
553     vlog("\t\t-[2^n]\tSet wimpy reduction factor, recommended range of n is "
554          "1-10, default factor(%u)\n",
555          gWimpyReductionFactor);
556     vlog("\t\t-b\tFill buffers on host instead of device. (Default: off)\n");
557     vlog("\t\t-z\tToggle FTZ mode (Section 6.5.3) for all functions. (Set by "
558          "device capabilities by default.)\n");
559     vlog("\t\t-v\tToggle Verbosity (Default: off)\n ");
560     vlog("\t\t-#\tTest only vector sizes #, e.g. \"-1\" tests scalar only, "
561          "\"-16\" tests 16-wide vectors only.\n");
562     vlog("\n\tYou may also pass a number instead of a function name.\n");
563     vlog("\tThis causes the first N tests to be skipped. The tests are "
564          "numbered.\n");
565     vlog("\tIf you pass a second number, that is the number tests to run after "
566          "the first one.\n");
567     vlog("\tA name list may be used in conjunction with a number range. In "
568          "that case,\n");
569     vlog("\tonly the named cases in the number range will run.\n");
570     vlog("\tYou may also choose to pass no arguments, in which case all tests "
571          "will be run.\n");
572     vlog("\tYou may pass CL_DEVICE_TYPE_CPU/GPU/ACCELERATOR to select the "
573          "device.\n");
574     vlog("\n");
575 }
576 
bruteforce_notify_callback(const char * errinfo,const void * private_info,size_t cb,void * user_data)577 static void CL_CALLBACK bruteforce_notify_callback(const char *errinfo,
578                                                    const void *private_info,
579                                                    size_t cb, void *user_data)
580 {
581     vlog("%s  (%p, %zd, %p)\n", errinfo, private_info, cb, user_data);
582 }
583 
InitCL(cl_device_id device)584 test_status InitCL(cl_device_id device)
585 {
586     int error;
587     uint32_t i;
588     cl_device_type device_type;
589 
590     error = clGetDeviceInfo(device, CL_DEVICE_TYPE, sizeof(device_type),
591                             &device_type, NULL);
592     if (error)
593     {
594         print_error(error, "Unable to get device type");
595         return TEST_FAIL;
596     }
597 
598     gDevice = device;
599 
600     // Check extensions
601     if (is_extension_available(gDevice, "cl_khr_fp64"))
602     {
603         gHasDouble ^= 1;
604 #if defined(CL_DEVICE_DOUBLE_FP_CONFIG)
605         cl_device_fp_config doubleCapabilities = 0;
606         if ((error = clGetDeviceInfo(gDevice, CL_DEVICE_DOUBLE_FP_CONFIG,
607                                      sizeof(doubleCapabilities),
608                                      &doubleCapabilities, NULL)))
609         {
610             vlog_error("ERROR: Unable to get device "
611                        "CL_DEVICE_DOUBLE_FP_CONFIG. (%d)\n",
612                        error);
613             return TEST_FAIL;
614         }
615 
616         if (DOUBLE_REQUIRED_FEATURES
617             != (doubleCapabilities & DOUBLE_REQUIRED_FEATURES))
618         {
619             std::string list;
620             if (0 == (doubleCapabilities & CL_FP_FMA)) list += "CL_FP_FMA, ";
621             if (0 == (doubleCapabilities & CL_FP_ROUND_TO_NEAREST))
622                 list += "CL_FP_ROUND_TO_NEAREST, ";
623             if (0 == (doubleCapabilities & CL_FP_ROUND_TO_ZERO))
624                 list += "CL_FP_ROUND_TO_ZERO, ";
625             if (0 == (doubleCapabilities & CL_FP_ROUND_TO_INF))
626                 list += "CL_FP_ROUND_TO_INF, ";
627             if (0 == (doubleCapabilities & CL_FP_INF_NAN))
628                 list += "CL_FP_INF_NAN, ";
629             if (0 == (doubleCapabilities & CL_FP_DENORM))
630                 list += "CL_FP_DENORM, ";
631             vlog_error("ERROR: required double features are missing: %s\n",
632                        list.c_str());
633 
634             return TEST_FAIL;
635         }
636 #else
637         vlog_error("FAIL: device says it supports cl_khr_fp64 but "
638                    "CL_DEVICE_DOUBLE_FP_CONFIG is not in the headers!\n");
639         return TEST_FAIL;
640 #endif
641     }
642 
643     uint32_t deviceFrequency = 0;
644     size_t configSize = sizeof(deviceFrequency);
645     if ((error = clGetDeviceInfo(gDevice, CL_DEVICE_MAX_CLOCK_FREQUENCY,
646                                  configSize, &deviceFrequency, NULL)))
647         deviceFrequency = 0;
648 
649     if ((error = clGetDeviceInfo(gDevice, CL_DEVICE_SINGLE_FP_CONFIG,
650                                  sizeof(gFloatCapabilities),
651                                  &gFloatCapabilities, NULL)))
652     {
653         vlog_error(
654             "ERROR: Unable to get device CL_DEVICE_SINGLE_FP_CONFIG. (%d)\n",
655             error);
656         return TEST_FAIL;
657     }
658 
659     gContext = clCreateContext(NULL, 1, &gDevice, bruteforce_notify_callback,
660                                NULL, &error);
661     if (NULL == gContext || error)
662     {
663         vlog_error("clCreateContext failed. (%d) \n", error);
664         return TEST_FAIL;
665     }
666 
667     gQueue = clCreateCommandQueue(gContext, gDevice, 0, &error);
668     if (NULL == gQueue || error)
669     {
670         vlog_error("clCreateCommandQueue failed. (%d)\n", error);
671         return TEST_FAIL;
672     }
673 
674     // Allocate buffers
675     cl_uint min_alignment = 0;
676     error = clGetDeviceInfo(gDevice, CL_DEVICE_MEM_BASE_ADDR_ALIGN,
677                             sizeof(cl_uint), (void *)&min_alignment, NULL);
678     if (CL_SUCCESS != error)
679     {
680         vlog_error("clGetDeviceInfo failed. (%d)\n", error);
681         return TEST_FAIL;
682     }
683     min_alignment >>= 3; // convert bits to bytes
684 
685     gIn = align_malloc(BUFFER_SIZE, min_alignment);
686     if (NULL == gIn) return TEST_FAIL;
687     gIn2 = align_malloc(BUFFER_SIZE, min_alignment);
688     if (NULL == gIn2) return TEST_FAIL;
689     gIn3 = align_malloc(BUFFER_SIZE, min_alignment);
690     if (NULL == gIn3) return TEST_FAIL;
691     gOut_Ref = align_malloc(BUFFER_SIZE, min_alignment);
692     if (NULL == gOut_Ref) return TEST_FAIL;
693     gOut_Ref2 = align_malloc(BUFFER_SIZE, min_alignment);
694     if (NULL == gOut_Ref2) return TEST_FAIL;
695 
696     for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
697     {
698         gOut[i] = align_malloc(BUFFER_SIZE, min_alignment);
699         if (NULL == gOut[i]) return TEST_FAIL;
700         gOut2[i] = align_malloc(BUFFER_SIZE, min_alignment);
701         if (NULL == gOut2[i]) return TEST_FAIL;
702     }
703 
704     cl_mem_flags device_flags = CL_MEM_READ_ONLY;
705     // save a copy on the host device to make this go faster
706     if (CL_DEVICE_TYPE_CPU == device_type)
707         device_flags |= CL_MEM_USE_HOST_PTR;
708     else
709         device_flags |= CL_MEM_COPY_HOST_PTR;
710 
711     // setup input buffers
712     gInBuffer =
713         clCreateBuffer(gContext, device_flags, BUFFER_SIZE, gIn, &error);
714     if (gInBuffer == NULL || error)
715     {
716         vlog_error("clCreateBuffer1 failed for input (%d)\n", error);
717         return TEST_FAIL;
718     }
719 
720     gInBuffer2 =
721         clCreateBuffer(gContext, device_flags, BUFFER_SIZE, gIn2, &error);
722     if (gInBuffer2 == NULL || error)
723     {
724         vlog_error("clCreateBuffer2 failed for input (%d)\n", error);
725         return TEST_FAIL;
726     }
727 
728     gInBuffer3 =
729         clCreateBuffer(gContext, device_flags, BUFFER_SIZE, gIn3, &error);
730     if (gInBuffer3 == NULL || error)
731     {
732         vlog_error("clCreateBuffer3 failed for input (%d)\n", error);
733         return TEST_FAIL;
734     }
735 
736 
737     // setup output buffers
738     device_flags = CL_MEM_READ_WRITE;
739     // save a copy on the host device to make this go faster
740     if (CL_DEVICE_TYPE_CPU == device_type)
741         device_flags |= CL_MEM_USE_HOST_PTR;
742     else
743         device_flags |= CL_MEM_COPY_HOST_PTR;
744     for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
745     {
746         gOutBuffer[i] = clCreateBuffer(gContext, device_flags, BUFFER_SIZE,
747                                        gOut[i], &error);
748         if (gOutBuffer[i] == NULL || error)
749         {
750             vlog_error("clCreateBuffer failed for output (%d)\n", error);
751             return TEST_FAIL;
752         }
753         gOutBuffer2[i] = clCreateBuffer(gContext, device_flags, BUFFER_SIZE,
754                                         gOut2[i], &error);
755         if (gOutBuffer2[i] == NULL || error)
756         {
757             vlog_error("clCreateBuffer2 failed for output (%d)\n", error);
758             return TEST_FAIL;
759         }
760     }
761 
762     // we are embedded, check current rounding mode
763     if (gIsEmbedded)
764     {
765         gIsInRTZMode = IsInRTZMode();
766     }
767 
768     // Check tininess detection
769     IsTininessDetectedBeforeRounding();
770 
771     cl_platform_id platform;
772     int err = clGetPlatformIDs(1, &platform, NULL);
773     if (err)
774     {
775         print_error(err, "clGetPlatformIDs failed");
776         return TEST_FAIL;
777     }
778 
779     char c[1024];
780     static const char *no_yes[] = { "NO", "YES" };
781     vlog("\nCompute Device info:\n");
782     clGetPlatformInfo(platform, CL_PLATFORM_VERSION, sizeof(c), &c, NULL);
783     vlog("\tPlatform Version: %s\n", c);
784     clGetDeviceInfo(gDevice, CL_DEVICE_NAME, sizeof(c), &c, NULL);
785     vlog("\tDevice Name: %s\n", c);
786     clGetDeviceInfo(gDevice, CL_DEVICE_VENDOR, sizeof(c), &c, NULL);
787     vlog("\tVendor: %s\n", c);
788     clGetDeviceInfo(gDevice, CL_DEVICE_VERSION, sizeof(c), &c, NULL);
789     vlog("\tDevice Version: %s\n", c);
790     clGetDeviceInfo(gDevice, CL_DEVICE_OPENCL_C_VERSION, sizeof(c), &c, NULL);
791     vlog("\tCL C Version: %s\n", c);
792     clGetDeviceInfo(gDevice, CL_DRIVER_VERSION, sizeof(c), &c, NULL);
793     vlog("\tDriver Version: %s\n", c);
794     vlog("\tDevice Frequency: %d MHz\n", deviceFrequency);
795     vlog("\tSubnormal values supported for floats? %s\n",
796          no_yes[0 != (CL_FP_DENORM & gFloatCapabilities)]);
797     vlog("\tCorrectly rounded divide and sqrt supported for floats? %s\n",
798          no_yes[0
799                 != (CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT & gFloatCapabilities)]);
800     if (gToggleCorrectlyRoundedDivideSqrt)
801     {
802         gFloatCapabilities ^= CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT;
803     }
804     vlog("\tTesting with correctly rounded float divide and sqrt? %s\n",
805          no_yes[0
806                 != (CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT & gFloatCapabilities)]);
807     vlog("\tTesting with FTZ mode ON for floats? %s\n",
808          no_yes[0 != gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities)]);
809     vlog("\tTesting single precision? %s\n", no_yes[0 != gTestFloat]);
810     vlog("\tTesting fast relaxed math? %s\n", no_yes[0 != gTestFastRelaxed]);
811     if (gTestFastRelaxed)
812     {
813         vlog("\tFast relaxed math has derived implementations? %s\n",
814              no_yes[0 != gFastRelaxedDerived]);
815     }
816     vlog("\tTesting double precision? %s\n", no_yes[0 != gHasDouble]);
817     if (sizeof(long double) == sizeof(double) && gHasDouble)
818     {
819         vlog("\n\t\tWARNING: Host system long double does not have better "
820              "precision than double!\n");
821         vlog("\t\t         All double results that do not match the reference "
822              "result have their reported\n");
823         vlog("\t\t         error inflated by 0.5 ulps to account for the fact "
824              "that this system\n");
825         vlog("\t\t         can not accurately represent the right result to an "
826              "accuracy closer\n");
827         vlog("\t\t         than half an ulp. See comments in "
828              "Bruteforce_Ulp_Error_Double() for more details.\n\n");
829     }
830 
831     vlog("\tIs Embedded? %s\n", no_yes[0 != gIsEmbedded]);
832     if (gIsEmbedded)
833         vlog("\tRunning in RTZ mode? %s\n", no_yes[0 != gIsInRTZMode]);
834     vlog("\tTininess is detected before rounding? %s\n",
835          no_yes[0 != gCheckTininessBeforeRounding]);
836     vlog("\tWorker threads: %d\n", GetThreadCount());
837     vlog("\tTesting vector sizes:");
838     for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
839         vlog("\t%d", sizeValues[i]);
840 
841     vlog("\n");
842     vlog("\tVerbose? %s\n", no_yes[0 != gVerboseBruteForce]);
843     vlog("\n\n");
844 
845     // Check to see if we are using single threaded mode on other than a 1.0
846     // device
847     if (getenv("CL_TEST_SINGLE_THREADED"))
848     {
849 
850         char device_version[1024] = { 0 };
851         clGetDeviceInfo(gDevice, CL_DEVICE_VERSION, sizeof(device_version),
852                         device_version, NULL);
853 
854         if (strcmp("OpenCL 1.0 ", device_version))
855         {
856             vlog("ERROR: CL_TEST_SINGLE_THREADED is set in the environment. "
857                  "Running single threaded.\n");
858         }
859     }
860 
861     return TEST_PASS;
862 }
863 
ReleaseCL(void)864 static void ReleaseCL(void)
865 {
866     uint32_t i;
867     clReleaseMemObject(gInBuffer);
868     clReleaseMemObject(gInBuffer2);
869     clReleaseMemObject(gInBuffer3);
870     for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
871     {
872         clReleaseMemObject(gOutBuffer[i]);
873         clReleaseMemObject(gOutBuffer2[i]);
874     }
875     clReleaseCommandQueue(gQueue);
876     clReleaseContext(gContext);
877 
878     align_free(gIn);
879     align_free(gIn2);
880     align_free(gIn3);
881     align_free(gOut_Ref);
882     align_free(gOut_Ref2);
883 
884     for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
885     {
886         align_free(gOut[i]);
887         align_free(gOut2[i]);
888     }
889 }
890 
_LogBuildError(cl_program p,int line,const char * file)891 void _LogBuildError(cl_program p, int line, const char *file)
892 {
893     char the_log[2048] = "";
894 
895     vlog_error("%s:%d: Build Log:\n", file, line);
896     if (0
897         == clGetProgramBuildInfo(p, gDevice, CL_PROGRAM_BUILD_LOG,
898                                  sizeof(the_log), the_log, NULL))
899         vlog_error("%s", the_log);
900     else
901         vlog_error("*** Error getting build log for program %p\n", p);
902 }
903 
InitILogbConstants(void)904 int InitILogbConstants(void)
905 {
906     int error;
907     const char *kernelSource =
908         R"(__kernel void GetILogBConstants( __global int *out )
909         {
910             out[0] = FP_ILOGB0;
911             out[1] = FP_ILOGBNAN;
912         })";
913 
914     clProgramWrapper query;
915     clKernelWrapper kernel;
916     error = create_single_kernel_helper(gContext, &query, &kernel, 1,
917                                         &kernelSource, "GetILogBConstants");
918     if (error != CL_SUCCESS)
919     {
920         vlog_error("Error: Unable to create kernel to get FP_ILOGB0 and "
921                    "FP_ILOGBNAN for the device. (%d)",
922                    error);
923         return error;
924     }
925 
926     if ((error =
927              clSetKernelArg(kernel, 0, sizeof(gOutBuffer[gMinVectorSizeIndex]),
928                             &gOutBuffer[gMinVectorSizeIndex])))
929     {
930         vlog_error("Error: Unable to set kernel arg to get FP_ILOGB0 and "
931                    "FP_ILOGBNAN for the device. Err = %d",
932                    error);
933         return error;
934     }
935 
936     size_t dim = 1;
937     if ((error = clEnqueueNDRangeKernel(gQueue, kernel, 1, NULL, &dim, NULL, 0,
938                                         NULL, NULL)))
939     {
940         vlog_error("Error: Unable to execute kernel to get FP_ILOGB0 and "
941                    "FP_ILOGBNAN for the device. Err = %d",
942                    error);
943         return error;
944     }
945 
946     struct
947     {
948         cl_int ilogb0, ilogbnan;
949     } data;
950     if ((error = clEnqueueReadBuffer(gQueue, gOutBuffer[gMinVectorSizeIndex],
951                                      CL_TRUE, 0, sizeof(data), &data, 0, NULL,
952                                      NULL)))
953     {
954         vlog_error("Error: unable to read FP_ILOGB0 and FP_ILOGBNAN from the "
955                    "device. Err = %d",
956                    error);
957         return error;
958     }
959 
960     gDeviceILogb0 = data.ilogb0;
961     gDeviceILogbNaN = data.ilogbnan;
962 
963     return 0;
964 }
965 
IsTininessDetectedBeforeRounding(void)966 int IsTininessDetectedBeforeRounding(void)
967 {
968     int error;
969     const char *kernelSource =
970         R"(__kernel void IsTininessDetectedBeforeRounding( __global float *out )
971         {
972            volatile float a = 0x1.000002p-126f;
973            volatile float b = 0x1.fffffcp-1f;
974            out[0] = a * b; // product is 0x1.fffffffffff8p-127
975         })";
976 
977     clProgramWrapper query;
978     clKernelWrapper kernel;
979     error =
980         create_single_kernel_helper(gContext, &query, &kernel, 1, &kernelSource,
981                                     "IsTininessDetectedBeforeRounding");
982     if (error != CL_SUCCESS)
983     {
984         vlog_error("Error: Unable to create kernel to detect how tininess is "
985                    "detected for the device. (%d)",
986                    error);
987         return error;
988     }
989 
990     if ((error =
991              clSetKernelArg(kernel, 0, sizeof(gOutBuffer[gMinVectorSizeIndex]),
992                             &gOutBuffer[gMinVectorSizeIndex])))
993     {
994         vlog_error("Error: Unable to set kernel arg to detect how tininess is "
995                    "detected  for the device. Err = %d",
996                    error);
997         return error;
998     }
999 
1000     size_t dim = 1;
1001     if ((error = clEnqueueNDRangeKernel(gQueue, kernel, 1, NULL, &dim, NULL, 0,
1002                                         NULL, NULL)))
1003     {
1004         vlog_error("Error: Unable to execute kernel to detect how tininess is "
1005                    "detected  for the device. Err = %d",
1006                    error);
1007         return error;
1008     }
1009 
1010     struct
1011     {
1012         cl_uint f;
1013     } data;
1014     if ((error = clEnqueueReadBuffer(gQueue, gOutBuffer[gMinVectorSizeIndex],
1015                                      CL_TRUE, 0, sizeof(data), &data, 0, NULL,
1016                                      NULL)))
1017     {
1018         vlog_error("Error: unable to read result from tininess test from the "
1019                    "device. Err = %d",
1020                    error);
1021         return error;
1022     }
1023 
1024     gCheckTininessBeforeRounding = 0 == (data.f & 0x7fffffff);
1025 
1026     return 0;
1027 }
1028 
IsInRTZMode(void)1029 static int IsInRTZMode(void)
1030 {
1031     int error;
1032     const char *kernelSource =
1033         R"(__kernel void GetRoundingMode( __global int *out )
1034         {
1035             volatile float a = 0x1.0p23f;
1036             volatile float b = -0x1.0p23f;
1037             out[0] = (a + 0x1.fffffep-1f == a) && (b - 0x1.fffffep-1f == b);
1038         })";
1039 
1040     clProgramWrapper query;
1041     clKernelWrapper kernel;
1042     error = create_single_kernel_helper(gContext, &query, &kernel, 1,
1043                                         &kernelSource, "GetRoundingMode");
1044     if (error != CL_SUCCESS)
1045     {
1046         vlog_error("Error: Unable to create kernel to detect RTZ mode for the "
1047                    "device. (%d)",
1048                    error);
1049         return error;
1050     }
1051 
1052     if ((error =
1053              clSetKernelArg(kernel, 0, sizeof(gOutBuffer[gMinVectorSizeIndex]),
1054                             &gOutBuffer[gMinVectorSizeIndex])))
1055     {
1056         vlog_error("Error: Unable to set kernel arg to detect RTZ mode for the "
1057                    "device. Err = %d",
1058                    error);
1059         return error;
1060     }
1061 
1062     size_t dim = 1;
1063     if ((error = clEnqueueNDRangeKernel(gQueue, kernel, 1, NULL, &dim, NULL, 0,
1064                                         NULL, NULL)))
1065     {
1066         vlog_error("Error: Unable to execute kernel to detect RTZ mode for the "
1067                    "device. Err = %d",
1068                    error);
1069         return error;
1070     }
1071 
1072     struct
1073     {
1074         cl_int isRTZ;
1075     } data;
1076     if ((error = clEnqueueReadBuffer(gQueue, gOutBuffer[gMinVectorSizeIndex],
1077                                      CL_TRUE, 0, sizeof(data), &data, 0, NULL,
1078                                      NULL)))
1079     {
1080         vlog_error(
1081             "Error: unable to read RTZ mode data from the device. Err = %d",
1082             error);
1083         return error;
1084     }
1085 
1086     return data.isRTZ;
1087 }
1088 
1089 #pragma mark -
1090 
1091 const char *sizeNames[VECTOR_SIZE_COUNT] = { "", "2", "3", "4", "8", "16" };
1092 const int sizeValues[VECTOR_SIZE_COUNT] = { 1, 2, 3, 4, 8, 16 };
1093 
1094 // TODO: There is another version of Ulp_Error_Double defined in
1095 // test_common/harness/errorHelpers.c
Bruteforce_Ulp_Error_Double(double test,long double reference)1096 float Bruteforce_Ulp_Error_Double(double test, long double reference)
1097 {
1098     // Check for Non-power-of-two and NaN
1099 
1100     // Note: This function presumes that someone has already tested whether the
1101     // result is correctly, rounded before calling this function.  That test:
1102     //
1103     //    if( (float) reference == test )
1104     //        return 0.0f;
1105     //
1106     // would ensure that cases like fabs(reference) > FLT_MAX are weeded out
1107     // before we get here. Otherwise, we'll return inf ulp error here, for what
1108     // are otherwise correctly rounded results.
1109 
1110     // Deal with long double = double
1111     // On most systems long double is a higher precision type than double. They
1112     // provide either a 80-bit or greater floating point type, or they provide a
1113     // head-tail double double format. That is sufficient to represent the
1114     // accuracy of a floating point result to many more bits than double and we
1115     // can calculate sub-ulp errors. This is the standard system for which this
1116     // test suite is designed.
1117     //
1118     // On some systems double and long double are the same thing. Then we run
1119     // into a problem, because our representation of the infinitely precise
1120     // result (passed in as reference above) can be off by as much as a half
1121     // double precision ulp itself.  In this case, we inflate the reported error
1122     // by half an ulp to take this into account.  A more correct and permanent
1123     // fix would be to undertake refactoring the reference code to return
1124     // results in this format:
1125     //
1126     //    typedef struct DoubleReference
1127     //    { // true value = correctlyRoundedResult + ulps *
1128     //    ulp(correctlyRoundedResult)        (infinitely precise)
1129     //        double  correctlyRoundedResult;     // as best we can
1130     //        double  ulps;                       // plus a fractional amount to
1131     //        account for the difference
1132     //    }DoubleReference;                       //     between infinitely
1133     //    precise result and correctlyRoundedResult, in units of ulps.
1134     //
1135     // This would provide a useful higher-than-double precision format for
1136     // everyone that we can use, and would solve a few problems with
1137     // representing absolute errors below DBL_MIN and over DBL_MAX for systems
1138     // that use a head to tail double double for long double.
1139 
1140     int x;
1141     long double testVal = test;
1142 
1143     // First, handle special reference values
1144     if (isinf(reference))
1145     {
1146         if (reference == testVal) return 0.0f;
1147 
1148         return INFINITY;
1149     }
1150 
1151     if (isnan(reference))
1152     {
1153         if (isnan(testVal)) return 0.0f;
1154 
1155         return INFINITY;
1156     }
1157 
1158     if (0.0L != reference && 0.5L != frexpl(reference, &x))
1159     { // Non-zero and Non-power of two
1160 
1161         // allow correctly rounded results to pass through unmolested. (We might
1162         // add error to it below.) There is something of a performance
1163         // optimization here.
1164         if (testVal == reference) return 0.0f;
1165 
1166         // The unbiased exponent of the ulp unit place
1167         int ulp_exp =
1168             DBL_MANT_DIG - 1 - std::max(ilogbl(reference), DBL_MIN_EXP - 1);
1169 
1170         // Scale the exponent of the error
1171         float result = (float)scalbnl(testVal - reference, ulp_exp);
1172 
1173         // account for rounding error in reference result on systems that do not
1174         // have a higher precision floating point type (see above)
1175         if (sizeof(long double) == sizeof(double))
1176             result += copysignf(0.5f, result);
1177 
1178         return result;
1179     }
1180 
1181     // reference is a normal power of two or a zero
1182     // The unbiased exponent of the ulp unit place
1183     int ulp_exp =
1184         DBL_MANT_DIG - 1 - std::max(ilogbl(reference) - 1, DBL_MIN_EXP - 1);
1185 
1186     // allow correctly rounded results to pass through unmolested. (We might add
1187     // error to it below.) There is something of a performance optimization here
1188     // too.
1189     if (testVal == reference) return 0.0f;
1190 
1191     // Scale the exponent of the error
1192     float result = (float)scalbnl(testVal - reference, ulp_exp);
1193 
1194     // account for rounding error in reference result on systems that do not
1195     // have a higher precision floating point type (see above)
1196     if (sizeof(long double) == sizeof(double))
1197         result += copysignf(0.5f, result);
1198 
1199     return result;
1200 }
1201 
Abs_Error(float test,double reference)1202 float Abs_Error(float test, double reference)
1203 {
1204     if (isnan(test) && isnan(reference)) return 0.0f;
1205     return fabs((float)(reference - (double)test));
1206 }
1207 
RoundUpToNextPowerOfTwo(cl_uint x)1208 cl_uint RoundUpToNextPowerOfTwo(cl_uint x)
1209 {
1210     if (0 == (x & (x - 1))) return x;
1211 
1212     while (x & (x - 1)) x &= x - 1;
1213 
1214     return x + x;
1215 }
1216