xref: /aosp_15_r20/external/OpenCL-CTS/test_conformance/math_brute_force/ternary_float.cpp (revision 6467f958c7de8070b317fc65bcb0f6472e388d82)
1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 
17 #include "common.h"
18 #include "function_list.h"
19 #include "test_functions.h"
20 #include "utility.h"
21 
22 #include <cinttypes>
23 #include <cstring>
24 
25 #define CORRECTLY_ROUNDED 0
26 #define FLUSHED 1
27 
28 namespace {
29 
BuildKernelFn(cl_uint job_id,cl_uint thread_id UNUSED,void * p)30 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
31 {
32     BuildKernelInfo &info = *(BuildKernelInfo *)p;
33     auto generator = [](const std::string &kernel_name, const char *builtin,
34                         cl_uint vector_size_index) {
35         return GetTernaryKernel(kernel_name, builtin, ParameterType::Float,
36                                 ParameterType::Float, ParameterType::Float,
37                                 ParameterType::Float, vector_size_index);
38     };
39     return BuildKernels(info, job_id, generator);
40 }
41 
42 // A table of more difficult cases to get right
43 const float specialValues[] = {
44     -NAN,
45     -INFINITY,
46     -FLT_MAX,
47     MAKE_HEX_FLOAT(-0x1.000002p64f, -0x1000002L, 40),
48     MAKE_HEX_FLOAT(-0x1.0p64f, -0x1L, 64),
49     MAKE_HEX_FLOAT(-0x1.fffffep63f, -0x1fffffeL, 39),
50     MAKE_HEX_FLOAT(-0x1.000002p63f, -0x1000002L, 39),
51     MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63),
52     MAKE_HEX_FLOAT(-0x1.fffffep62f, -0x1fffffeL, 38),
53     -3.0f,
54     MAKE_HEX_FLOAT(-0x1.800002p1f, -0x1800002L, -23),
55     -2.5f,
56     MAKE_HEX_FLOAT(-0x1.7ffffep1f, -0x17ffffeL, -23),
57     -2.0f,
58     MAKE_HEX_FLOAT(-0x1.800002p0f, -0x1800002L, -24),
59     -1.75f,
60     -1.5f,
61     -1.25f,
62     MAKE_HEX_FLOAT(-0x1.7ffffep0f, -0x17ffffeL, -24),
63     MAKE_HEX_FLOAT(-0x1.000002p0f, -0x1000002L, -24),
64     MAKE_HEX_FLOAT(-0x1.003p0f, -0x1003000L, -24),
65     -MAKE_HEX_FLOAT(0x1.001p0f, 0x1001000L, -24),
66     -1.0f,
67     MAKE_HEX_FLOAT(-0x1.fffffep-1f, -0x1fffffeL, -25),
68     MAKE_HEX_FLOAT(-0x1.000002p-126f, -0x1000002L, -150),
69     -FLT_MIN,
70     MAKE_HEX_FLOAT(-0x0.fffffep-126f, -0x0fffffeL, -150),
71     MAKE_HEX_FLOAT(-0x0.000ffep-126f, -0x0000ffeL, -150),
72     MAKE_HEX_FLOAT(-0x0.0000fep-126f, -0x00000feL, -150),
73     MAKE_HEX_FLOAT(-0x0.00000ep-126f, -0x000000eL, -150),
74     MAKE_HEX_FLOAT(-0x0.00000cp-126f, -0x000000cL, -150),
75     MAKE_HEX_FLOAT(-0x0.00000ap-126f, -0x000000aL, -150),
76     MAKE_HEX_FLOAT(-0x0.000008p-126f, -0x0000008L, -150),
77     MAKE_HEX_FLOAT(-0x0.000006p-126f, -0x0000006L, -150),
78     MAKE_HEX_FLOAT(-0x0.000004p-126f, -0x0000004L, -150),
79     MAKE_HEX_FLOAT(-0x0.000002p-126f, -0x0000002L, -150),
80     -0.0f,
81 
82     +NAN,
83     +INFINITY,
84     +FLT_MAX,
85     MAKE_HEX_FLOAT(+0x1.000002p64f, +0x1000002L, 40),
86     MAKE_HEX_FLOAT(+0x1.0p64f, +0x1L, 64),
87     MAKE_HEX_FLOAT(+0x1.fffffep63f, +0x1fffffeL, 39),
88     MAKE_HEX_FLOAT(+0x1.000002p63f, +0x1000002L, 39),
89     MAKE_HEX_FLOAT(+0x1.0p63f, +0x1L, 63),
90     MAKE_HEX_FLOAT(+0x1.fffffep62f, +0x1fffffeL, 38),
91     +3.0f,
92     MAKE_HEX_FLOAT(+0x1.800002p1f, +0x1800002L, -23),
93     2.5f,
94     MAKE_HEX_FLOAT(+0x1.7ffffep1f, +0x17ffffeL, -23),
95     +2.0f,
96     MAKE_HEX_FLOAT(+0x1.800002p0f, +0x1800002L, -24),
97     1.75f,
98     1.5f,
99     1.25f,
100     MAKE_HEX_FLOAT(+0x1.7ffffep0f, +0x17ffffeL, -24),
101     MAKE_HEX_FLOAT(+0x1.000002p0f, +0x1000002L, -24),
102     MAKE_HEX_FLOAT(0x1.003p0f, 0x1003000L, -24),
103     +MAKE_HEX_FLOAT(0x1.001p0f, 0x1001000L, -24),
104     +1.0f,
105     MAKE_HEX_FLOAT(+0x1.fffffep-1f, +0x1fffffeL, -25),
106     MAKE_HEX_FLOAT(0x1.000002p-126f, 0x1000002L, -150),
107     +FLT_MIN,
108     MAKE_HEX_FLOAT(+0x0.fffffep-126f, +0x0fffffeL, -150),
109     MAKE_HEX_FLOAT(+0x0.000ffep-126f, +0x0000ffeL, -150),
110     MAKE_HEX_FLOAT(+0x0.0000fep-126f, +0x00000feL, -150),
111     MAKE_HEX_FLOAT(+0x0.00000ep-126f, +0x000000eL, -150),
112     MAKE_HEX_FLOAT(+0x0.00000cp-126f, +0x000000cL, -150),
113     MAKE_HEX_FLOAT(+0x0.00000ap-126f, +0x000000aL, -150),
114     MAKE_HEX_FLOAT(+0x0.000008p-126f, +0x0000008L, -150),
115     MAKE_HEX_FLOAT(+0x0.000006p-126f, +0x0000006L, -150),
116     MAKE_HEX_FLOAT(+0x0.000004p-126f, +0x0000004L, -150),
117     MAKE_HEX_FLOAT(+0x0.000002p-126f, +0x0000002L, -150),
118     +0.0f,
119 };
120 
121 constexpr size_t specialValuesCount =
122     sizeof(specialValues) / sizeof(specialValues[0]);
123 
124 } // anonymous namespace
125 
TestFunc_Float_Float_Float_Float(const Func * f,MTdata d,bool relaxedMode)126 int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
127 {
128     int error;
129 
130     logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
131 
132     Programs programs;
133     const unsigned thread_id = 0; // Test is currently not multithreaded.
134     KernelMatrix kernels;
135     float maxError = 0.0f;
136     int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
137     float maxErrorVal = 0.0f;
138     float maxErrorVal2 = 0.0f;
139     float maxErrorVal3 = 0.0f;
140     uint64_t step = getTestStep(sizeof(float), BUFFER_SIZE);
141 
142     cl_uchar overflow[BUFFER_SIZE / sizeof(float)];
143 
144     float float_ulps;
145     if (gIsEmbedded)
146         float_ulps = f->float_embedded_ulps;
147     else
148         float_ulps = f->float_ulps;
149 
150     int skipNanInf = (0 == strcmp("fma", f->nameInCode)) && !gInfNanSupport;
151 
152     // Init the kernels
153     BuildKernelInfo build_info{ 1, kernels, programs, f->nameInCode,
154                                 relaxedMode };
155     if ((error = ThreadPool_Do(BuildKernelFn,
156                                gMaxVectorSizeIndex - gMinVectorSizeIndex,
157                                &build_info)))
158         return error;
159 
160     for (uint64_t i = 0; i < (1ULL << 32); i += step)
161     {
162         // Init input array
163         cl_uint *p = (cl_uint *)gIn;
164         cl_uint *p2 = (cl_uint *)gIn2;
165         cl_uint *p3 = (cl_uint *)gIn3;
166         size_t idx = 0;
167 
168         if (i == 0)
169         { // test edge cases
170             float *fp = (float *)gIn;
171             float *fp2 = (float *)gIn2;
172             float *fp3 = (float *)gIn3;
173             uint32_t x, y, z;
174             x = y = z = 0;
175             for (; idx < BUFFER_SIZE / sizeof(float); idx++)
176             {
177                 fp[idx] = specialValues[x];
178                 fp2[idx] = specialValues[y];
179                 fp3[idx] = specialValues[z];
180 
181                 if (++x >= specialValuesCount)
182                 {
183                     x = 0;
184                     if (++y >= specialValuesCount)
185                     {
186                         y = 0;
187                         if (++z >= specialValuesCount) break;
188                     }
189                 }
190             }
191             if (idx == BUFFER_SIZE / sizeof(float))
192                 vlog_error("Test Error: not all special cases tested!\n");
193         }
194 
195         for (; idx < BUFFER_SIZE / sizeof(float); idx++)
196         {
197             p[idx] = genrand_int32(d);
198             p2[idx] = genrand_int32(d);
199             p3[idx] = genrand_int32(d);
200         }
201 
202         if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
203                                           BUFFER_SIZE, gIn, 0, NULL, NULL)))
204         {
205             vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
206             return error;
207         }
208 
209         if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0,
210                                           BUFFER_SIZE, gIn2, 0, NULL, NULL)))
211         {
212             vlog_error("\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error);
213             return error;
214         }
215 
216         if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer3, CL_FALSE, 0,
217                                           BUFFER_SIZE, gIn3, 0, NULL, NULL)))
218         {
219             vlog_error("\n*** Error %d in clEnqueueWriteBuffer3 ***\n", error);
220             return error;
221         }
222 
223         // Write garbage into output arrays
224         for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
225         {
226             uint32_t pattern = 0xffffdead;
227             if (gHostFill)
228             {
229                 memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
230                 if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j],
231                                                   CL_FALSE, 0, BUFFER_SIZE,
232                                                   gOut[j], 0, NULL, NULL)))
233                 {
234                     vlog_error(
235                         "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
236                         error, j);
237                     return error;
238                 }
239             }
240             else
241             {
242                 if ((error = clEnqueueFillBuffer(gQueue, gOutBuffer[j],
243                                                  &pattern, sizeof(pattern), 0,
244                                                  BUFFER_SIZE, 0, NULL, NULL)))
245                 {
246                     vlog_error("Error: clEnqueueFillBuffer failed! err: %d\n",
247                                error);
248                     return error;
249                 }
250             }
251         }
252 
253         // Run the kernels
254         for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
255         {
256             size_t vectorSize = sizeof(cl_float) * sizeValues[j];
257             size_t localCount = (BUFFER_SIZE + vectorSize - 1)
258                 / vectorSize; // BUFFER_SIZE / vectorSize  rounded up
259             if ((error = clSetKernelArg(kernels[j][thread_id], 0,
260                                         sizeof(gOutBuffer[j]), &gOutBuffer[j])))
261             {
262                 LogBuildError(programs[j]);
263                 return error;
264             }
265             if ((error = clSetKernelArg(kernels[j][thread_id], 1,
266                                         sizeof(gInBuffer), &gInBuffer)))
267             {
268                 LogBuildError(programs[j]);
269                 return error;
270             }
271             if ((error = clSetKernelArg(kernels[j][thread_id], 2,
272                                         sizeof(gInBuffer2), &gInBuffer2)))
273             {
274                 LogBuildError(programs[j]);
275                 return error;
276             }
277             if ((error = clSetKernelArg(kernels[j][thread_id], 3,
278                                         sizeof(gInBuffer3), &gInBuffer3)))
279             {
280                 LogBuildError(programs[j]);
281                 return error;
282             }
283 
284             if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
285                                                 1, NULL, &localCount, NULL, 0,
286                                                 NULL, NULL)))
287             {
288                 vlog_error("FAILED -- could not execute kernel\n");
289                 return error;
290             }
291         }
292 
293         // Get that moving
294         if ((error = clFlush(gQueue))) vlog("clFlush failed\n");
295 
296         // Calculate the correctly rounded reference result
297         float *r = (float *)gOut_Ref;
298         float *s = (float *)gIn;
299         float *s2 = (float *)gIn2;
300         float *s3 = (float *)gIn3;
301         if (skipNanInf)
302         {
303             for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
304             {
305                 feclearexcept(FE_OVERFLOW);
306                 r[j] =
307                     (float)f->func.f_fma(s[j], s2[j], s3[j], CORRECTLY_ROUNDED);
308                 overflow[j] =
309                     FE_OVERFLOW == (FE_OVERFLOW & fetestexcept(FE_OVERFLOW));
310             }
311         }
312         else
313         {
314             for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
315                 r[j] =
316                     (float)f->func.f_fma(s[j], s2[j], s3[j], CORRECTLY_ROUNDED);
317         }
318 
319         // Read the data back
320         for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
321         {
322             if ((error =
323                      clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
324                                          BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
325             {
326                 vlog_error("ReadArray failed %d\n", error);
327                 return error;
328             }
329         }
330 
331         if (gSkipCorrectnessTesting) break;
332 
333         // Verify data
334         uint32_t *t = (uint32_t *)gOut_Ref;
335         for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
336         {
337             for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
338             {
339                 uint32_t *q = (uint32_t *)(gOut[k]);
340 
341                 // If we aren't getting the correctly rounded result
342                 if (t[j] != q[j])
343                 {
344                     float err;
345                     int fail;
346                     float test = ((float *)q)[j];
347                     float correct =
348                         f->func.f_fma(s[j], s2[j], s3[j], CORRECTLY_ROUNDED);
349 
350                     // Per section 10 paragraph 6, accept any result if an input
351                     // or output is a infinity or NaN or overflow
352                     if (skipNanInf)
353                     {
354                         if (overflow[j] || IsFloatInfinity(correct)
355                             || IsFloatNaN(correct) || IsFloatInfinity(s[j])
356                             || IsFloatNaN(s[j]) || IsFloatInfinity(s2[j])
357                             || IsFloatNaN(s2[j]) || IsFloatInfinity(s3[j])
358                             || IsFloatNaN(s3[j]))
359                             continue;
360                     }
361 
362 
363                     err = Ulp_Error(test, correct);
364                     fail = !(fabsf(err) <= float_ulps);
365 
366                     if (fail && (ftz || relaxedMode))
367                     {
368                         float correct2, err2;
369 
370                         // retry per section 6.5.3.2  with flushing on
371                         if (0.0f == test
372                             && 0.0f
373                                 == f->func.f_fma(s[j], s2[j], s3[j], FLUSHED))
374                         {
375                             fail = 0;
376                             err = 0.0f;
377                         }
378 
379                         // retry per section 6.5.3.3
380                         if (fail && IsFloatSubnormal(s[j]))
381                         { // look at me,
382                             float err3, correct3;
383 
384                             if (skipNanInf) feclearexcept(FE_OVERFLOW);
385 
386                             correct2 = f->func.f_fma(0.0f, s2[j], s3[j],
387                                                      CORRECTLY_ROUNDED);
388                             correct3 = f->func.f_fma(-0.0f, s2[j], s3[j],
389                                                      CORRECTLY_ROUNDED);
390 
391                             if (skipNanInf)
392                             {
393                                 if (fetestexcept(FE_OVERFLOW)) continue;
394 
395                                 // Note: no double rounding here.  Reference
396                                 // functions calculate in single precision.
397                                 if (IsFloatInfinity(correct2)
398                                     || IsFloatNaN(correct2)
399                                     || IsFloatInfinity(correct3)
400                                     || IsFloatNaN(correct3))
401                                     continue;
402                             }
403 
404                             err2 = Ulp_Error(test, correct2);
405                             err3 = Ulp_Error(test, correct3);
406                             fail = fail
407                                 && ((!(fabsf(err2) <= float_ulps))
408                                     && (!(fabsf(err3) <= float_ulps)));
409                             if (fabsf(err2) < fabsf(err)) err = err2;
410                             if (fabsf(err3) < fabsf(err)) err = err3;
411 
412                             // retry per section 6.5.3.4
413                             if (0.0f == test
414                                 && (0.0f
415                                         == f->func.f_fma(0.0f, s2[j], s3[j],
416                                                          FLUSHED)
417                                     || 0.0f
418                                         == f->func.f_fma(-0.0f, s2[j], s3[j],
419                                                          FLUSHED)))
420                             {
421                                 fail = 0;
422                                 err = 0.0f;
423                             }
424 
425                             // try with first two args as zero
426                             if (IsFloatSubnormal(s2[j]))
427                             { // its fun to have fun,
428                                 double correct4, correct5;
429                                 float err4, err5;
430 
431                                 if (skipNanInf) feclearexcept(FE_OVERFLOW);
432 
433                                 correct2 = f->func.f_fma(0.0f, 0.0f, s3[j],
434                                                          CORRECTLY_ROUNDED);
435                                 correct3 = f->func.f_fma(-0.0f, 0.0f, s3[j],
436                                                          CORRECTLY_ROUNDED);
437                                 correct4 = f->func.f_fma(0.0f, -0.0f, s3[j],
438                                                          CORRECTLY_ROUNDED);
439                                 correct5 = f->func.f_fma(-0.0f, -0.0f, s3[j],
440                                                          CORRECTLY_ROUNDED);
441 
442                                 // Per section 10 paragraph 6, accept any result
443                                 // if an input or output is a infinity or NaN or
444                                 // overflow
445                                 if (!gInfNanSupport)
446                                 {
447                                     if (fetestexcept(FE_OVERFLOW)) continue;
448 
449                                     // Note: no double rounding here.  Reference
450                                     // functions calculate in single precision.
451                                     if (IsFloatInfinity(correct2)
452                                         || IsFloatNaN(correct2)
453                                         || IsFloatInfinity(correct3)
454                                         || IsFloatNaN(correct3)
455                                         || IsFloatInfinity(correct4)
456                                         || IsFloatNaN(correct4)
457                                         || IsFloatInfinity(correct5)
458                                         || IsFloatNaN(correct5))
459                                         continue;
460                                 }
461 
462                                 err2 = Ulp_Error(test, correct2);
463                                 err3 = Ulp_Error(test, correct3);
464                                 err4 = Ulp_Error(test, correct4);
465                                 err5 = Ulp_Error(test, correct5);
466                                 fail = fail
467                                     && ((!(fabsf(err2) <= float_ulps))
468                                         && (!(fabsf(err3) <= float_ulps))
469                                         && (!(fabsf(err4) <= float_ulps))
470                                         && (!(fabsf(err5) <= float_ulps)));
471                                 if (fabsf(err2) < fabsf(err)) err = err2;
472                                 if (fabsf(err3) < fabsf(err)) err = err3;
473                                 if (fabsf(err4) < fabsf(err)) err = err4;
474                                 if (fabsf(err5) < fabsf(err)) err = err5;
475 
476                                 // retry per section 6.5.3.4
477                                 if (0.0f == test
478                                     && (0.0f
479                                             == f->func.f_fma(0.0f, 0.0f, s3[j],
480                                                              FLUSHED)
481                                         || 0.0f
482                                             == f->func.f_fma(-0.0f, 0.0f, s3[j],
483                                                              FLUSHED)
484                                         || 0.0f
485                                             == f->func.f_fma(0.0f, -0.0f, s3[j],
486                                                              FLUSHED)
487                                         || 0.0f
488                                             == f->func.f_fma(-0.0f, -0.0f,
489                                                              s3[j], FLUSHED)))
490                                 {
491                                     fail = 0;
492                                     err = 0.0f;
493                                 }
494 
495                                 if (IsFloatSubnormal(s3[j]))
496                                 {
497                                     if (test == 0.0f) // 0*0+0 is 0
498                                     {
499                                         fail = 0;
500                                         err = 0.0f;
501                                     }
502                                 }
503                             }
504                             else if (IsFloatSubnormal(s3[j]))
505                             {
506                                 double correct4, correct5;
507                                 float err4, err5;
508 
509                                 if (skipNanInf) feclearexcept(FE_OVERFLOW);
510 
511                                 correct2 = f->func.f_fma(0.0f, s2[j], 0.0f,
512                                                          CORRECTLY_ROUNDED);
513                                 correct3 = f->func.f_fma(-0.0f, s2[j], 0.0f,
514                                                          CORRECTLY_ROUNDED);
515                                 correct4 = f->func.f_fma(0.0f, s2[j], -0.0f,
516                                                          CORRECTLY_ROUNDED);
517                                 correct5 = f->func.f_fma(-0.0f, s2[j], -0.0f,
518                                                          CORRECTLY_ROUNDED);
519 
520                                 // Per section 10 paragraph 6, accept any result
521                                 // if an input or output is a infinity or NaN or
522                                 // overflow
523                                 if (!gInfNanSupport)
524                                 {
525                                     if (fetestexcept(FE_OVERFLOW)) continue;
526 
527                                     // Note: no double rounding here.  Reference
528                                     // functions calculate in single precision.
529                                     if (IsFloatInfinity(correct2)
530                                         || IsFloatNaN(correct2)
531                                         || IsFloatInfinity(correct3)
532                                         || IsFloatNaN(correct3)
533                                         || IsFloatInfinity(correct4)
534                                         || IsFloatNaN(correct4)
535                                         || IsFloatInfinity(correct5)
536                                         || IsFloatNaN(correct5))
537                                         continue;
538                                 }
539 
540                                 err2 = Ulp_Error(test, correct2);
541                                 err3 = Ulp_Error(test, correct3);
542                                 err4 = Ulp_Error(test, correct4);
543                                 err5 = Ulp_Error(test, correct5);
544                                 fail = fail
545                                     && ((!(fabsf(err2) <= float_ulps))
546                                         && (!(fabsf(err3) <= float_ulps))
547                                         && (!(fabsf(err4) <= float_ulps))
548                                         && (!(fabsf(err5) <= float_ulps)));
549                                 if (fabsf(err2) < fabsf(err)) err = err2;
550                                 if (fabsf(err3) < fabsf(err)) err = err3;
551                                 if (fabsf(err4) < fabsf(err)) err = err4;
552                                 if (fabsf(err5) < fabsf(err)) err = err5;
553 
554                                 // retry per section 6.5.3.4
555                                 if (0.0f == test
556                                     && (0.0f
557                                             == f->func.f_fma(0.0f, s2[j], 0.0f,
558                                                              FLUSHED)
559                                         || 0.0f
560                                             == f->func.f_fma(-0.0f, s2[j], 0.0f,
561                                                              FLUSHED)
562                                         || 0.0f
563                                             == f->func.f_fma(0.0f, s2[j], -0.0f,
564                                                              FLUSHED)
565                                         || 0.0f
566                                             == f->func.f_fma(-0.0f, s2[j],
567                                                              -0.0f, FLUSHED)))
568                                 {
569                                     fail = 0;
570                                     err = 0.0f;
571                                 }
572                             }
573                         }
574                         else if (fail && IsFloatSubnormal(s2[j]))
575                         {
576                             double correct2, correct3;
577                             float err2, err3;
578 
579                             if (skipNanInf) feclearexcept(FE_OVERFLOW);
580 
581                             correct2 = f->func.f_fma(s[j], 0.0f, s3[j],
582                                                      CORRECTLY_ROUNDED);
583                             correct3 = f->func.f_fma(s[j], -0.0f, s3[j],
584                                                      CORRECTLY_ROUNDED);
585 
586                             if (skipNanInf)
587                             {
588                                 if (fetestexcept(FE_OVERFLOW)) continue;
589 
590                                 // Note: no double rounding here.  Reference
591                                 // functions calculate in single precision.
592                                 if (IsFloatInfinity(correct2)
593                                     || IsFloatNaN(correct2)
594                                     || IsFloatInfinity(correct3)
595                                     || IsFloatNaN(correct3))
596                                     continue;
597                             }
598 
599                             err2 = Ulp_Error(test, correct2);
600                             err3 = Ulp_Error(test, correct3);
601                             fail = fail
602                                 && ((!(fabsf(err2) <= float_ulps))
603                                     && (!(fabsf(err3) <= float_ulps)));
604                             if (fabsf(err2) < fabsf(err)) err = err2;
605                             if (fabsf(err3) < fabsf(err)) err = err3;
606 
607                             // retry per section 6.5.3.4
608                             if (0.0f == test
609                                 && (0.0f
610                                         == f->func.f_fma(s[j], 0.0f, s3[j],
611                                                          FLUSHED)
612                                     || 0.0f
613                                         == f->func.f_fma(s[j], -0.0f, s3[j],
614                                                          FLUSHED)))
615                             {
616                                 fail = 0;
617                                 err = 0.0f;
618                             }
619 
620                             // try with second two args as zero
621                             if (IsFloatSubnormal(s3[j]))
622                             {
623                                 double correct4, correct5;
624                                 float err4, err5;
625 
626                                 if (skipNanInf) feclearexcept(FE_OVERFLOW);
627 
628                                 correct2 = f->func.f_fma(s[j], 0.0f, 0.0f,
629                                                          CORRECTLY_ROUNDED);
630                                 correct3 = f->func.f_fma(s[j], -0.0f, 0.0f,
631                                                          CORRECTLY_ROUNDED);
632                                 correct4 = f->func.f_fma(s[j], 0.0f, -0.0f,
633                                                          CORRECTLY_ROUNDED);
634                                 correct5 = f->func.f_fma(s[j], -0.0f, -0.0f,
635                                                          CORRECTLY_ROUNDED);
636 
637                                 // Per section 10 paragraph 6, accept any result
638                                 // if an input or output is a infinity or NaN or
639                                 // overflow
640                                 if (!gInfNanSupport)
641                                 {
642                                     if (fetestexcept(FE_OVERFLOW)) continue;
643 
644                                     // Note: no double rounding here.  Reference
645                                     // functions calculate in single precision.
646                                     if (IsFloatInfinity(correct2)
647                                         || IsFloatNaN(correct2)
648                                         || IsFloatInfinity(correct3)
649                                         || IsFloatNaN(correct3)
650                                         || IsFloatInfinity(correct4)
651                                         || IsFloatNaN(correct4)
652                                         || IsFloatInfinity(correct5)
653                                         || IsFloatNaN(correct5))
654                                         continue;
655                                 }
656 
657                                 err2 = Ulp_Error(test, correct2);
658                                 err3 = Ulp_Error(test, correct3);
659                                 err4 = Ulp_Error(test, correct4);
660                                 err5 = Ulp_Error(test, correct5);
661                                 fail = fail
662                                     && ((!(fabsf(err2) <= float_ulps))
663                                         && (!(fabsf(err3) <= float_ulps))
664                                         && (!(fabsf(err4) <= float_ulps))
665                                         && (!(fabsf(err5) <= float_ulps)));
666                                 if (fabsf(err2) < fabsf(err)) err = err2;
667                                 if (fabsf(err3) < fabsf(err)) err = err3;
668                                 if (fabsf(err4) < fabsf(err)) err = err4;
669                                 if (fabsf(err5) < fabsf(err)) err = err5;
670 
671                                 // retry per section 6.5.3.4
672                                 if (0.0f == test
673                                     && (0.0f
674                                             == f->func.f_fma(s[j], 0.0f, 0.0f,
675                                                              FLUSHED)
676                                         || 0.0f
677                                             == f->func.f_fma(s[j], -0.0f, 0.0f,
678                                                              FLUSHED)
679                                         || 0.0f
680                                             == f->func.f_fma(s[j], 0.0f, -0.0f,
681                                                              FLUSHED)
682                                         || 0.0f
683                                             == f->func.f_fma(s[j], -0.0f, -0.0f,
684                                                              FLUSHED)))
685                                 {
686                                     fail = 0;
687                                     err = 0.0f;
688                                 }
689                             }
690                         }
691                         else if (fail && IsFloatSubnormal(s3[j]))
692                         {
693                             double correct2, correct3;
694                             float err2, err3;
695 
696                             if (skipNanInf) feclearexcept(FE_OVERFLOW);
697 
698                             correct2 = f->func.f_fma(s[j], s2[j], 0.0f,
699                                                      CORRECTLY_ROUNDED);
700                             correct3 = f->func.f_fma(s[j], s2[j], -0.0f,
701                                                      CORRECTLY_ROUNDED);
702 
703                             if (skipNanInf)
704                             {
705                                 if (fetestexcept(FE_OVERFLOW)) continue;
706 
707                                 // Note: no double rounding here.  Reference
708                                 // functions calculate in single precision.
709                                 if (IsFloatInfinity(correct2)
710                                     || IsFloatNaN(correct2)
711                                     || IsFloatInfinity(correct3)
712                                     || IsFloatNaN(correct3))
713                                     continue;
714                             }
715 
716                             err2 = Ulp_Error(test, correct2);
717                             err3 = Ulp_Error(test, correct3);
718                             fail = fail
719                                 && ((!(fabsf(err2) <= float_ulps))
720                                     && (!(fabsf(err3) <= float_ulps)));
721                             if (fabsf(err2) < fabsf(err)) err = err2;
722                             if (fabsf(err3) < fabsf(err)) err = err3;
723 
724                             // retry per section 6.5.3.4
725                             if (0.0f == test
726                                 && (0.0f
727                                         == f->func.f_fma(s[j], s2[j], 0.0f,
728                                                          FLUSHED)
729                                     || 0.0f
730                                         == f->func.f_fma(s[j], s2[j], -0.0f,
731                                                          FLUSHED)))
732                             {
733                                 fail = 0;
734                                 err = 0.0f;
735                             }
736                         }
737                     }
738 
739                     if (fabsf(err) > maxError)
740                     {
741                         maxError = fabsf(err);
742                         maxErrorVal = s[j];
743                         maxErrorVal2 = s2[j];
744                         maxErrorVal3 = s3[j];
745                     }
746 
747                     if (fail)
748                     {
749                         vlog_error(
750                             "\nERROR: %s%s: %f ulp error at {%a, %a, %a} "
751                             "({0x%8.8x, 0x%8.8x, 0x%8.8x}): *%a vs. %a\n",
752                             f->name, sizeNames[k], err, s[j], s2[j], s3[j],
753                             ((cl_uint *)s)[j], ((cl_uint *)s2)[j],
754                             ((cl_uint *)s3)[j], ((float *)gOut_Ref)[j], test);
755                         return -1;
756                     }
757                 }
758             }
759         }
760 
761         if (0 == (i & 0x0fffffff))
762         {
763             if (gVerboseBruteForce)
764             {
765                 vlog("base:%14" PRIu64 " step:%10" PRIu64 " bufferSize:%10d \n",
766                      i, step, BUFFER_SIZE);
767             }
768             else
769             {
770                 vlog(".");
771             }
772             fflush(stdout);
773         }
774     }
775 
776     if (!gSkipCorrectnessTesting)
777     {
778         if (gWimpyMode)
779             vlog("Wimp pass");
780         else
781             vlog("passed");
782 
783         vlog("\t%8.2f @ {%a, %a, %a}", maxError, maxErrorVal, maxErrorVal2,
784              maxErrorVal3);
785     }
786 
787     vlog("\n");
788 
789     return CL_SUCCESS;
790 }
791