1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16
17 #include "common.h"
18 #include "function_list.h"
19 #include "test_functions.h"
20 #include "utility.h"
21
22 #include <cinttypes>
23 #include <cstring>
24
25 #define CORRECTLY_ROUNDED 0
26 #define FLUSHED 1
27
28 namespace {
29
BuildKernelFn(cl_uint job_id,cl_uint thread_id UNUSED,void * p)30 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
31 {
32 BuildKernelInfo &info = *(BuildKernelInfo *)p;
33 auto generator = [](const std::string &kernel_name, const char *builtin,
34 cl_uint vector_size_index) {
35 return GetTernaryKernel(kernel_name, builtin, ParameterType::Float,
36 ParameterType::Float, ParameterType::Float,
37 ParameterType::Float, vector_size_index);
38 };
39 return BuildKernels(info, job_id, generator);
40 }
41
42 // A table of more difficult cases to get right
43 const float specialValues[] = {
44 -NAN,
45 -INFINITY,
46 -FLT_MAX,
47 MAKE_HEX_FLOAT(-0x1.000002p64f, -0x1000002L, 40),
48 MAKE_HEX_FLOAT(-0x1.0p64f, -0x1L, 64),
49 MAKE_HEX_FLOAT(-0x1.fffffep63f, -0x1fffffeL, 39),
50 MAKE_HEX_FLOAT(-0x1.000002p63f, -0x1000002L, 39),
51 MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63),
52 MAKE_HEX_FLOAT(-0x1.fffffep62f, -0x1fffffeL, 38),
53 -3.0f,
54 MAKE_HEX_FLOAT(-0x1.800002p1f, -0x1800002L, -23),
55 -2.5f,
56 MAKE_HEX_FLOAT(-0x1.7ffffep1f, -0x17ffffeL, -23),
57 -2.0f,
58 MAKE_HEX_FLOAT(-0x1.800002p0f, -0x1800002L, -24),
59 -1.75f,
60 -1.5f,
61 -1.25f,
62 MAKE_HEX_FLOAT(-0x1.7ffffep0f, -0x17ffffeL, -24),
63 MAKE_HEX_FLOAT(-0x1.000002p0f, -0x1000002L, -24),
64 MAKE_HEX_FLOAT(-0x1.003p0f, -0x1003000L, -24),
65 -MAKE_HEX_FLOAT(0x1.001p0f, 0x1001000L, -24),
66 -1.0f,
67 MAKE_HEX_FLOAT(-0x1.fffffep-1f, -0x1fffffeL, -25),
68 MAKE_HEX_FLOAT(-0x1.000002p-126f, -0x1000002L, -150),
69 -FLT_MIN,
70 MAKE_HEX_FLOAT(-0x0.fffffep-126f, -0x0fffffeL, -150),
71 MAKE_HEX_FLOAT(-0x0.000ffep-126f, -0x0000ffeL, -150),
72 MAKE_HEX_FLOAT(-0x0.0000fep-126f, -0x00000feL, -150),
73 MAKE_HEX_FLOAT(-0x0.00000ep-126f, -0x000000eL, -150),
74 MAKE_HEX_FLOAT(-0x0.00000cp-126f, -0x000000cL, -150),
75 MAKE_HEX_FLOAT(-0x0.00000ap-126f, -0x000000aL, -150),
76 MAKE_HEX_FLOAT(-0x0.000008p-126f, -0x0000008L, -150),
77 MAKE_HEX_FLOAT(-0x0.000006p-126f, -0x0000006L, -150),
78 MAKE_HEX_FLOAT(-0x0.000004p-126f, -0x0000004L, -150),
79 MAKE_HEX_FLOAT(-0x0.000002p-126f, -0x0000002L, -150),
80 -0.0f,
81
82 +NAN,
83 +INFINITY,
84 +FLT_MAX,
85 MAKE_HEX_FLOAT(+0x1.000002p64f, +0x1000002L, 40),
86 MAKE_HEX_FLOAT(+0x1.0p64f, +0x1L, 64),
87 MAKE_HEX_FLOAT(+0x1.fffffep63f, +0x1fffffeL, 39),
88 MAKE_HEX_FLOAT(+0x1.000002p63f, +0x1000002L, 39),
89 MAKE_HEX_FLOAT(+0x1.0p63f, +0x1L, 63),
90 MAKE_HEX_FLOAT(+0x1.fffffep62f, +0x1fffffeL, 38),
91 +3.0f,
92 MAKE_HEX_FLOAT(+0x1.800002p1f, +0x1800002L, -23),
93 2.5f,
94 MAKE_HEX_FLOAT(+0x1.7ffffep1f, +0x17ffffeL, -23),
95 +2.0f,
96 MAKE_HEX_FLOAT(+0x1.800002p0f, +0x1800002L, -24),
97 1.75f,
98 1.5f,
99 1.25f,
100 MAKE_HEX_FLOAT(+0x1.7ffffep0f, +0x17ffffeL, -24),
101 MAKE_HEX_FLOAT(+0x1.000002p0f, +0x1000002L, -24),
102 MAKE_HEX_FLOAT(0x1.003p0f, 0x1003000L, -24),
103 +MAKE_HEX_FLOAT(0x1.001p0f, 0x1001000L, -24),
104 +1.0f,
105 MAKE_HEX_FLOAT(+0x1.fffffep-1f, +0x1fffffeL, -25),
106 MAKE_HEX_FLOAT(0x1.000002p-126f, 0x1000002L, -150),
107 +FLT_MIN,
108 MAKE_HEX_FLOAT(+0x0.fffffep-126f, +0x0fffffeL, -150),
109 MAKE_HEX_FLOAT(+0x0.000ffep-126f, +0x0000ffeL, -150),
110 MAKE_HEX_FLOAT(+0x0.0000fep-126f, +0x00000feL, -150),
111 MAKE_HEX_FLOAT(+0x0.00000ep-126f, +0x000000eL, -150),
112 MAKE_HEX_FLOAT(+0x0.00000cp-126f, +0x000000cL, -150),
113 MAKE_HEX_FLOAT(+0x0.00000ap-126f, +0x000000aL, -150),
114 MAKE_HEX_FLOAT(+0x0.000008p-126f, +0x0000008L, -150),
115 MAKE_HEX_FLOAT(+0x0.000006p-126f, +0x0000006L, -150),
116 MAKE_HEX_FLOAT(+0x0.000004p-126f, +0x0000004L, -150),
117 MAKE_HEX_FLOAT(+0x0.000002p-126f, +0x0000002L, -150),
118 +0.0f,
119 };
120
121 constexpr size_t specialValuesCount =
122 sizeof(specialValues) / sizeof(specialValues[0]);
123
124 } // anonymous namespace
125
TestFunc_Float_Float_Float_Float(const Func * f,MTdata d,bool relaxedMode)126 int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
127 {
128 int error;
129
130 logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
131
132 Programs programs;
133 const unsigned thread_id = 0; // Test is currently not multithreaded.
134 KernelMatrix kernels;
135 float maxError = 0.0f;
136 int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
137 float maxErrorVal = 0.0f;
138 float maxErrorVal2 = 0.0f;
139 float maxErrorVal3 = 0.0f;
140 uint64_t step = getTestStep(sizeof(float), BUFFER_SIZE);
141
142 cl_uchar overflow[BUFFER_SIZE / sizeof(float)];
143
144 float float_ulps;
145 if (gIsEmbedded)
146 float_ulps = f->float_embedded_ulps;
147 else
148 float_ulps = f->float_ulps;
149
150 int skipNanInf = (0 == strcmp("fma", f->nameInCode)) && !gInfNanSupport;
151
152 // Init the kernels
153 BuildKernelInfo build_info{ 1, kernels, programs, f->nameInCode,
154 relaxedMode };
155 if ((error = ThreadPool_Do(BuildKernelFn,
156 gMaxVectorSizeIndex - gMinVectorSizeIndex,
157 &build_info)))
158 return error;
159
160 for (uint64_t i = 0; i < (1ULL << 32); i += step)
161 {
162 // Init input array
163 cl_uint *p = (cl_uint *)gIn;
164 cl_uint *p2 = (cl_uint *)gIn2;
165 cl_uint *p3 = (cl_uint *)gIn3;
166 size_t idx = 0;
167
168 if (i == 0)
169 { // test edge cases
170 float *fp = (float *)gIn;
171 float *fp2 = (float *)gIn2;
172 float *fp3 = (float *)gIn3;
173 uint32_t x, y, z;
174 x = y = z = 0;
175 for (; idx < BUFFER_SIZE / sizeof(float); idx++)
176 {
177 fp[idx] = specialValues[x];
178 fp2[idx] = specialValues[y];
179 fp3[idx] = specialValues[z];
180
181 if (++x >= specialValuesCount)
182 {
183 x = 0;
184 if (++y >= specialValuesCount)
185 {
186 y = 0;
187 if (++z >= specialValuesCount) break;
188 }
189 }
190 }
191 if (idx == BUFFER_SIZE / sizeof(float))
192 vlog_error("Test Error: not all special cases tested!\n");
193 }
194
195 for (; idx < BUFFER_SIZE / sizeof(float); idx++)
196 {
197 p[idx] = genrand_int32(d);
198 p2[idx] = genrand_int32(d);
199 p3[idx] = genrand_int32(d);
200 }
201
202 if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
203 BUFFER_SIZE, gIn, 0, NULL, NULL)))
204 {
205 vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
206 return error;
207 }
208
209 if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0,
210 BUFFER_SIZE, gIn2, 0, NULL, NULL)))
211 {
212 vlog_error("\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error);
213 return error;
214 }
215
216 if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer3, CL_FALSE, 0,
217 BUFFER_SIZE, gIn3, 0, NULL, NULL)))
218 {
219 vlog_error("\n*** Error %d in clEnqueueWriteBuffer3 ***\n", error);
220 return error;
221 }
222
223 // Write garbage into output arrays
224 for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
225 {
226 uint32_t pattern = 0xffffdead;
227 if (gHostFill)
228 {
229 memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
230 if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j],
231 CL_FALSE, 0, BUFFER_SIZE,
232 gOut[j], 0, NULL, NULL)))
233 {
234 vlog_error(
235 "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
236 error, j);
237 return error;
238 }
239 }
240 else
241 {
242 if ((error = clEnqueueFillBuffer(gQueue, gOutBuffer[j],
243 &pattern, sizeof(pattern), 0,
244 BUFFER_SIZE, 0, NULL, NULL)))
245 {
246 vlog_error("Error: clEnqueueFillBuffer failed! err: %d\n",
247 error);
248 return error;
249 }
250 }
251 }
252
253 // Run the kernels
254 for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
255 {
256 size_t vectorSize = sizeof(cl_float) * sizeValues[j];
257 size_t localCount = (BUFFER_SIZE + vectorSize - 1)
258 / vectorSize; // BUFFER_SIZE / vectorSize rounded up
259 if ((error = clSetKernelArg(kernels[j][thread_id], 0,
260 sizeof(gOutBuffer[j]), &gOutBuffer[j])))
261 {
262 LogBuildError(programs[j]);
263 return error;
264 }
265 if ((error = clSetKernelArg(kernels[j][thread_id], 1,
266 sizeof(gInBuffer), &gInBuffer)))
267 {
268 LogBuildError(programs[j]);
269 return error;
270 }
271 if ((error = clSetKernelArg(kernels[j][thread_id], 2,
272 sizeof(gInBuffer2), &gInBuffer2)))
273 {
274 LogBuildError(programs[j]);
275 return error;
276 }
277 if ((error = clSetKernelArg(kernels[j][thread_id], 3,
278 sizeof(gInBuffer3), &gInBuffer3)))
279 {
280 LogBuildError(programs[j]);
281 return error;
282 }
283
284 if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
285 1, NULL, &localCount, NULL, 0,
286 NULL, NULL)))
287 {
288 vlog_error("FAILED -- could not execute kernel\n");
289 return error;
290 }
291 }
292
293 // Get that moving
294 if ((error = clFlush(gQueue))) vlog("clFlush failed\n");
295
296 // Calculate the correctly rounded reference result
297 float *r = (float *)gOut_Ref;
298 float *s = (float *)gIn;
299 float *s2 = (float *)gIn2;
300 float *s3 = (float *)gIn3;
301 if (skipNanInf)
302 {
303 for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
304 {
305 feclearexcept(FE_OVERFLOW);
306 r[j] =
307 (float)f->func.f_fma(s[j], s2[j], s3[j], CORRECTLY_ROUNDED);
308 overflow[j] =
309 FE_OVERFLOW == (FE_OVERFLOW & fetestexcept(FE_OVERFLOW));
310 }
311 }
312 else
313 {
314 for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
315 r[j] =
316 (float)f->func.f_fma(s[j], s2[j], s3[j], CORRECTLY_ROUNDED);
317 }
318
319 // Read the data back
320 for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
321 {
322 if ((error =
323 clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
324 BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
325 {
326 vlog_error("ReadArray failed %d\n", error);
327 return error;
328 }
329 }
330
331 if (gSkipCorrectnessTesting) break;
332
333 // Verify data
334 uint32_t *t = (uint32_t *)gOut_Ref;
335 for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
336 {
337 for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
338 {
339 uint32_t *q = (uint32_t *)(gOut[k]);
340
341 // If we aren't getting the correctly rounded result
342 if (t[j] != q[j])
343 {
344 float err;
345 int fail;
346 float test = ((float *)q)[j];
347 float correct =
348 f->func.f_fma(s[j], s2[j], s3[j], CORRECTLY_ROUNDED);
349
350 // Per section 10 paragraph 6, accept any result if an input
351 // or output is a infinity or NaN or overflow
352 if (skipNanInf)
353 {
354 if (overflow[j] || IsFloatInfinity(correct)
355 || IsFloatNaN(correct) || IsFloatInfinity(s[j])
356 || IsFloatNaN(s[j]) || IsFloatInfinity(s2[j])
357 || IsFloatNaN(s2[j]) || IsFloatInfinity(s3[j])
358 || IsFloatNaN(s3[j]))
359 continue;
360 }
361
362
363 err = Ulp_Error(test, correct);
364 fail = !(fabsf(err) <= float_ulps);
365
366 if (fail && (ftz || relaxedMode))
367 {
368 float correct2, err2;
369
370 // retry per section 6.5.3.2 with flushing on
371 if (0.0f == test
372 && 0.0f
373 == f->func.f_fma(s[j], s2[j], s3[j], FLUSHED))
374 {
375 fail = 0;
376 err = 0.0f;
377 }
378
379 // retry per section 6.5.3.3
380 if (fail && IsFloatSubnormal(s[j]))
381 { // look at me,
382 float err3, correct3;
383
384 if (skipNanInf) feclearexcept(FE_OVERFLOW);
385
386 correct2 = f->func.f_fma(0.0f, s2[j], s3[j],
387 CORRECTLY_ROUNDED);
388 correct3 = f->func.f_fma(-0.0f, s2[j], s3[j],
389 CORRECTLY_ROUNDED);
390
391 if (skipNanInf)
392 {
393 if (fetestexcept(FE_OVERFLOW)) continue;
394
395 // Note: no double rounding here. Reference
396 // functions calculate in single precision.
397 if (IsFloatInfinity(correct2)
398 || IsFloatNaN(correct2)
399 || IsFloatInfinity(correct3)
400 || IsFloatNaN(correct3))
401 continue;
402 }
403
404 err2 = Ulp_Error(test, correct2);
405 err3 = Ulp_Error(test, correct3);
406 fail = fail
407 && ((!(fabsf(err2) <= float_ulps))
408 && (!(fabsf(err3) <= float_ulps)));
409 if (fabsf(err2) < fabsf(err)) err = err2;
410 if (fabsf(err3) < fabsf(err)) err = err3;
411
412 // retry per section 6.5.3.4
413 if (0.0f == test
414 && (0.0f
415 == f->func.f_fma(0.0f, s2[j], s3[j],
416 FLUSHED)
417 || 0.0f
418 == f->func.f_fma(-0.0f, s2[j], s3[j],
419 FLUSHED)))
420 {
421 fail = 0;
422 err = 0.0f;
423 }
424
425 // try with first two args as zero
426 if (IsFloatSubnormal(s2[j]))
427 { // its fun to have fun,
428 double correct4, correct5;
429 float err4, err5;
430
431 if (skipNanInf) feclearexcept(FE_OVERFLOW);
432
433 correct2 = f->func.f_fma(0.0f, 0.0f, s3[j],
434 CORRECTLY_ROUNDED);
435 correct3 = f->func.f_fma(-0.0f, 0.0f, s3[j],
436 CORRECTLY_ROUNDED);
437 correct4 = f->func.f_fma(0.0f, -0.0f, s3[j],
438 CORRECTLY_ROUNDED);
439 correct5 = f->func.f_fma(-0.0f, -0.0f, s3[j],
440 CORRECTLY_ROUNDED);
441
442 // Per section 10 paragraph 6, accept any result
443 // if an input or output is a infinity or NaN or
444 // overflow
445 if (!gInfNanSupport)
446 {
447 if (fetestexcept(FE_OVERFLOW)) continue;
448
449 // Note: no double rounding here. Reference
450 // functions calculate in single precision.
451 if (IsFloatInfinity(correct2)
452 || IsFloatNaN(correct2)
453 || IsFloatInfinity(correct3)
454 || IsFloatNaN(correct3)
455 || IsFloatInfinity(correct4)
456 || IsFloatNaN(correct4)
457 || IsFloatInfinity(correct5)
458 || IsFloatNaN(correct5))
459 continue;
460 }
461
462 err2 = Ulp_Error(test, correct2);
463 err3 = Ulp_Error(test, correct3);
464 err4 = Ulp_Error(test, correct4);
465 err5 = Ulp_Error(test, correct5);
466 fail = fail
467 && ((!(fabsf(err2) <= float_ulps))
468 && (!(fabsf(err3) <= float_ulps))
469 && (!(fabsf(err4) <= float_ulps))
470 && (!(fabsf(err5) <= float_ulps)));
471 if (fabsf(err2) < fabsf(err)) err = err2;
472 if (fabsf(err3) < fabsf(err)) err = err3;
473 if (fabsf(err4) < fabsf(err)) err = err4;
474 if (fabsf(err5) < fabsf(err)) err = err5;
475
476 // retry per section 6.5.3.4
477 if (0.0f == test
478 && (0.0f
479 == f->func.f_fma(0.0f, 0.0f, s3[j],
480 FLUSHED)
481 || 0.0f
482 == f->func.f_fma(-0.0f, 0.0f, s3[j],
483 FLUSHED)
484 || 0.0f
485 == f->func.f_fma(0.0f, -0.0f, s3[j],
486 FLUSHED)
487 || 0.0f
488 == f->func.f_fma(-0.0f, -0.0f,
489 s3[j], FLUSHED)))
490 {
491 fail = 0;
492 err = 0.0f;
493 }
494
495 if (IsFloatSubnormal(s3[j]))
496 {
497 if (test == 0.0f) // 0*0+0 is 0
498 {
499 fail = 0;
500 err = 0.0f;
501 }
502 }
503 }
504 else if (IsFloatSubnormal(s3[j]))
505 {
506 double correct4, correct5;
507 float err4, err5;
508
509 if (skipNanInf) feclearexcept(FE_OVERFLOW);
510
511 correct2 = f->func.f_fma(0.0f, s2[j], 0.0f,
512 CORRECTLY_ROUNDED);
513 correct3 = f->func.f_fma(-0.0f, s2[j], 0.0f,
514 CORRECTLY_ROUNDED);
515 correct4 = f->func.f_fma(0.0f, s2[j], -0.0f,
516 CORRECTLY_ROUNDED);
517 correct5 = f->func.f_fma(-0.0f, s2[j], -0.0f,
518 CORRECTLY_ROUNDED);
519
520 // Per section 10 paragraph 6, accept any result
521 // if an input or output is a infinity or NaN or
522 // overflow
523 if (!gInfNanSupport)
524 {
525 if (fetestexcept(FE_OVERFLOW)) continue;
526
527 // Note: no double rounding here. Reference
528 // functions calculate in single precision.
529 if (IsFloatInfinity(correct2)
530 || IsFloatNaN(correct2)
531 || IsFloatInfinity(correct3)
532 || IsFloatNaN(correct3)
533 || IsFloatInfinity(correct4)
534 || IsFloatNaN(correct4)
535 || IsFloatInfinity(correct5)
536 || IsFloatNaN(correct5))
537 continue;
538 }
539
540 err2 = Ulp_Error(test, correct2);
541 err3 = Ulp_Error(test, correct3);
542 err4 = Ulp_Error(test, correct4);
543 err5 = Ulp_Error(test, correct5);
544 fail = fail
545 && ((!(fabsf(err2) <= float_ulps))
546 && (!(fabsf(err3) <= float_ulps))
547 && (!(fabsf(err4) <= float_ulps))
548 && (!(fabsf(err5) <= float_ulps)));
549 if (fabsf(err2) < fabsf(err)) err = err2;
550 if (fabsf(err3) < fabsf(err)) err = err3;
551 if (fabsf(err4) < fabsf(err)) err = err4;
552 if (fabsf(err5) < fabsf(err)) err = err5;
553
554 // retry per section 6.5.3.4
555 if (0.0f == test
556 && (0.0f
557 == f->func.f_fma(0.0f, s2[j], 0.0f,
558 FLUSHED)
559 || 0.0f
560 == f->func.f_fma(-0.0f, s2[j], 0.0f,
561 FLUSHED)
562 || 0.0f
563 == f->func.f_fma(0.0f, s2[j], -0.0f,
564 FLUSHED)
565 || 0.0f
566 == f->func.f_fma(-0.0f, s2[j],
567 -0.0f, FLUSHED)))
568 {
569 fail = 0;
570 err = 0.0f;
571 }
572 }
573 }
574 else if (fail && IsFloatSubnormal(s2[j]))
575 {
576 double correct2, correct3;
577 float err2, err3;
578
579 if (skipNanInf) feclearexcept(FE_OVERFLOW);
580
581 correct2 = f->func.f_fma(s[j], 0.0f, s3[j],
582 CORRECTLY_ROUNDED);
583 correct3 = f->func.f_fma(s[j], -0.0f, s3[j],
584 CORRECTLY_ROUNDED);
585
586 if (skipNanInf)
587 {
588 if (fetestexcept(FE_OVERFLOW)) continue;
589
590 // Note: no double rounding here. Reference
591 // functions calculate in single precision.
592 if (IsFloatInfinity(correct2)
593 || IsFloatNaN(correct2)
594 || IsFloatInfinity(correct3)
595 || IsFloatNaN(correct3))
596 continue;
597 }
598
599 err2 = Ulp_Error(test, correct2);
600 err3 = Ulp_Error(test, correct3);
601 fail = fail
602 && ((!(fabsf(err2) <= float_ulps))
603 && (!(fabsf(err3) <= float_ulps)));
604 if (fabsf(err2) < fabsf(err)) err = err2;
605 if (fabsf(err3) < fabsf(err)) err = err3;
606
607 // retry per section 6.5.3.4
608 if (0.0f == test
609 && (0.0f
610 == f->func.f_fma(s[j], 0.0f, s3[j],
611 FLUSHED)
612 || 0.0f
613 == f->func.f_fma(s[j], -0.0f, s3[j],
614 FLUSHED)))
615 {
616 fail = 0;
617 err = 0.0f;
618 }
619
620 // try with second two args as zero
621 if (IsFloatSubnormal(s3[j]))
622 {
623 double correct4, correct5;
624 float err4, err5;
625
626 if (skipNanInf) feclearexcept(FE_OVERFLOW);
627
628 correct2 = f->func.f_fma(s[j], 0.0f, 0.0f,
629 CORRECTLY_ROUNDED);
630 correct3 = f->func.f_fma(s[j], -0.0f, 0.0f,
631 CORRECTLY_ROUNDED);
632 correct4 = f->func.f_fma(s[j], 0.0f, -0.0f,
633 CORRECTLY_ROUNDED);
634 correct5 = f->func.f_fma(s[j], -0.0f, -0.0f,
635 CORRECTLY_ROUNDED);
636
637 // Per section 10 paragraph 6, accept any result
638 // if an input or output is a infinity or NaN or
639 // overflow
640 if (!gInfNanSupport)
641 {
642 if (fetestexcept(FE_OVERFLOW)) continue;
643
644 // Note: no double rounding here. Reference
645 // functions calculate in single precision.
646 if (IsFloatInfinity(correct2)
647 || IsFloatNaN(correct2)
648 || IsFloatInfinity(correct3)
649 || IsFloatNaN(correct3)
650 || IsFloatInfinity(correct4)
651 || IsFloatNaN(correct4)
652 || IsFloatInfinity(correct5)
653 || IsFloatNaN(correct5))
654 continue;
655 }
656
657 err2 = Ulp_Error(test, correct2);
658 err3 = Ulp_Error(test, correct3);
659 err4 = Ulp_Error(test, correct4);
660 err5 = Ulp_Error(test, correct5);
661 fail = fail
662 && ((!(fabsf(err2) <= float_ulps))
663 && (!(fabsf(err3) <= float_ulps))
664 && (!(fabsf(err4) <= float_ulps))
665 && (!(fabsf(err5) <= float_ulps)));
666 if (fabsf(err2) < fabsf(err)) err = err2;
667 if (fabsf(err3) < fabsf(err)) err = err3;
668 if (fabsf(err4) < fabsf(err)) err = err4;
669 if (fabsf(err5) < fabsf(err)) err = err5;
670
671 // retry per section 6.5.3.4
672 if (0.0f == test
673 && (0.0f
674 == f->func.f_fma(s[j], 0.0f, 0.0f,
675 FLUSHED)
676 || 0.0f
677 == f->func.f_fma(s[j], -0.0f, 0.0f,
678 FLUSHED)
679 || 0.0f
680 == f->func.f_fma(s[j], 0.0f, -0.0f,
681 FLUSHED)
682 || 0.0f
683 == f->func.f_fma(s[j], -0.0f, -0.0f,
684 FLUSHED)))
685 {
686 fail = 0;
687 err = 0.0f;
688 }
689 }
690 }
691 else if (fail && IsFloatSubnormal(s3[j]))
692 {
693 double correct2, correct3;
694 float err2, err3;
695
696 if (skipNanInf) feclearexcept(FE_OVERFLOW);
697
698 correct2 = f->func.f_fma(s[j], s2[j], 0.0f,
699 CORRECTLY_ROUNDED);
700 correct3 = f->func.f_fma(s[j], s2[j], -0.0f,
701 CORRECTLY_ROUNDED);
702
703 if (skipNanInf)
704 {
705 if (fetestexcept(FE_OVERFLOW)) continue;
706
707 // Note: no double rounding here. Reference
708 // functions calculate in single precision.
709 if (IsFloatInfinity(correct2)
710 || IsFloatNaN(correct2)
711 || IsFloatInfinity(correct3)
712 || IsFloatNaN(correct3))
713 continue;
714 }
715
716 err2 = Ulp_Error(test, correct2);
717 err3 = Ulp_Error(test, correct3);
718 fail = fail
719 && ((!(fabsf(err2) <= float_ulps))
720 && (!(fabsf(err3) <= float_ulps)));
721 if (fabsf(err2) < fabsf(err)) err = err2;
722 if (fabsf(err3) < fabsf(err)) err = err3;
723
724 // retry per section 6.5.3.4
725 if (0.0f == test
726 && (0.0f
727 == f->func.f_fma(s[j], s2[j], 0.0f,
728 FLUSHED)
729 || 0.0f
730 == f->func.f_fma(s[j], s2[j], -0.0f,
731 FLUSHED)))
732 {
733 fail = 0;
734 err = 0.0f;
735 }
736 }
737 }
738
739 if (fabsf(err) > maxError)
740 {
741 maxError = fabsf(err);
742 maxErrorVal = s[j];
743 maxErrorVal2 = s2[j];
744 maxErrorVal3 = s3[j];
745 }
746
747 if (fail)
748 {
749 vlog_error(
750 "\nERROR: %s%s: %f ulp error at {%a, %a, %a} "
751 "({0x%8.8x, 0x%8.8x, 0x%8.8x}): *%a vs. %a\n",
752 f->name, sizeNames[k], err, s[j], s2[j], s3[j],
753 ((cl_uint *)s)[j], ((cl_uint *)s2)[j],
754 ((cl_uint *)s3)[j], ((float *)gOut_Ref)[j], test);
755 return -1;
756 }
757 }
758 }
759 }
760
761 if (0 == (i & 0x0fffffff))
762 {
763 if (gVerboseBruteForce)
764 {
765 vlog("base:%14" PRIu64 " step:%10" PRIu64 " bufferSize:%10d \n",
766 i, step, BUFFER_SIZE);
767 }
768 else
769 {
770 vlog(".");
771 }
772 fflush(stdout);
773 }
774 }
775
776 if (!gSkipCorrectnessTesting)
777 {
778 if (gWimpyMode)
779 vlog("Wimp pass");
780 else
781 vlog("passed");
782
783 vlog("\t%8.2f @ {%a, %a, %a}", maxError, maxErrorVal, maxErrorVal2,
784 maxErrorVal3);
785 }
786
787 vlog("\n");
788
789 return CL_SUCCESS;
790 }
791