xref: /aosp_15_r20/external/OpenCL-CTS/test_conformance/math_brute_force/unary_u_float.cpp (revision 6467f958c7de8070b317fc65bcb0f6472e388d82)
1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 
17 #include "common.h"
18 #include "function_list.h"
19 #include "test_functions.h"
20 #include "utility.h"
21 
22 #include <cinttypes>
23 #include <cstring>
24 
25 namespace {
26 
BuildKernelFn(cl_uint job_id,cl_uint thread_id UNUSED,void * p)27 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
28 {
29     BuildKernelInfo &info = *(BuildKernelInfo *)p;
30     auto generator = [](const std::string &kernel_name, const char *builtin,
31                         cl_uint vector_size_index) {
32         return GetUnaryKernel(kernel_name, builtin, ParameterType::Float,
33                               ParameterType::UInt, vector_size_index);
34     };
35     return BuildKernels(info, job_id, generator);
36 }
37 
38 } // anonymous namespace
39 
TestFunc_Float_UInt(const Func * f,MTdata d,bool relaxedMode)40 int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode)
41 {
42     int error;
43     Programs programs;
44     KernelMatrix kernels;
45     const unsigned thread_id = 0; // Test is currently not multithreaded.
46     float maxError = 0.0f;
47     int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
48     float maxErrorVal = 0.0f;
49     uint64_t step = getTestStep(sizeof(float), BUFFER_SIZE);
50     int scale = (int)((1ULL << 32) / (16 * BUFFER_SIZE / sizeof(double)) + 1);
51 
52     logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
53 
54     float float_ulps;
55     if (gIsEmbedded)
56         float_ulps = f->float_embedded_ulps;
57     else
58         float_ulps = f->float_ulps;
59 
60     // Init the kernels
61     BuildKernelInfo build_info{ 1, kernels, programs, f->nameInCode,
62                                 relaxedMode };
63     if ((error = ThreadPool_Do(BuildKernelFn,
64                                gMaxVectorSizeIndex - gMinVectorSizeIndex,
65                                &build_info)))
66         return error;
67 
68     for (uint64_t i = 0; i < (1ULL << 32); i += step)
69     {
70         // Init input array
71         uint32_t *p = (uint32_t *)gIn;
72         if (gWimpyMode)
73         {
74             for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
75                 p[j] = (uint32_t)i + j * scale;
76         }
77         else
78         {
79             for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
80                 p[j] = (uint32_t)i + j;
81         }
82         if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
83                                           BUFFER_SIZE, gIn, 0, NULL, NULL)))
84         {
85             vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
86             return error;
87         }
88 
89         // Write garbage into output arrays
90         for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
91         {
92             uint32_t pattern = 0xffffdead;
93             if (gHostFill)
94             {
95                 memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
96                 if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j],
97                                                   CL_FALSE, 0, BUFFER_SIZE,
98                                                   gOut[j], 0, NULL, NULL)))
99                 {
100                     vlog_error(
101                         "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
102                         error, j);
103                     return error;
104                 }
105             }
106             else
107             {
108                 if ((error = clEnqueueFillBuffer(gQueue, gOutBuffer[j],
109                                                  &pattern, sizeof(pattern), 0,
110                                                  BUFFER_SIZE, 0, NULL, NULL)))
111                 {
112                     vlog_error("Error: clEnqueueFillBuffer failed! err: %d\n",
113                                error);
114                     return error;
115                 }
116             }
117         }
118 
119         // Run the kernels
120         for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
121         {
122             size_t vectorSize = sizeValues[j] * sizeof(cl_float);
123             size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
124             if ((error = clSetKernelArg(kernels[j][thread_id], 0,
125                                         sizeof(gOutBuffer[j]), &gOutBuffer[j])))
126             {
127                 LogBuildError(programs[j]);
128                 return error;
129             }
130             if ((error = clSetKernelArg(kernels[j][thread_id], 1,
131                                         sizeof(gInBuffer), &gInBuffer)))
132             {
133                 LogBuildError(programs[j]);
134                 return error;
135             }
136 
137             if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
138                                                 1, NULL, &localCount, NULL, 0,
139                                                 NULL, NULL)))
140             {
141                 vlog_error("FAILED -- could not execute kernel\n");
142                 return error;
143             }
144         }
145 
146         // Get that moving
147         if ((error = clFlush(gQueue))) vlog("clFlush failed\n");
148 
149         // Calculate the correctly rounded reference result
150         float *r = (float *)gOut_Ref;
151         cl_uint *s = (cl_uint *)gIn;
152         for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
153             r[j] = (float)f->func.f_u(s[j]);
154 
155         // Read the data back
156         for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
157         {
158             if ((error =
159                      clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
160                                          BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
161             {
162                 vlog_error("ReadArray failed %d\n", error);
163                 return error;
164             }
165         }
166 
167         if (gSkipCorrectnessTesting) break;
168 
169         // Verify data
170         uint32_t *t = (uint32_t *)gOut_Ref;
171         for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
172         {
173             for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
174             {
175                 uint32_t *q = (uint32_t *)(gOut[k]);
176 
177                 // If we aren't getting the correctly rounded result
178                 if (t[j] != q[j])
179                 {
180                     float test = ((float *)q)[j];
181                     double correct = f->func.f_u(s[j]);
182                     float err = Ulp_Error(test, correct);
183                     int fail = !(fabsf(err) <= float_ulps);
184 
185                     if (fail)
186                     {
187                         if (ftz || relaxedMode)
188                         {
189                             // retry per section 6.5.3.2
190                             if (IsFloatResultSubnormal(correct, float_ulps))
191                             {
192                                 fail = fail && (test != 0.0f);
193                                 if (!fail) err = 0.0f;
194                             }
195                         }
196                     }
197                     if (fabsf(err) > maxError)
198                     {
199                         maxError = fabsf(err);
200                         maxErrorVal = s[j];
201                     }
202                     if (fail)
203                     {
204                         vlog_error(
205                             "\n%s%s: %f ulp error at 0x%8.8x: *%a vs. %a\n",
206                             f->name, sizeNames[k], err, ((uint32_t *)gIn)[j],
207                             ((float *)gOut_Ref)[j], test);
208                         return -1;
209                     }
210                 }
211             }
212         }
213 
214         if (0 == (i & 0x0fffffff))
215         {
216             if (gVerboseBruteForce)
217             {
218                 vlog("base:%14" PRIu64 " step:%10" PRIu64
219                      "  bufferSize:%10d \n",
220                      i, step, BUFFER_SIZE);
221             }
222             else
223             {
224                 vlog(".");
225             }
226             fflush(stdout);
227         }
228     }
229 
230     if (!gSkipCorrectnessTesting)
231     {
232         if (gWimpyMode)
233             vlog("Wimp pass");
234         else
235             vlog("passed");
236 
237         vlog("\t%8.2f @ %a", maxError, maxErrorVal);
238     }
239 
240     vlog("\n");
241 
242     return CL_SUCCESS;
243 }
244