1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16
17 #include "common.h"
18 #include "function_list.h"
19 #include "test_functions.h"
20 #include "utility.h"
21
22 #include <cinttypes>
23 #include <cstring>
24
25 namespace {
26
BuildKernelFn(cl_uint job_id,cl_uint thread_id UNUSED,void * p)27 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
28 {
29 BuildKernelInfo &info = *(BuildKernelInfo *)p;
30 auto generator = [](const std::string &kernel_name, const char *builtin,
31 cl_uint vector_size_index) {
32 return GetUnaryKernel(kernel_name, builtin, ParameterType::Int,
33 ParameterType::Float, vector_size_index);
34 };
35 return BuildKernels(info, job_id, generator);
36 }
37
38 } // anonymous namespace
39
TestFunc_Int_Float(const Func * f,MTdata d,bool relaxedMode)40 int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)
41 {
42 int error;
43 Programs programs;
44 const unsigned thread_id = 0; // Test is currently not multithreaded.
45 KernelMatrix kernels;
46 int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
47 uint64_t step = getTestStep(sizeof(float), BUFFER_SIZE);
48 int scale = (int)((1ULL << 32) / (16 * BUFFER_SIZE / sizeof(float)) + 1);
49
50 logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
51
52 Force64BitFPUPrecision();
53
54 // Init the kernels
55 {
56 BuildKernelInfo build_info{ 1, kernels, programs, f->nameInCode,
57 relaxedMode };
58 if ((error = ThreadPool_Do(BuildKernelFn,
59 gMaxVectorSizeIndex - gMinVectorSizeIndex,
60 &build_info)))
61 return error;
62 }
63
64 for (uint64_t i = 0; i < (1ULL << 32); i += step)
65 {
66 // Init input array
67 cl_uint *p = (cl_uint *)gIn;
68 if (gWimpyMode)
69 {
70 for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
71 p[j] = (cl_uint)i + j * scale;
72 }
73 else
74 {
75 for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
76 p[j] = (uint32_t)i + j;
77 }
78
79 if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
80 BUFFER_SIZE, gIn, 0, NULL, NULL)))
81 {
82 vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
83 return error;
84 }
85
86 // Write garbage into output arrays
87 for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
88 {
89 uint32_t pattern = 0xffffdead;
90 if (gHostFill)
91 {
92 memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
93 if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j],
94 CL_FALSE, 0, BUFFER_SIZE,
95 gOut[j], 0, NULL, NULL)))
96 {
97 vlog_error(
98 "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
99 error, j);
100 goto exit;
101 }
102 }
103 else
104 {
105 if ((error = clEnqueueFillBuffer(gQueue, gOutBuffer[j],
106 &pattern, sizeof(pattern), 0,
107 BUFFER_SIZE, 0, NULL, NULL)))
108 {
109 vlog_error("Error: clEnqueueFillBuffer failed! err: %d\n",
110 error);
111 return error;
112 }
113 }
114 }
115
116 // Run the kernels
117 for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
118 {
119 size_t vectorSize = sizeValues[j] * sizeof(cl_float);
120 size_t localCount = (BUFFER_SIZE + vectorSize - 1)
121 / vectorSize; // BUFFER_SIZE / vectorSize rounded up
122 if ((error = clSetKernelArg(kernels[j][thread_id], 0,
123 sizeof(gOutBuffer[j]), &gOutBuffer[j])))
124 {
125 LogBuildError(programs[j]);
126 goto exit;
127 }
128 if ((error = clSetKernelArg(kernels[j][thread_id], 1,
129 sizeof(gInBuffer), &gInBuffer)))
130 {
131 LogBuildError(programs[j]);
132 goto exit;
133 }
134
135 if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
136 1, NULL, &localCount, NULL, 0,
137 NULL, NULL)))
138 {
139 vlog_error("FAILED -- could not execute kernel\n");
140 goto exit;
141 }
142 }
143
144 // Get that moving
145 if ((error = clFlush(gQueue))) vlog("clFlush failed\n");
146
147 // Calculate the correctly rounded reference result
148 int *r = (int *)gOut_Ref;
149 float *s = (float *)gIn;
150 for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
151 r[j] = f->func.i_f(s[j]);
152
153 // Read the data back
154 for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
155 {
156 if ((error =
157 clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
158 BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
159 {
160 vlog_error("ReadArray failed %d\n", error);
161 goto exit;
162 }
163 }
164
165 if (gSkipCorrectnessTesting) break;
166
167 // Verify data
168 uint32_t *t = (uint32_t *)gOut_Ref;
169 for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
170 {
171 for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
172 {
173 uint32_t *q = (uint32_t *)(gOut[k]);
174 // If we aren't getting the correctly rounded result
175 if (t[j] != q[j])
176 {
177 if ((ftz || relaxedMode) && IsFloatSubnormal(s[j]))
178 {
179 unsigned int correct0 = f->func.i_f(0.0);
180 unsigned int correct1 = f->func.i_f(-0.0);
181 if (q[j] == correct0 || q[j] == correct1) continue;
182 }
183
184 uint32_t err = t[j] - q[j];
185 if (q[j] > t[j]) err = q[j] - t[j];
186 vlog_error("\nERROR: %s%s: %d ulp error at %a (0x%8.8x): "
187 "*%d vs. %d\n",
188 f->name, sizeNames[k], err, ((float *)gIn)[j],
189 ((cl_uint *)gIn)[j], t[j], q[j]);
190 error = -1;
191 goto exit;
192 }
193 }
194 }
195
196 if (0 == (i & 0x0fffffff))
197 {
198 if (gVerboseBruteForce)
199 {
200 vlog("base:%14" PRIu64 " step:%10" PRIu64
201 " bufferSize:%10d \n",
202 i, step, BUFFER_SIZE);
203 }
204 else
205 {
206 vlog(".");
207 }
208 fflush(stdout);
209 }
210 }
211
212 if (!gSkipCorrectnessTesting)
213 {
214 if (gWimpyMode)
215 vlog("Wimp pass");
216 else
217 vlog("passed");
218 }
219
220 vlog("\n");
221
222 exit:
223 return error;
224 }
225