1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16
17 #include "common.h"
18 #include "function_list.h"
19 #include "test_functions.h"
20 #include "utility.h"
21
22 #include <cinttypes>
23 #include <cstring>
24
25 namespace {
26
BuildKernelFn(cl_uint job_id,cl_uint thread_id UNUSED,void * p)27 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
28 {
29 BuildKernelInfo &info = *(BuildKernelInfo *)p;
30 auto generator = [](const std::string &kernel_name, const char *builtin,
31 cl_uint vector_size_index) {
32 return GetUnaryKernel(kernel_name, builtin, ParameterType::Int,
33 ParameterType::Double, vector_size_index);
34 };
35 return BuildKernels(info, job_id, generator);
36 }
37
38 } // anonymous namespace
39
TestFunc_Int_Double(const Func * f,MTdata d,bool relaxedMode)40 int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)
41 {
42 int error;
43 Programs programs;
44 const unsigned thread_id = 0; // Test is currently not multithreaded.
45 KernelMatrix kernels;
46 int ftz = f->ftz || gForceFTZ;
47 uint64_t step = getTestStep(sizeof(cl_double), BUFFER_SIZE);
48 int scale =
49 (int)((1ULL << 32) / (16 * BUFFER_SIZE / sizeof(cl_double)) + 1);
50
51 logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
52
53 Force64BitFPUPrecision();
54
55 // Init the kernels
56 {
57 BuildKernelInfo build_info{ 1, kernels, programs, f->nameInCode,
58 relaxedMode };
59 if ((error = ThreadPool_Do(BuildKernelFn,
60 gMaxVectorSizeIndex - gMinVectorSizeIndex,
61 &build_info)))
62 return error;
63 }
64
65 for (uint64_t i = 0; i < (1ULL << 32); i += step)
66 {
67 // Init input array
68 double *p = (double *)gIn;
69 if (gWimpyMode)
70 {
71 for (size_t j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
72 p[j] = DoubleFromUInt32((uint32_t)i + j * scale);
73 }
74 else
75 {
76 for (size_t j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
77 p[j] = DoubleFromUInt32((uint32_t)i + j);
78 }
79
80 if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
81 BUFFER_SIZE, gIn, 0, NULL, NULL)))
82 {
83 vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
84 return error;
85 }
86
87 // Write garbage into output arrays
88 for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
89 {
90 uint32_t pattern = 0xffffdead;
91 if (gHostFill)
92 {
93 memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
94 if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j],
95 CL_FALSE, 0, BUFFER_SIZE,
96 gOut[j], 0, NULL, NULL)))
97 {
98 vlog_error(
99 "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
100 error, j);
101 goto exit;
102 }
103 }
104 else
105 {
106 if ((error = clEnqueueFillBuffer(gQueue, gOutBuffer[j],
107 &pattern, sizeof(pattern), 0,
108 BUFFER_SIZE, 0, NULL, NULL)))
109 {
110 vlog_error("Error: clEnqueueFillBuffer failed! err: %d\n",
111 error);
112 return error;
113 }
114 }
115 }
116
117 // Run the kernels
118 for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
119 {
120 size_t vectorSize = sizeValues[j] * sizeof(cl_double);
121 size_t localCount = (BUFFER_SIZE + vectorSize - 1)
122 / vectorSize; // BUFFER_SIZE / vectorSize rounded up
123 if ((error = clSetKernelArg(kernels[j][thread_id], 0,
124 sizeof(gOutBuffer[j]), &gOutBuffer[j])))
125 {
126 LogBuildError(programs[j]);
127 goto exit;
128 }
129 if ((error = clSetKernelArg(kernels[j][thread_id], 1,
130 sizeof(gInBuffer), &gInBuffer)))
131 {
132 LogBuildError(programs[j]);
133 goto exit;
134 }
135
136 if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
137 1, NULL, &localCount, NULL, 0,
138 NULL, NULL)))
139 {
140 vlog_error("FAILED -- could not execute kernel\n");
141 goto exit;
142 }
143 }
144
145 // Get that moving
146 if ((error = clFlush(gQueue))) vlog("clFlush failed\n");
147
148 // Calculate the correctly rounded reference result
149 int *r = (int *)gOut_Ref;
150 double *s = (double *)gIn;
151 for (size_t j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
152 r[j] = f->dfunc.i_f(s[j]);
153
154 // Read the data back
155 for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
156 {
157 if ((error =
158 clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
159 BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
160 {
161 vlog_error("ReadArray failed %d\n", error);
162 goto exit;
163 }
164 }
165
166 if (gSkipCorrectnessTesting) break;
167
168 // Verify data
169 uint32_t *t = (uint32_t *)gOut_Ref;
170 for (size_t j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
171 {
172 for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
173 {
174 uint32_t *q = (uint32_t *)(gOut[k]);
175 // If we aren't getting the correctly rounded result
176 if (t[j] != q[j])
177 {
178 if ((ftz || relaxedMode) && IsDoubleSubnormal(s[j]))
179 {
180 unsigned int correct0 = f->dfunc.i_f(0.0);
181 unsigned int correct1 = f->dfunc.i_f(-0.0);
182 if (q[j] == correct0 || q[j] == correct1) continue;
183 }
184
185 uint32_t err = t[j] - q[j];
186 if (q[j] > t[j]) err = q[j] - t[j];
187 vlog_error(
188 "\nERROR: %sD%s: %d ulp error at %.13la: *%d vs. %d\n",
189 f->name, sizeNames[k], err, ((double *)gIn)[j], t[j],
190 q[j]);
191 error = -1;
192 goto exit;
193 }
194 }
195 }
196
197 if (0 == (i & 0x0fffffff))
198 {
199 if (gVerboseBruteForce)
200 {
201 vlog("base:%14" PRIu64 " step:%10" PRIu64
202 " bufferSize:%10d \n",
203 i, step, BUFFER_SIZE);
204 }
205 else
206 {
207 vlog(".");
208 }
209
210 fflush(stdout);
211 }
212 }
213
214 if (!gSkipCorrectnessTesting)
215 {
216 if (gWimpyMode)
217 vlog("Wimp pass");
218 else
219 vlog("passed");
220 }
221
222 vlog("\n");
223
224 exit:
225 return error;
226 }
227