1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16
17 #include <stdio.h>
18 #include <string.h>
19 #include <sys/types.h>
20 #include <sys/stat.h>
21 #include <vector>
22
23 #include "harness/deviceInfo.h"
24 #include "harness/typeWrappers.h"
25
26 #include "procs.h"
27 #include "test_base.h"
28
29 #ifndef M_PI
30 #define M_PI 3.14159265358979323846264338327950288
31 #endif
32
33 #define CLAMP_KERNEL(type) \
34 const char *clamp_##type##_kernel_code = EMIT_PRAGMA_DIRECTIVE \
35 "__kernel void test_clamp(__global " #type " *x, __global " #type \
36 " *minval, __global " #type " *maxval, __global " #type " *dst)\n" \
37 "{\n" \
38 " int tid = get_global_id(0);\n" \
39 "\n" \
40 " dst[tid] = clamp(x[tid], minval[tid], maxval[tid]);\n" \
41 "}\n";
42
43 #define CLAMP_KERNEL_V(type, size) \
44 const char *clamp_##type##size##_kernel_code = EMIT_PRAGMA_DIRECTIVE \
45 "__kernel void test_clamp(__global " #type #size \
46 " *x, __global " #type #size " *minval, __global " #type #size \
47 " *maxval, __global " #type #size " *dst)\n" \
48 "{\n" \
49 " int tid = get_global_id(0);\n" \
50 "\n" \
51 " dst[tid] = clamp(x[tid], minval[tid], maxval[tid]);\n" \
52 "}\n";
53
54 #define CLAMP_KERNEL_V3(type, size) \
55 const char *clamp_##type##size##_kernel_code = EMIT_PRAGMA_DIRECTIVE \
56 "__kernel void test_clamp(__global " #type " *x, __global " #type \
57 " *minval, __global " #type " *maxval, __global " #type " *dst)\n" \
58 "{\n" \
59 " int tid = get_global_id(0);\n" \
60 "\n" \
61 " vstore3(clamp(vload3(tid, x), vload3(tid,minval), " \
62 "vload3(tid,maxval)), tid, dst);\n" \
63 "}\n";
64
65 #define EMIT_PRAGMA_DIRECTIVE "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
66 CLAMP_KERNEL(half)
67 CLAMP_KERNEL_V(half, 2)
68 CLAMP_KERNEL_V(half, 4)
69 CLAMP_KERNEL_V(half, 8)
70 CLAMP_KERNEL_V(half, 16)
71 CLAMP_KERNEL_V3(half, 3)
72 #undef EMIT_PRAGMA_DIRECTIVE
73
74 #define EMIT_PRAGMA_DIRECTIVE " "
75 CLAMP_KERNEL(float)
76 CLAMP_KERNEL_V(float, 2)
77 CLAMP_KERNEL_V(float, 4)
78 CLAMP_KERNEL_V(float, 8)
79 CLAMP_KERNEL_V(float, 16)
80 CLAMP_KERNEL_V3(float, 3)
81 #undef EMIT_PRAGMA_DIRECTIVE
82
83 #define EMIT_PRAGMA_DIRECTIVE "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
84 CLAMP_KERNEL(double)
85 CLAMP_KERNEL_V(double, 2)
86 CLAMP_KERNEL_V(double, 4)
87 CLAMP_KERNEL_V(double, 8)
88 CLAMP_KERNEL_V(double, 16)
89 CLAMP_KERNEL_V3(double, 3)
90 #undef EMIT_PRAGMA_DIRECTIVE
91
92 const char *clamp_half_codes[] = {
93 clamp_half_kernel_code, clamp_half2_kernel_code, clamp_half4_kernel_code,
94 clamp_half8_kernel_code, clamp_half16_kernel_code, clamp_half3_kernel_code
95 };
96 const char *clamp_float_codes[] = {
97 clamp_float_kernel_code, clamp_float2_kernel_code,
98 clamp_float4_kernel_code, clamp_float8_kernel_code,
99 clamp_float16_kernel_code, clamp_float3_kernel_code
100 };
101 const char *clamp_double_codes[] = {
102 clamp_double_kernel_code, clamp_double2_kernel_code,
103 clamp_double4_kernel_code, clamp_double8_kernel_code,
104 clamp_double16_kernel_code, clamp_double3_kernel_code
105 };
106
107 namespace {
108
109 template <typename T>
verify_clamp(const T * const x,const T * const minval,const T * const maxval,const T * const outptr,int n)110 int verify_clamp(const T *const x, const T *const minval, const T *const maxval,
111 const T *const outptr, int n)
112 {
113 if (std::is_same<T, half>::value)
114 {
115 float t;
116 for (int i = 0; i < n; i++)
117 {
118 t = std::min(
119 std::max(cl_half_to_float(x[i]), cl_half_to_float(minval[i])),
120 cl_half_to_float(maxval[i]));
121 if (t != cl_half_to_float(outptr[i]))
122 {
123 log_error(
124 "%d) verification error: clamp( %a, %a, %a) = *%a vs. %a\n",
125 i, cl_half_to_float(x[i]), cl_half_to_float(minval[i]),
126 cl_half_to_float(maxval[i]), t,
127 cl_half_to_float(outptr[i]));
128 return -1;
129 }
130 }
131 }
132 else
133 {
134 T t;
135 for (int i = 0; i < n; i++)
136 {
137 t = std::min(std::max(x[i], minval[i]), maxval[i]);
138 if (t != outptr[i])
139 {
140 log_error(
141 "%d) verification error: clamp( %a, %a, %a) = *%a vs. %a\n",
142 i, x[i], minval[i], maxval[i], t, outptr[i]);
143 return -1;
144 }
145 }
146 }
147
148 return 0;
149 }
150 }
151
152 template <typename T>
test_clamp_fn(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)153 int test_clamp_fn(cl_device_id device, cl_context context,
154 cl_command_queue queue, int n_elems)
155 {
156 clMemWrapper streams[4];
157 std::vector<T> input_ptr[3], output_ptr;
158
159 std::vector<clProgramWrapper> programs;
160 std::vector<clKernelWrapper> kernels;
161
162 int err, i, j;
163 MTdataHolder d = MTdataHolder(gRandomSeed);
164
165 assert(BaseFunctionTest::type2name.find(sizeof(T))
166 != BaseFunctionTest::type2name.end());
167 auto tname = BaseFunctionTest::type2name[sizeof(T)];
168
169 programs.resize(kTotalVecCount);
170 kernels.resize(kTotalVecCount);
171
172 int num_elements = n_elems * (1 << (kVectorSizeCount - 1));
173
174 for (i = 0; i < 3; i++) input_ptr[i].resize(num_elements);
175 output_ptr.resize(num_elements);
176
177 for (i = 0; i < 4; i++)
178 {
179 streams[i] = clCreateBuffer(context, CL_MEM_READ_WRITE,
180 sizeof(T) * num_elements, NULL, &err);
181 test_error(err, "clCreateBuffer failed");
182 }
183
184 if (std::is_same<T, float>::value)
185 {
186 for (j = 0; j < num_elements; j++)
187 {
188 input_ptr[0][j] = get_random_float(-0x200000, 0x200000, d);
189 input_ptr[1][j] = get_random_float(-0x200000, 0x200000, d);
190 input_ptr[2][j] = get_random_float(input_ptr[1][j], 0x200000, d);
191 }
192 }
193 else if (std::is_same<T, double>::value)
194 {
195 for (j = 0; j < num_elements; j++)
196 {
197 input_ptr[0][j] = get_random_double(-0x20000000, 0x20000000, d);
198 input_ptr[1][j] = get_random_double(-0x20000000, 0x20000000, d);
199 input_ptr[2][j] = get_random_double(input_ptr[1][j], 0x20000000, d);
200 }
201 }
202 else if (std::is_same<T, half>::value)
203 {
204 const float fval = CL_HALF_MAX;
205 for (j = 0; j < num_elements; j++)
206 {
207 input_ptr[0][j] = conv_to_half(get_random_float(-fval, fval, d));
208 input_ptr[1][j] = conv_to_half(get_random_float(-fval, fval, d));
209 input_ptr[2][j] = conv_to_half(
210 get_random_float(conv_to_flt(input_ptr[1][j]), fval, d));
211 }
212 }
213
214 for (i = 0; i < 3; i++)
215 {
216 err = clEnqueueWriteBuffer(queue, streams[i], CL_TRUE, 0,
217 sizeof(T) * num_elements,
218 &input_ptr[i].front(), 0, NULL, NULL);
219 test_error(err, "Unable to write input buffer");
220 }
221
222 for (i = 0; i < kTotalVecCount; i++)
223 {
224 if (std::is_same<T, float>::value)
225 {
226 err = create_single_kernel_helper(
227 context, &programs[i], &kernels[i], 1, &clamp_float_codes[i],
228 "test_clamp");
229 test_error(err, "Unable to create kernel");
230 }
231 else if (std::is_same<T, double>::value)
232 {
233 err = create_single_kernel_helper(
234 context, &programs[i], &kernels[i], 1, &clamp_double_codes[i],
235 "test_clamp");
236 test_error(err, "Unable to create kernel");
237 }
238 else if (std::is_same<T, half>::value)
239 {
240 err = create_single_kernel_helper(
241 context, &programs[i], &kernels[i], 1, &clamp_half_codes[i],
242 "test_clamp");
243 test_error(err, "Unable to create kernel");
244 }
245
246 log_info("Just made a program for %s, i=%d, size=%d, in slot %d\n",
247 tname.c_str(), i, g_arrVecSizes[i], i);
248 fflush(stdout);
249
250 for (j = 0; j < 4; j++)
251 {
252 err =
253 clSetKernelArg(kernels[i], j, sizeof(streams[j]), &streams[j]);
254 test_error(err, "Unable to set kernel argument");
255 }
256
257 size_t threads = (size_t)n_elems;
258
259 err = clEnqueueNDRangeKernel(queue, kernels[i], 1, NULL, &threads, NULL,
260 0, NULL, NULL);
261 test_error(err, "Unable to execute kernel");
262
263 err = clEnqueueReadBuffer(queue, streams[3], true, 0,
264 sizeof(T) * num_elements, &output_ptr[0], 0,
265 NULL, NULL);
266 test_error(err, "Unable to read results");
267
268 if (verify_clamp<T>((T *)&input_ptr[0].front(),
269 (T *)&input_ptr[1].front(),
270 (T *)&input_ptr[2].front(), (T *)&output_ptr[0],
271 n_elems * ((g_arrVecSizes[i]))))
272 {
273 log_error("CLAMP %s%d test failed\n", tname.c_str(),
274 ((g_arrVecSizes[i])));
275 err = -1;
276 }
277 else
278 {
279 log_info("CLAMP %s%d test passed\n", tname.c_str(),
280 ((g_arrVecSizes[i])));
281 err = 0;
282 }
283
284 if (err) break;
285 }
286
287 return err;
288 }
289
Run()290 cl_int ClampTest::Run()
291 {
292 cl_int error = CL_SUCCESS;
293 if (is_extension_available(device, "cl_khr_fp16"))
294 {
295 error = test_clamp_fn<cl_half>(device, context, queue, num_elems);
296 test_error(error, "ClampTest::Run<cl_half> failed");
297 }
298
299 error = test_clamp_fn<float>(device, context, queue, num_elems);
300 test_error(error, "ClampTest::Run<float> failed");
301
302 if (is_extension_available(device, "cl_khr_fp64"))
303 {
304 error = test_clamp_fn<double>(device, context, queue, num_elems);
305 test_error(error, "ClampTest::Run<double> failed");
306 }
307
308 return error;
309 }
310
test_clamp(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)311 int test_clamp(cl_device_id device, cl_context context, cl_command_queue queue,
312 int n_elems)
313 {
314 return MakeAndRunTest<ClampTest>(device, context, queue, n_elems);
315 }
316