xref: /aosp_15_r20/external/OpenCL-CTS/test_conformance/commonfns/test_clamp.cpp (revision 6467f958c7de8070b317fc65bcb0f6472e388d82)
1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 
17 #include <stdio.h>
18 #include <string.h>
19 #include <sys/types.h>
20 #include <sys/stat.h>
21 #include <vector>
22 
23 #include "harness/deviceInfo.h"
24 #include "harness/typeWrappers.h"
25 
26 #include "procs.h"
27 #include "test_base.h"
28 
29 #ifndef M_PI
30 #define M_PI 3.14159265358979323846264338327950288
31 #endif
32 
33 #define CLAMP_KERNEL(type)                                                     \
34     const char *clamp_##type##_kernel_code = EMIT_PRAGMA_DIRECTIVE             \
35         "__kernel void test_clamp(__global " #type " *x, __global " #type      \
36         " *minval, __global " #type " *maxval, __global " #type " *dst)\n"     \
37         "{\n"                                                                  \
38         "    int  tid = get_global_id(0);\n"                                   \
39         "\n"                                                                   \
40         "    dst[tid] = clamp(x[tid], minval[tid], maxval[tid]);\n"            \
41         "}\n";
42 
43 #define CLAMP_KERNEL_V(type, size)                                             \
44     const char *clamp_##type##size##_kernel_code = EMIT_PRAGMA_DIRECTIVE       \
45         "__kernel void test_clamp(__global " #type #size                       \
46         " *x, __global " #type #size " *minval, __global " #type #size         \
47         " *maxval, __global " #type #size " *dst)\n"                           \
48         "{\n"                                                                  \
49         "    int  tid = get_global_id(0);\n"                                   \
50         "\n"                                                                   \
51         "    dst[tid] = clamp(x[tid], minval[tid], maxval[tid]);\n"            \
52         "}\n";
53 
54 #define CLAMP_KERNEL_V3(type, size)                                            \
55     const char *clamp_##type##size##_kernel_code = EMIT_PRAGMA_DIRECTIVE       \
56         "__kernel void test_clamp(__global " #type " *x, __global " #type      \
57         " *minval, __global " #type " *maxval, __global " #type " *dst)\n"     \
58         "{\n"                                                                  \
59         "    int  tid = get_global_id(0);\n"                                   \
60         "\n"                                                                   \
61         "    vstore3(clamp(vload3(tid, x), vload3(tid,minval), "               \
62         "vload3(tid,maxval)), tid, dst);\n"                                    \
63         "}\n";
64 
65 #define EMIT_PRAGMA_DIRECTIVE "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
66 CLAMP_KERNEL(half)
67 CLAMP_KERNEL_V(half, 2)
68 CLAMP_KERNEL_V(half, 4)
69 CLAMP_KERNEL_V(half, 8)
70 CLAMP_KERNEL_V(half, 16)
71 CLAMP_KERNEL_V3(half, 3)
72 #undef EMIT_PRAGMA_DIRECTIVE
73 
74 #define EMIT_PRAGMA_DIRECTIVE " "
75 CLAMP_KERNEL(float)
76 CLAMP_KERNEL_V(float, 2)
77 CLAMP_KERNEL_V(float, 4)
78 CLAMP_KERNEL_V(float, 8)
79 CLAMP_KERNEL_V(float, 16)
80 CLAMP_KERNEL_V3(float, 3)
81 #undef EMIT_PRAGMA_DIRECTIVE
82 
83 #define EMIT_PRAGMA_DIRECTIVE "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
84 CLAMP_KERNEL(double)
85 CLAMP_KERNEL_V(double, 2)
86 CLAMP_KERNEL_V(double, 4)
87 CLAMP_KERNEL_V(double, 8)
88 CLAMP_KERNEL_V(double, 16)
89 CLAMP_KERNEL_V3(double, 3)
90 #undef EMIT_PRAGMA_DIRECTIVE
91 
92 const char *clamp_half_codes[] = {
93     clamp_half_kernel_code,  clamp_half2_kernel_code,  clamp_half4_kernel_code,
94     clamp_half8_kernel_code, clamp_half16_kernel_code, clamp_half3_kernel_code
95 };
96 const char *clamp_float_codes[] = {
97     clamp_float_kernel_code,   clamp_float2_kernel_code,
98     clamp_float4_kernel_code,  clamp_float8_kernel_code,
99     clamp_float16_kernel_code, clamp_float3_kernel_code
100 };
101 const char *clamp_double_codes[] = {
102     clamp_double_kernel_code,   clamp_double2_kernel_code,
103     clamp_double4_kernel_code,  clamp_double8_kernel_code,
104     clamp_double16_kernel_code, clamp_double3_kernel_code
105 };
106 
107 namespace {
108 
109 template <typename T>
verify_clamp(const T * const x,const T * const minval,const T * const maxval,const T * const outptr,int n)110 int verify_clamp(const T *const x, const T *const minval, const T *const maxval,
111                  const T *const outptr, int n)
112 {
113     if (std::is_same<T, half>::value)
114     {
115         float t;
116         for (int i = 0; i < n; i++)
117         {
118             t = std::min(
119                 std::max(cl_half_to_float(x[i]), cl_half_to_float(minval[i])),
120                 cl_half_to_float(maxval[i]));
121             if (t != cl_half_to_float(outptr[i]))
122             {
123                 log_error(
124                     "%d) verification error: clamp( %a, %a, %a) = *%a vs. %a\n",
125                     i, cl_half_to_float(x[i]), cl_half_to_float(minval[i]),
126                     cl_half_to_float(maxval[i]), t,
127                     cl_half_to_float(outptr[i]));
128                 return -1;
129             }
130         }
131     }
132     else
133     {
134         T t;
135         for (int i = 0; i < n; i++)
136         {
137             t = std::min(std::max(x[i], minval[i]), maxval[i]);
138             if (t != outptr[i])
139             {
140                 log_error(
141                     "%d) verification error: clamp( %a, %a, %a) = *%a vs. %a\n",
142                     i, x[i], minval[i], maxval[i], t, outptr[i]);
143                 return -1;
144             }
145         }
146     }
147 
148     return 0;
149 }
150 }
151 
152 template <typename T>
test_clamp_fn(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)153 int test_clamp_fn(cl_device_id device, cl_context context,
154                   cl_command_queue queue, int n_elems)
155 {
156     clMemWrapper streams[4];
157     std::vector<T> input_ptr[3], output_ptr;
158 
159     std::vector<clProgramWrapper> programs;
160     std::vector<clKernelWrapper> kernels;
161 
162     int err, i, j;
163     MTdataHolder d = MTdataHolder(gRandomSeed);
164 
165     assert(BaseFunctionTest::type2name.find(sizeof(T))
166            != BaseFunctionTest::type2name.end());
167     auto tname = BaseFunctionTest::type2name[sizeof(T)];
168 
169     programs.resize(kTotalVecCount);
170     kernels.resize(kTotalVecCount);
171 
172     int num_elements = n_elems * (1 << (kVectorSizeCount - 1));
173 
174     for (i = 0; i < 3; i++) input_ptr[i].resize(num_elements);
175     output_ptr.resize(num_elements);
176 
177     for (i = 0; i < 4; i++)
178     {
179         streams[i] = clCreateBuffer(context, CL_MEM_READ_WRITE,
180                                     sizeof(T) * num_elements, NULL, &err);
181         test_error(err, "clCreateBuffer failed");
182     }
183 
184     if (std::is_same<T, float>::value)
185     {
186         for (j = 0; j < num_elements; j++)
187         {
188             input_ptr[0][j] = get_random_float(-0x200000, 0x200000, d);
189             input_ptr[1][j] = get_random_float(-0x200000, 0x200000, d);
190             input_ptr[2][j] = get_random_float(input_ptr[1][j], 0x200000, d);
191         }
192     }
193     else if (std::is_same<T, double>::value)
194     {
195         for (j = 0; j < num_elements; j++)
196         {
197             input_ptr[0][j] = get_random_double(-0x20000000, 0x20000000, d);
198             input_ptr[1][j] = get_random_double(-0x20000000, 0x20000000, d);
199             input_ptr[2][j] = get_random_double(input_ptr[1][j], 0x20000000, d);
200         }
201     }
202     else if (std::is_same<T, half>::value)
203     {
204         const float fval = CL_HALF_MAX;
205         for (j = 0; j < num_elements; j++)
206         {
207             input_ptr[0][j] = conv_to_half(get_random_float(-fval, fval, d));
208             input_ptr[1][j] = conv_to_half(get_random_float(-fval, fval, d));
209             input_ptr[2][j] = conv_to_half(
210                 get_random_float(conv_to_flt(input_ptr[1][j]), fval, d));
211         }
212     }
213 
214     for (i = 0; i < 3; i++)
215     {
216         err = clEnqueueWriteBuffer(queue, streams[i], CL_TRUE, 0,
217                                    sizeof(T) * num_elements,
218                                    &input_ptr[i].front(), 0, NULL, NULL);
219         test_error(err, "Unable to write input buffer");
220     }
221 
222     for (i = 0; i < kTotalVecCount; i++)
223     {
224         if (std::is_same<T, float>::value)
225         {
226             err = create_single_kernel_helper(
227                 context, &programs[i], &kernels[i], 1, &clamp_float_codes[i],
228                 "test_clamp");
229             test_error(err, "Unable to create kernel");
230         }
231         else if (std::is_same<T, double>::value)
232         {
233             err = create_single_kernel_helper(
234                 context, &programs[i], &kernels[i], 1, &clamp_double_codes[i],
235                 "test_clamp");
236             test_error(err, "Unable to create kernel");
237         }
238         else if (std::is_same<T, half>::value)
239         {
240             err = create_single_kernel_helper(
241                 context, &programs[i], &kernels[i], 1, &clamp_half_codes[i],
242                 "test_clamp");
243             test_error(err, "Unable to create kernel");
244         }
245 
246         log_info("Just made a program for %s, i=%d, size=%d, in slot %d\n",
247                  tname.c_str(), i, g_arrVecSizes[i], i);
248         fflush(stdout);
249 
250         for (j = 0; j < 4; j++)
251         {
252             err =
253                 clSetKernelArg(kernels[i], j, sizeof(streams[j]), &streams[j]);
254             test_error(err, "Unable to set kernel argument");
255         }
256 
257         size_t threads = (size_t)n_elems;
258 
259         err = clEnqueueNDRangeKernel(queue, kernels[i], 1, NULL, &threads, NULL,
260                                      0, NULL, NULL);
261         test_error(err, "Unable to execute kernel");
262 
263         err = clEnqueueReadBuffer(queue, streams[3], true, 0,
264                                   sizeof(T) * num_elements, &output_ptr[0], 0,
265                                   NULL, NULL);
266         test_error(err, "Unable to read results");
267 
268         if (verify_clamp<T>((T *)&input_ptr[0].front(),
269                             (T *)&input_ptr[1].front(),
270                             (T *)&input_ptr[2].front(), (T *)&output_ptr[0],
271                             n_elems * ((g_arrVecSizes[i]))))
272         {
273             log_error("CLAMP %s%d test failed\n", tname.c_str(),
274                       ((g_arrVecSizes[i])));
275             err = -1;
276         }
277         else
278         {
279             log_info("CLAMP %s%d test passed\n", tname.c_str(),
280                      ((g_arrVecSizes[i])));
281             err = 0;
282         }
283 
284         if (err) break;
285     }
286 
287     return err;
288 }
289 
Run()290 cl_int ClampTest::Run()
291 {
292     cl_int error = CL_SUCCESS;
293     if (is_extension_available(device, "cl_khr_fp16"))
294     {
295         error = test_clamp_fn<cl_half>(device, context, queue, num_elems);
296         test_error(error, "ClampTest::Run<cl_half> failed");
297     }
298 
299     error = test_clamp_fn<float>(device, context, queue, num_elems);
300     test_error(error, "ClampTest::Run<float> failed");
301 
302     if (is_extension_available(device, "cl_khr_fp64"))
303     {
304         error = test_clamp_fn<double>(device, context, queue, num_elems);
305         test_error(error, "ClampTest::Run<double> failed");
306     }
307 
308     return error;
309 }
310 
test_clamp(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)311 int test_clamp(cl_device_id device, cl_context context, cl_command_queue queue,
312                int n_elems)
313 {
314     return MakeAndRunTest<ClampTest>(device, context, queue, n_elems);
315 }
316