xref: /aosp_15_r20/external/OpenCL-CTS/test_conformance/commonfns/test_unary_fn.cpp (revision 6467f958c7de8070b317fc65bcb0f6472e388d82)
1 //
2 // Copyright (c) 2023 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include <stdio.h>
17 #include <string.h>
18 #include <sys/types.h>
19 #include <sys/stat.h>
20 
21 #include <vector>
22 
23 #include "harness/deviceInfo.h"
24 #include "harness/stringHelpers.h"
25 #include "harness/typeWrappers.h"
26 
27 #include "procs.h"
28 #include "test_base.h"
29 
30 #ifndef M_PI
31 #define M_PI 3.14159265358979323846264338327950288
32 #endif
33 
34 // clang-format off
35 const char *unary_fn_code_pattern =
36 "%s\n" /* optional pragma */
37 "__kernel void test_fn(__global %s%s *src, __global %s%s *dst)\n"
38 "{\n"
39 "    int  tid = get_global_id(0);\n"
40 "\n"
41 "    dst[tid] = %s(src[tid]);\n"
42 "}\n";
43 
44 const char *unary_fn_code_pattern_v3 =
45 "%s\n" /* optional pragma */
46 "__kernel void test_fn(__global %s *src, __global %s *dst)\n"
47 "{\n"
48 "    int  tid = get_global_id(0);\n"
49 "\n"
50 "    vstore3(%s(vload3(tid,src)), tid, dst);\n"
51 "}\n";
52 // clang-format on
53 
54 #define MAX_ERR 2.0f
55 
56 namespace {
57 
58 template <typename T>
verify_degrees(const T * const inptr,const T * const outptr,int n)59 int verify_degrees(const T *const inptr, const T *const outptr, int n)
60 {
61     float error, max_error = 0.0f;
62     double r, max_val = NAN;
63     int max_index = 0;
64 
65     for (int i = 0, j = 0; i < n; i++, j++)
66     {
67         r = (180.0 / M_PI) * conv_to_dbl(inptr[i]);
68 
69         if (std::is_same<T, half>::value)
70             if (!isfinite_fp(conv_to_half(r)) && !isfinite_fp(outptr[i]))
71                 continue;
72 
73         error = UlpFn(outptr[i], r);
74 
75         if (fabsf(error) > max_error)
76         {
77             max_error = error;
78             max_index = i;
79             max_val = r;
80             if (fabsf(error) > MAX_ERR)
81             {
82                 if (std::is_same<T, half>::value)
83                     log_error(
84                         "%d) Error @ %a: *%a vs %a  (*%g vs %g) ulps: %f\n", i,
85                         conv_to_flt(inptr[i]), r, conv_to_flt(outptr[i]), r,
86                         conv_to_flt(outptr[i]), error);
87                 else
88                     log_error(
89                         "%d) Error @ %a: *%a vs %a  (*%g vs %g) ulps: %f\n", i,
90                         inptr[i], r, outptr[i], r, outptr[i], error);
91                 return 1;
92             }
93         }
94     }
95 
96     if (std::is_same<T, half>::value)
97         log_info("degrees: Max error %f ulps at %d: *%a vs %a  (*%g vs %g)\n",
98                  max_error, max_index, max_val, conv_to_flt(outptr[max_index]),
99                  max_val, conv_to_flt(outptr[max_index]));
100     else
101         log_info("degrees: Max error %f ulps at %d: *%a vs %a  (*%g vs %g)\n",
102                  max_error, max_index, max_val, outptr[max_index], max_val,
103                  outptr[max_index]);
104 
105     return 0;
106 }
107 
108 template <typename T>
verify_radians(const T * const inptr,const T * const outptr,int n)109 int verify_radians(const T *const inptr, const T *const outptr, int n)
110 {
111     float error, max_error = 0.0f;
112     double r, max_val = NAN;
113     int max_index = 0;
114 
115     for (int i = 0, j = 0; i < n; i++, j++)
116     {
117         r = (M_PI / 180.0) * conv_to_dbl(inptr[i]);
118 
119         if (std::is_same<T, half>::value)
120             if (!isfinite_fp(conv_to_half(r)) && !isfinite_fp(outptr[i]))
121                 continue;
122 
123         error = UlpFn(outptr[i], r);
124 
125         if (fabsf(error) > max_error)
126         {
127             max_error = error;
128             max_index = i;
129             max_val = r;
130             if (fabsf(error) > MAX_ERR)
131             {
132                 if (std::is_same<T, half>::value)
133                     log_error(
134                         "%d) Error @ %a: *%a vs %a  (*%g vs %g) ulps: %f\n", i,
135                         conv_to_flt(inptr[i]), r, conv_to_flt(outptr[i]), r,
136                         conv_to_flt(outptr[i]), error);
137                 else
138                     log_error(
139                         "%d) Error @ %a: *%a vs %a  (*%g vs %g) ulps: %f\n", i,
140                         inptr[i], r, outptr[i], r, outptr[i], error);
141                 return 1;
142             }
143         }
144     }
145 
146     if (std::is_same<T, half>::value)
147         log_info("radians: Max error %f ulps at %d: *%a vs %a  (*%g vs %g)\n",
148                  max_error, max_index, max_val, conv_to_flt(outptr[max_index]),
149                  max_val, conv_to_flt(outptr[max_index]));
150     else
151         log_info("radians: Max error %f ulps at %d: *%a vs %a  (*%g vs %g)\n",
152                  max_error, max_index, max_val, outptr[max_index], max_val,
153                  outptr[max_index]);
154 
155     return 0;
156 }
157 
158 template <typename T>
verify_sign(const T * const inptr,const T * const outptr,int n)159 int verify_sign(const T *const inptr, const T *const outptr, int n)
160 {
161     double r = 0;
162     for (int i = 0; i < n; i++)
163     {
164         if (conv_to_dbl(inptr[i]) > 0.0f)
165             r = 1.0;
166         else if (conv_to_dbl(inptr[i]) < 0.0f)
167             r = -1.0;
168         else
169             r = 0.0;
170         if (r != conv_to_dbl(outptr[i])) return -1;
171     }
172     return 0;
173 }
174 
175 }
176 
177 template <typename T>
test_unary_fn(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems,const std::string & fnName,VerifyFuncUnary<T> verifyFn)178 int test_unary_fn(cl_device_id device, cl_context context,
179                   cl_command_queue queue, int n_elems,
180                   const std::string &fnName, VerifyFuncUnary<T> verifyFn)
181 {
182     clMemWrapper streams[2];
183     std::vector<T> input_ptr, output_ptr;
184 
185     std::vector<clProgramWrapper> programs;
186     std::vector<clKernelWrapper> kernels;
187 
188     int err, i;
189     MTdataHolder d = MTdataHolder(gRandomSeed);
190 
191     assert(BaseFunctionTest::type2name.find(sizeof(T))
192            != BaseFunctionTest::type2name.end());
193     auto tname = BaseFunctionTest::type2name[sizeof(T)];
194 
195     programs.resize(kTotalVecCount);
196     kernels.resize(kTotalVecCount);
197 
198     int num_elements = n_elems * (1 << (kTotalVecCount - 1));
199 
200     input_ptr.resize(num_elements);
201     output_ptr.resize(num_elements);
202 
203     for (i = 0; i < 2; i++)
204     {
205         streams[i] = clCreateBuffer(context, CL_MEM_READ_WRITE,
206                                     sizeof(T) * num_elements, NULL, &err);
207         test_error(err, "clCreateBuffer failed");
208     }
209 
210     std::string pragma_str;
211     if (std::is_same<T, float>::value)
212     {
213         for (int j = 0; j < num_elements; j++)
214         {
215             input_ptr[j] = get_random_float((float)(-100000.f * M_PI),
216                                             (float)(100000.f * M_PI), d);
217         }
218     }
219     else if (std::is_same<T, double>::value)
220     {
221         pragma_str = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
222         for (int j = 0; j < num_elements; j++)
223         {
224             input_ptr[j] =
225                 get_random_double(-100000.0 * M_PI, 100000.0 * M_PI, d);
226         }
227     }
228     else if (std::is_same<T, half>::value)
229     {
230         pragma_str = "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n";
231         for (int j = 0; j < num_elements; j++)
232         {
233             input_ptr[j] = conv_to_half(get_random_float(
234                 (float)(-10000.f * M_PI), (float)(10000.f * M_PI), d));
235         }
236     }
237 
238     err = clEnqueueWriteBuffer(queue, streams[0], true, 0,
239                                sizeof(T) * num_elements, &input_ptr.front(), 0,
240                                NULL, NULL);
241     test_error(err, "clEnqueueWriteBuffer failed\n");
242 
243     for (i = 0; i < kTotalVecCount; i++)
244     {
245         std::string kernelSource;
246         const char vecSizeNames[][3] = { "", "2", "4", "8", "16", "3" };
247 
248         if (i >= kVectorSizeCount)
249         {
250             std::string str = unary_fn_code_pattern_v3;
251             kernelSource = str_sprintf(str, pragma_str.c_str(), tname.c_str(),
252                                        tname.c_str(), fnName.c_str());
253         }
254         else
255         {
256             std::string str = unary_fn_code_pattern;
257             kernelSource = str_sprintf(str, pragma_str.c_str(), tname.c_str(),
258                                        vecSizeNames[i], tname.c_str(),
259                                        vecSizeNames[i], fnName.c_str());
260         }
261 
262         /* Create kernels */
263         const char *programPtr = kernelSource.c_str();
264         err =
265             create_single_kernel_helper(context, &programs[i], &kernels[i], 1,
266                                         (const char **)&programPtr, "test_fn");
267 
268         err = clSetKernelArg(kernels[i], 0, sizeof streams[0], &streams[0]);
269         err |= clSetKernelArg(kernels[i], 1, sizeof streams[1], &streams[1]);
270         if (err != CL_SUCCESS)
271         {
272             log_error("clSetKernelArgs failed\n");
273             return -1;
274         }
275 
276         // Line below is troublesome...
277         size_t threads = (size_t)num_elements / ((g_arrVecSizes[i]));
278         err = clEnqueueNDRangeKernel(queue, kernels[i], 1, NULL, &threads, NULL,
279                                      0, NULL, NULL);
280         if (err != CL_SUCCESS)
281         {
282             log_error("clEnqueueNDRangeKernel failed\n");
283             return -1;
284         }
285 
286         cl_uint dead = 42;
287         memset_pattern4(&output_ptr[0], &dead, sizeof(T) * num_elements);
288         err = clEnqueueReadBuffer(queue, streams[1], true, 0,
289                                   sizeof(T) * num_elements, &output_ptr[0], 0,
290                                   NULL, NULL);
291         if (err != CL_SUCCESS)
292         {
293             log_error("clEnqueueReadBuffer failed\n");
294             return -1;
295         }
296 
297         if (verifyFn((T *)&input_ptr.front(), (T *)&output_ptr.front(),
298                      n_elems * (i + 1)))
299         {
300             log_error("%s %s%d test failed\n", fnName.c_str(), tname.c_str(),
301                       ((g_arrVecSizes[i])));
302             err = -1;
303         }
304         else
305         {
306             log_info("%s %s%d test passed\n", fnName.c_str(), tname.c_str(),
307                      ((g_arrVecSizes[i])));
308         }
309 
310         if (err) break;
311     }
312 
313     return err;
314 }
315 
Run()316 cl_int DegreesTest::Run()
317 {
318     cl_int error = CL_SUCCESS;
319     if (is_extension_available(device, "cl_khr_fp16"))
320     {
321         error = test_unary_fn<half>(device, context, queue, num_elems,
322                                     fnName.c_str(), verify_degrees<half>);
323         test_error(error, "DegreesTest::Run<cl_half> failed");
324     }
325 
326     error = test_unary_fn<float>(device, context, queue, num_elems,
327                                  fnName.c_str(), verify_degrees<float>);
328     test_error(error, "DegreesTest::Run<float> failed");
329 
330     if (is_extension_available(device, "cl_khr_fp64"))
331     {
332         error = test_unary_fn<double>(device, context, queue, num_elems,
333                                       fnName.c_str(), verify_degrees<double>);
334         test_error(error, "DegreesTest::Run<double> failed");
335     }
336 
337     return error;
338 }
339 
Run()340 cl_int RadiansTest::Run()
341 {
342     cl_int error = CL_SUCCESS;
343     if (is_extension_available(device, "cl_khr_fp16"))
344     {
345         error = test_unary_fn<half>(device, context, queue, num_elems,
346                                     fnName.c_str(), verify_radians<half>);
347         test_error(error, "RadiansTest::Run<cl_half> failed");
348     }
349 
350     error = test_unary_fn<float>(device, context, queue, num_elems,
351                                  fnName.c_str(), verify_radians<float>);
352     test_error(error, "RadiansTest::Run<float> failed");
353 
354     if (is_extension_available(device, "cl_khr_fp64"))
355     {
356         error = test_unary_fn<double>(device, context, queue, num_elems,
357                                       fnName.c_str(), verify_radians<double>);
358         test_error(error, "RadiansTest::Run<double> failed");
359     }
360 
361     return error;
362 }
363 
Run()364 cl_int SignTest::Run()
365 {
366     cl_int error = CL_SUCCESS;
367     if (is_extension_available(device, "cl_khr_fp16"))
368     {
369         error = test_unary_fn<half>(device, context, queue, num_elems,
370                                     fnName.c_str(), verify_sign<half>);
371         test_error(error, "SignTest::Run<cl_half> failed");
372     }
373 
374     error = test_unary_fn<float>(device, context, queue, num_elems,
375                                  fnName.c_str(), verify_sign<float>);
376     test_error(error, "SignTest::Run<float> failed");
377 
378     if (is_extension_available(device, "cl_khr_fp64"))
379     {
380         error = test_unary_fn<double>(device, context, queue, num_elems,
381                                       fnName.c_str(), verify_sign<double>);
382         test_error(error, "SignTest::Run<double> failed");
383     }
384 
385     return error;
386 }
387 
test_degrees(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)388 int test_degrees(cl_device_id device, cl_context context,
389                  cl_command_queue queue, int n_elems)
390 {
391     return MakeAndRunTest<DegreesTest>(device, context, queue, n_elems,
392                                        "degrees");
393 }
394 
test_radians(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)395 int test_radians(cl_device_id device, cl_context context,
396                  cl_command_queue queue, int n_elems)
397 {
398     return MakeAndRunTest<RadiansTest>(device, context, queue, n_elems,
399                                        "radians");
400 }
401 
test_sign(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)402 int test_sign(cl_device_id device, cl_context context, cl_command_queue queue,
403               int n_elems)
404 {
405     return MakeAndRunTest<SignTest>(device, context, queue, n_elems, "sign");
406 }
407