xref: /aosp_15_r20/external/OpenCL-CTS/test_conformance/math_brute_force/common.cpp (revision 6467f958c7de8070b317fc65bcb0f6472e388d82)
1 //
2 // Copyright (c) 2022 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 
17 #include "common.h"
18 
19 #include "utility.h" // for sizeNames and sizeValues.
20 
21 #include <sstream>
22 #include <string>
23 
24 namespace {
25 
GetTypeName(ParameterType type)26 const char *GetTypeName(ParameterType type)
27 {
28     switch (type)
29     {
30         case ParameterType::Float: return "float";
31         case ParameterType::Double: return "double";
32         case ParameterType::Int: return "int";
33         case ParameterType::UInt: return "uint";
34         case ParameterType::Long: return "long";
35         case ParameterType::ULong: return "ulong";
36     }
37     return nullptr;
38 }
39 
GetUndefValue(ParameterType type)40 const char *GetUndefValue(ParameterType type)
41 {
42     switch (type)
43     {
44         case ParameterType::Float:
45         case ParameterType::Double: return "NAN";
46 
47         case ParameterType::Int:
48         case ParameterType::UInt: return "0x12345678";
49 
50         case ParameterType::Long:
51         case ParameterType::ULong: return "0x0ddf00dbadc0ffee";
52     }
53     return nullptr;
54 }
55 
EmitDefineType(std::ostringstream & kernel,const char * name,ParameterType type,int vector_size_index)56 void EmitDefineType(std::ostringstream &kernel, const char *name,
57                     ParameterType type, int vector_size_index)
58 {
59     kernel << "#define " << name << " " << GetTypeName(type)
60            << sizeNames[vector_size_index] << '\n';
61     kernel << "#define " << name << "_SCALAR " << GetTypeName(type) << '\n';
62 }
63 
EmitDefineUndef(std::ostringstream & kernel,const char * name,ParameterType type)64 void EmitDefineUndef(std::ostringstream &kernel, const char *name,
65                      ParameterType type)
66 {
67     kernel << "#define " << name << " " << GetUndefValue(type) << '\n';
68 }
69 
EmitEnableExtension(std::ostringstream & kernel,const std::initializer_list<ParameterType> & types)70 void EmitEnableExtension(std::ostringstream &kernel,
71                          const std::initializer_list<ParameterType> &types)
72 {
73     bool needsFp64 = false;
74 
75     for (const auto &type : types)
76     {
77         switch (type)
78         {
79             case ParameterType::Double: needsFp64 = true; break;
80 
81             case ParameterType::Float:
82             case ParameterType::Int:
83             case ParameterType::UInt:
84             case ParameterType::Long:
85             case ParameterType::ULong:
86                 // No extension required.
87                 break;
88         }
89     }
90 
91     if (needsFp64) kernel << "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
92 }
93 
GetBuildOptions(bool relaxed_mode)94 std::string GetBuildOptions(bool relaxed_mode)
95 {
96     std::ostringstream options;
97 
98     if (gForceFTZ)
99     {
100         options << " -cl-denorms-are-zero";
101     }
102 
103     if (gFloatCapabilities & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT)
104     {
105         options << " -cl-fp32-correctly-rounded-divide-sqrt";
106     }
107 
108     if (relaxed_mode)
109     {
110         options << " -cl-fast-relaxed-math";
111     }
112 
113     return options.str();
114 }
115 
116 } // anonymous namespace
117 
GetKernelName(int vector_size_index)118 std::string GetKernelName(int vector_size_index)
119 {
120     return std::string("math_kernel") + sizeNames[vector_size_index];
121 }
122 
GetUnaryKernel(const std::string & kernel_name,const char * builtin,ParameterType retType,ParameterType type1,int vector_size_index)123 std::string GetUnaryKernel(const std::string &kernel_name, const char *builtin,
124                            ParameterType retType, ParameterType type1,
125                            int vector_size_index)
126 {
127     // To keep the kernel code readable, use macros for types and undef values.
128     std::ostringstream kernel;
129     EmitDefineType(kernel, "RETTYPE", retType, vector_size_index);
130     EmitDefineType(kernel, "TYPE1", type1, vector_size_index);
131     EmitDefineUndef(kernel, "UNDEF1", type1);
132     EmitEnableExtension(kernel, { retType, type1 });
133 
134     // clang-format off
135     const char *kernel_nonvec3[] = { R"(
136 __kernel void )", kernel_name.c_str(), R"((__global RETTYPE* out,
137                           __global TYPE1* in1)
138 {
139     size_t i = get_global_id(0);
140     out[i] = )", builtin, R"((in1[i]);
141 }
142 )" };
143 
144     const char *kernel_vec3[] = { R"(
145 __kernel void )", kernel_name.c_str(), R"((__global RETTYPE_SCALAR* out,
146                           __global TYPE1_SCALAR* in1)
147 {
148     size_t i = get_global_id(0);
149 
150     if (i + 1 < get_global_size(0))
151     {
152         TYPE1 a = vload3(0, in1 + 3 * i);
153         RETTYPE res = )", builtin, R"((a);
154         vstore3(res, 0, out + 3 * i);
155     }
156     else
157     {
158         // Figure out how many elements are left over after
159         // BUFFER_SIZE % (3 * sizeof(type)).
160         // Assume power of two buffer size.
161         size_t parity = i & 1;
162         TYPE1 a = (TYPE1)(UNDEF1, UNDEF1, UNDEF1);
163         switch (parity)
164         {
165             case 0:
166                 a.y = in1[3 * i + 1];
167                 // fall through
168             case 1:
169                 a.x = in1[3 * i];
170                 break;
171         }
172 
173         RETTYPE res = )", builtin, R"((a);
174 
175         switch (parity)
176         {
177             case 0:
178                 out[3 * i + 1] = res.y;
179                 // fall through
180             case 1:
181                 out[3 * i] = res.x;
182                 break;
183         }
184     }
185 }
186 )" };
187     // clang-format on
188 
189     if (sizeValues[vector_size_index] != 3)
190         for (const auto &chunk : kernel_nonvec3) kernel << chunk;
191     else
192         for (const auto &chunk : kernel_vec3) kernel << chunk;
193 
194     return kernel.str();
195 }
196 
GetUnaryKernel(const std::string & kernel_name,const char * builtin,ParameterType retType1,ParameterType retType2,ParameterType type1,int vector_size_index)197 std::string GetUnaryKernel(const std::string &kernel_name, const char *builtin,
198                            ParameterType retType1, ParameterType retType2,
199                            ParameterType type1, int vector_size_index)
200 {
201     // To keep the kernel code readable, use macros for types and undef values.
202     std::ostringstream kernel;
203     EmitDefineType(kernel, "RETTYPE1", retType1, vector_size_index);
204     EmitDefineType(kernel, "RETTYPE2", retType2, vector_size_index);
205     EmitDefineType(kernel, "TYPE1", type1, vector_size_index);
206     EmitDefineUndef(kernel, "UNDEF1", type1);
207     EmitDefineUndef(kernel, "UNDEFR2", retType2);
208     EmitEnableExtension(kernel, { retType1, retType2, type1 });
209 
210     // clang-format off
211     const char *kernel_nonvec3[] = { R"(
212 __kernel void )", kernel_name.c_str(), R"((__global RETTYPE1* out1,
213                           __global RETTYPE2* out2,
214                           __global TYPE1* in1)
215 {
216     size_t i = get_global_id(0);
217     out1[i] = )", builtin, R"((in1[i], out2 + i);
218 }
219 )" };
220 
221     const char *kernel_vec3[] = { R"(
222 __kernel void )", kernel_name.c_str(), R"((__global RETTYPE1_SCALAR* out1,
223                           __global RETTYPE2_SCALAR* out2,
224                           __global TYPE1_SCALAR* in1)
225 {
226     size_t i = get_global_id(0);
227 
228     if (i + 1 < get_global_size(0))
229     {
230         TYPE1 a = vload3(0, in1 + 3 * i);
231         RETTYPE2 res2 = UNDEFR2;
232         RETTYPE1 res1 = )", builtin, R"((a, &res2);
233         vstore3(res1, 0, out1 + 3 * i);
234         vstore3(res2, 0, out2 + 3 * i);
235     }
236     else
237     {
238         // Figure out how many elements are left over after
239         // BUFFER_SIZE % (3 * sizeof(type)).
240         // Assume power of two buffer size.
241         size_t parity = i & 1;
242         TYPE1 a = (TYPE1)(UNDEF1, UNDEF1, UNDEF1);
243         switch (parity)
244         {
245             case 0:
246                 a.y = in1[3 * i + 1];
247                 // fall through
248             case 1:
249                 a.x = in1[3 * i];
250                 break;
251         }
252 
253         RETTYPE2 res2 = UNDEFR2;
254         RETTYPE1 res1 = )", builtin, R"((a, &res2);
255 
256         switch (parity)
257         {
258             case 0:
259                 out1[3 * i + 1] = res1.y;
260                 out2[3 * i + 1] = res2.y;
261                 // fall through
262             case 1:
263                 out1[3 * i] = res1.x;
264                 out2[3 * i] = res2.x;
265                 break;
266         }
267     }
268 }
269 )" };
270     // clang-format on
271 
272     if (sizeValues[vector_size_index] != 3)
273         for (const auto &chunk : kernel_nonvec3) kernel << chunk;
274     else
275         for (const auto &chunk : kernel_vec3) kernel << chunk;
276 
277     return kernel.str();
278 }
279 
GetBinaryKernel(const std::string & kernel_name,const char * builtin,ParameterType retType,ParameterType type1,ParameterType type2,int vector_size_index)280 std::string GetBinaryKernel(const std::string &kernel_name, const char *builtin,
281                             ParameterType retType, ParameterType type1,
282                             ParameterType type2, int vector_size_index)
283 {
284     // To keep the kernel code readable, use macros for types and undef values.
285     std::ostringstream kernel;
286     EmitDefineType(kernel, "RETTYPE", retType, vector_size_index);
287     EmitDefineType(kernel, "TYPE1", type1, vector_size_index);
288     EmitDefineType(kernel, "TYPE2", type2, vector_size_index);
289     EmitDefineUndef(kernel, "UNDEF1", type1);
290     EmitDefineUndef(kernel, "UNDEF2", type2);
291     EmitEnableExtension(kernel, { retType, type1, type2 });
292 
293     const bool is_vec3 = sizeValues[vector_size_index] == 3;
294 
295     std::string invocation;
296     if (strlen(builtin) == 1)
297     {
298         // Assume a single-character builtin is an operator (e.g., +, *, ...).
299         invocation = is_vec3 ? "a" : "in1[i] ";
300         invocation += builtin;
301         invocation += is_vec3 ? "b" : " in2[i]";
302     }
303     else
304     {
305         // Otherwise call the builtin as a function with two arguments.
306         invocation = builtin;
307         invocation += is_vec3 ? "(a, b)" : "(in1[i], in2[i])";
308     }
309 
310     // clang-format off
311     const char *kernel_nonvec3[] = { R"(
312 __kernel void )", kernel_name.c_str(), R"((__global RETTYPE* out,
313                           __global TYPE1* in1,
314                           __global TYPE2* in2)
315 {
316     size_t i = get_global_id(0);
317     out[i] = )", invocation.c_str(), R"(;
318 }
319 )" };
320 
321     const char *kernel_vec3[] = { R"(
322 __kernel void )", kernel_name.c_str(), R"((__global RETTYPE_SCALAR* out,
323                           __global TYPE1_SCALAR* in1,
324                           __global TYPE2_SCALAR* in2)
325 {
326     size_t i = get_global_id(0);
327 
328     if (i + 1 < get_global_size(0))
329     {
330         TYPE1 a = vload3(0, in1 + 3 * i);
331         TYPE2 b = vload3(0, in2 + 3 * i);
332         RETTYPE res = )", invocation.c_str(), R"(;
333         vstore3(res, 0, out + 3 * i);
334     }
335     else
336     {
337         // Figure out how many elements are left over after
338         // BUFFER_SIZE % (3 * sizeof(type)).
339         // Assume power of two buffer size.
340         size_t parity = i & 1;
341         TYPE1 a = (TYPE1)(UNDEF1, UNDEF1, UNDEF1);
342         TYPE2 b = (TYPE2)(UNDEF2, UNDEF2, UNDEF2);
343         switch (parity)
344         {
345             case 0:
346                 a.y = in1[3 * i + 1];
347                 b.y = in2[3 * i + 1];
348                 // fall through
349             case 1:
350                 a.x = in1[3 * i];
351                 b.x = in2[3 * i];
352                 break;
353         }
354 
355         RETTYPE res = )", invocation.c_str(), R"(;
356 
357         switch (parity)
358         {
359             case 0:
360                 out[3 * i + 1] = res.y;
361                 // fall through
362             case 1:
363                 out[3 * i] = res.x;
364                 break;
365         }
366     }
367 }
368 )" };
369     // clang-format on
370 
371     if (!is_vec3)
372         for (const auto &chunk : kernel_nonvec3) kernel << chunk;
373     else
374         for (const auto &chunk : kernel_vec3) kernel << chunk;
375 
376     return kernel.str();
377 }
378 
GetBinaryKernel(const std::string & kernel_name,const char * builtin,ParameterType retType1,ParameterType retType2,ParameterType type1,ParameterType type2,int vector_size_index)379 std::string GetBinaryKernel(const std::string &kernel_name, const char *builtin,
380                             ParameterType retType1, ParameterType retType2,
381                             ParameterType type1, ParameterType type2,
382                             int vector_size_index)
383 {
384     // To keep the kernel code readable, use macros for types and undef values.
385     std::ostringstream kernel;
386     EmitDefineType(kernel, "RETTYPE1", retType1, vector_size_index);
387     EmitDefineType(kernel, "RETTYPE2", retType2, vector_size_index);
388     EmitDefineType(kernel, "TYPE1", type1, vector_size_index);
389     EmitDefineType(kernel, "TYPE2", type2, vector_size_index);
390     EmitDefineUndef(kernel, "UNDEF1", type1);
391     EmitDefineUndef(kernel, "UNDEF2", type2);
392     EmitDefineUndef(kernel, "UNDEFR2", retType2);
393     EmitEnableExtension(kernel, { retType1, retType2, type1, type2 });
394 
395     // clang-format off
396     const char *kernel_nonvec3[] = { R"(
397 __kernel void )", kernel_name.c_str(), R"((__global RETTYPE1* out1,
398                           __global RETTYPE2* out2,
399                           __global TYPE1* in1,
400                           __global TYPE2* in2)
401 {
402     size_t i = get_global_id(0);
403     out1[i] = )", builtin, R"((in1[i], in2[i], out2 + i);
404 }
405 )" };
406 
407     const char *kernel_vec3[] = { R"(
408 __kernel void )", kernel_name.c_str(), R"((__global RETTYPE1_SCALAR* out1,
409                           __global RETTYPE2_SCALAR* out2,
410                           __global TYPE1_SCALAR* in1,
411                           __global TYPE2_SCALAR* in2)
412 {
413     size_t i = get_global_id(0);
414 
415     if (i + 1 < get_global_size(0))
416     {
417         TYPE1 a = vload3(0, in1 + 3 * i);
418         TYPE2 b = vload3(0, in2 + 3 * i);
419         RETTYPE2 res2 = UNDEFR2;
420         RETTYPE1 res1 = )", builtin, R"((a, b, &res2);
421         vstore3(res1, 0, out1 + 3 * i);
422         vstore3(res2, 0, out2 + 3 * i);
423     }
424     else
425     {
426         // Figure out how many elements are left over after
427         // BUFFER_SIZE % (3 * sizeof(type)).
428         // Assume power of two buffer size.
429         size_t parity = i & 1;
430         TYPE1 a = (TYPE1)(UNDEF1, UNDEF1, UNDEF1);
431         TYPE2 b = (TYPE2)(UNDEF2, UNDEF2, UNDEF2);
432         switch (parity)
433         {
434             case 0:
435                 a.y = in1[3 * i + 1];
436                 b.y = in2[3 * i + 1];
437                 // fall through
438             case 1:
439                 a.x = in1[3 * i];
440                 b.x = in2[3 * i];
441                 break;
442         }
443 
444         RETTYPE2 res2 = UNDEFR2;
445         RETTYPE1 res1 = )", builtin, R"((a, b, &res2);
446 
447         switch (parity)
448         {
449             case 0:
450                 out1[3 * i + 1] = res1.y;
451                 out2[3 * i + 1] = res2.y;
452                 // fall through
453             case 1:
454                 out1[3 * i] = res1.x;
455                 out2[3 * i] = res2.x;
456                 break;
457         }
458     }
459 }
460 )" };
461     // clang-format on
462 
463     if (sizeValues[vector_size_index] != 3)
464         for (const auto &chunk : kernel_nonvec3) kernel << chunk;
465     else
466         for (const auto &chunk : kernel_vec3) kernel << chunk;
467 
468     return kernel.str();
469 }
470 
GetTernaryKernel(const std::string & kernel_name,const char * builtin,ParameterType retType,ParameterType type1,ParameterType type2,ParameterType type3,int vector_size_index)471 std::string GetTernaryKernel(const std::string &kernel_name,
472                              const char *builtin, ParameterType retType,
473                              ParameterType type1, ParameterType type2,
474                              ParameterType type3, int vector_size_index)
475 {
476     // To keep the kernel code readable, use macros for types and undef values.
477     std::ostringstream kernel;
478     EmitDefineType(kernel, "RETTYPE", retType, vector_size_index);
479     EmitDefineType(kernel, "TYPE1", type1, vector_size_index);
480     EmitDefineType(kernel, "TYPE2", type2, vector_size_index);
481     EmitDefineType(kernel, "TYPE3", type3, vector_size_index);
482     EmitDefineUndef(kernel, "UNDEF1", type1);
483     EmitDefineUndef(kernel, "UNDEF2", type2);
484     EmitDefineUndef(kernel, "UNDEF3", type3);
485     EmitEnableExtension(kernel, { retType, type1, type2, type3 });
486 
487     // clang-format off
488     const char *kernel_nonvec3[] = { R"(
489 __kernel void )", kernel_name.c_str(), R"((__global RETTYPE* out,
490                           __global TYPE1* in1,
491                           __global TYPE2* in2,
492                           __global TYPE3* in3)
493 {
494     size_t i = get_global_id(0);
495     out[i] = )", builtin, R"((in1[i], in2[i], in3[i]);
496 }
497 )" };
498 
499     const char *kernel_vec3[] = { R"(
500 __kernel void )", kernel_name.c_str(), R"((__global RETTYPE_SCALAR* out,
501                           __global TYPE1_SCALAR* in1,
502                           __global TYPE2_SCALAR* in2,
503                           __global TYPE3_SCALAR* in3)
504 {
505     size_t i = get_global_id(0);
506 
507     if (i + 1 < get_global_size(0))
508     {
509         TYPE1 a = vload3(0, in1 + 3 * i);
510         TYPE2 b = vload3(0, in2 + 3 * i);
511         TYPE3 c = vload3(0, in3 + 3 * i);
512         RETTYPE res = )", builtin, R"((a, b, c);
513         vstore3(res, 0, out + 3 * i);
514     }
515     else
516     {
517         // Figure out how many elements are left over after
518         // BUFFER_SIZE % (3 * sizeof(type)).
519         // Assume power of two buffer size.
520         size_t parity = i & 1;
521         TYPE1 a = (TYPE1)(UNDEF1, UNDEF1, UNDEF1);
522         TYPE2 b = (TYPE2)(UNDEF2, UNDEF2, UNDEF2);
523         TYPE3 c = (TYPE3)(UNDEF3, UNDEF3, UNDEF3);
524         switch (parity)
525         {
526             case 0:
527                 a.y = in1[3 * i + 1];
528                 b.y = in2[3 * i + 1];
529                 c.y = in3[3 * i + 1];
530                 // fall through
531             case 1:
532                 a.x = in1[3 * i];
533                 b.x = in2[3 * i];
534                 c.x = in3[3 * i];
535                 break;
536         }
537 
538         RETTYPE res = )", builtin, R"((a, b, c);
539 
540         switch (parity)
541         {
542             case 0:
543                 out[3 * i + 1] = res.y;
544                 // fall through
545             case 1:
546                 out[3 * i] = res.x;
547                 break;
548         }
549     }
550 }
551 )" };
552     // clang-format on
553 
554     if (sizeValues[vector_size_index] != 3)
555         for (const auto &chunk : kernel_nonvec3) kernel << chunk;
556     else
557         for (const auto &chunk : kernel_vec3) kernel << chunk;
558 
559     return kernel.str();
560 }
561 
BuildKernels(BuildKernelInfo & info,cl_uint job_id,SourceGenerator generator)562 cl_int BuildKernels(BuildKernelInfo &info, cl_uint job_id,
563                     SourceGenerator generator)
564 {
565     // Generate the kernel code.
566     cl_uint vector_size_index = gMinVectorSizeIndex + job_id;
567     auto kernel_name = GetKernelName(vector_size_index);
568     auto source = generator(kernel_name, info.nameInCode, vector_size_index);
569     std::array<const char *, 1> sources{ source.c_str() };
570 
571     // Create the program.
572     clProgramWrapper &program = info.programs[vector_size_index];
573     auto options = GetBuildOptions(info.relaxedMode);
574     int error =
575         create_single_kernel_helper(gContext, &program, nullptr, sources.size(),
576                                     sources.data(), nullptr, options.c_str());
577     if (error != CL_SUCCESS)
578     {
579         vlog_error("\t\tFAILED -- Failed to create program. (%d)\n", error);
580         return error;
581     }
582 
583     // Create a kernel for each thread. cl_kernels aren't thread safe, so make
584     // one for every thread
585     auto &kernels = info.kernels[vector_size_index];
586     assert(kernels.empty() && "Dirty BuildKernelInfo");
587     kernels.resize(info.threadCount);
588     for (auto &kernel : kernels)
589     {
590         kernel = clCreateKernel(program, kernel_name.c_str(), &error);
591         if (!kernel || error != CL_SUCCESS)
592         {
593             vlog_error("\t\tFAILED -- clCreateKernel() failed: (%d)\n", error);
594             return error;
595         }
596     }
597 
598     return CL_SUCCESS;
599 }
600