xref: /aosp_15_r20/external/OpenCL-CTS/test_conformance/integer_ops/test_intmad24.cpp (revision 6467f958c7de8070b317fc65bcb0f6472e388d82)
1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/compat.h"
17 
18 #include <stdio.h>
19 #include <string.h>
20 #include <sys/types.h>
21 #include <sys/stat.h>
22 
23 #include "procs.h"
24 
25 #define NUM_PROGRAMS 6
26 
27 static const int vector_sizes[] = {1, 2, 3, 4, 8, 16};
28 
29 
30 const char *int_mad24_kernel_code =
31 "__kernel void test_int_mad24(__global int *srcA, __global int *srcB, __global int *srcC, __global int *dst)\n"
32 "{\n"
33 "    int  tid = get_global_id(0);\n"
34 "\n"
35 "    dst[tid] = mad24(srcA[tid], srcB[tid], srcC[tid]);\n"
36 "}\n";
37 
38 const char *int2_mad24_kernel_code =
39 "__kernel void test_int2_mad24(__global int2 *srcA, __global int2 *srcB, __global int2 *srcC, __global int2 *dst)\n"
40 "{\n"
41 "    int  tid = get_global_id(0);\n"
42 "\n"
43 "    dst[tid] = mad24(srcA[tid], srcB[tid], srcC[tid]);\n"
44 "}\n";
45 
46 const char *int3_mad24_kernel_code =
47 "__kernel void test_int3_mad24(__global int *srcA, __global int *srcB, __global int *srcC, __global int *dst)\n"
48 "{\n"
49 "    int  tid = get_global_id(0);\n"
50 "    int3 tmp = mad24(vload3(tid, srcA), vload3(tid, srcB), vload3(tid, srcC));\n"
51 "    vstore3(tmp, tid, dst);\n"
52 "}\n";
53 
54 const char *int4_mad24_kernel_code =
55 "__kernel void test_int4_mad24(__global int4 *srcA, __global int4 *srcB, __global int4 *srcC, __global int4 *dst)\n"
56 "{\n"
57 "    int  tid = get_global_id(0);\n"
58 "\n"
59 "    dst[tid] = mad24(srcA[tid], srcB[tid], srcC[tid]);\n"
60 "}\n";
61 
62 const char *int8_mad24_kernel_code =
63 "__kernel void test_int8_mad24(__global int8 *srcA, __global int8 *srcB, __global int8 *srcC, __global int8 *dst)\n"
64 "{\n"
65 "    int  tid = get_global_id(0);\n"
66 "\n"
67 "    dst[tid] = mad24(srcA[tid], srcB[tid], srcC[tid]);\n"
68 "}\n";
69 
70 const char *int16_mad24_kernel_code =
71 "__kernel void test_int16_mad24(__global int16 *srcA, __global int16 *srcB, __global int16 *srcC, __global int16 *dst)\n"
72 "{\n"
73 "    int  tid = get_global_id(0);\n"
74 "\n"
75 "    dst[tid] = mad24(srcA[tid], srcB[tid], srcC[tid]);\n"
76 "}\n";
77 
78 
79 const char *uint_mad24_kernel_code =
80 "__kernel void test_uint_mad24(__global uint *srcA, __global uint *srcB, __global uint *srcC, __global uint *dst)\n"
81 "{\n"
82 "    uint  tid = get_global_id(0);\n"
83 "\n"
84 "    dst[tid] = mad24(srcA[tid], srcB[tid], srcC[tid]);\n"
85 "}\n";
86 
87 const char *uint2_mad24_kernel_code =
88 "__kernel void test_uint2_mad24(__global uint2 *srcA, __global uint2 *srcB, __global uint2 *srcC, __global uint2 *dst)\n"
89 "{\n"
90 "    uint  tid = get_global_id(0);\n"
91 "\n"
92 "    dst[tid] = mad24(srcA[tid], srcB[tid], srcC[tid]);\n"
93 "}\n";
94 
95 const char *uint3_mad24_kernel_code =
96 "__kernel void test_uint3_mad24(__global uint *srcA, __global uint *srcB, __global uint *srcC, __global uint *dst)\n"
97 "{\n"
98 "    int  tid = get_global_id(0);\n"
99 "    uint3 tmp = mad24(vload3(tid, srcA), vload3(tid, srcB), vload3(tid, srcC));\n"
100 "    vstore3(tmp, tid, dst);\n"
101 "}\n";
102 
103 
104 const char *uint4_mad24_kernel_code =
105 "__kernel void test_uint4_mad24(__global uint4 *srcA, __global uint4 *srcB, __global uint4 *srcC, __global uint4 *dst)\n"
106 "{\n"
107 "    uint  tid = get_global_id(0);\n"
108 "\n"
109 "    dst[tid] = mad24(srcA[tid], srcB[tid], srcC[tid]);\n"
110 "}\n";
111 
112 const char *uint8_mad24_kernel_code =
113 "__kernel void test_uint8_mad24(__global uint8 *srcA, __global uint8 *srcB, __global uint8 *srcC, __global uint8 *dst)\n"
114 "{\n"
115 "    uint  tid = get_global_id(0);\n"
116 "\n"
117 "    dst[tid] = mad24(srcA[tid], srcB[tid], srcC[tid]);\n"
118 "}\n";
119 
120 const char *uint16_mad24_kernel_code =
121 "__kernel void test_uint16_mad24(__global uint16 *srcA, __global uint16 *srcB, __global uint16 *srcC, __global uint16 *dst)\n"
122 "{\n"
123 "    uint  tid = get_global_id(0);\n"
124 "\n"
125 "    dst[tid] = mad24(srcA[tid], srcB[tid], srcC[tid]);\n"
126 "}\n";
127 
128 
129 int
verify_int_mad24(int * inptrA,int * inptrB,int * inptrC,int * outptr,size_t n,size_t vecSize)130 verify_int_mad24(int *inptrA, int *inptrB, int *inptrC, int *outptr, size_t n, size_t vecSize)
131 {
132     int            r;
133     size_t         i;
134 
135     for (i=0; i<n; i++)
136     {
137         int a = inptrA[i];
138         int b = inptrB[i];
139         r = a * b + inptrC[i];
140         if (r != outptr[i])
141         {
142             log_error( "Failed at %ld)  0x%8.8x * 0x%8.8x + 0x%8.8x = *0x%8.8x vs 0x%8.8x\n", i, a, b, inptrC[i], r, outptr[i] );
143              return -1;
144         }
145     }
146 
147     return 0;
148 }
149 
150 int
verify_uint_mad24(cl_uint * inptrA,cl_uint * inptrB,cl_uint * inptrC,cl_uint * outptr,size_t n,size_t vecSize)151 verify_uint_mad24(cl_uint *inptrA, cl_uint *inptrB, cl_uint *inptrC, cl_uint *outptr, size_t n, size_t vecSize)
152 {
153     cl_uint         r;
154     size_t         i;
155 
156     for (i=0; i<n; i++)
157     {
158         cl_uint a = inptrA[i] & 0xFFFFFFU;
159         cl_uint b = inptrB[i] & 0xFFFFFFU;
160         r = a * b + inptrC[i];
161         if (r != outptr[i])
162         {
163             log_error( "Failed at %ld)  0x%8.8x * 0x%8.8x + 0x%8.8x = *0x%8.8x vs 0x%8.8x\n", i, a, b, inptrC[i], r, outptr[i] );
164              return -1;
165         }
166     }
167 
168     return 0;
169 }
170 
171 static const char *test_str_names[] = { "int", "int2", "int3", "int4", "int8", "int16", "uint", "uint2", "uint3", "uint4", "uint8", "uint16" };
172 
random_int24(MTdata d)173 static inline int random_int24( MTdata d )
174 {
175     int result = genrand_int32(d);
176 
177     return (result << 8) >> 8;
178 }
179 
random_int32(MTdata d)180 static inline int random_int32( MTdata d )
181 {
182     return genrand_int32(d);
183 }
184 
185 
test_integer_mad24(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)186 int test_integer_mad24(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
187 {
188     cl_mem streams[4];
189     cl_int *input_ptr[3], *output_ptr, *p;
190 
191     cl_program program[2*NUM_PROGRAMS];
192     cl_kernel kernel[2*NUM_PROGRAMS];
193     size_t threads[1];
194 
195     int                num_elements;
196     int                err;
197     int                i;
198     MTdata              d;
199 
200     size_t length = sizeof(cl_int) * 16 * n_elems;
201     num_elements = n_elems * 16;
202 
203     input_ptr[0] = (cl_int*)malloc(length);
204     input_ptr[1] = (cl_int*)malloc(length);
205     input_ptr[2] = (cl_int*)malloc(length);
206     output_ptr   = (cl_int*)malloc(length);
207 
208     streams[0] = clCreateBuffer(context, 0, length, NULL, &err);
209       test_error(err, "clCreateBuffer failed");
210     streams[1] = clCreateBuffer(context, 0, length, NULL, &err);
211       test_error(err, "clCreateBuffer failed");
212     streams[2] = clCreateBuffer(context, 0, length, NULL, &err);
213       test_error(err, "clCreateBuffer failed");
214     streams[3] = clCreateBuffer(context, 0, length, NULL, &err);
215       test_error(err, "clCreateBuffer failed");
216 
217     d = init_genrand( gRandomSeed );
218     p = input_ptr[0];
219     for (i=0; i<num_elements; i++)
220         p[i] = random_int24(d);
221     p = input_ptr[1];
222     for (i=0; i<num_elements; i++)
223         p[i] = random_int24(d);
224     p = input_ptr[2];
225     for (i=0; i<num_elements; i++)
226         p[i] = random_int32(d);
227     free_mtdata(d); d = NULL;
228 
229     err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
230          test_error(err, "clEnqueueWriteBuffer failed");
231     err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
232       test_error(err, "clEnqueueWriteBuffer failed");
233     err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr[2], 0, NULL, NULL);
234       test_error(err, "clEnqueueWriteBuffer failed");
235 
236     err = create_single_kernel_helper(context, &program[0], &kernel[0], 1, &int_mad24_kernel_code, "test_int_mad24");
237     if (err)
238         return -1;
239     err = create_single_kernel_helper(context, &program[1], &kernel[1], 1, &int2_mad24_kernel_code, "test_int2_mad24");
240     if (err)
241         return -1;
242     err = create_single_kernel_helper(context, &program[2], &kernel[2], 1, &int3_mad24_kernel_code, "test_int3_mad24");
243     if (err)
244         return -1;
245     err = create_single_kernel_helper(context, &program[3], &kernel[3], 1, &int4_mad24_kernel_code, "test_int4_mad24");
246     if (err)
247         return -1;
248     err = create_single_kernel_helper(context, &program[4], &kernel[4], 1, &int8_mad24_kernel_code, "test_int8_mad24");
249     if (err)
250         return -1;
251     err = create_single_kernel_helper(context, &program[5], &kernel[5], 1, &int16_mad24_kernel_code, "test_int16_mad24");
252     if (err)
253         return -1;
254 
255     err = create_single_kernel_helper(context, &program[NUM_PROGRAMS], &kernel[NUM_PROGRAMS], 1, &uint_mad24_kernel_code, "test_uint_mad24");
256     if (err)
257         return -1;
258     err = create_single_kernel_helper(context, &program[NUM_PROGRAMS+1], &kernel[NUM_PROGRAMS+1], 1, &uint2_mad24_kernel_code, "test_uint2_mad24");
259     if (err)
260         return -1;
261     err = create_single_kernel_helper(context, &program[NUM_PROGRAMS+2], &kernel[NUM_PROGRAMS+2], 1, &uint3_mad24_kernel_code, "test_uint3_mad24");
262     if (err)
263         return -1;
264     err = create_single_kernel_helper(context, &program[NUM_PROGRAMS+3], &kernel[NUM_PROGRAMS+3], 1, &uint4_mad24_kernel_code, "test_uint4_mad24");
265     if (err)
266         return -1;
267     err = create_single_kernel_helper(context, &program[NUM_PROGRAMS+4], &kernel[NUM_PROGRAMS+4], 1, &uint8_mad24_kernel_code, "test_uint8_mad24");
268     if (err)
269         return -1;
270     err = create_single_kernel_helper(context, &program[NUM_PROGRAMS+5], &kernel[NUM_PROGRAMS+5], 1, &uint16_mad24_kernel_code, "test_uint16_mad24");
271     if (err)
272         return -1;
273 
274     for (i=0; i< 2*NUM_PROGRAMS; i++)
275     {
276         err  = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0]);
277         err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1]);
278         err |= clSetKernelArg(kernel[i], 2, sizeof streams[2], &streams[2]);
279         err |= clSetKernelArg(kernel[i], 3, sizeof streams[3], &streams[3]);
280           test_error(err, "clSetKernelArg failed");
281     }
282 
283 
284     threads[0] = (unsigned int)n_elems;
285     // test signed
286     for (i=0; i<NUM_PROGRAMS; i++)
287     {
288         err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL);
289           test_error(err, "clEnqueueNDRangeKernel failed");
290 
291         err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
292           test_error(err, "clEnqueueNDRangeKernel failed");
293 
294         if (verify_int_mad24(input_ptr[0], input_ptr[1], input_ptr[2], output_ptr, n_elems * vector_sizes[i], vector_sizes[i]))
295         {
296             log_error("INT_MAD24 %s test failed\n", test_str_names[i]);
297             err = -1;
298         }
299         else
300         {
301             log_info("INT_MAD24 %s test passed\n", test_str_names[i]);
302             err = 0;
303         }
304 
305         if (err)
306             break;
307     }
308 
309     p = input_ptr[0];
310     for (i=0; i<num_elements; i++)
311         p[i] &= 0xffffffU;
312     p = input_ptr[1];
313     for (i=0; i<num_elements; i++)
314         p[i] &= 0xffffffU;
315 
316     err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
317       test_error(err, "clEnqueueWriteBuffer failed");
318     err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
319       test_error(err, "clEnqueueWriteBuffer failed");
320 
321 
322     // test unsigned
323     for (i=NUM_PROGRAMS; i<2*NUM_PROGRAMS; i++)
324     {
325         err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL);
326           test_error(err, "clEnqueueNDRangeKernel failed");
327 
328         err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
329           test_error(err, "clEnqueueNDRangeKernel failed");
330 
331         if (verify_uint_mad24( (cl_uint*) input_ptr[0], (cl_uint*) input_ptr[1], (cl_uint*) input_ptr[2], (cl_uint*)output_ptr, n_elems * vector_sizes[i-NUM_PROGRAMS], vector_sizes[i-NUM_PROGRAMS]))
332         {
333             log_error("UINT_MAD24 %s test failed\n", test_str_names[i]);
334             err = -1;
335         }
336         else
337         {
338             log_info("UINT_MAD24 %s test passed\n", test_str_names[i]);
339             err = 0;
340         }
341 
342         if (err)
343         break;
344     }
345 
346     // cleanup
347     clReleaseMemObject(streams[0]);
348     clReleaseMemObject(streams[1]);
349     clReleaseMemObject(streams[2]);
350     clReleaseMemObject(streams[3]);
351     for (i=0; i<2*NUM_PROGRAMS; i++)
352     {
353         clReleaseKernel(kernel[i]);
354         clReleaseProgram(program[i]);
355     }
356     free(input_ptr[0]);
357     free(input_ptr[1]);
358     free(input_ptr[2]);
359     free(output_ptr);
360 
361     return err;
362 }
363 
364 
365