1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/compat.h"
17
18 #include <stdio.h>
19 #include <string.h>
20 #include <sys/types.h>
21 #include <sys/stat.h>
22
23 #include "procs.h"
24
25 #define NUM_PROGRAMS 6
26
27 static const int vector_sizes[] = {1, 2, 3, 4, 8, 16};
28
29
30 const char *int_mad24_kernel_code =
31 "__kernel void test_int_mad24(__global int *srcA, __global int *srcB, __global int *srcC, __global int *dst)\n"
32 "{\n"
33 " int tid = get_global_id(0);\n"
34 "\n"
35 " dst[tid] = mad24(srcA[tid], srcB[tid], srcC[tid]);\n"
36 "}\n";
37
38 const char *int2_mad24_kernel_code =
39 "__kernel void test_int2_mad24(__global int2 *srcA, __global int2 *srcB, __global int2 *srcC, __global int2 *dst)\n"
40 "{\n"
41 " int tid = get_global_id(0);\n"
42 "\n"
43 " dst[tid] = mad24(srcA[tid], srcB[tid], srcC[tid]);\n"
44 "}\n";
45
46 const char *int3_mad24_kernel_code =
47 "__kernel void test_int3_mad24(__global int *srcA, __global int *srcB, __global int *srcC, __global int *dst)\n"
48 "{\n"
49 " int tid = get_global_id(0);\n"
50 " int3 tmp = mad24(vload3(tid, srcA), vload3(tid, srcB), vload3(tid, srcC));\n"
51 " vstore3(tmp, tid, dst);\n"
52 "}\n";
53
54 const char *int4_mad24_kernel_code =
55 "__kernel void test_int4_mad24(__global int4 *srcA, __global int4 *srcB, __global int4 *srcC, __global int4 *dst)\n"
56 "{\n"
57 " int tid = get_global_id(0);\n"
58 "\n"
59 " dst[tid] = mad24(srcA[tid], srcB[tid], srcC[tid]);\n"
60 "}\n";
61
62 const char *int8_mad24_kernel_code =
63 "__kernel void test_int8_mad24(__global int8 *srcA, __global int8 *srcB, __global int8 *srcC, __global int8 *dst)\n"
64 "{\n"
65 " int tid = get_global_id(0);\n"
66 "\n"
67 " dst[tid] = mad24(srcA[tid], srcB[tid], srcC[tid]);\n"
68 "}\n";
69
70 const char *int16_mad24_kernel_code =
71 "__kernel void test_int16_mad24(__global int16 *srcA, __global int16 *srcB, __global int16 *srcC, __global int16 *dst)\n"
72 "{\n"
73 " int tid = get_global_id(0);\n"
74 "\n"
75 " dst[tid] = mad24(srcA[tid], srcB[tid], srcC[tid]);\n"
76 "}\n";
77
78
79 const char *uint_mad24_kernel_code =
80 "__kernel void test_uint_mad24(__global uint *srcA, __global uint *srcB, __global uint *srcC, __global uint *dst)\n"
81 "{\n"
82 " uint tid = get_global_id(0);\n"
83 "\n"
84 " dst[tid] = mad24(srcA[tid], srcB[tid], srcC[tid]);\n"
85 "}\n";
86
87 const char *uint2_mad24_kernel_code =
88 "__kernel void test_uint2_mad24(__global uint2 *srcA, __global uint2 *srcB, __global uint2 *srcC, __global uint2 *dst)\n"
89 "{\n"
90 " uint tid = get_global_id(0);\n"
91 "\n"
92 " dst[tid] = mad24(srcA[tid], srcB[tid], srcC[tid]);\n"
93 "}\n";
94
95 const char *uint3_mad24_kernel_code =
96 "__kernel void test_uint3_mad24(__global uint *srcA, __global uint *srcB, __global uint *srcC, __global uint *dst)\n"
97 "{\n"
98 " int tid = get_global_id(0);\n"
99 " uint3 tmp = mad24(vload3(tid, srcA), vload3(tid, srcB), vload3(tid, srcC));\n"
100 " vstore3(tmp, tid, dst);\n"
101 "}\n";
102
103
104 const char *uint4_mad24_kernel_code =
105 "__kernel void test_uint4_mad24(__global uint4 *srcA, __global uint4 *srcB, __global uint4 *srcC, __global uint4 *dst)\n"
106 "{\n"
107 " uint tid = get_global_id(0);\n"
108 "\n"
109 " dst[tid] = mad24(srcA[tid], srcB[tid], srcC[tid]);\n"
110 "}\n";
111
112 const char *uint8_mad24_kernel_code =
113 "__kernel void test_uint8_mad24(__global uint8 *srcA, __global uint8 *srcB, __global uint8 *srcC, __global uint8 *dst)\n"
114 "{\n"
115 " uint tid = get_global_id(0);\n"
116 "\n"
117 " dst[tid] = mad24(srcA[tid], srcB[tid], srcC[tid]);\n"
118 "}\n";
119
120 const char *uint16_mad24_kernel_code =
121 "__kernel void test_uint16_mad24(__global uint16 *srcA, __global uint16 *srcB, __global uint16 *srcC, __global uint16 *dst)\n"
122 "{\n"
123 " uint tid = get_global_id(0);\n"
124 "\n"
125 " dst[tid] = mad24(srcA[tid], srcB[tid], srcC[tid]);\n"
126 "}\n";
127
128
129 int
verify_int_mad24(int * inptrA,int * inptrB,int * inptrC,int * outptr,size_t n,size_t vecSize)130 verify_int_mad24(int *inptrA, int *inptrB, int *inptrC, int *outptr, size_t n, size_t vecSize)
131 {
132 int r;
133 size_t i;
134
135 for (i=0; i<n; i++)
136 {
137 int a = inptrA[i];
138 int b = inptrB[i];
139 r = a * b + inptrC[i];
140 if (r != outptr[i])
141 {
142 log_error( "Failed at %ld) 0x%8.8x * 0x%8.8x + 0x%8.8x = *0x%8.8x vs 0x%8.8x\n", i, a, b, inptrC[i], r, outptr[i] );
143 return -1;
144 }
145 }
146
147 return 0;
148 }
149
150 int
verify_uint_mad24(cl_uint * inptrA,cl_uint * inptrB,cl_uint * inptrC,cl_uint * outptr,size_t n,size_t vecSize)151 verify_uint_mad24(cl_uint *inptrA, cl_uint *inptrB, cl_uint *inptrC, cl_uint *outptr, size_t n, size_t vecSize)
152 {
153 cl_uint r;
154 size_t i;
155
156 for (i=0; i<n; i++)
157 {
158 cl_uint a = inptrA[i] & 0xFFFFFFU;
159 cl_uint b = inptrB[i] & 0xFFFFFFU;
160 r = a * b + inptrC[i];
161 if (r != outptr[i])
162 {
163 log_error( "Failed at %ld) 0x%8.8x * 0x%8.8x + 0x%8.8x = *0x%8.8x vs 0x%8.8x\n", i, a, b, inptrC[i], r, outptr[i] );
164 return -1;
165 }
166 }
167
168 return 0;
169 }
170
171 static const char *test_str_names[] = { "int", "int2", "int3", "int4", "int8", "int16", "uint", "uint2", "uint3", "uint4", "uint8", "uint16" };
172
random_int24(MTdata d)173 static inline int random_int24( MTdata d )
174 {
175 int result = genrand_int32(d);
176
177 return (result << 8) >> 8;
178 }
179
random_int32(MTdata d)180 static inline int random_int32( MTdata d )
181 {
182 return genrand_int32(d);
183 }
184
185
test_integer_mad24(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)186 int test_integer_mad24(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
187 {
188 cl_mem streams[4];
189 cl_int *input_ptr[3], *output_ptr, *p;
190
191 cl_program program[2*NUM_PROGRAMS];
192 cl_kernel kernel[2*NUM_PROGRAMS];
193 size_t threads[1];
194
195 int num_elements;
196 int err;
197 int i;
198 MTdata d;
199
200 size_t length = sizeof(cl_int) * 16 * n_elems;
201 num_elements = n_elems * 16;
202
203 input_ptr[0] = (cl_int*)malloc(length);
204 input_ptr[1] = (cl_int*)malloc(length);
205 input_ptr[2] = (cl_int*)malloc(length);
206 output_ptr = (cl_int*)malloc(length);
207
208 streams[0] = clCreateBuffer(context, 0, length, NULL, &err);
209 test_error(err, "clCreateBuffer failed");
210 streams[1] = clCreateBuffer(context, 0, length, NULL, &err);
211 test_error(err, "clCreateBuffer failed");
212 streams[2] = clCreateBuffer(context, 0, length, NULL, &err);
213 test_error(err, "clCreateBuffer failed");
214 streams[3] = clCreateBuffer(context, 0, length, NULL, &err);
215 test_error(err, "clCreateBuffer failed");
216
217 d = init_genrand( gRandomSeed );
218 p = input_ptr[0];
219 for (i=0; i<num_elements; i++)
220 p[i] = random_int24(d);
221 p = input_ptr[1];
222 for (i=0; i<num_elements; i++)
223 p[i] = random_int24(d);
224 p = input_ptr[2];
225 for (i=0; i<num_elements; i++)
226 p[i] = random_int32(d);
227 free_mtdata(d); d = NULL;
228
229 err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
230 test_error(err, "clEnqueueWriteBuffer failed");
231 err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
232 test_error(err, "clEnqueueWriteBuffer failed");
233 err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr[2], 0, NULL, NULL);
234 test_error(err, "clEnqueueWriteBuffer failed");
235
236 err = create_single_kernel_helper(context, &program[0], &kernel[0], 1, &int_mad24_kernel_code, "test_int_mad24");
237 if (err)
238 return -1;
239 err = create_single_kernel_helper(context, &program[1], &kernel[1], 1, &int2_mad24_kernel_code, "test_int2_mad24");
240 if (err)
241 return -1;
242 err = create_single_kernel_helper(context, &program[2], &kernel[2], 1, &int3_mad24_kernel_code, "test_int3_mad24");
243 if (err)
244 return -1;
245 err = create_single_kernel_helper(context, &program[3], &kernel[3], 1, &int4_mad24_kernel_code, "test_int4_mad24");
246 if (err)
247 return -1;
248 err = create_single_kernel_helper(context, &program[4], &kernel[4], 1, &int8_mad24_kernel_code, "test_int8_mad24");
249 if (err)
250 return -1;
251 err = create_single_kernel_helper(context, &program[5], &kernel[5], 1, &int16_mad24_kernel_code, "test_int16_mad24");
252 if (err)
253 return -1;
254
255 err = create_single_kernel_helper(context, &program[NUM_PROGRAMS], &kernel[NUM_PROGRAMS], 1, &uint_mad24_kernel_code, "test_uint_mad24");
256 if (err)
257 return -1;
258 err = create_single_kernel_helper(context, &program[NUM_PROGRAMS+1], &kernel[NUM_PROGRAMS+1], 1, &uint2_mad24_kernel_code, "test_uint2_mad24");
259 if (err)
260 return -1;
261 err = create_single_kernel_helper(context, &program[NUM_PROGRAMS+2], &kernel[NUM_PROGRAMS+2], 1, &uint3_mad24_kernel_code, "test_uint3_mad24");
262 if (err)
263 return -1;
264 err = create_single_kernel_helper(context, &program[NUM_PROGRAMS+3], &kernel[NUM_PROGRAMS+3], 1, &uint4_mad24_kernel_code, "test_uint4_mad24");
265 if (err)
266 return -1;
267 err = create_single_kernel_helper(context, &program[NUM_PROGRAMS+4], &kernel[NUM_PROGRAMS+4], 1, &uint8_mad24_kernel_code, "test_uint8_mad24");
268 if (err)
269 return -1;
270 err = create_single_kernel_helper(context, &program[NUM_PROGRAMS+5], &kernel[NUM_PROGRAMS+5], 1, &uint16_mad24_kernel_code, "test_uint16_mad24");
271 if (err)
272 return -1;
273
274 for (i=0; i< 2*NUM_PROGRAMS; i++)
275 {
276 err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0]);
277 err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1]);
278 err |= clSetKernelArg(kernel[i], 2, sizeof streams[2], &streams[2]);
279 err |= clSetKernelArg(kernel[i], 3, sizeof streams[3], &streams[3]);
280 test_error(err, "clSetKernelArg failed");
281 }
282
283
284 threads[0] = (unsigned int)n_elems;
285 // test signed
286 for (i=0; i<NUM_PROGRAMS; i++)
287 {
288 err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL);
289 test_error(err, "clEnqueueNDRangeKernel failed");
290
291 err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
292 test_error(err, "clEnqueueNDRangeKernel failed");
293
294 if (verify_int_mad24(input_ptr[0], input_ptr[1], input_ptr[2], output_ptr, n_elems * vector_sizes[i], vector_sizes[i]))
295 {
296 log_error("INT_MAD24 %s test failed\n", test_str_names[i]);
297 err = -1;
298 }
299 else
300 {
301 log_info("INT_MAD24 %s test passed\n", test_str_names[i]);
302 err = 0;
303 }
304
305 if (err)
306 break;
307 }
308
309 p = input_ptr[0];
310 for (i=0; i<num_elements; i++)
311 p[i] &= 0xffffffU;
312 p = input_ptr[1];
313 for (i=0; i<num_elements; i++)
314 p[i] &= 0xffffffU;
315
316 err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
317 test_error(err, "clEnqueueWriteBuffer failed");
318 err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
319 test_error(err, "clEnqueueWriteBuffer failed");
320
321
322 // test unsigned
323 for (i=NUM_PROGRAMS; i<2*NUM_PROGRAMS; i++)
324 {
325 err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL);
326 test_error(err, "clEnqueueNDRangeKernel failed");
327
328 err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
329 test_error(err, "clEnqueueNDRangeKernel failed");
330
331 if (verify_uint_mad24( (cl_uint*) input_ptr[0], (cl_uint*) input_ptr[1], (cl_uint*) input_ptr[2], (cl_uint*)output_ptr, n_elems * vector_sizes[i-NUM_PROGRAMS], vector_sizes[i-NUM_PROGRAMS]))
332 {
333 log_error("UINT_MAD24 %s test failed\n", test_str_names[i]);
334 err = -1;
335 }
336 else
337 {
338 log_info("UINT_MAD24 %s test passed\n", test_str_names[i]);
339 err = 0;
340 }
341
342 if (err)
343 break;
344 }
345
346 // cleanup
347 clReleaseMemObject(streams[0]);
348 clReleaseMemObject(streams[1]);
349 clReleaseMemObject(streams[2]);
350 clReleaseMemObject(streams[3]);
351 for (i=0; i<2*NUM_PROGRAMS; i++)
352 {
353 clReleaseKernel(kernel[i]);
354 clReleaseProgram(program[i]);
355 }
356 free(input_ptr[0]);
357 free(input_ptr[1]);
358 free(input_ptr[2]);
359 free(output_ptr);
360
361 return err;
362 }
363
364
365