xref: /aosp_15_r20/external/OpenCL-CTS/test_conformance/buffers/test_buffer_read.cpp (revision 6467f958c7de8070b317fc65bcb0f6472e388d82)
1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/compat.h"
17 
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <string.h>
21 #include <time.h>
22 #include <sys/types.h>
23 #include <sys/stat.h>
24 #include <CL/cl_half.h>
25 
26 #include "procs.h"
27 
28 //#define HK_DO_NOT_RUN_SHORT_ASYNC    1
29 //#define HK_DO_NOT_RUN_USHORT_ASYNC    1
30 //#define HK_DO_NOT_RUN_CHAR_ASYNC    1
31 //#define HK_DO_NOT_RUN_UCHAR_ASYNC    1
32 
33 #define TEST_PRIME_INT        ((1<<16)+1)
34 #define TEST_PRIME_UINT        ((1U<<16)+1U)
35 #define TEST_PRIME_LONG        ((1LL<<32)+1LL)
36 #define TEST_PRIME_ULONG    ((1ULL<<32)+1ULL)
37 #define TEST_PRIME_SHORT    ((1S<<8)+1S)
38 #define TEST_PRIME_FLOAT    (float)3.40282346638528860e+38
39 #define TEST_PRIME_HALF        119.f
40 #define TEST_BOOL            true
41 #define TEST_PRIME_CHAR        0x77
42 
43 #ifndef ulong
44 typedef unsigned long ulong;
45 #endif
46 
47 #ifndef uchar
48 typedef unsigned char uchar;
49 #endif
50 
51 #ifndef TestStruct
52 typedef struct{
53     int     a;
54     float   b;
55 } TestStruct;
56 #endif
57 
58 //--- the code for the kernel executables
59 static const char *buffer_read_int_kernel_code[] = {
60     "__kernel void test_buffer_read_int(__global int *dst)\n"
61     "{\n"
62     "    int  tid = get_global_id(0);\n"
63     "\n"
64     "    dst[tid] = ((1<<16)+1);\n"
65     "}\n",
66 
67     "__kernel void test_buffer_read_int2(__global int2 *dst)\n"
68     "{\n"
69     "    int  tid = get_global_id(0);\n"
70     "\n"
71     "    dst[tid] = ((1<<16)+1);\n"
72     "}\n",
73 
74     "__kernel void test_buffer_read_int4(__global int4 *dst)\n"
75     "{\n"
76     "    int  tid = get_global_id(0);\n"
77     "\n"
78     "    dst[tid] = ((1<<16)+1);\n"
79     "}\n",
80 
81     "__kernel void test_buffer_read_int8(__global int8 *dst)\n"
82     "{\n"
83     "    int  tid = get_global_id(0);\n"
84     "\n"
85     "    dst[tid] = ((1<<16)+1);\n"
86     "}\n",
87 
88     "__kernel void test_buffer_read_int16(__global int16 *dst)\n"
89     "{\n"
90     "    int  tid = get_global_id(0);\n"
91     "\n"
92     "    dst[tid] = ((1<<16)+1);\n"
93     "}\n" };
94 
95 static const char *int_kernel_name[] = { "test_buffer_read_int", "test_buffer_read_int2", "test_buffer_read_int4", "test_buffer_read_int8", "test_buffer_read_int16" };
96 
97 static const char *buffer_read_uint_kernel_code[] = {
98     "__kernel void test_buffer_read_uint(__global uint *dst)\n"
99     "{\n"
100     "    int  tid = get_global_id(0);\n"
101     "\n"
102     "    dst[tid] = ((1U<<16)+1U);\n"
103     "}\n",
104 
105     "__kernel void test_buffer_read_uint2(__global uint2 *dst)\n"
106     "{\n"
107     "    int  tid = get_global_id(0);\n"
108     "\n"
109     "    dst[tid] = ((1U<<16)+1U);\n"
110     "}\n",
111 
112     "__kernel void test_buffer_read_uint4(__global uint4 *dst)\n"
113     "{\n"
114     "    int  tid = get_global_id(0);\n"
115     "\n"
116     "    dst[tid] = ((1U<<16)+1U);\n"
117     "}\n",
118 
119     "__kernel void test_buffer_read_uint8(__global uint8 *dst)\n"
120     "{\n"
121     "    int  tid = get_global_id(0);\n"
122     "\n"
123     "    dst[tid] = ((1U<<16)+1U);\n"
124     "}\n",
125 
126     "__kernel void test_buffer_read_uint16(__global uint16 *dst)\n"
127     "{\n"
128     "    int  tid = get_global_id(0);\n"
129     "\n"
130     "    dst[tid] = ((1U<<16)+1U);\n"
131     "}\n" };
132 
133 static const char *uint_kernel_name[] = { "test_buffer_read_uint", "test_buffer_read_uint2", "test_buffer_read_uint4", "test_buffer_read_uint8", "test_buffer_read_uint16" };
134 
135 static const char *buffer_read_long_kernel_code[] = {
136     "__kernel void test_buffer_read_long(__global long *dst)\n"
137     "{\n"
138     "    int  tid = get_global_id(0);\n"
139     "\n"
140     "    dst[tid] = ((1L<<32)+1L);\n"
141     "}\n",
142 
143     "__kernel void test_buffer_read_long2(__global long2 *dst)\n"
144     "{\n"
145     "    int  tid = get_global_id(0);\n"
146     "\n"
147     "    dst[tid] = ((1L<<32)+1L);\n"
148     "}\n",
149 
150     "__kernel void test_buffer_read_long4(__global long4 *dst)\n"
151     "{\n"
152     "    int  tid = get_global_id(0);\n"
153     "\n"
154     "    dst[tid] = ((1L<<32)+1L);\n"
155     "}\n",
156 
157     "__kernel void test_buffer_read_long8(__global long8 *dst)\n"
158     "{\n"
159     "    int  tid = get_global_id(0);\n"
160     "\n"
161     "    dst[tid] = ((1L<<32)+1L);\n"
162     "}\n",
163 
164     "__kernel void test_buffer_read_long16(__global long16 *dst)\n"
165     "{\n"
166     "    int  tid = get_global_id(0);\n"
167     "\n"
168     "    dst[tid] = ((1L<<32)+1L);\n"
169     "}\n" };
170 
171 static const char *long_kernel_name[] = { "test_buffer_read_long", "test_buffer_read_long2", "test_buffer_read_long4", "test_buffer_read_long8", "test_buffer_read_long16" };
172 
173 static const char *buffer_read_ulong_kernel_code[] = {
174     "__kernel void test_buffer_read_ulong(__global ulong *dst)\n"
175     "{\n"
176     "    int  tid = get_global_id(0);\n"
177     "\n"
178     "    dst[tid] = ((1UL<<32)+1UL);\n"
179     "}\n",
180 
181     "__kernel void test_buffer_read_ulong2(__global ulong2 *dst)\n"
182     "{\n"
183     "    int  tid = get_global_id(0);\n"
184     "\n"
185     "    dst[tid] = ((1UL<<32)+1UL);\n"
186     "}\n",
187 
188     "__kernel void test_buffer_read_ulong4(__global ulong4 *dst)\n"
189     "{\n"
190     "    int  tid = get_global_id(0);\n"
191     "\n"
192     "    dst[tid] = ((1UL<<32)+1UL);\n"
193     "}\n",
194 
195     "__kernel void test_buffer_read_ulong8(__global ulong8 *dst)\n"
196     "{\n"
197     "    int  tid = get_global_id(0);\n"
198     "\n"
199     "    dst[tid] = ((1UL<<32)+1UL);\n"
200     "}\n",
201 
202     "__kernel void test_buffer_read_ulong16(__global ulong16 *dst)\n"
203     "{\n"
204     "    int  tid = get_global_id(0);\n"
205     "\n"
206     "    dst[tid] = ((1UL<<32)+1UL);\n"
207     "}\n" };
208 
209 static const char *ulong_kernel_name[] = { "test_buffer_read_ulong", "test_buffer_read_ulong2", "test_buffer_read_ulong4", "test_buffer_read_ulong8", "test_buffer_read_ulong16" };
210 
211 static const char *buffer_read_short_kernel_code[] = {
212     "__kernel void test_buffer_read_short(__global short *dst)\n"
213     "{\n"
214     "    int  tid = get_global_id(0);\n"
215     "\n"
216     "    dst[tid] = (short)((1<<8)+1);\n"
217     "}\n",
218 
219     "__kernel void test_buffer_read_short2(__global short2 *dst)\n"
220     "{\n"
221     "    int  tid = get_global_id(0);\n"
222     "\n"
223     "    dst[tid] = (short)((1<<8)+1);\n"
224     "}\n",
225 
226     "__kernel void test_buffer_read_short4(__global short4 *dst)\n"
227     "{\n"
228     "    int  tid = get_global_id(0);\n"
229     "\n"
230     "    dst[tid] = (short)((1<<8)+1);\n"
231     "}\n",
232 
233     "__kernel void test_buffer_read_short8(__global short8 *dst)\n"
234     "{\n"
235     "    int  tid = get_global_id(0);\n"
236     "\n"
237     "    dst[tid] = (short)((1<<8)+1);\n"
238     "}\n",
239 
240     "__kernel void test_buffer_read_short16(__global short16 *dst)\n"
241     "{\n"
242     "    int  tid = get_global_id(0);\n"
243     "\n"
244     "    dst[tid] = (short)((1<<8)+1);\n"
245     "}\n" };
246 
247 static const char *short_kernel_name[] = { "test_buffer_read_short", "test_buffer_read_short2", "test_buffer_read_short4", "test_buffer_read_short8", "test_buffer_read_short16" };
248 
249 
250 static const char *buffer_read_ushort_kernel_code[] = {
251     "__kernel void test_buffer_read_ushort(__global ushort *dst)\n"
252     "{\n"
253     "    int  tid = get_global_id(0);\n"
254     "\n"
255     "    dst[tid] = (ushort)((1<<8)+1);\n"
256     "}\n",
257 
258     "__kernel void test_buffer_read_ushort2(__global ushort2 *dst)\n"
259     "{\n"
260     "    int  tid = get_global_id(0);\n"
261     "\n"
262     "    dst[tid] = (ushort)((1<<8)+1);\n"
263     "}\n",
264 
265     "__kernel void test_buffer_read_ushort4(__global ushort4 *dst)\n"
266     "{\n"
267     "    int  tid = get_global_id(0);\n"
268     "\n"
269     "    dst[tid] = (ushort)((1<<8)+1);\n"
270     "}\n",
271 
272     "__kernel void test_buffer_read_ushort8(__global ushort8 *dst)\n"
273     "{\n"
274     "    int  tid = get_global_id(0);\n"
275     "\n"
276     "    dst[tid] = (ushort)((1<<8)+1);\n"
277     "}\n",
278 
279     "__kernel void test_buffer_read_ushort16(__global ushort16 *dst)\n"
280     "{\n"
281     "    int  tid = get_global_id(0);\n"
282     "\n"
283     "    dst[tid] = (ushort)((1<<8)+1);\n"
284     "}\n" };
285 
286 static const char *ushort_kernel_name[] = { "test_buffer_read_ushort", "test_buffer_read_ushort2", "test_buffer_read_ushort4", "test_buffer_read_ushort8", "test_buffer_read_ushort16" };
287 
288 
289 static const char *buffer_read_float_kernel_code[] = {
290     "__kernel void test_buffer_read_float(__global float *dst)\n"
291     "{\n"
292     "    int  tid = get_global_id(0);\n"
293     "\n"
294     "    dst[tid] = (float)3.40282346638528860e+38;\n"
295     "}\n",
296 
297     "__kernel void test_buffer_read_float2(__global float2 *dst)\n"
298     "{\n"
299     "    int  tid = get_global_id(0);\n"
300     "\n"
301     "    dst[tid] = (float)3.40282346638528860e+38;\n"
302     "}\n",
303 
304     "__kernel void test_buffer_read_float4(__global float4 *dst)\n"
305     "{\n"
306     "    int  tid = get_global_id(0);\n"
307     "\n"
308     "    dst[tid] = (float)3.40282346638528860e+38;\n"
309     "}\n",
310 
311     "__kernel void test_buffer_read_float8(__global float8 *dst)\n"
312     "{\n"
313     "    int  tid = get_global_id(0);\n"
314     "\n"
315     "    dst[tid] = (float)3.40282346638528860e+38;\n"
316     "}\n",
317 
318     "__kernel void test_buffer_read_float16(__global float16 *dst)\n"
319     "{\n"
320     "    int  tid = get_global_id(0);\n"
321     "\n"
322     "    dst[tid] = (float)3.40282346638528860e+38;\n"
323     "}\n" };
324 
325 static const char *float_kernel_name[] = { "test_buffer_read_float", "test_buffer_read_float2", "test_buffer_read_float4", "test_buffer_read_float8", "test_buffer_read_float16" };
326 
327 
328 static const char *buffer_read_half_kernel_code[] = {
329     "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
330     "__kernel void test_buffer_read_half(__global half *dst)\n"
331     "{\n"
332     "    int  tid = get_global_id(0);\n"
333     "\n"
334     "    dst[tid] = (half)119;\n"
335     "}\n",
336 
337     "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
338     "__kernel void test_buffer_read_half2(__global half2 *dst)\n"
339     "{\n"
340     "    int  tid = get_global_id(0);\n"
341     "\n"
342     "    dst[tid] = (half)119;\n"
343     "}\n",
344 
345     "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
346     "__kernel void test_buffer_read_half4(__global half4 *dst)\n"
347     "{\n"
348     "    int  tid = get_global_id(0);\n"
349     "\n"
350     "    dst[tid] = (half)119;\n"
351     "}\n",
352 
353     "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
354     "__kernel void test_buffer_read_half8(__global half8 *dst)\n"
355     "{\n"
356     "    int  tid = get_global_id(0);\n"
357     "\n"
358     "    dst[tid] = (half)119;\n"
359     "}\n",
360 
361     "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
362     "__kernel void test_buffer_read_half16(__global half16 *dst)\n"
363     "{\n"
364     "    int  tid = get_global_id(0);\n"
365     "\n"
366     "    dst[tid] = (half)119;\n"
367     "}\n"
368 };
369 
370 static const char *half_kernel_name[] = { "test_buffer_read_half", "test_buffer_read_half2", "test_buffer_read_half4", "test_buffer_read_half8", "test_buffer_read_half16" };
371 
372 
373 static const char *buffer_read_char_kernel_code[] = {
374     "__kernel void test_buffer_read_char(__global char *dst)\n"
375     "{\n"
376     "    int  tid = get_global_id(0);\n"
377     "\n"
378     "    dst[tid] = (char)'w';\n"
379     "}\n",
380 
381     "__kernel void test_buffer_read_char2(__global char2 *dst)\n"
382     "{\n"
383     "    int  tid = get_global_id(0);\n"
384     "\n"
385     "    dst[tid] = (char)'w';\n"
386     "}\n",
387 
388     "__kernel void test_buffer_read_char4(__global char4 *dst)\n"
389     "{\n"
390     "    int  tid = get_global_id(0);\n"
391     "\n"
392     "    dst[tid] = (char)'w';\n"
393     "}\n",
394 
395     "__kernel void test_buffer_read_char8(__global char8 *dst)\n"
396     "{\n"
397     "    int  tid = get_global_id(0);\n"
398     "\n"
399     "    dst[tid] = (char)'w';\n"
400     "}\n",
401 
402     "__kernel void test_buffer_read_char16(__global char16 *dst)\n"
403     "{\n"
404     "    int  tid = get_global_id(0);\n"
405     "\n"
406     "    dst[tid] = (char)'w';\n"
407     "}\n" };
408 
409 static const char *char_kernel_name[] = { "test_buffer_read_char", "test_buffer_read_char2", "test_buffer_read_char4", "test_buffer_read_char8", "test_buffer_read_char16" };
410 
411 
412 static const char *buffer_read_uchar_kernel_code[] = {
413     "__kernel void test_buffer_read_uchar(__global uchar *dst)\n"
414     "{\n"
415     "    int  tid = get_global_id(0);\n"
416     "\n"
417     "    dst[tid] = 'w';\n"
418     "}\n",
419 
420     "__kernel void test_buffer_read_uchar2(__global uchar2 *dst)\n"
421     "{\n"
422     "    int  tid = get_global_id(0);\n"
423     "\n"
424     "    dst[tid] = (uchar)'w';\n"
425     "}\n",
426 
427     "__kernel void test_buffer_read_uchar4(__global uchar4 *dst)\n"
428     "{\n"
429     "    int  tid = get_global_id(0);\n"
430     "\n"
431     "    dst[tid] = (uchar)'w';\n"
432     "}\n",
433 
434     "__kernel void test_buffer_read_uchar8(__global uchar8 *dst)\n"
435     "{\n"
436     "    int  tid = get_global_id(0);\n"
437     "\n"
438     "    dst[tid] = (uchar)'w';\n"
439     "}\n",
440 
441     "__kernel void test_buffer_read_uchar16(__global uchar16 *dst)\n"
442     "{\n"
443     "    int  tid = get_global_id(0);\n"
444     "\n"
445     "    dst[tid] = (uchar)'w';\n"
446     "}\n" };
447 
448 static const char *uchar_kernel_name[] = { "test_buffer_read_uchar", "test_buffer_read_uchar2", "test_buffer_read_uchar4", "test_buffer_read_uchar8", "test_buffer_read_uchar16" };
449 
450 
451 static const char *buffer_read_struct_kernel_code =
452 "typedef struct{\n"
453 "int    a;\n"
454 "float    b;\n"
455 "} TestStruct;\n"
456 "__kernel void test_buffer_read_struct(__global TestStruct *dst)\n"
457 "{\n"
458 "    int  tid = get_global_id(0);\n"
459 "\n"
460 "    dst[tid].a = ((1<<16)+1);\n"
461 "     dst[tid].b = (float)3.40282346638528860e+38;\n"
462 "}\n";
463 
464 
465 //--- the verify functions
verify_read_int(void * ptr,int n)466 static int verify_read_int(void *ptr, int n)
467 {
468     int     i;
469     cl_int  *outptr = (cl_int *)ptr;
470 
471     for (i=0; i<n; i++){
472         if ( outptr[i] != TEST_PRIME_INT )
473             return -1;
474     }
475 
476     return 0;
477 }
478 
479 
verify_read_uint(void * ptr,int n)480 static int verify_read_uint(void *ptr, int n)
481 {
482     int     i;
483     cl_uint *outptr = (cl_uint *)ptr;
484 
485     for (i=0; i<n; i++){
486         if ( outptr[i] != TEST_PRIME_UINT )
487             return -1;
488     }
489 
490     return 0;
491 }
492 
493 
verify_read_long(void * ptr,int n)494 static int verify_read_long(void *ptr, int n)
495 {
496     int     i;
497     cl_long *outptr = (cl_long *)ptr;
498 
499     for (i=0; i<n; i++){
500         if ( outptr[i] != TEST_PRIME_LONG )
501             return -1;
502     }
503 
504     return 0;
505 }
506 
507 
verify_read_ulong(void * ptr,int n)508 static int verify_read_ulong(void *ptr, int n)
509 {
510     int      i;
511     cl_ulong *outptr = (cl_ulong *)ptr;
512 
513     for (i=0; i<n; i++){
514         if ( outptr[i] != TEST_PRIME_ULONG )
515             return -1;
516     }
517 
518     return 0;
519 }
520 
521 
verify_read_short(void * ptr,int n)522 static int verify_read_short(void *ptr, int n)
523 {
524     int      i;
525     cl_short *outptr = (cl_short *)ptr;
526 
527     for (i=0; i<n; i++){
528         if ( outptr[i] != (cl_short)((1<<8)+1) )
529             return -1;
530     }
531 
532     return 0;
533 }
534 
535 
verify_read_ushort(void * ptr,int n)536 static int verify_read_ushort(void *ptr, int n)
537 {
538     int       i;
539     cl_ushort *outptr = (cl_ushort *)ptr;
540 
541     for (i=0; i<n; i++){
542         if ( outptr[i] != (cl_ushort)((1<<8)+1) )
543             return -1;
544     }
545 
546     return 0;
547 }
548 
549 
verify_read_float(void * ptr,int n)550 static int verify_read_float( void *ptr, int n )
551 {
552     int      i;
553     cl_float *outptr = (cl_float *)ptr;
554 
555     for (i=0; i<n; i++){
556         if ( outptr[i] != TEST_PRIME_FLOAT )
557             return -1;
558     }
559 
560     return 0;
561 }
562 
563 
verify_read_half(void * ptr,int n)564 static int verify_read_half( void *ptr, int n )
565 {
566     int     i;
567     cl_half *outptr = (cl_half *)ptr;
568 
569     for (i = 0; i < n; i++)
570     {
571         if (cl_half_to_float(outptr[i]) != TEST_PRIME_HALF) return -1;
572     }
573 
574     return 0;
575 }
576 
577 
verify_read_char(void * ptr,int n)578 static int verify_read_char(void *ptr, int n)
579 {
580     int     i;
581     cl_char *outptr = (cl_char *)ptr;
582 
583     for (i=0; i<n; i++){
584         if ( outptr[i] != TEST_PRIME_CHAR )
585             return -1;
586     }
587 
588     return 0;
589 }
590 
591 
verify_read_uchar(void * ptr,int n)592 static int verify_read_uchar(void *ptr, int n)
593 {
594     int      i;
595     cl_uchar *outptr = (cl_uchar *)ptr;
596 
597     for (i=0; i<n; i++){
598         if ( outptr[i] != TEST_PRIME_CHAR )
599             return -1;
600     }
601 
602     return 0;
603 }
604 
605 
verify_read_struct(TestStruct * outptr,int n)606 static int verify_read_struct(TestStruct *outptr, int n)
607 {
608     int     i;
609 
610     for (i=0; i<n; i++)
611     {
612         if ( ( outptr[i].a != TEST_PRIME_INT ) ||
613              ( outptr[i].b != TEST_PRIME_FLOAT ) )
614             return -1;
615     }
616 
617     return 0;
618 }
619 
620 //----- the test functions
test_buffer_read(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,size_t size,char * type,int loops,const char * kernelCode[],const char * kernelName[],int (* fn)(void *,int))621 int test_buffer_read( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, size_t size, char *type, int loops,
622                       const char *kernelCode[], const char *kernelName[], int (*fn)(void *,int) )
623 {
624     void        *outptr[5];
625     void        *inptr[5];
626     clProgramWrapper program[5];
627     clKernelWrapper kernel[5];
628     size_t      global_work_size[3];
629     cl_int      err;
630     int         i;
631     size_t      ptrSizes[5];
632     int         src_flag_id;
633     int         total_errors = 0;
634 
635     size_t      min_alignment = get_min_alignment(context);
636 
637     global_work_size[0] = (cl_uint)num_elements;
638 
639     ptrSizes[0] = size;
640     ptrSizes[1] = ptrSizes[0] << 1;
641     ptrSizes[2] = ptrSizes[1] << 1;
642     ptrSizes[3] = ptrSizes[2] << 1;
643     ptrSizes[4] = ptrSizes[3] << 1;
644 
645     //skip devices that don't support long
646     if (! gHasLong && strstr(type,"long") )
647     {
648         log_info( "Device does not support 64-bit integers. Skipping test.\n" );
649         return CL_SUCCESS;
650     }
651 
652     for (i = 0; i < loops; i++)
653     {
654 
655         err = create_single_kernel_helper(context, &program[i], &kernel[i], 1,
656                                           &kernelCode[i], kernelName[i]);
657         if (err)
658         {
659             log_error("Creating program for %s\n", type);
660             print_error(err, " Error creating program ");
661             return -1;
662         }
663 
664         for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++)
665         {
666             clMemWrapper buffer;
667             outptr[i] = align_malloc( ptrSizes[i] * num_elements, min_alignment);
668             if ( ! outptr[i] ){
669                 log_error( " unable to allocate %d bytes for outptr\n", (int)( ptrSizes[i] * num_elements ) );
670                 return -1;
671             }
672             inptr[i] = align_malloc( ptrSizes[i] * num_elements, min_alignment);
673             if ( ! inptr[i] ){
674                 log_error( " unable to allocate %d bytes for inptr\n", (int)( ptrSizes[i] * num_elements ) );
675                 return -1;
676             }
677 
678 
679             if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
680                 buffer =
681                     clCreateBuffer(context, flag_set[src_flag_id],
682                                    ptrSizes[i] * num_elements, inptr[i], &err);
683             else
684                 buffer = clCreateBuffer(context, flag_set[src_flag_id],
685                                         ptrSizes[i] * num_elements, NULL, &err);
686             if (err != CL_SUCCESS)
687             {
688                 print_error(err, " clCreateBuffer failed\n" );
689                 align_free( outptr[i] );
690                 align_free( inptr[i] );
691                 return -1;
692             }
693 
694             err = clSetKernelArg(kernel[i], 0, sizeof(cl_mem), (void *)&buffer);
695             if ( err != CL_SUCCESS ){
696                 print_error( err, "clSetKernelArg failed" );
697                 align_free( outptr[i] );
698                 align_free( inptr[i] );
699                 return -1;
700             }
701 
702             err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL,
703                                          global_work_size, NULL, 0, NULL, NULL);
704             if ( err != CL_SUCCESS ){
705                 print_error( err, "clEnqueueNDRangeKernel failed" );
706                 align_free( outptr[i] );
707                 align_free( inptr[i] );
708                 return -1;
709             }
710 
711             err = clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0,
712                                       ptrSizes[i] * num_elements, outptr[i], 0,
713                                       NULL, NULL);
714             if ( err != CL_SUCCESS ){
715                 print_error( err, "clEnqueueReadBuffer failed" );
716                 align_free( outptr[i] );
717                 align_free( inptr[i] );
718                 return -1;
719             }
720 
721             if (fn(outptr[i], num_elements*(1<<i))){
722                 log_error(" %s%d test failed. cl_mem_flags src: %s\n", type,
723                           1 << i, flag_set_names[src_flag_id]);
724                 total_errors++;
725             }
726             else{
727                 log_info(" %s%d test passed. cl_mem_flags src: %s\n", type,
728                          1 << i, flag_set_names[src_flag_id]);
729             }
730 
731             err = clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0,
732                                       ptrSizes[i] * num_elements, inptr[i], 0,
733                                       NULL, NULL);
734             if (err != CL_SUCCESS)
735             {
736                 print_error( err, "clEnqueueReadBuffer failed" );
737                 align_free( outptr[i] );
738                 align_free( inptr[i] );
739                 return -1;
740             }
741 
742             if (fn(inptr[i], num_elements*(1<<i))){
743                 log_error( " %s%d test failed in-place readback\n", type, 1<<i );
744                 total_errors++;
745             }
746             else{
747                 log_info( " %s%d test passed in-place readback\n", type, 1<<i );
748             }
749 
750 
751             // cleanup
752             align_free( outptr[i] );
753             align_free( inptr[i] );
754         }
755     } // mem flag
756 
757     return total_errors;
758 
759 }   // end test_buffer_read()
760 
test_buffer_read_async(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,size_t size,char * type,int loops,const char * kernelCode[],const char * kernelName[],int (* fn)(void *,int))761 int test_buffer_read_async( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, size_t size, char *type, int loops,
762                             const char *kernelCode[], const char *kernelName[], int (*fn)(void *,int) )
763 {
764     clProgramWrapper program[5];
765     clKernelWrapper kernel[5];
766     void        *outptr[5];
767     void        *inptr[5];
768     size_t      global_work_size[3];
769     cl_int      err;
770     int         i;
771     size_t      ptrSizes[5];
772     int         src_flag_id;
773     int         total_errors = 0;
774 
775     size_t      min_alignment = get_min_alignment(context);
776 
777     global_work_size[0] = (cl_uint)num_elements;
778 
779     ptrSizes[0] = size;
780     ptrSizes[1] = ptrSizes[0] << 1;
781     ptrSizes[2] = ptrSizes[1] << 1;
782     ptrSizes[3] = ptrSizes[2] << 1;
783     ptrSizes[4] = ptrSizes[3] << 1;
784 
785     //skip devices that don't support long
786     if (! gHasLong && strstr(type,"long") )
787     {
788         log_info( "Device does not support 64-bit integers. Skipping test.\n" );
789         return CL_SUCCESS;
790     }
791 
792     for (i = 0; i < loops; i++)
793     {
794 
795         err = create_single_kernel_helper(context, &program[i], &kernel[i], 1,
796                                           &kernelCode[i], kernelName[i]);
797         if (err)
798         {
799             log_error(" Error creating program for %s\n", type);
800             return -1;
801         }
802 
803         for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++)
804         {
805             clMemWrapper buffer;
806             clEventWrapper event;
807             outptr[i] = align_malloc(ptrSizes[i] * num_elements, min_alignment);
808             if ( ! outptr[i] ){
809                 log_error( " unable to allocate %d bytes for outptr\n", (int)(ptrSizes[i] * num_elements) );
810                 return -1;
811             }
812             memset( outptr[i], 0, ptrSizes[i] * num_elements ); // initialize to zero to tell difference
813             inptr[i] = align_malloc(ptrSizes[i] * num_elements, min_alignment);
814             if ( ! inptr[i] ){
815                 log_error( " unable to allocate %d bytes for inptr\n", (int)(ptrSizes[i] * num_elements) );
816                 return -1;
817             }
818             memset( inptr[i], 0, ptrSizes[i] * num_elements );  // initialize to zero to tell difference
819 
820 
821             if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
822                 buffer =
823                     clCreateBuffer(context, flag_set[src_flag_id],
824                                    ptrSizes[i] * num_elements, inptr[i], &err);
825             else
826                 buffer = clCreateBuffer(context, flag_set[src_flag_id],
827                                         ptrSizes[i] * num_elements, NULL, &err);
828             if ( err != CL_SUCCESS ){
829                 print_error(err, " clCreateBuffer failed\n" );
830                 align_free( outptr[i] );
831                 align_free( inptr[i] );
832                 return -1;
833             }
834 
835             err = clSetKernelArg(kernel[i], 0, sizeof(cl_mem), (void *)&buffer);
836             if ( err != CL_SUCCESS ){
837                 print_error( err, "clSetKernelArg failed" );
838                 align_free( outptr[i] );
839                 align_free( inptr[i] );
840                 return -1;
841             }
842 
843             err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
844             if ( err != CL_SUCCESS ){
845                 print_error( err, "clEnqueueNDRangeKernel failed" );
846                 align_free( outptr[i] );
847                 align_free( inptr[i] );
848                 return -1;
849             }
850 
851             err = clEnqueueReadBuffer(queue, buffer, false, 0,
852                                       ptrSizes[i] * num_elements, outptr[i], 0,
853                                       NULL, &event);
854 #ifdef CHECK_FOR_NON_WAIT
855             size_t lastIndex = (num_elements * (1 << i) - 1) * ptrSizes[0];
856             if ( ((uchar *)outptr[i])[lastIndex] ){
857                 log_error( "    clEnqueueReadBuffer() possibly returned only after inappropriately waiting for execution to be finished\n" );
858                 log_error( "    Function was run asynchornously, but last value in array was set in code line following clEnqueueReadBuffer()\n" );
859             }
860 #endif
861             if ( err != CL_SUCCESS ){
862                 print_error( err, "clEnqueueReadBuffer failed" );
863                 align_free( outptr[i] );
864                 align_free( inptr[i] );
865                 return -1;
866             }
867             err = clWaitForEvents(1, &event );
868             if ( err != CL_SUCCESS ){
869                 print_error( err, "clWaitForEvents() failed" );
870                 align_free( outptr[i] );
871                 align_free( inptr[i] );
872                 return -1;
873             }
874 
875             if ( fn(outptr[i], num_elements*(1<<i)) ){
876                 log_error(" %s%d test failed. cl_mem_flags src: %s\n", type,
877                           1 << i, flag_set_names[src_flag_id]);
878                 total_errors++;
879             }
880             else{
881                 log_info(" %s%d test passed. cl_mem_flags src: %s\n", type,
882                          1 << i, flag_set_names[src_flag_id]);
883             }
884 
885             // cleanup
886             align_free( outptr[i] );
887             align_free( inptr[i] );
888         }
889     } // mem flags
890 
891 
892     return total_errors;
893 
894 }   // end test_buffer_read_array_async()
895 
896 
test_buffer_read_array_barrier(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,size_t size,char * type,int loops,const char * kernelCode[],const char * kernelName[],int (* fn)(void *,int))897 int test_buffer_read_array_barrier( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, size_t size, char *type, int loops,
898                                     const char *kernelCode[], const char *kernelName[], int (*fn)(void *,int) )
899 {
900     clProgramWrapper program[5];
901     clKernelWrapper kernel[5];
902     void        *outptr[5], *inptr[5];
903     size_t      global_work_size[3];
904     cl_int      err;
905     int         i;
906     size_t      ptrSizes[5];
907     int         src_flag_id;
908     int         total_errors = 0;
909 
910     size_t min_alignment = get_min_alignment(context);
911 
912     global_work_size[0] = (cl_uint)num_elements;
913 
914     ptrSizes[0] = size;
915     ptrSizes[1] = ptrSizes[0] << 1;
916     ptrSizes[2] = ptrSizes[1] << 1;
917     ptrSizes[3] = ptrSizes[2] << 1;
918     ptrSizes[4] = ptrSizes[3] << 1;
919 
920     //skip devices that don't support long
921     if (! gHasLong && strstr(type,"long") )
922     {
923         log_info( "Device does not support 64-bit integers. Skipping test.\n" );
924         return CL_SUCCESS;
925     }
926 
927     for (i = 0; i < loops; i++)
928     {
929 
930         err = create_single_kernel_helper(context, &program[i], &kernel[i], 1,
931                                           &kernelCode[i], kernelName[i]);
932         if (err)
933         {
934             log_error(" Error creating program for %s\n", type);
935             return -1;
936         }
937 
938         for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++)
939         {
940             clMemWrapper buffer;
941             clEventWrapper event;
942             outptr[i] = align_malloc(ptrSizes[i] * num_elements, min_alignment);
943             if ( ! outptr[i] ){
944                 log_error( " unable to allocate %d bytes for outptr\n", (int)(ptrSizes[i] * num_elements) );
945                 return -1;
946             }
947             memset( outptr[i], 0, ptrSizes[i] * num_elements ); // initialize to zero to tell difference
948             inptr[i] = align_malloc(ptrSizes[i] * num_elements, min_alignment);
949             if ( ! inptr[i] ){
950                 log_error( " unable to allocate %d bytes for inptr\n", (int)(ptrSizes[i] * num_elements) );
951                 return -1;
952             }
953             memset( inptr[i], 0, ptrSizes[i] * num_elements );  // initialize to zero to tell difference
954 
955             if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
956                 buffer =
957                     clCreateBuffer(context, flag_set[src_flag_id],
958                                    ptrSizes[i] * num_elements, inptr[i], &err);
959             else
960                 buffer = clCreateBuffer(context, flag_set[src_flag_id],
961                                         ptrSizes[i] * num_elements, NULL, &err);
962             if ( err != CL_SUCCESS ){
963                 print_error(err, " clCreateBuffer failed\n" );
964                 align_free( outptr[i] );
965                 align_free( inptr[i] );
966                 return -1;
967             }
968 
969             err = clSetKernelArg(kernel[i], 0, sizeof(cl_mem), (void *)&buffer);
970             if ( err != CL_SUCCESS ){
971                 print_error( err, "clSetKernelArgs failed" );
972                 align_free( outptr[i] );
973                 align_free( inptr[i] );
974                 return -1;
975             }
976 
977             err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
978             if ( err != CL_SUCCESS ){
979                 print_error( err, "clEnqueueNDRangeKernel failed" );
980                 align_free( outptr[i] );
981                 align_free( inptr[i] );
982                 return -1;
983             }
984 
985             err = clEnqueueReadBuffer(queue, buffer, false, 0,
986                                       ptrSizes[i] * num_elements,
987                                       (void *)(outptr[i]), 0, NULL, &event);
988 #ifdef CHECK_FOR_NON_WAIT
989             size_t lastIndex = (num_elements * (1 << i) - 1) * ptrSizes[0];
990             if ( ((uchar *)outptr[i])[lastIndex] ){
991                 log_error( "    clEnqueueReadBuffer() possibly returned only after inappropriately waiting for execution to be finished\n" );
992                 log_error( "    Function was run asynchornously, but last value in array was set in code line following clEnqueueReadBuffer()\n" );
993             }
994 #endif
995             if ( err != CL_SUCCESS ){
996                 print_error( err, "clEnqueueReadBuffer failed" );
997                 align_free( outptr[i] );
998                 align_free( inptr[i] );
999                 return -1;
1000             }
1001             err = clEnqueueBarrierWithWaitList(queue, 0, NULL, NULL);
1002             if ( err != CL_SUCCESS ){
1003                 print_error( err, "clEnqueueBarrierWithWaitList() failed" );
1004                 align_free( outptr[i] );
1005                 return -1;
1006             }
1007 
1008             err = clWaitForEvents(1, &event);
1009             if ( err != CL_SUCCESS ){
1010                 print_error( err, "clWaitForEvents() failed" );
1011                 align_free( outptr[i] );
1012                 align_free( inptr[i] );
1013                 return -1;
1014             }
1015 
1016             if ( fn(outptr[i], num_elements*(1<<i)) ){
1017                 log_error(" %s%d test failed. cl_mem_flags src: %s\n", type,
1018                           1 << i, flag_set_names[src_flag_id]);
1019                 total_errors++;
1020             }
1021             else{
1022                 log_info(" %s%d test passed. cl_mem_flags src: %s\n", type,
1023                          1 << i, flag_set_names[src_flag_id]);
1024             }
1025 
1026             // cleanup
1027             align_free( outptr[i] );
1028             align_free( inptr[i] );
1029         }
1030     } // cl_mem flags
1031     return total_errors;
1032 
1033 }   // end test_buffer_read_array_barrier()
1034 
1035 
1036 #define DECLARE_READ_TEST(type, realType) \
1037 int test_buffer_read_##type( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )    \
1038 { \
1039 return test_buffer_read( deviceID, context, queue, num_elements, sizeof( realType ), (char*)#type, 5, \
1040 buffer_read_##type##_kernel_code, type##_kernel_name, verify_read_##type ); \
1041 }
1042 
DECLARE_READ_TEST(int,cl_int)1043 DECLARE_READ_TEST(int, cl_int)
1044 DECLARE_READ_TEST(uint, cl_uint)
1045 DECLARE_READ_TEST(long, cl_long)
1046 DECLARE_READ_TEST(ulong, cl_ulong)
1047 DECLARE_READ_TEST(short, cl_short)
1048 DECLARE_READ_TEST(ushort, cl_ushort)
1049 DECLARE_READ_TEST(float, cl_float)
1050 DECLARE_READ_TEST(char, cl_char)
1051 DECLARE_READ_TEST(uchar, cl_uchar)
1052 
1053 int test_buffer_read_half(cl_device_id deviceID, cl_context context,
1054                           cl_command_queue queue, int num_elements)
1055 {
1056     PASSIVE_REQUIRE_FP16_SUPPORT(deviceID)
1057     return test_buffer_read( deviceID, context, queue, num_elements, sizeof( cl_float ) / 2, (char*)"half", 5,
1058                              buffer_read_half_kernel_code, half_kernel_name, verify_read_half );
1059 }
1060 
1061 
1062 #define DECLARE_ASYNC_TEST(type, realType) \
1063 int test_buffer_read_async_##type( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )    \
1064 { \
1065 return test_buffer_read_async( deviceID, context, queue, num_elements, sizeof( realType ), (char*)#type, 5, \
1066 buffer_read_##type##_kernel_code, type##_kernel_name, verify_read_##type ); \
1067 }
1068 
DECLARE_ASYNC_TEST(char,cl_char)1069 DECLARE_ASYNC_TEST(char, cl_char)
1070 DECLARE_ASYNC_TEST(uchar, cl_uchar)
1071 DECLARE_ASYNC_TEST(short, cl_short)
1072 DECLARE_ASYNC_TEST(ushort, cl_ushort)
1073 DECLARE_ASYNC_TEST(int, cl_int)
1074 DECLARE_ASYNC_TEST(uint, cl_uint)
1075 DECLARE_ASYNC_TEST(long, cl_long)
1076 DECLARE_ASYNC_TEST(ulong, cl_ulong)
1077 DECLARE_ASYNC_TEST(float, cl_float)
1078 
1079 
1080 #define DECLARE_BARRIER_TEST(type, realType) \
1081 int test_buffer_read_array_barrier_##type( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )    \
1082 { \
1083 return test_buffer_read_array_barrier( deviceID, context, queue, num_elements, sizeof( realType ), (char*)#type, 5, \
1084 buffer_read_##type##_kernel_code, type##_kernel_name, verify_read_##type ); \
1085 }
1086 
1087 DECLARE_BARRIER_TEST(int, cl_int)
1088 DECLARE_BARRIER_TEST(uint, cl_uint)
1089 DECLARE_BARRIER_TEST(long, cl_long)
1090 DECLARE_BARRIER_TEST(ulong, cl_ulong)
1091 DECLARE_BARRIER_TEST(short, cl_short)
1092 DECLARE_BARRIER_TEST(ushort, cl_ushort)
1093 DECLARE_BARRIER_TEST(char, cl_char)
1094 DECLARE_BARRIER_TEST(uchar, cl_uchar)
1095 DECLARE_BARRIER_TEST(float, cl_float)
1096 
1097 int test_buffer_read_struct(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
1098 {
1099     cl_mem      buffers[1];
1100     TestStruct  *output_ptr;
1101     cl_program  program[1];
1102     cl_kernel   kernel[1];
1103     size_t      global_work_size[3];
1104     cl_int      err;
1105     size_t      objSize = sizeof(TestStruct);
1106 
1107     size_t      min_alignment = get_min_alignment(context);
1108 
1109     global_work_size[0] = (cl_uint)num_elements;
1110 
1111     output_ptr = (TestStruct*)align_malloc(objSize * num_elements, min_alignment);
1112     if ( ! output_ptr ){
1113         log_error( " unable to allocate %d bytes for output_ptr\n", (int)(objSize * num_elements) );
1114         return -1;
1115     }
1116     buffers[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
1117                                 objSize * num_elements, NULL, &err);
1118     if ( err != CL_SUCCESS ){
1119         print_error( err, " clCreateBuffer failed\n" );
1120         align_free( output_ptr );
1121         return -1;
1122     }
1123 
1124     err = create_single_kernel_helper(  context, &program[0], &kernel[0], 1, &buffer_read_struct_kernel_code, "test_buffer_read_struct" );
1125     if ( err ){
1126         clReleaseProgram( program[0] );
1127         align_free( output_ptr );
1128         return -1;
1129     }
1130 
1131     err = clSetKernelArg( kernel[0], 0, sizeof( cl_mem ), (void *)&buffers[0] );
1132     if ( err != CL_SUCCESS){
1133         print_error( err, "clSetKernelArg failed" );
1134         clReleaseMemObject( buffers[0] );
1135         clReleaseKernel( kernel[0] );
1136         clReleaseProgram( program[0] );
1137         align_free( output_ptr );
1138         return -1;
1139     }
1140 
1141     err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
1142     if ( err != CL_SUCCESS ){
1143         print_error( err, "clEnqueueNDRangeKernel failed" );
1144         clReleaseMemObject( buffers[0] );
1145         clReleaseKernel( kernel[0] );
1146         clReleaseProgram( program[0] );
1147         align_free( output_ptr );
1148         return -1;
1149     }
1150 
1151     err = clEnqueueReadBuffer( queue, buffers[0], true, 0, objSize*num_elements, (void *)output_ptr, 0, NULL, NULL );
1152     if ( err != CL_SUCCESS){
1153         print_error( err, "clEnqueueReadBuffer failed" );
1154         clReleaseMemObject( buffers[0] );
1155         clReleaseKernel( kernel[0] );
1156         clReleaseProgram( program[0] );
1157         align_free( output_ptr );
1158         return -1;
1159     }
1160 
1161     if (verify_read_struct(output_ptr, num_elements)){
1162         log_error(" struct test failed\n");
1163         err = -1;
1164     }
1165     else{
1166         log_info(" struct test passed\n");
1167         err = 0;
1168     }
1169 
1170     // cleanup
1171     clReleaseMemObject( buffers[0] );
1172     clReleaseKernel( kernel[0] );
1173     clReleaseProgram( program[0] );
1174     align_free( output_ptr );
1175 
1176     return err;
1177 }
1178 
1179 
testRandomReadSize(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,cl_uint startOfRead,size_t sizeOfRead)1180 static int testRandomReadSize( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, cl_uint startOfRead, size_t sizeOfRead )
1181 {
1182     cl_mem      buffers[3];
1183     int         *outptr[3];
1184     cl_program  program[3];
1185     cl_kernel   kernel[3];
1186     size_t      global_work_size[3];
1187     cl_int      err;
1188     int         i, j;
1189     size_t      ptrSizes[3];    // sizeof(int), sizeof(int2), sizeof(int4)
1190     int         total_errors = 0;
1191     size_t      min_alignment = get_min_alignment(context);
1192 
1193     global_work_size[0] = (cl_uint)num_elements;
1194 
1195     ptrSizes[0] = sizeof(cl_int);
1196     ptrSizes[1] = ptrSizes[0] << 1;
1197     ptrSizes[2] = ptrSizes[1] << 1;
1198     for ( i = 0; i < 3; i++ ){
1199         outptr[i] = (int *)align_malloc( ptrSizes[i] * num_elements, min_alignment);
1200         if ( ! outptr[i] ){
1201             log_error( " Unable to allocate %d bytes for outptr[%d]\n", (int)(ptrSizes[i] * num_elements), i );
1202             for ( j = 0; j < i; j++ ){
1203                 clReleaseMemObject( buffers[j] );
1204                 align_free( outptr[j] );
1205             }
1206             return -1;
1207         }
1208         buffers[i] = clCreateBuffer(context, CL_MEM_READ_WRITE,
1209                                     ptrSizes[i] * num_elements, NULL, &err);
1210         if ( err != CL_SUCCESS ){
1211             print_error(err, " clCreateBuffer failed\n" );
1212             for ( j = 0; j < i; j++ ){
1213                 clReleaseMemObject( buffers[j] );
1214                 align_free( outptr[j] );
1215             }
1216             align_free( outptr[i] );
1217             return -1;
1218         }
1219     }
1220 
1221     err = create_single_kernel_helper(  context, &program[0], &kernel[0], 1, &buffer_read_int_kernel_code[0], "test_buffer_read_int" );
1222     if ( err ){
1223         log_error( " Error creating program for int\n" );
1224         for ( i = 0; i < 3; i++ ){
1225             clReleaseMemObject( buffers[i] );
1226             align_free( outptr[i] );
1227         }
1228         return -1;
1229     }
1230 
1231     err = create_single_kernel_helper(  context, &program[1], &kernel[1], 1, &buffer_read_int_kernel_code[1], "test_buffer_read_int2" );
1232     if ( err ){
1233         log_error( " Error creating program for int2\n" );
1234         clReleaseKernel( kernel[0] );
1235         clReleaseProgram( program[0] );
1236         for ( i = 0; i < 3; i++ ){
1237             clReleaseMemObject( buffers[i] );
1238             align_free( outptr[i] );
1239         }
1240         return -1;
1241     }
1242 
1243     err = create_single_kernel_helper(  context, &program[2], &kernel[2], 1, &buffer_read_int_kernel_code[2], "test_buffer_read_int4" );
1244     if ( err ){
1245         log_error( " Error creating program for int4\n" );
1246         clReleaseKernel( kernel[0] );
1247         clReleaseProgram( program[0] );
1248         clReleaseKernel( kernel[1] );
1249         clReleaseProgram( program[1] );
1250         for ( i = 0; i < 3; i++ ){
1251             clReleaseMemObject( buffers[i] );
1252             align_free( outptr[i] );
1253         }
1254         return -1;
1255     }
1256 
1257     for (i=0; i<3; i++){
1258         err = clSetKernelArg( kernel[i], 0, sizeof( cl_mem ), (void *)&buffers[i] );
1259         if ( err != CL_SUCCESS ){
1260             print_error( err, "clSetKernelArgs failed" );
1261             clReleaseMemObject( buffers[i] );
1262             clReleaseKernel( kernel[i] );
1263             clReleaseProgram( program[i] );
1264             align_free( outptr[i] );
1265             return -1;
1266         }
1267 
1268         err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
1269         if ( err != CL_SUCCESS ){
1270             print_error( err, "clEnqueueNDRangeKernel failed" );
1271             clReleaseMemObject( buffers[i] );
1272             clReleaseKernel( kernel[i] );
1273             clReleaseProgram( program[i] );
1274             align_free( outptr[i] );
1275             return -1;
1276         }
1277 
1278         err = clEnqueueReadBuffer( queue, buffers[i], true, startOfRead*ptrSizes[i], ptrSizes[i]*sizeOfRead, (void *)(outptr[i]), 0, NULL, NULL );
1279         if ( err != CL_SUCCESS ){
1280             print_error( err, "clEnqueueReadBuffer failed" );
1281             clReleaseMemObject( buffers[i] );
1282             clReleaseKernel( kernel[i] );
1283             clReleaseProgram( program[i] );
1284             align_free( outptr[i] );
1285             return -1;
1286         }
1287 
1288         if ( verify_read_int( outptr[i], (int)sizeOfRead*(1<<i) ) ){
1289             log_error(" random size from %d, size: %d test failed on i%d\n", (int)startOfRead, (int)sizeOfRead, 1<<i);
1290             total_errors++;
1291         }
1292         else{
1293             log_info(" random size from %d, size: %d test passed on i%d\n", (int)startOfRead, (int)sizeOfRead, 1<<i);
1294         }
1295 
1296         // cleanup
1297         clReleaseMemObject( buffers[i] );
1298         clReleaseKernel( kernel[i] );
1299         clReleaseProgram( program[i] );
1300         align_free( outptr[i] );
1301     }
1302 
1303     return total_errors;
1304 
1305 }   // end testRandomReadSize()
1306 
1307 
test_buffer_read_random_size(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1308 int test_buffer_read_random_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
1309 {
1310     int     err = 0;
1311     int     i;
1312     cl_uint start;
1313     size_t  size;
1314     MTdata  d = init_genrand( gRandomSeed );
1315 
1316     // now test for random sizes of array being read
1317     for ( i = 0; i < 8; i++ ){
1318         start = (cl_uint)get_random_float( 0.f, (float)(num_elements - 8), d );
1319         size = (size_t)get_random_float( 8.f, (float)(num_elements - start), d );
1320         if (testRandomReadSize( deviceID, context, queue, num_elements, start, size ))
1321             err++;
1322     }
1323 
1324     free_mtdata(d);
1325 
1326     return err;
1327 }
1328 
1329