1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/compat.h"
17
18 #include <stdio.h>
19 #include <string.h>
20 #include <sys/types.h>
21 #include <sys/stat.h>
22
23 #include "procs.h"
24
25
26 const char *wg_any_kernel_code =
27 "__kernel void test_wg_any(global float *input, global int *output)\n"
28 "{\n"
29 " int tid = get_global_id(0);\n"
30 "\n"
31 " int result = work_group_any((input[tid] > input[tid+1]));\n"
32 " output[tid] = result;\n"
33 "}\n";
34
35
36 static int
verify_wg_any(float * inptr,int * outptr,size_t n,size_t wg_size)37 verify_wg_any(float *inptr, int *outptr, size_t n, size_t wg_size)
38 {
39 size_t i, j;
40
41 for (i=0; i<n; i+=wg_size)
42 {
43 int predicate_any = 0x0;
44 for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
45 {
46 if (inptr[i+j] > inptr[i+j+1])
47 {
48 predicate_any = 0xFFFFFFFF;
49 break;
50 }
51 }
52 for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
53 {
54 if ( (predicate_any && (outptr[i+j] == 0)) ||
55 ((predicate_any == 0) && outptr[i+j]) )
56 {
57 log_info("work_group_any: Error at %lu: expected = %d, got = %d\n", i+j, predicate_any, outptr[i+j]);
58 return -1;
59 }
60 }
61 }
62
63 return 0;
64 }
65
66 int
test_work_group_any(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)67 test_work_group_any(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
68 {
69 cl_mem streams[2];
70 cl_float *input_ptr[1], *p;
71 cl_int *output_ptr;
72 cl_program program;
73 cl_kernel kernel;
74 size_t threads[1];
75 size_t wg_size[1];
76 size_t num_elements;
77 int err;
78 MTdata d;
79
80 err = create_single_kernel_helper(context, &program, &kernel, 1,
81 &wg_any_kernel_code, "test_wg_any");
82 if (err)
83 return -1;
84
85 // "wg_size" is limited to that of the first dimension as only a 1DRange is executed.
86 err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size);
87 test_error(err, "get_max_allowed_1d_work_group_size_on_device failed");
88
89 num_elements = n_elems;
90
91 input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * (num_elements+1));
92 output_ptr = (cl_int*)malloc(sizeof(cl_int) * (num_elements+1));
93 streams[0] =
94 clCreateBuffer(context, CL_MEM_READ_WRITE,
95 sizeof(cl_float) * (num_elements + 1), NULL, NULL);
96 if (!streams[0])
97 {
98 log_error("clCreateBuffer failed\n");
99 return -1;
100 }
101
102 streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
103 sizeof(cl_int) * num_elements, NULL, NULL);
104 if (!streams[1])
105 {
106 log_error("clCreateBuffer failed\n");
107 return -1;
108 }
109
110 p = input_ptr[0];
111 d = init_genrand( gRandomSeed );
112 for (size_t i = 0; i < (num_elements + 1); i++)
113 {
114 p[i] = get_random_float((float)(-100000.f * M_PI), (float)(100000.f * M_PI) ,d);
115 }
116 free_mtdata(d); d = NULL;
117
118 err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_float)*(num_elements+1), (void *)input_ptr[0], 0, NULL, NULL );
119 if (err != CL_SUCCESS)
120 {
121 log_error("clWriteArray failed\n");
122 return -1;
123 }
124
125 err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
126 err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
127 if (err != CL_SUCCESS)
128 {
129 log_error("clSetKernelArgs failed\n");
130 return -1;
131 }
132
133 // Line below is troublesome...
134 threads[0] = (size_t)n_elems;
135 err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
136 if (err != CL_SUCCESS)
137 {
138 log_error("clEnqueueNDRangeKernel failed\n");
139 return -1;
140 }
141
142 cl_uint dead = 0xdeaddead;
143 memset_pattern4(output_ptr, &dead, sizeof(cl_float)*num_elements);
144 err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_int)*num_elements, (void *)output_ptr, 0, NULL, NULL );
145 if (err != CL_SUCCESS)
146 {
147 log_error("clEnqueueReadBuffer failed\n");
148 return -1;
149 }
150
151 if (verify_wg_any(input_ptr[0], output_ptr, num_elements, wg_size[0]))
152 {
153 log_error("work_group_any test failed\n");
154 return -1;
155 }
156 log_info("work_group_any test passed\n");
157
158 clReleaseMemObject(streams[0]);
159 clReleaseMemObject(streams[1]);
160 clReleaseKernel(kernel);
161 clReleaseProgram(program);
162 free(input_ptr[0]);
163 free(output_ptr);
164
165 return err;
166 }
167
168