1 //
2 // Copyright (c) 2022 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16
17 #include <algorithm>
18 #include <numeric>
19 #include <string>
20 #include <type_traits>
21 #include <vector>
22
23 #include "procs.h"
24 #include "harness/integer_ops_test_info.h"
25 #include "harness/testHarness.h"
26
27 template <typename T>
28 static typename std::make_unsigned<T>::type
arithmetic_shift_right(T tx,cl_uint count)29 arithmetic_shift_right(T tx, cl_uint count)
30 {
31 typedef typename std::make_unsigned<T>::type unsigned_t;
32 unsigned_t x = static_cast<unsigned_t>(tx);
33
34 // To implement an arithmetic shift right:
35 // - If the sign bit is not set, shift as usual.
36 // - Otherwise, flip all of the bits, shift, then flip back.
37 unsigned_t s = -(x >> (sizeof(x) * 8 - 1));
38 unsigned_t result = (s ^ x) >> count ^ s;
39
40 return result;
41 }
42
43 template <typename T>
44 static typename std::make_unsigned<T>::type
cpu_bit_extract_signed(T tbase,cl_uint offset,cl_uint count)45 cpu_bit_extract_signed(T tbase, cl_uint offset, cl_uint count)
46 {
47 typedef typename std::make_signed<T>::type unsigned_t;
48
49 assert(offset <= sizeof(T) * 8);
50 assert(count <= sizeof(T) * 8);
51 assert(offset + count <= sizeof(T) * 8);
52
53 unsigned_t base = static_cast<unsigned_t>(tbase);
54 unsigned_t result;
55
56 if (count == 0)
57 {
58 result = 0;
59 }
60 else
61 {
62 result = base << (sizeof(T) * 8 - offset - count);
63 result = arithmetic_shift_right(result, sizeof(T) * 8 - count);
64 }
65
66 return result;
67 }
68
69 template <typename T>
70 static typename std::make_unsigned<T>::type
cpu_bit_extract_unsigned(T tbase,cl_uint offset,cl_uint count)71 cpu_bit_extract_unsigned(T tbase, cl_uint offset, cl_uint count)
72 {
73 typedef typename std::make_unsigned<T>::type unsigned_t;
74
75 assert(offset <= sizeof(T) * 8);
76 assert(count <= sizeof(T) * 8);
77 assert(offset + count <= sizeof(T) * 8);
78
79 unsigned_t base = static_cast<unsigned_t>(tbase);
80 unsigned_t result;
81
82 if (count == 0)
83 {
84 result = 0;
85 }
86 else
87 {
88 result = base << (sizeof(T) * 8 - offset - count);
89 result = result >> (sizeof(T) * 8 - count);
90 }
91
92 return result;
93 }
94
95 template <typename T, size_t N>
96 static void
calculate_reference(std::vector<typename std::make_unsigned<T>::type> & sref,std::vector<typename std::make_unsigned<T>::type> & uref,const std::vector<T> & base)97 calculate_reference(std::vector<typename std::make_unsigned<T>::type>& sref,
98 std::vector<typename std::make_unsigned<T>::type>& uref,
99 const std::vector<T>& base)
100 {
101 sref.resize(base.size());
102 uref.resize(base.size());
103 for (size_t i = 0; i < base.size(); i++)
104 {
105 cl_uint offset = (i / N) / (sizeof(T) * 8 + 1);
106 cl_uint count = (i / N) % (sizeof(T) * 8 + 1);
107 if (offset + count > sizeof(T) * 8)
108 {
109 count = (sizeof(T) * 8) - offset;
110 }
111 sref[i] = cpu_bit_extract_signed(base[i], offset, count);
112 uref[i] = cpu_bit_extract_unsigned(base[i], offset, count);
113 }
114 }
115
116 static constexpr const char* kernel_source = R"CLC(
117 __kernel void test_bitfield_extract(__global SIGNED_TYPE* sdst, __global UNSIGNED_TYPE* udst, __global TYPE* base)
118 {
119 int index = get_global_id(0);
120 uint offset = index / (sizeof(BASETYPE) * 8 + 1);
121 uint count = index % (sizeof(BASETYPE) * 8 + 1);
122 if (offset + count > sizeof(BASETYPE) * 8) {
123 count = (sizeof(BASETYPE) * 8) - offset;
124 }
125 sdst[index] = bitfield_extract_signed(base[index], offset, count);
126 udst[index] = bitfield_extract_unsigned(base[index], offset, count);
127 }
128 )CLC";
129
130 static constexpr const char* kernel_source_vec3 = R"CLC(
131 __kernel void test_bitfield_extract(__global SIGNED_BASETYPE* sdst, __global UNSIGNED_BASETYPE* udst, __global BASETYPE* base)
132 {
133 int index = get_global_id(0);
134 uint offset = index / (sizeof(BASETYPE) * 8 + 1);
135 uint count = index % (sizeof(BASETYPE) * 8 + 1);
136 if (offset + count > sizeof(BASETYPE) * 8) {
137 count = (sizeof(BASETYPE) * 8) - offset;
138 }
139 TYPE b = vload3(index, base);
140 SIGNED_TYPE s = bitfield_extract_signed(b, offset, count);
141 UNSIGNED_TYPE u = bitfield_extract_unsigned(b, offset, count);
142 vstore3(s, index, sdst);
143 vstore3(u, index, udst);
144 }
145 )CLC";
146
147 template <typename T, size_t N>
test_vectype(cl_device_id device,cl_context context,cl_command_queue queue)148 static int test_vectype(cl_device_id device, cl_context context,
149 cl_command_queue queue)
150 {
151 // Because converting from an unsigned type to a signed type is
152 // implementation-defined if the most significant bit is set until C++ 20,
153 // compute all reference results using unsigned types.
154 typedef typename std::make_unsigned<T>::type unsigned_t;
155
156 cl_int error = CL_SUCCESS;
157
158 clProgramWrapper program;
159 clKernelWrapper kernel;
160
161 std::string buildOptions;
162 buildOptions += " -DTYPE=";
163 buildOptions +=
164 TestInfo<T>::deviceTypeName + ((N > 1) ? std::to_string(N) : "");
165 buildOptions += " -DSIGNED_TYPE=";
166 buildOptions +=
167 TestInfo<T>::deviceTypeNameSigned + ((N > 1) ? std::to_string(N) : "");
168 buildOptions += " -DUNSIGNED_TYPE=";
169 buildOptions += TestInfo<T>::deviceTypeNameUnsigned
170 + ((N > 1) ? std::to_string(N) : "");
171 buildOptions += " -DBASETYPE=";
172 buildOptions += TestInfo<T>::deviceTypeName;
173 buildOptions += " -DSIGNED_BASETYPE=";
174 buildOptions += TestInfo<T>::deviceTypeNameSigned;
175 buildOptions += " -DUNSIGNED_BASETYPE=";
176 buildOptions += TestInfo<T>::deviceTypeNameUnsigned;
177
178 const size_t ELEMENTS_TO_TEST = (sizeof(T) * 8 + 1) * (sizeof(T) * 8 + 1);
179
180 std::vector<T> base(ELEMENTS_TO_TEST * N);
181 fill_vector_with_random_data(base);
182
183 std::vector<unsigned_t> sreference;
184 std::vector<unsigned_t> ureference;
185 calculate_reference<T, N>(sreference, ureference, base);
186
187 const char* source = (N == 3) ? kernel_source_vec3 : kernel_source;
188 error = create_single_kernel_helper(context, &program, &kernel, 1, &source,
189 "test_bitfield_extract",
190 buildOptions.c_str());
191 test_error(error, "Unable to create test_bitfield_insert kernel");
192
193 clMemWrapper sdst =
194 clCreateBuffer(context, 0, sreference.size() * sizeof(T), NULL, &error);
195 test_error(error, "Unable to create signed output buffer");
196
197 clMemWrapper udst =
198 clCreateBuffer(context, 0, ureference.size() * sizeof(T), NULL, &error);
199 test_error(error, "Unable to create unsigned output buffer");
200
201 clMemWrapper src_base =
202 clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, base.size() * sizeof(T),
203 base.data(), &error);
204 test_error(error, "Unable to create base buffer");
205
206 error = clSetKernelArg(kernel, 0, sizeof(sdst), &sdst);
207 test_error(error, "Unable to set signed output buffer kernel arg");
208
209 error = clSetKernelArg(kernel, 1, sizeof(udst), &udst);
210 test_error(error, "Unable to set unsigned output buffer kernel arg");
211
212 error = clSetKernelArg(kernel, 2, sizeof(src_base), &src_base);
213 test_error(error, "Unable to set base buffer kernel arg");
214
215 size_t global_work_size[] = { sreference.size() / N };
216 error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_work_size,
217 NULL, 0, NULL, NULL);
218 test_error(error, "Unable to enqueue test kernel");
219
220 error = clFinish(queue);
221 test_error(error, "clFinish failed after test kernel");
222
223 std::vector<unsigned_t> sresults(sreference.size(), 99);
224 error = clEnqueueReadBuffer(queue, sdst, CL_TRUE, 0,
225 sresults.size() * sizeof(T), sresults.data(), 0,
226 NULL, NULL);
227 test_error(error, "Unable to read signed data after test kernel");
228
229 if (sresults != sreference)
230 {
231 log_error("Signed result buffer did not match reference buffer!\n");
232 return TEST_FAIL;
233 }
234
235 std::vector<unsigned_t> uresults(ureference.size(), 99);
236 error = clEnqueueReadBuffer(queue, udst, CL_TRUE, 0,
237 uresults.size() * sizeof(T), uresults.data(), 0,
238 NULL, NULL);
239 test_error(error, "Unable to read unsigned data after test kernel");
240
241 if (uresults != ureference)
242 {
243 log_error("Unsigned result buffer did not match reference buffer!\n");
244 return TEST_FAIL;
245 }
246
247 return TEST_PASS;
248 }
249
250 template <typename T>
test_type(cl_device_id device,cl_context context,cl_command_queue queue)251 static int test_type(cl_device_id device, cl_context context,
252 cl_command_queue queue)
253 {
254 log_info(" testing type %s\n", TestInfo<T>::deviceTypeName);
255
256 return test_vectype<T, 1>(device, context, queue)
257 | test_vectype<T, 2>(device, context, queue)
258 | test_vectype<T, 3>(device, context, queue)
259 | test_vectype<T, 4>(device, context, queue)
260 | test_vectype<T, 8>(device, context, queue)
261 | test_vectype<T, 16>(device, context, queue);
262 }
263
test_extended_bit_ops_extract(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)264 int test_extended_bit_ops_extract(cl_device_id device, cl_context context,
265 cl_command_queue queue, int num_elements)
266 {
267 if (is_extension_available(device, "cl_khr_extended_bit_ops"))
268 {
269 int result = TEST_PASS;
270
271 result |= test_type<cl_char>(device, context, queue);
272 result |= test_type<cl_uchar>(device, context, queue);
273 result |= test_type<cl_short>(device, context, queue);
274 result |= test_type<cl_ushort>(device, context, queue);
275 result |= test_type<cl_int>(device, context, queue);
276 result |= test_type<cl_uint>(device, context, queue);
277 if (gHasLong)
278 {
279 result |= test_type<cl_long>(device, context, queue);
280 result |= test_type<cl_ulong>(device, context, queue);
281 }
282 return result;
283 }
284
285 log_info("cl_khr_extended_bit_ops is not supported\n");
286 return TEST_SKIPPED_ITSELF;
287 }
288