1 //
2 // Copyright (c) 2022 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16
17 #include <algorithm>
18 #include <numeric>
19 #include <string>
20 #include <type_traits>
21 #include <vector>
22
23 #include "procs.h"
24 #include "harness/integer_ops_test_info.h"
25 #include "harness/testHarness.h"
26
27 template <typename T>
28 static typename std::make_unsigned<T>::type
cpu_bit_insert(T tbase,T tinsert,cl_uint offset,cl_uint count)29 cpu_bit_insert(T tbase, T tinsert, cl_uint offset, cl_uint count)
30 {
31 assert(offset <= sizeof(T) * 8);
32 assert(count <= sizeof(T) * 8);
33 assert(offset + count <= sizeof(T) * 8);
34
35 cl_ulong base = static_cast<cl_ulong>(tbase);
36 cl_ulong insert = static_cast<cl_ulong>(tinsert);
37
38 cl_ulong mask = (count < 64) ? ((1ULL << count) - 1) << offset : ~0ULL;
39 cl_ulong result = ((insert << offset) & mask) | (base & ~mask);
40
41 return static_cast<typename std::make_unsigned<T>::type>(result);
42 }
43
44 template <typename T, size_t N>
45 static void
calculate_reference(std::vector<typename std::make_unsigned<T>::type> & ref,const std::vector<T> & base,const std::vector<T> & insert)46 calculate_reference(std::vector<typename std::make_unsigned<T>::type>& ref,
47 const std::vector<T>& base, const std::vector<T>& insert)
48 {
49 ref.resize(base.size());
50 for (size_t i = 0; i < base.size(); i++)
51 {
52 cl_uint offset = (i / N) / (sizeof(T) * 8 + 1);
53 cl_uint count = (i / N) % (sizeof(T) * 8 + 1);
54 if (offset + count > sizeof(T) * 8)
55 {
56 count = (sizeof(T) * 8) - offset;
57 }
58 ref[i] = cpu_bit_insert(base[i], insert[i], offset, count);
59 }
60 }
61
62 static constexpr const char* kernel_source = R"CLC(
63 __kernel void test_bitfield_insert(__global TYPE* dst, __global TYPE* base, __global TYPE* insert)
64 {
65 int index = get_global_id(0);
66 uint offset = index / (sizeof(BASETYPE) * 8 + 1);
67 uint count = index % (sizeof(BASETYPE) * 8 + 1);
68 if (offset + count > sizeof(BASETYPE) * 8) {
69 count = (sizeof(BASETYPE) * 8) - offset;
70 }
71 dst[index] = bitfield_insert(base[index], insert[index], offset, count);
72 }
73 )CLC";
74
75 static constexpr const char* kernel_source_vec3 = R"CLC(
76 __kernel void test_bitfield_insert(__global BASETYPE* dst, __global BASETYPE* base, __global BASETYPE* insert)
77 {
78 int index = get_global_id(0);
79 uint offset = index / (sizeof(BASETYPE) * 8 + 1);
80 uint count = index % (sizeof(BASETYPE) * 8 + 1);
81 if (offset + count > sizeof(BASETYPE) * 8) {
82 count = (sizeof(BASETYPE) * 8) - offset;
83 }
84 TYPE b = vload3(index, base);
85 TYPE i = vload3(index, insert);
86 TYPE d = bitfield_insert(b, i, offset, count);
87 vstore3(d, index, dst);
88 }
89 )CLC";
90
91 template <typename T, size_t N>
test_vectype(cl_device_id device,cl_context context,cl_command_queue queue)92 static int test_vectype(cl_device_id device, cl_context context,
93 cl_command_queue queue)
94 {
95 // Because converting from an unsigned type to a signed type is
96 // implementation-defined if the most significant bit is set until C++ 20,
97 // compute all reference results using unsigned types.
98 typedef typename std::make_unsigned<T>::type unsigned_t;
99
100 cl_int error = CL_SUCCESS;
101
102 clProgramWrapper program;
103 clKernelWrapper kernel;
104
105 std::string buildOptions{ "-DTYPE=" };
106 buildOptions += TestInfo<T>::deviceTypeName;
107 if (N > 1)
108 {
109 buildOptions += std::to_string(N);
110 }
111 buildOptions += " -DBASETYPE=";
112 buildOptions += TestInfo<T>::deviceTypeName;
113
114 const size_t ELEMENTS_TO_TEST = (sizeof(T) * 8 + 1) * (sizeof(T) * 8 + 1);
115
116 std::vector<T> base(ELEMENTS_TO_TEST * N);
117 std::fill(base.begin(), base.end(), static_cast<T>(0xA5A5A5A5A5A5A5A5ULL));
118
119 std::vector<T> insert(ELEMENTS_TO_TEST * N);
120 fill_vector_with_random_data(insert);
121
122 std::vector<unsigned_t> reference;
123 calculate_reference<T, N>(reference, base, insert);
124
125 const char* source = (N == 3) ? kernel_source_vec3 : kernel_source;
126 error = create_single_kernel_helper(context, &program, &kernel, 1, &source,
127 "test_bitfield_insert",
128 buildOptions.c_str());
129 test_error(error, "Unable to create test_bitfield_insert kernel");
130
131 clMemWrapper dst =
132 clCreateBuffer(context, 0, reference.size() * sizeof(T), NULL, &error);
133 test_error(error, "Unable to create output buffer");
134
135 clMemWrapper src_base =
136 clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, base.size() * sizeof(T),
137 base.data(), &error);
138 test_error(error, "Unable to create base buffer");
139
140 clMemWrapper src_insert =
141 clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, insert.size() * sizeof(T),
142 insert.data(), &error);
143 test_error(error, "Unable to create insert buffer");
144
145 error = clSetKernelArg(kernel, 0, sizeof(dst), &dst);
146 test_error(error, "Unable to set output buffer kernel arg");
147
148 error = clSetKernelArg(kernel, 1, sizeof(src_base), &src_base);
149 test_error(error, "Unable to set base buffer kernel arg");
150
151 error = clSetKernelArg(kernel, 2, sizeof(src_insert), &src_insert);
152 test_error(error, "Unable to set insert buffer kernel arg");
153
154 size_t global_work_size[] = { reference.size() / N };
155 error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_work_size,
156 NULL, 0, NULL, NULL);
157 test_error(error, "Unable to enqueue test kernel");
158
159 error = clFinish(queue);
160 test_error(error, "clFinish failed after test kernel");
161
162 std::vector<unsigned_t> results(reference.size(), 99);
163 error =
164 clEnqueueReadBuffer(queue, dst, CL_TRUE, 0, results.size() * sizeof(T),
165 results.data(), 0, NULL, NULL);
166 test_error(error, "Unable to read data after test kernel");
167
168 if (results != reference)
169 {
170 log_error("Result buffer did not match reference buffer!\n");
171 return TEST_FAIL;
172 }
173
174 return TEST_PASS;
175 }
176
177 template <typename T>
test_type(cl_device_id device,cl_context context,cl_command_queue queue)178 static int test_type(cl_device_id device, cl_context context,
179 cl_command_queue queue)
180 {
181 log_info(" testing type %s\n", TestInfo<T>::deviceTypeName);
182
183 return test_vectype<T, 1>(device, context, queue)
184 | test_vectype<T, 2>(device, context, queue)
185 | test_vectype<T, 3>(device, context, queue)
186 | test_vectype<T, 4>(device, context, queue)
187 | test_vectype<T, 8>(device, context, queue)
188 | test_vectype<T, 16>(device, context, queue);
189 }
190
test_extended_bit_ops_insert(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)191 int test_extended_bit_ops_insert(cl_device_id device, cl_context context,
192 cl_command_queue queue, int num_elements)
193 {
194 if (is_extension_available(device, "cl_khr_extended_bit_ops"))
195 {
196 int result = TEST_PASS;
197
198 result |= test_type<cl_char>(device, context, queue);
199 result |= test_type<cl_uchar>(device, context, queue);
200 result |= test_type<cl_short>(device, context, queue);
201 result |= test_type<cl_ushort>(device, context, queue);
202 result |= test_type<cl_int>(device, context, queue);
203 result |= test_type<cl_uint>(device, context, queue);
204 if (gHasLong)
205 {
206 result |= test_type<cl_long>(device, context, queue);
207 result |= test_type<cl_ulong>(device, context, queue);
208 }
209 return result;
210 }
211
212 log_info("cl_khr_extended_bit_ops is not supported\n");
213 return TEST_SKIPPED_ITSELF;
214 }
215