xref: /aosp_15_r20/external/OpenCL-CTS/test_conformance/integer_ops/test_extended_bit_ops_insert.cpp (revision 6467f958c7de8070b317fc65bcb0f6472e388d82)
1 //
2 // Copyright (c) 2022 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 
17 #include <algorithm>
18 #include <numeric>
19 #include <string>
20 #include <type_traits>
21 #include <vector>
22 
23 #include "procs.h"
24 #include "harness/integer_ops_test_info.h"
25 #include "harness/testHarness.h"
26 
27 template <typename T>
28 static typename std::make_unsigned<T>::type
cpu_bit_insert(T tbase,T tinsert,cl_uint offset,cl_uint count)29 cpu_bit_insert(T tbase, T tinsert, cl_uint offset, cl_uint count)
30 {
31     assert(offset <= sizeof(T) * 8);
32     assert(count <= sizeof(T) * 8);
33     assert(offset + count <= sizeof(T) * 8);
34 
35     cl_ulong base = static_cast<cl_ulong>(tbase);
36     cl_ulong insert = static_cast<cl_ulong>(tinsert);
37 
38     cl_ulong mask = (count < 64) ? ((1ULL << count) - 1) << offset : ~0ULL;
39     cl_ulong result = ((insert << offset) & mask) | (base & ~mask);
40 
41     return static_cast<typename std::make_unsigned<T>::type>(result);
42 }
43 
44 template <typename T, size_t N>
45 static void
calculate_reference(std::vector<typename std::make_unsigned<T>::type> & ref,const std::vector<T> & base,const std::vector<T> & insert)46 calculate_reference(std::vector<typename std::make_unsigned<T>::type>& ref,
47                     const std::vector<T>& base, const std::vector<T>& insert)
48 {
49     ref.resize(base.size());
50     for (size_t i = 0; i < base.size(); i++)
51     {
52         cl_uint offset = (i / N) / (sizeof(T) * 8 + 1);
53         cl_uint count = (i / N) % (sizeof(T) * 8 + 1);
54         if (offset + count > sizeof(T) * 8)
55         {
56             count = (sizeof(T) * 8) - offset;
57         }
58         ref[i] = cpu_bit_insert(base[i], insert[i], offset, count);
59     }
60 }
61 
62 static constexpr const char* kernel_source = R"CLC(
63 __kernel void test_bitfield_insert(__global TYPE* dst, __global TYPE* base, __global TYPE* insert)
64 {
65     int index = get_global_id(0);
66     uint offset = index / (sizeof(BASETYPE) * 8 + 1);
67     uint count = index % (sizeof(BASETYPE) * 8 + 1);
68     if (offset + count > sizeof(BASETYPE) * 8) {
69         count = (sizeof(BASETYPE) * 8) - offset;
70     }
71     dst[index] = bitfield_insert(base[index], insert[index], offset, count);
72 }
73 )CLC";
74 
75 static constexpr const char* kernel_source_vec3 = R"CLC(
76 __kernel void test_bitfield_insert(__global BASETYPE* dst, __global BASETYPE* base, __global BASETYPE* insert)
77 {
78     int index = get_global_id(0);
79     uint offset = index / (sizeof(BASETYPE) * 8 + 1);
80     uint count = index % (sizeof(BASETYPE) * 8 + 1);
81     if (offset + count > sizeof(BASETYPE) * 8) {
82         count = (sizeof(BASETYPE) * 8) - offset;
83     }
84     TYPE b = vload3(index, base);
85     TYPE i = vload3(index, insert);
86     TYPE d = bitfield_insert(b, i, offset, count);
87     vstore3(d, index, dst);
88 }
89 )CLC";
90 
91 template <typename T, size_t N>
test_vectype(cl_device_id device,cl_context context,cl_command_queue queue)92 static int test_vectype(cl_device_id device, cl_context context,
93                         cl_command_queue queue)
94 {
95     // Because converting from an unsigned type to a signed type is
96     // implementation-defined if the most significant bit is set until C++ 20,
97     // compute all reference results using unsigned types.
98     typedef typename std::make_unsigned<T>::type unsigned_t;
99 
100     cl_int error = CL_SUCCESS;
101 
102     clProgramWrapper program;
103     clKernelWrapper kernel;
104 
105     std::string buildOptions{ "-DTYPE=" };
106     buildOptions += TestInfo<T>::deviceTypeName;
107     if (N > 1)
108     {
109         buildOptions += std::to_string(N);
110     }
111     buildOptions += " -DBASETYPE=";
112     buildOptions += TestInfo<T>::deviceTypeName;
113 
114     const size_t ELEMENTS_TO_TEST = (sizeof(T) * 8 + 1) * (sizeof(T) * 8 + 1);
115 
116     std::vector<T> base(ELEMENTS_TO_TEST * N);
117     std::fill(base.begin(), base.end(), static_cast<T>(0xA5A5A5A5A5A5A5A5ULL));
118 
119     std::vector<T> insert(ELEMENTS_TO_TEST * N);
120     fill_vector_with_random_data(insert);
121 
122     std::vector<unsigned_t> reference;
123     calculate_reference<T, N>(reference, base, insert);
124 
125     const char* source = (N == 3) ? kernel_source_vec3 : kernel_source;
126     error = create_single_kernel_helper(context, &program, &kernel, 1, &source,
127                                         "test_bitfield_insert",
128                                         buildOptions.c_str());
129     test_error(error, "Unable to create test_bitfield_insert kernel");
130 
131     clMemWrapper dst =
132         clCreateBuffer(context, 0, reference.size() * sizeof(T), NULL, &error);
133     test_error(error, "Unable to create output buffer");
134 
135     clMemWrapper src_base =
136         clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, base.size() * sizeof(T),
137                        base.data(), &error);
138     test_error(error, "Unable to create base buffer");
139 
140     clMemWrapper src_insert =
141         clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, insert.size() * sizeof(T),
142                        insert.data(), &error);
143     test_error(error, "Unable to create insert buffer");
144 
145     error = clSetKernelArg(kernel, 0, sizeof(dst), &dst);
146     test_error(error, "Unable to set output buffer kernel arg");
147 
148     error = clSetKernelArg(kernel, 1, sizeof(src_base), &src_base);
149     test_error(error, "Unable to set base buffer kernel arg");
150 
151     error = clSetKernelArg(kernel, 2, sizeof(src_insert), &src_insert);
152     test_error(error, "Unable to set insert buffer kernel arg");
153 
154     size_t global_work_size[] = { reference.size() / N };
155     error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_work_size,
156                                    NULL, 0, NULL, NULL);
157     test_error(error, "Unable to enqueue test kernel");
158 
159     error = clFinish(queue);
160     test_error(error, "clFinish failed after test kernel");
161 
162     std::vector<unsigned_t> results(reference.size(), 99);
163     error =
164         clEnqueueReadBuffer(queue, dst, CL_TRUE, 0, results.size() * sizeof(T),
165                             results.data(), 0, NULL, NULL);
166     test_error(error, "Unable to read data after test kernel");
167 
168     if (results != reference)
169     {
170         log_error("Result buffer did not match reference buffer!\n");
171         return TEST_FAIL;
172     }
173 
174     return TEST_PASS;
175 }
176 
177 template <typename T>
test_type(cl_device_id device,cl_context context,cl_command_queue queue)178 static int test_type(cl_device_id device, cl_context context,
179                      cl_command_queue queue)
180 {
181     log_info("    testing type %s\n", TestInfo<T>::deviceTypeName);
182 
183     return test_vectype<T, 1>(device, context, queue)
184         | test_vectype<T, 2>(device, context, queue)
185         | test_vectype<T, 3>(device, context, queue)
186         | test_vectype<T, 4>(device, context, queue)
187         | test_vectype<T, 8>(device, context, queue)
188         | test_vectype<T, 16>(device, context, queue);
189 }
190 
test_extended_bit_ops_insert(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)191 int test_extended_bit_ops_insert(cl_device_id device, cl_context context,
192                                  cl_command_queue queue, int num_elements)
193 {
194     if (is_extension_available(device, "cl_khr_extended_bit_ops"))
195     {
196         int result = TEST_PASS;
197 
198         result |= test_type<cl_char>(device, context, queue);
199         result |= test_type<cl_uchar>(device, context, queue);
200         result |= test_type<cl_short>(device, context, queue);
201         result |= test_type<cl_ushort>(device, context, queue);
202         result |= test_type<cl_int>(device, context, queue);
203         result |= test_type<cl_uint>(device, context, queue);
204         if (gHasLong)
205         {
206             result |= test_type<cl_long>(device, context, queue);
207             result |= test_type<cl_ulong>(device, context, queue);
208         }
209         return result;
210     }
211 
212     log_info("cl_khr_extended_bit_ops is not supported\n");
213     return TEST_SKIPPED_ITSELF;
214 }
215