1 #define CL_HPP_ENABLE_EXCEPTIONS
2 #define CL_HPP_TARGET_OPENCL_VERSION 200
3
4 //#define CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY
5 //#define CL_HPP_CL_1_2_DEFAULT_BUILD
6 #include <CL/opencl.hpp>
7 #include <iostream>
8 #include <vector>
9 #include <memory>
10 #include <algorithm>
11
12 const int numElements = 32;
13
main(void)14 int main(void)
15 {
16 // Filter for a 2.0 platform and set it as the default
17 std::vector<cl::Platform> platforms;
18 cl::Platform::get(&platforms);
19 cl::Platform plat;
20 for (auto &p : platforms) {
21 std::string platver = p.getInfo<CL_PLATFORM_VERSION>();
22 std::cerr << "Plat: " << platver << "\n";
23 if (platver.find("OpenCL 2.") != std::string::npos) {
24 plat = p;
25 }
26 }
27 if (plat() == 0) {
28 std::cout << "No OpenCL 2.0 platform found.\n";
29 return -1;
30 }
31
32 cl::Platform newP = cl::Platform::setDefault(plat);
33 if (newP != plat) {
34 std::cout << "Error setting default platform.";
35 return -1;
36 }
37
38 // Test command queue property construction
39 cl::CommandQueue q5(cl::QueueProperties::Profiling | cl::QueueProperties::OutOfOrder);
40
41 #if defined(CL_HPP_ENABLE_EXCEPTIONS)
42 cl::Program errorProgram(
43 std::string(
44 "sakfdjnksajfnksajnfsa")
45 , false);
46 try {
47 errorProgram.build("-cl-std=CL2.0");
48 }
49 catch (...) {
50 // Print build info for all devices
51 cl_int buildErr = CL_SUCCESS;
52 auto buildInfo = errorProgram.getBuildInfo<CL_PROGRAM_BUILD_LOG>(&buildErr);
53 std::cerr << "Errors for failed build for all devices" << std::endl;
54 for (auto &pair : buildInfo) {
55 std::cerr << "Device: " << pair.first.getInfo<CL_DEVICE_NAME>() << std::endl << pair.second << std::endl << std::endl;
56 }
57 }
58
59
60 cl::Program errorProgramException(
61 std::string(
62 "sakfdjnksajfnksajnfsa")
63 , false);
64 try {
65 errorProgramException.build("-cl-std=CL2.0");
66 }
67 catch (const cl::BuildError &err) {
68 // Print build info for all devices
69 auto buildInfo = err.getBuildLog();
70 std::cerr << "Errors for failed build for all devices from thrown exception" << std::endl;
71 for (auto &pair : buildInfo) {
72 std::cerr << "Device: " << pair.first.getInfo<CL_DEVICE_NAME>() << std::endl << pair.second << std::endl << std::endl;
73 }
74 }
75 #endif // #if defined(CL_HPP_ENABLE_EXCEPTIONS)
76
77
78 std::string kernel1{"global int globalA;"
79 "kernel void updateGlobal(){"
80 " globalA = 75;"
81 "}"};
82 std::string kernel2{
83 "typedef struct { global int *bar; } Foo; kernel void vectorAdd(global const Foo* aNum, global const int *inputA, global const int *inputB, global int *output, global int *output2, int val, write_only pipe int outPipe, queue_t childQueue){"
84 " output[get_global_id(0)] = inputA[get_global_id(0)] + inputB[get_global_id(0)] + val + *(aNum->bar);"
85 " output2[get_global_id(0)] = inputA[get_global_id(0)] + inputB[get_global_id(0)] + val + *(aNum->bar);"
86 " write_pipe(outPipe, &val);"
87 " queue_t default_queue = get_default_queue(); "
88 " ndrange_t ndrange = ndrange_1D(get_global_size(0)/2, get_global_size(0)/2); "
89 // Have a child kernel write into third quarter of output
90 " enqueue_kernel(default_queue, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, "
91 " ^{"
92 " output[get_global_size(0)*2 + get_global_id(0)] = inputA[get_global_size(0)*2+get_global_id(0)] + inputB[get_global_size(0)*2+get_global_id(0)] + globalA;"
93 " });"
94 // Have a child kernel write into last quarter of output
95 " enqueue_kernel(childQueue, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, "
96 " ^{"
97 " output[get_global_size(0)*3 + get_global_id(0)] = inputA[get_global_size(0)*3 + get_global_id(0)] + inputB[get_global_size(0)*3 + get_global_id(0)] + globalA + 2;"
98 " });"
99 "}" };
100 #if defined(CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY)
101 // Old interface style
102 cl::Program::Sources programStrings;
103 programStrings.push_back(std::pair<const char*, size_t>(kernel1.data(), kernel1.length()));
104 programStrings.push_back(std::pair<const char*, size_t>(kernel2.data(), kernel2.length()));
105 #else // #if defined(CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY)
106 // New simpler string interface style
107 std::vector<std::string> programStrings {
108 kernel1,
109 kernel2 };
110 #endif // #if defined(CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY)
111 cl::Program vectorAddProgram(
112 programStrings);
113 #if defined(CL_HPP_ENABLE_EXCEPTIONS)
114 try {
115 vectorAddProgram.build("-cl-std=CL2.0");
116 }
117 catch (...) {
118 // Print build info for all devices
119 cl_int buildErr = CL_SUCCESS;
120 auto buildInfo = vectorAddProgram.getBuildInfo<CL_PROGRAM_BUILD_LOG>(&buildErr);
121 for (auto &pair : buildInfo) {
122 std::cerr << pair.second << std::endl << std::endl;
123 }
124
125 return 1;
126 }
127 #else // #if defined(CL_HPP_ENABLE_EXCEPTIONS)
128 cl_int buildErr = vectorAddProgram.build("-cl-std=CL2.0");
129 if (buildErr != CL_SUCCESS) {
130 std::cerr << "Build error: " << buildErr << "\n";
131 return -1;
132 }
133 #endif // #if defined(CL_HPP_ENABLE_EXCEPTIONS)
134
135 typedef struct { int *bar; } Foo;
136
137 // Get and run kernel that initializes the program-scope global
138 // A test for kernels that take no arguments
139 auto program2Kernel =
140 cl::KernelFunctor<>(vectorAddProgram, "updateGlobal");
141 program2Kernel(
142 cl::EnqueueArgs(
143 cl::NDRange(1)));
144
145
146 //////////////////
147 // SVM allocations
148
149 // Store pointer to pointer here to test clSetKernelExecInfo
150 // Code using cl namespace allocators etc as a test
151 // std::shared_ptr etc should work fine too
152
153 auto anSVMInt = cl::allocate_svm<int, cl::SVMTraitCoarse<>>();
154 *anSVMInt = 5;
155 cl::SVMAllocator<int, cl::SVMTraitCoarse<>> svmAlloc;
156 std::cout << "Max alloc size: " << svmAlloc.max_size() << " bytes\n";
157 cl::SVMAllocator<Foo, cl::SVMTraitCoarse<cl::SVMTraitReadOnly<>>> svmAllocReadOnly;
158 auto fooPointer = cl::allocate_pointer<Foo>(svmAllocReadOnly);
159 fooPointer->bar = anSVMInt.get();
160
161 std::vector<int, cl::SVMAllocator<int, cl::SVMTraitCoarse<>>> inputA(numElements, 1, svmAlloc);
162
163 cl::coarse_svm_vector<int> inputB(numElements, 2, svmAlloc);
164
165 //
166 //////////////
167
168 // Traditional cl_mem allocations
169 std::vector<int> output(numElements, 0xdeadbeef);
170 cl::Buffer outputBuffer(output.begin(), output.end(), false);
171
172 std::vector<int, cl::SVMAllocator<int, cl::SVMTraitCoarse<>>> output2(numElements / 2, 0xdeadbeef);
173 cl::Pipe aPipe(sizeof(cl_int), numElements / 2);
174 // Unfortunately, there is no way to check for a default or know if a kernel needs one
175 // so the user has to create one
176 // We can't preemptively do so on device creation because they cannot then replace it
177 cl::DeviceCommandQueue defaultDeviceQueue;
178 defaultDeviceQueue = cl::DeviceCommandQueue::makeDefault();
179
180 auto vectorAddKernel =
181 cl::KernelFunctor<
182 decltype(fooPointer)&,
183 int*,
184 cl::coarse_svm_vector<int>&,
185 cl::Buffer,
186 std::vector<int, cl::SVMAllocator<int, cl::SVMTraitCoarse<>>>&,
187 int,
188 cl::Pipe&,
189 cl::DeviceCommandQueue
190 >(vectorAddProgram, "vectorAdd");
191
192
193 // Only the last of these will actually be used
194 // but this will check that the API is working for all
195 // of them
196 cl::vector<void*> ptrs{ static_cast<void*>(anSVMInt.get()) };
197 vectorAddKernel.setSVMPointers(ptrs);
198 vectorAddKernel.setSVMPointers(anSVMInt.get());
199 vectorAddKernel.setSVMPointers(anSVMInt);
200
201 // Hand control of coarse allocations to runtime
202 cl::enqueueUnmapSVM(anSVMInt);
203 cl::enqueueUnmapSVM(fooPointer);
204 cl::unmapSVM(inputB);
205 cl::unmapSVM(output2);
206
207
208 cl_int error;
209 vectorAddKernel(
210 cl::EnqueueArgs(
211 cl::NDRange(numElements/2),
212 cl::NDRange(numElements/2)),
213 fooPointer,
214 inputA.data(),
215 inputB,
216 outputBuffer,
217 output2,
218 3,
219 aPipe,
220 defaultDeviceQueue,
221 error
222 );
223
224 // Copy the cl_mem output back to the vector
225 cl::copy(outputBuffer, output.begin(), output.end());
226 // Grab the SVM output vector using a map
227 cl::mapSVM(output2);
228
229 cl::Device d = cl::Device::getDefault();
230 std::cout << "Max pipe args: " << d.getInfo<CL_DEVICE_MAX_PIPE_ARGS>() << "\n";
231 std::cout << "Max pipe active reservations: " << d.getInfo<CL_DEVICE_PIPE_MAX_ACTIVE_RESERVATIONS>() << "\n";
232 std::cout << "Max pipe packet size: " << d.getInfo<CL_DEVICE_PIPE_MAX_PACKET_SIZE>() << "\n";
233 std::cout << "Device SVM capabilities: " << d.getInfo<CL_DEVICE_SVM_CAPABILITIES>() << "\n";
234 std::cout << "\tCL_DEVICE_SVM_COARSE_GRAIN_BUFFER = " << CL_DEVICE_SVM_COARSE_GRAIN_BUFFER << "\n";
235 std::cout << "\tCL_DEVICE_SVM_FINE_GRAIN_BUFFER = " << CL_DEVICE_SVM_FINE_GRAIN_BUFFER << "\n";
236 std::cout << "\tCL_DEVICE_SVM_FINE_GRAIN_SYSTEM = " << CL_DEVICE_SVM_FINE_GRAIN_SYSTEM << "\n";
237 std::cout << "\tCL_DEVICE_SVM_ATOMICS = " << CL_DEVICE_SVM_ATOMICS << "\n";
238
239 auto v = vectorAddProgram.getInfo<CL_PROGRAM_BINARIES>();
240 auto v2 = vectorAddProgram.getInfo<CL_PROGRAM_BINARY_SIZES>();
241 std::vector<std::vector<unsigned char>> v3;
242 std::vector<size_t> v4;
243 vectorAddProgram.getInfo(CL_PROGRAM_BINARIES, &v3);
244 vectorAddProgram.getInfo(CL_PROGRAM_BINARY_SIZES, &v4);
245
246 std::cout << "Binaries: " << v.size() << "\n";
247 std::cout << "Binary sizes: " << v2.size() << "\n";
248 for (size_t s : v2) {
249 std::cout << "\t" << s << "\n";
250 }
251
252 std::cout << "Output:\n";
253 for (int i = 1; i < numElements; ++i) {
254 std::cout << "\t" << output[i] << "\n";
255 }
256 std::cout << "\n\n";
257 std::cout << "Output2:\n";
258 for (auto &e : output2) {
259 std::cout << "\t" << e << "\n";
260 }
261 std::cout << "\n\n";
262
263 return 0;
264 }
265