1*6fee86a4SJeremy Kemp #define CL_HPP_ENABLE_EXCEPTIONS
2*6fee86a4SJeremy Kemp #define CL_HPP_TARGET_OPENCL_VERSION 200
3*6fee86a4SJeremy Kemp
4*6fee86a4SJeremy Kemp //#define CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY
5*6fee86a4SJeremy Kemp //#define CL_HPP_CL_1_2_DEFAULT_BUILD
6*6fee86a4SJeremy Kemp #include <CL/opencl.hpp>
7*6fee86a4SJeremy Kemp #include <iostream>
8*6fee86a4SJeremy Kemp #include <vector>
9*6fee86a4SJeremy Kemp #include <memory>
10*6fee86a4SJeremy Kemp #include <algorithm>
11*6fee86a4SJeremy Kemp
12*6fee86a4SJeremy Kemp const int numElements = 32;
13*6fee86a4SJeremy Kemp
main(void)14*6fee86a4SJeremy Kemp int main(void)
15*6fee86a4SJeremy Kemp {
16*6fee86a4SJeremy Kemp // Filter for a 2.0 platform and set it as the default
17*6fee86a4SJeremy Kemp std::vector<cl::Platform> platforms;
18*6fee86a4SJeremy Kemp cl::Platform::get(&platforms);
19*6fee86a4SJeremy Kemp cl::Platform plat;
20*6fee86a4SJeremy Kemp for (auto &p : platforms) {
21*6fee86a4SJeremy Kemp std::string platver = p.getInfo<CL_PLATFORM_VERSION>();
22*6fee86a4SJeremy Kemp std::cerr << "Plat: " << platver << "\n";
23*6fee86a4SJeremy Kemp if (platver.find("OpenCL 2.") != std::string::npos) {
24*6fee86a4SJeremy Kemp plat = p;
25*6fee86a4SJeremy Kemp }
26*6fee86a4SJeremy Kemp }
27*6fee86a4SJeremy Kemp if (plat() == 0) {
28*6fee86a4SJeremy Kemp std::cout << "No OpenCL 2.0 platform found.\n";
29*6fee86a4SJeremy Kemp return -1;
30*6fee86a4SJeremy Kemp }
31*6fee86a4SJeremy Kemp
32*6fee86a4SJeremy Kemp cl::Platform newP = cl::Platform::setDefault(plat);
33*6fee86a4SJeremy Kemp if (newP != plat) {
34*6fee86a4SJeremy Kemp std::cout << "Error setting default platform.";
35*6fee86a4SJeremy Kemp return -1;
36*6fee86a4SJeremy Kemp }
37*6fee86a4SJeremy Kemp
38*6fee86a4SJeremy Kemp // Test command queue property construction
39*6fee86a4SJeremy Kemp cl::CommandQueue q5(cl::QueueProperties::Profiling | cl::QueueProperties::OutOfOrder);
40*6fee86a4SJeremy Kemp
41*6fee86a4SJeremy Kemp #if defined(CL_HPP_ENABLE_EXCEPTIONS)
42*6fee86a4SJeremy Kemp cl::Program errorProgram(
43*6fee86a4SJeremy Kemp std::string(
44*6fee86a4SJeremy Kemp "sakfdjnksajfnksajnfsa")
45*6fee86a4SJeremy Kemp , false);
46*6fee86a4SJeremy Kemp try {
47*6fee86a4SJeremy Kemp errorProgram.build("-cl-std=CL2.0");
48*6fee86a4SJeremy Kemp }
49*6fee86a4SJeremy Kemp catch (...) {
50*6fee86a4SJeremy Kemp // Print build info for all devices
51*6fee86a4SJeremy Kemp cl_int buildErr = CL_SUCCESS;
52*6fee86a4SJeremy Kemp auto buildInfo = errorProgram.getBuildInfo<CL_PROGRAM_BUILD_LOG>(&buildErr);
53*6fee86a4SJeremy Kemp std::cerr << "Errors for failed build for all devices" << std::endl;
54*6fee86a4SJeremy Kemp for (auto &pair : buildInfo) {
55*6fee86a4SJeremy Kemp std::cerr << "Device: " << pair.first.getInfo<CL_DEVICE_NAME>() << std::endl << pair.second << std::endl << std::endl;
56*6fee86a4SJeremy Kemp }
57*6fee86a4SJeremy Kemp }
58*6fee86a4SJeremy Kemp
59*6fee86a4SJeremy Kemp
60*6fee86a4SJeremy Kemp cl::Program errorProgramException(
61*6fee86a4SJeremy Kemp std::string(
62*6fee86a4SJeremy Kemp "sakfdjnksajfnksajnfsa")
63*6fee86a4SJeremy Kemp , false);
64*6fee86a4SJeremy Kemp try {
65*6fee86a4SJeremy Kemp errorProgramException.build("-cl-std=CL2.0");
66*6fee86a4SJeremy Kemp }
67*6fee86a4SJeremy Kemp catch (const cl::BuildError &err) {
68*6fee86a4SJeremy Kemp // Print build info for all devices
69*6fee86a4SJeremy Kemp auto buildInfo = err.getBuildLog();
70*6fee86a4SJeremy Kemp std::cerr << "Errors for failed build for all devices from thrown exception" << std::endl;
71*6fee86a4SJeremy Kemp for (auto &pair : buildInfo) {
72*6fee86a4SJeremy Kemp std::cerr << "Device: " << pair.first.getInfo<CL_DEVICE_NAME>() << std::endl << pair.second << std::endl << std::endl;
73*6fee86a4SJeremy Kemp }
74*6fee86a4SJeremy Kemp }
75*6fee86a4SJeremy Kemp #endif // #if defined(CL_HPP_ENABLE_EXCEPTIONS)
76*6fee86a4SJeremy Kemp
77*6fee86a4SJeremy Kemp
78*6fee86a4SJeremy Kemp std::string kernel1{"global int globalA;"
79*6fee86a4SJeremy Kemp "kernel void updateGlobal(){"
80*6fee86a4SJeremy Kemp " globalA = 75;"
81*6fee86a4SJeremy Kemp "}"};
82*6fee86a4SJeremy Kemp std::string kernel2{
83*6fee86a4SJeremy Kemp "typedef struct { global int *bar; } Foo; kernel void vectorAdd(global const Foo* aNum, global const int *inputA, global const int *inputB, global int *output, global int *output2, int val, write_only pipe int outPipe, queue_t childQueue){"
84*6fee86a4SJeremy Kemp " output[get_global_id(0)] = inputA[get_global_id(0)] + inputB[get_global_id(0)] + val + *(aNum->bar);"
85*6fee86a4SJeremy Kemp " output2[get_global_id(0)] = inputA[get_global_id(0)] + inputB[get_global_id(0)] + val + *(aNum->bar);"
86*6fee86a4SJeremy Kemp " write_pipe(outPipe, &val);"
87*6fee86a4SJeremy Kemp " queue_t default_queue = get_default_queue(); "
88*6fee86a4SJeremy Kemp " ndrange_t ndrange = ndrange_1D(get_global_size(0)/2, get_global_size(0)/2); "
89*6fee86a4SJeremy Kemp // Have a child kernel write into third quarter of output
90*6fee86a4SJeremy Kemp " enqueue_kernel(default_queue, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, "
91*6fee86a4SJeremy Kemp " ^{"
92*6fee86a4SJeremy Kemp " output[get_global_size(0)*2 + get_global_id(0)] = inputA[get_global_size(0)*2+get_global_id(0)] + inputB[get_global_size(0)*2+get_global_id(0)] + globalA;"
93*6fee86a4SJeremy Kemp " });"
94*6fee86a4SJeremy Kemp // Have a child kernel write into last quarter of output
95*6fee86a4SJeremy Kemp " enqueue_kernel(childQueue, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, "
96*6fee86a4SJeremy Kemp " ^{"
97*6fee86a4SJeremy Kemp " output[get_global_size(0)*3 + get_global_id(0)] = inputA[get_global_size(0)*3 + get_global_id(0)] + inputB[get_global_size(0)*3 + get_global_id(0)] + globalA + 2;"
98*6fee86a4SJeremy Kemp " });"
99*6fee86a4SJeremy Kemp "}" };
100*6fee86a4SJeremy Kemp #if defined(CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY)
101*6fee86a4SJeremy Kemp // Old interface style
102*6fee86a4SJeremy Kemp cl::Program::Sources programStrings;
103*6fee86a4SJeremy Kemp programStrings.push_back(std::pair<const char*, size_t>(kernel1.data(), kernel1.length()));
104*6fee86a4SJeremy Kemp programStrings.push_back(std::pair<const char*, size_t>(kernel2.data(), kernel2.length()));
105*6fee86a4SJeremy Kemp #else // #if defined(CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY)
106*6fee86a4SJeremy Kemp // New simpler string interface style
107*6fee86a4SJeremy Kemp std::vector<std::string> programStrings {
108*6fee86a4SJeremy Kemp kernel1,
109*6fee86a4SJeremy Kemp kernel2 };
110*6fee86a4SJeremy Kemp #endif // #if defined(CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY)
111*6fee86a4SJeremy Kemp cl::Program vectorAddProgram(
112*6fee86a4SJeremy Kemp programStrings);
113*6fee86a4SJeremy Kemp #if defined(CL_HPP_ENABLE_EXCEPTIONS)
114*6fee86a4SJeremy Kemp try {
115*6fee86a4SJeremy Kemp vectorAddProgram.build("-cl-std=CL2.0");
116*6fee86a4SJeremy Kemp }
117*6fee86a4SJeremy Kemp catch (...) {
118*6fee86a4SJeremy Kemp // Print build info for all devices
119*6fee86a4SJeremy Kemp cl_int buildErr = CL_SUCCESS;
120*6fee86a4SJeremy Kemp auto buildInfo = vectorAddProgram.getBuildInfo<CL_PROGRAM_BUILD_LOG>(&buildErr);
121*6fee86a4SJeremy Kemp for (auto &pair : buildInfo) {
122*6fee86a4SJeremy Kemp std::cerr << pair.second << std::endl << std::endl;
123*6fee86a4SJeremy Kemp }
124*6fee86a4SJeremy Kemp
125*6fee86a4SJeremy Kemp return 1;
126*6fee86a4SJeremy Kemp }
127*6fee86a4SJeremy Kemp #else // #if defined(CL_HPP_ENABLE_EXCEPTIONS)
128*6fee86a4SJeremy Kemp cl_int buildErr = vectorAddProgram.build("-cl-std=CL2.0");
129*6fee86a4SJeremy Kemp if (buildErr != CL_SUCCESS) {
130*6fee86a4SJeremy Kemp std::cerr << "Build error: " << buildErr << "\n";
131*6fee86a4SJeremy Kemp return -1;
132*6fee86a4SJeremy Kemp }
133*6fee86a4SJeremy Kemp #endif // #if defined(CL_HPP_ENABLE_EXCEPTIONS)
134*6fee86a4SJeremy Kemp
135*6fee86a4SJeremy Kemp typedef struct { int *bar; } Foo;
136*6fee86a4SJeremy Kemp
137*6fee86a4SJeremy Kemp // Get and run kernel that initializes the program-scope global
138*6fee86a4SJeremy Kemp // A test for kernels that take no arguments
139*6fee86a4SJeremy Kemp auto program2Kernel =
140*6fee86a4SJeremy Kemp cl::KernelFunctor<>(vectorAddProgram, "updateGlobal");
141*6fee86a4SJeremy Kemp program2Kernel(
142*6fee86a4SJeremy Kemp cl::EnqueueArgs(
143*6fee86a4SJeremy Kemp cl::NDRange(1)));
144*6fee86a4SJeremy Kemp
145*6fee86a4SJeremy Kemp
146*6fee86a4SJeremy Kemp //////////////////
147*6fee86a4SJeremy Kemp // SVM allocations
148*6fee86a4SJeremy Kemp
149*6fee86a4SJeremy Kemp // Store pointer to pointer here to test clSetKernelExecInfo
150*6fee86a4SJeremy Kemp // Code using cl namespace allocators etc as a test
151*6fee86a4SJeremy Kemp // std::shared_ptr etc should work fine too
152*6fee86a4SJeremy Kemp
153*6fee86a4SJeremy Kemp auto anSVMInt = cl::allocate_svm<int, cl::SVMTraitCoarse<>>();
154*6fee86a4SJeremy Kemp *anSVMInt = 5;
155*6fee86a4SJeremy Kemp cl::SVMAllocator<int, cl::SVMTraitCoarse<>> svmAlloc;
156*6fee86a4SJeremy Kemp std::cout << "Max alloc size: " << svmAlloc.max_size() << " bytes\n";
157*6fee86a4SJeremy Kemp cl::SVMAllocator<Foo, cl::SVMTraitCoarse<cl::SVMTraitReadOnly<>>> svmAllocReadOnly;
158*6fee86a4SJeremy Kemp auto fooPointer = cl::allocate_pointer<Foo>(svmAllocReadOnly);
159*6fee86a4SJeremy Kemp fooPointer->bar = anSVMInt.get();
160*6fee86a4SJeremy Kemp
161*6fee86a4SJeremy Kemp std::vector<int, cl::SVMAllocator<int, cl::SVMTraitCoarse<>>> inputA(numElements, 1, svmAlloc);
162*6fee86a4SJeremy Kemp
163*6fee86a4SJeremy Kemp cl::coarse_svm_vector<int> inputB(numElements, 2, svmAlloc);
164*6fee86a4SJeremy Kemp
165*6fee86a4SJeremy Kemp //
166*6fee86a4SJeremy Kemp //////////////
167*6fee86a4SJeremy Kemp
168*6fee86a4SJeremy Kemp // Traditional cl_mem allocations
169*6fee86a4SJeremy Kemp std::vector<int> output(numElements, 0xdeadbeef);
170*6fee86a4SJeremy Kemp cl::Buffer outputBuffer(output.begin(), output.end(), false);
171*6fee86a4SJeremy Kemp
172*6fee86a4SJeremy Kemp std::vector<int, cl::SVMAllocator<int, cl::SVMTraitCoarse<>>> output2(numElements / 2, 0xdeadbeef);
173*6fee86a4SJeremy Kemp cl::Pipe aPipe(sizeof(cl_int), numElements / 2);
174*6fee86a4SJeremy Kemp // Unfortunately, there is no way to check for a default or know if a kernel needs one
175*6fee86a4SJeremy Kemp // so the user has to create one
176*6fee86a4SJeremy Kemp // We can't preemptively do so on device creation because they cannot then replace it
177*6fee86a4SJeremy Kemp cl::DeviceCommandQueue defaultDeviceQueue;
178*6fee86a4SJeremy Kemp defaultDeviceQueue = cl::DeviceCommandQueue::makeDefault();
179*6fee86a4SJeremy Kemp
180*6fee86a4SJeremy Kemp auto vectorAddKernel =
181*6fee86a4SJeremy Kemp cl::KernelFunctor<
182*6fee86a4SJeremy Kemp decltype(fooPointer)&,
183*6fee86a4SJeremy Kemp int*,
184*6fee86a4SJeremy Kemp cl::coarse_svm_vector<int>&,
185*6fee86a4SJeremy Kemp cl::Buffer,
186*6fee86a4SJeremy Kemp std::vector<int, cl::SVMAllocator<int, cl::SVMTraitCoarse<>>>&,
187*6fee86a4SJeremy Kemp int,
188*6fee86a4SJeremy Kemp cl::Pipe&,
189*6fee86a4SJeremy Kemp cl::DeviceCommandQueue
190*6fee86a4SJeremy Kemp >(vectorAddProgram, "vectorAdd");
191*6fee86a4SJeremy Kemp
192*6fee86a4SJeremy Kemp
193*6fee86a4SJeremy Kemp // Only the last of these will actually be used
194*6fee86a4SJeremy Kemp // but this will check that the API is working for all
195*6fee86a4SJeremy Kemp // of them
196*6fee86a4SJeremy Kemp cl::vector<void*> ptrs{ static_cast<void*>(anSVMInt.get()) };
197*6fee86a4SJeremy Kemp vectorAddKernel.setSVMPointers(ptrs);
198*6fee86a4SJeremy Kemp vectorAddKernel.setSVMPointers(anSVMInt.get());
199*6fee86a4SJeremy Kemp vectorAddKernel.setSVMPointers(anSVMInt);
200*6fee86a4SJeremy Kemp
201*6fee86a4SJeremy Kemp // Hand control of coarse allocations to runtime
202*6fee86a4SJeremy Kemp cl::enqueueUnmapSVM(anSVMInt);
203*6fee86a4SJeremy Kemp cl::enqueueUnmapSVM(fooPointer);
204*6fee86a4SJeremy Kemp cl::unmapSVM(inputB);
205*6fee86a4SJeremy Kemp cl::unmapSVM(output2);
206*6fee86a4SJeremy Kemp
207*6fee86a4SJeremy Kemp
208*6fee86a4SJeremy Kemp cl_int error;
209*6fee86a4SJeremy Kemp vectorAddKernel(
210*6fee86a4SJeremy Kemp cl::EnqueueArgs(
211*6fee86a4SJeremy Kemp cl::NDRange(numElements/2),
212*6fee86a4SJeremy Kemp cl::NDRange(numElements/2)),
213*6fee86a4SJeremy Kemp fooPointer,
214*6fee86a4SJeremy Kemp inputA.data(),
215*6fee86a4SJeremy Kemp inputB,
216*6fee86a4SJeremy Kemp outputBuffer,
217*6fee86a4SJeremy Kemp output2,
218*6fee86a4SJeremy Kemp 3,
219*6fee86a4SJeremy Kemp aPipe,
220*6fee86a4SJeremy Kemp defaultDeviceQueue,
221*6fee86a4SJeremy Kemp error
222*6fee86a4SJeremy Kemp );
223*6fee86a4SJeremy Kemp
224*6fee86a4SJeremy Kemp // Copy the cl_mem output back to the vector
225*6fee86a4SJeremy Kemp cl::copy(outputBuffer, output.begin(), output.end());
226*6fee86a4SJeremy Kemp // Grab the SVM output vector using a map
227*6fee86a4SJeremy Kemp cl::mapSVM(output2);
228*6fee86a4SJeremy Kemp
229*6fee86a4SJeremy Kemp cl::Device d = cl::Device::getDefault();
230*6fee86a4SJeremy Kemp std::cout << "Max pipe args: " << d.getInfo<CL_DEVICE_MAX_PIPE_ARGS>() << "\n";
231*6fee86a4SJeremy Kemp std::cout << "Max pipe active reservations: " << d.getInfo<CL_DEVICE_PIPE_MAX_ACTIVE_RESERVATIONS>() << "\n";
232*6fee86a4SJeremy Kemp std::cout << "Max pipe packet size: " << d.getInfo<CL_DEVICE_PIPE_MAX_PACKET_SIZE>() << "\n";
233*6fee86a4SJeremy Kemp std::cout << "Device SVM capabilities: " << d.getInfo<CL_DEVICE_SVM_CAPABILITIES>() << "\n";
234*6fee86a4SJeremy Kemp std::cout << "\tCL_DEVICE_SVM_COARSE_GRAIN_BUFFER = " << CL_DEVICE_SVM_COARSE_GRAIN_BUFFER << "\n";
235*6fee86a4SJeremy Kemp std::cout << "\tCL_DEVICE_SVM_FINE_GRAIN_BUFFER = " << CL_DEVICE_SVM_FINE_GRAIN_BUFFER << "\n";
236*6fee86a4SJeremy Kemp std::cout << "\tCL_DEVICE_SVM_FINE_GRAIN_SYSTEM = " << CL_DEVICE_SVM_FINE_GRAIN_SYSTEM << "\n";
237*6fee86a4SJeremy Kemp std::cout << "\tCL_DEVICE_SVM_ATOMICS = " << CL_DEVICE_SVM_ATOMICS << "\n";
238*6fee86a4SJeremy Kemp
239*6fee86a4SJeremy Kemp auto v = vectorAddProgram.getInfo<CL_PROGRAM_BINARIES>();
240*6fee86a4SJeremy Kemp auto v2 = vectorAddProgram.getInfo<CL_PROGRAM_BINARY_SIZES>();
241*6fee86a4SJeremy Kemp std::vector<std::vector<unsigned char>> v3;
242*6fee86a4SJeremy Kemp std::vector<size_t> v4;
243*6fee86a4SJeremy Kemp vectorAddProgram.getInfo(CL_PROGRAM_BINARIES, &v3);
244*6fee86a4SJeremy Kemp vectorAddProgram.getInfo(CL_PROGRAM_BINARY_SIZES, &v4);
245*6fee86a4SJeremy Kemp
246*6fee86a4SJeremy Kemp std::cout << "Binaries: " << v.size() << "\n";
247*6fee86a4SJeremy Kemp std::cout << "Binary sizes: " << v2.size() << "\n";
248*6fee86a4SJeremy Kemp for (size_t s : v2) {
249*6fee86a4SJeremy Kemp std::cout << "\t" << s << "\n";
250*6fee86a4SJeremy Kemp }
251*6fee86a4SJeremy Kemp
252*6fee86a4SJeremy Kemp std::cout << "Output:\n";
253*6fee86a4SJeremy Kemp for (int i = 1; i < numElements; ++i) {
254*6fee86a4SJeremy Kemp std::cout << "\t" << output[i] << "\n";
255*6fee86a4SJeremy Kemp }
256*6fee86a4SJeremy Kemp std::cout << "\n\n";
257*6fee86a4SJeremy Kemp std::cout << "Output2:\n";
258*6fee86a4SJeremy Kemp for (auto &e : output2) {
259*6fee86a4SJeremy Kemp std::cout << "\t" << e << "\n";
260*6fee86a4SJeremy Kemp }
261*6fee86a4SJeremy Kemp std::cout << "\n\n";
262*6fee86a4SJeremy Kemp
263*6fee86a4SJeremy Kemp return 0;
264*6fee86a4SJeremy Kemp }
265