1 #define CL_HPP_ENABLE_EXCEPTIONS
2 #define CL_HPP_TARGET_OPENCL_VERSION 200
3
4 #include <CL/opencl.hpp>
5 #include <iostream>
6 #include <vector>
7 #include <memory>
8 #include <algorithm>
9
10 const int numElements = 32;
11
main(void)12 int main(void)
13 {
14 // Filter for a 2.0 or newer platform and set it as the default
15 std::vector<cl::Platform> platforms;
16 cl::Platform::get(&platforms);
17 cl::Platform plat;
18 for (auto &p : platforms) {
19 std::string platver = p.getInfo<CL_PLATFORM_VERSION>();
20 if (platver.find("OpenCL 2.") != std::string::npos ||
21 platver.find("OpenCL 3.") != std::string::npos) {
22 // Note: an OpenCL 3.x platform may not support all required features!
23 plat = p;
24 }
25 }
26 if (plat() == 0) {
27 std::cout << "No OpenCL 2.0 or newer platform found.\n";
28 return -1;
29 }
30
31 cl::Platform newP = cl::Platform::setDefault(plat);
32 if (newP != plat) {
33 std::cout << "Error setting default platform.\n";
34 return -1;
35 }
36
37 // C++11 raw string literal for the first kernel
38 std::string kernel1{R"CLC(
39 global int globalA;
40 kernel void updateGlobal()
41 {
42 globalA = 75;
43 }
44 )CLC"};
45
46 // Raw string literal for the second kernel
47 std::string kernel2{R"CLC(
48 typedef struct { global int *bar; } Foo;
49 kernel void vectorAdd(global const Foo* aNum, global const int *inputA, global const int *inputB,
50 global int *output, int val, write_only pipe int outPipe, queue_t childQueue)
51 {
52 output[get_global_id(0)] = inputA[get_global_id(0)] + inputB[get_global_id(0)] + val + *(aNum->bar);
53 write_pipe(outPipe, &val);
54 queue_t default_queue = get_default_queue();
55 ndrange_t ndrange = ndrange_1D(get_global_size(0)/2, get_global_size(0)/2);
56
57 // Have a child kernel write into third quarter of output
58 enqueue_kernel(default_queue, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange,
59 ^{
60 output[get_global_size(0)*2 + get_global_id(0)] =
61 inputA[get_global_size(0)*2 + get_global_id(0)] + inputB[get_global_size(0)*2 + get_global_id(0)] + globalA;
62 });
63
64 // Have a child kernel write into last quarter of output
65 enqueue_kernel(childQueue, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange,
66 ^{
67 output[get_global_size(0)*3 + get_global_id(0)] =
68 inputA[get_global_size(0)*3 + get_global_id(0)] + inputB[get_global_size(0)*3 + get_global_id(0)] + globalA + 2;
69 });
70 }
71 )CLC"};
72
73 std::vector<std::string> programStrings;
74 programStrings.push_back(kernel1);
75 programStrings.push_back(kernel2);
76
77 cl::Program vectorAddProgram(programStrings);
78 try {
79 vectorAddProgram.build("-cl-std=CL2.0");
80 }
81 catch (...) {
82 // Print build info for all devices
83 cl_int buildErr = CL_SUCCESS;
84 auto buildInfo = vectorAddProgram.getBuildInfo<CL_PROGRAM_BUILD_LOG>(&buildErr);
85 for (auto &pair : buildInfo) {
86 std::cerr << pair.second << std::endl << std::endl;
87 }
88
89 return 1;
90 }
91
92 typedef struct { int *bar; } Foo;
93
94 // Get and run kernel that initializes the program-scope global
95 // A test for kernels that take no arguments
96 auto program2Kernel =
97 cl::KernelFunctor<>(vectorAddProgram, "updateGlobal");
98 program2Kernel(
99 cl::EnqueueArgs(
100 cl::NDRange(1)));
101
102 //////////////////
103 // SVM allocations
104
105 auto anSVMInt = cl::allocate_svm<int, cl::SVMTraitCoarse<>>();
106 *anSVMInt = 5;
107 cl::SVMAllocator<Foo, cl::SVMTraitCoarse<cl::SVMTraitReadOnly<>>> svmAllocReadOnly;
108 auto fooPointer = cl::allocate_pointer<Foo>(svmAllocReadOnly);
109 fooPointer->bar = anSVMInt.get();
110 cl::SVMAllocator<int, cl::SVMTraitCoarse<>> svmAlloc;
111 std::vector<int, cl::SVMAllocator<int, cl::SVMTraitCoarse<>>> inputA(numElements, 1, svmAlloc);
112 cl::coarse_svm_vector<int> inputB(numElements, 2, svmAlloc);
113
114 //////////////
115 // Traditional cl_mem allocations
116
117 std::vector<int> output(numElements, 0xdeadbeef);
118 cl::Buffer outputBuffer(output.begin(), output.end(), false);
119 cl::Pipe aPipe(sizeof(cl_int), numElements / 2);
120
121 // Default command queue, also passed in as a parameter
122 cl::DeviceCommandQueue defaultDeviceQueue = cl::DeviceCommandQueue::makeDefault(
123 cl::Context::getDefault(), cl::Device::getDefault());
124
125 auto vectorAddKernel =
126 cl::KernelFunctor<
127 decltype(fooPointer)&,
128 int*,
129 cl::coarse_svm_vector<int>&,
130 cl::Buffer,
131 int,
132 cl::Pipe&,
133 cl::DeviceCommandQueue
134 >(vectorAddProgram, "vectorAdd");
135
136 // Ensure that the additional SVM pointer is available to the kernel
137 // This one was not passed as a parameter
138 vectorAddKernel.setSVMPointers(anSVMInt);
139
140 cl_int error;
141 vectorAddKernel(
142 cl::EnqueueArgs(
143 cl::NDRange(numElements/2),
144 cl::NDRange(numElements/2)),
145 fooPointer,
146 inputA.data(),
147 inputB,
148 outputBuffer,
149 3,
150 aPipe,
151 defaultDeviceQueue,
152 error
153 );
154
155 cl::copy(outputBuffer, output.begin(), output.end());
156
157 cl::Device d = cl::Device::getDefault();
158
159 std::cout << "Output:\n";
160 for (int i = 1; i < numElements; ++i) {
161 std::cout << "\t" << output[i] << "\n";
162 }
163 std::cout << "\n\n";
164
165 return 0;
166 }
167