xref: /aosp_15_r20/external/OpenCL-CLHPP/examples/src/trivial.cpp (revision 6fee86a4f833e4f32f25770a262884407554133d)
1 #define CL_HPP_ENABLE_EXCEPTIONS
2 #define CL_HPP_TARGET_OPENCL_VERSION 200
3 
4 //#define CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY
5 //#define CL_HPP_CL_1_2_DEFAULT_BUILD
6 #include <CL/opencl.hpp>
7 #include <iostream>
8 #include <vector>
9 #include <memory>
10 #include <algorithm>
11 
12 const int numElements = 32;
13 
main(void)14 int main(void)
15 {
16     // Filter for a 2.0 platform and set it as the default
17     std::vector<cl::Platform> platforms;
18     cl::Platform::get(&platforms);
19     cl::Platform plat;
20     for (auto &p : platforms) {
21         std::string platver = p.getInfo<CL_PLATFORM_VERSION>();
22         std::cerr << "Plat: " << platver << "\n";
23         if (platver.find("OpenCL 2.") != std::string::npos) {
24             plat = p;
25         }
26     }
27     if (plat() == 0)  {
28         std::cout << "No OpenCL 2.0 platform found.\n";
29         return -1;
30     }
31 
32     cl::Platform newP = cl::Platform::setDefault(plat);
33     if (newP != plat) {
34         std::cout << "Error setting default platform.";
35         return -1;
36     }
37 
38     // Test command queue property construction
39     cl::CommandQueue q5(cl::QueueProperties::Profiling | cl::QueueProperties::OutOfOrder);
40 
41 #if defined(CL_HPP_ENABLE_EXCEPTIONS)
42     cl::Program errorProgram(
43         std::string(
44         "sakfdjnksajfnksajnfsa")
45         , false);
46     try {
47         errorProgram.build("-cl-std=CL2.0");
48     }
49     catch (...) {
50         // Print build info for all devices
51         cl_int buildErr = CL_SUCCESS;
52         auto buildInfo = errorProgram.getBuildInfo<CL_PROGRAM_BUILD_LOG>(&buildErr);
53         std::cerr << "Errors for failed build for all devices" << std::endl;
54         for (auto &pair : buildInfo) {
55             std::cerr << "Device: " << pair.first.getInfo<CL_DEVICE_NAME>() << std::endl << pair.second << std::endl << std::endl;
56         }
57     }
58 
59 
60     cl::Program errorProgramException(
61         std::string(
62         "sakfdjnksajfnksajnfsa")
63         , false);
64     try {
65         errorProgramException.build("-cl-std=CL2.0");
66     }
67     catch (const cl::BuildError &err) {
68         // Print build info for all devices
69         auto buildInfo = err.getBuildLog();
70         std::cerr << "Errors for failed build for all devices from thrown exception" << std::endl;
71         for (auto &pair : buildInfo) {
72             std::cerr << "Device: " << pair.first.getInfo<CL_DEVICE_NAME>() << std::endl << pair.second << std::endl << std::endl;
73         }
74     }
75 #endif // #if defined(CL_HPP_ENABLE_EXCEPTIONS)
76 
77 
78     std::string kernel1{"global int globalA;"
79         "kernel void updateGlobal(){"
80         "  globalA = 75;"
81         "}"};
82     std::string kernel2{
83         "typedef struct { global int *bar; } Foo; kernel void vectorAdd(global const Foo* aNum, global const int *inputA, global const int *inputB, global int *output, global int *output2, int val, write_only pipe int outPipe, queue_t childQueue){"
84         "  output[get_global_id(0)] = inputA[get_global_id(0)] + inputB[get_global_id(0)] + val + *(aNum->bar);"
85         "  output2[get_global_id(0)] = inputA[get_global_id(0)] + inputB[get_global_id(0)] + val + *(aNum->bar);"
86         "  write_pipe(outPipe, &val);"
87         "  queue_t default_queue = get_default_queue(); "
88         "  ndrange_t ndrange = ndrange_1D(get_global_size(0)/2, get_global_size(0)/2); "
89         // Have a child kernel write into third quarter of output
90         "  enqueue_kernel(default_queue, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, "
91         "    ^{"
92         "      output[get_global_size(0)*2 + get_global_id(0)] = inputA[get_global_size(0)*2+get_global_id(0)] + inputB[get_global_size(0)*2+get_global_id(0)] + globalA;"
93         "    });"
94         // Have a child kernel write into last quarter of output
95         "  enqueue_kernel(childQueue, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, "
96         "    ^{"
97         "      output[get_global_size(0)*3 + get_global_id(0)] = inputA[get_global_size(0)*3 + get_global_id(0)] + inputB[get_global_size(0)*3 + get_global_id(0)] + globalA + 2;"
98         "    });"
99         "}" };
100 #if defined(CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY)
101     // Old interface style
102     cl::Program::Sources programStrings;
103     programStrings.push_back(std::pair<const char*, size_t>(kernel1.data(), kernel1.length()));
104     programStrings.push_back(std::pair<const char*, size_t>(kernel2.data(), kernel2.length()));
105 #else // #if defined(CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY)
106     // New simpler string interface style
107     std::vector<std::string> programStrings {
108         kernel1,
109         kernel2 };
110 #endif // #if defined(CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY)
111     cl::Program vectorAddProgram(
112         programStrings);
113 #if defined(CL_HPP_ENABLE_EXCEPTIONS)
114     try {
115         vectorAddProgram.build("-cl-std=CL2.0");
116     }
117     catch (...) {
118         // Print build info for all devices
119         cl_int buildErr = CL_SUCCESS;
120         auto buildInfo = vectorAddProgram.getBuildInfo<CL_PROGRAM_BUILD_LOG>(&buildErr);
121         for (auto &pair : buildInfo) {
122             std::cerr << pair.second << std::endl << std::endl;
123         }
124 
125         return 1;
126     }
127 #else // #if defined(CL_HPP_ENABLE_EXCEPTIONS)
128     cl_int buildErr = vectorAddProgram.build("-cl-std=CL2.0");
129     if (buildErr != CL_SUCCESS) {
130         std::cerr << "Build error: " << buildErr << "\n";
131         return -1;
132     }
133 #endif // #if defined(CL_HPP_ENABLE_EXCEPTIONS)
134 
135     typedef struct { int *bar; } Foo;
136 
137     // Get and run kernel that initializes the program-scope global
138     // A test for kernels that take no arguments
139     auto program2Kernel =
140         cl::KernelFunctor<>(vectorAddProgram, "updateGlobal");
141     program2Kernel(
142         cl::EnqueueArgs(
143         cl::NDRange(1)));
144 
145 
146     //////////////////
147     // SVM allocations
148 
149     // Store pointer to pointer here to test clSetKernelExecInfo
150     // Code using cl namespace allocators etc as a test
151     // std::shared_ptr etc should work fine too
152 
153     auto anSVMInt = cl::allocate_svm<int, cl::SVMTraitCoarse<>>();
154     *anSVMInt = 5;
155     cl::SVMAllocator<int, cl::SVMTraitCoarse<>> svmAlloc;
156     std::cout << "Max alloc size: " << svmAlloc.max_size() << " bytes\n";
157     cl::SVMAllocator<Foo, cl::SVMTraitCoarse<cl::SVMTraitReadOnly<>>> svmAllocReadOnly;
158     auto fooPointer = cl::allocate_pointer<Foo>(svmAllocReadOnly);
159     fooPointer->bar = anSVMInt.get();
160 
161     std::vector<int, cl::SVMAllocator<int, cl::SVMTraitCoarse<>>> inputA(numElements, 1, svmAlloc);
162 
163     cl::coarse_svm_vector<int> inputB(numElements, 2, svmAlloc);
164 
165     //
166     //////////////
167 
168     // Traditional cl_mem allocations
169     std::vector<int> output(numElements, 0xdeadbeef);
170     cl::Buffer outputBuffer(output.begin(), output.end(), false);
171 
172     std::vector<int, cl::SVMAllocator<int, cl::SVMTraitCoarse<>>> output2(numElements / 2, 0xdeadbeef);
173     cl::Pipe aPipe(sizeof(cl_int), numElements / 2);
174     // Unfortunately, there is no way to check for a default or know if a kernel needs one
175     // so the user has to create one
176     // We can't preemptively do so on device creation because they cannot then replace it
177     cl::DeviceCommandQueue defaultDeviceQueue;
178     defaultDeviceQueue = cl::DeviceCommandQueue::makeDefault();
179 
180     auto vectorAddKernel =
181         cl::KernelFunctor<
182             decltype(fooPointer)&,
183             int*,
184             cl::coarse_svm_vector<int>&,
185             cl::Buffer,
186             std::vector<int, cl::SVMAllocator<int, cl::SVMTraitCoarse<>>>&,
187             int,
188             cl::Pipe&,
189             cl::DeviceCommandQueue
190         >(vectorAddProgram, "vectorAdd");
191 
192 
193     // Only the last of these will actually be used
194     // but this will check that the API is working for all
195     // of them
196     cl::vector<void*> ptrs{ static_cast<void*>(anSVMInt.get()) };
197     vectorAddKernel.setSVMPointers(ptrs);
198     vectorAddKernel.setSVMPointers(anSVMInt.get());
199     vectorAddKernel.setSVMPointers(anSVMInt);
200 
201     // Hand control of coarse allocations to runtime
202     cl::enqueueUnmapSVM(anSVMInt);
203     cl::enqueueUnmapSVM(fooPointer);
204     cl::unmapSVM(inputB);
205     cl::unmapSVM(output2);
206 
207 
208 	cl_int error;
209 	vectorAddKernel(
210         cl::EnqueueArgs(
211             cl::NDRange(numElements/2),
212             cl::NDRange(numElements/2)),
213         fooPointer,
214         inputA.data(),
215         inputB,
216         outputBuffer,
217         output2,
218         3,
219         aPipe,
220         defaultDeviceQueue,
221 		error
222         );
223 
224     // Copy the cl_mem output back to the vector
225     cl::copy(outputBuffer, output.begin(), output.end());
226     // Grab the SVM output vector using a map
227     cl::mapSVM(output2);
228 
229     cl::Device d = cl::Device::getDefault();
230     std::cout << "Max pipe args: " << d.getInfo<CL_DEVICE_MAX_PIPE_ARGS>() << "\n";
231     std::cout << "Max pipe active reservations: " << d.getInfo<CL_DEVICE_PIPE_MAX_ACTIVE_RESERVATIONS>() << "\n";
232     std::cout << "Max pipe packet size: " << d.getInfo<CL_DEVICE_PIPE_MAX_PACKET_SIZE>() << "\n";
233     std::cout << "Device SVM capabilities: " << d.getInfo<CL_DEVICE_SVM_CAPABILITIES>() << "\n";
234     std::cout << "\tCL_DEVICE_SVM_COARSE_GRAIN_BUFFER = " << CL_DEVICE_SVM_COARSE_GRAIN_BUFFER << "\n";
235     std::cout << "\tCL_DEVICE_SVM_FINE_GRAIN_BUFFER = " << CL_DEVICE_SVM_FINE_GRAIN_BUFFER << "\n";
236     std::cout << "\tCL_DEVICE_SVM_FINE_GRAIN_SYSTEM = " << CL_DEVICE_SVM_FINE_GRAIN_SYSTEM << "\n";
237     std::cout << "\tCL_DEVICE_SVM_ATOMICS = " << CL_DEVICE_SVM_ATOMICS << "\n";
238 
239     auto v = vectorAddProgram.getInfo<CL_PROGRAM_BINARIES>();
240     auto v2 = vectorAddProgram.getInfo<CL_PROGRAM_BINARY_SIZES>();
241     std::vector<std::vector<unsigned char>> v3;
242     std::vector<size_t> v4;
243     vectorAddProgram.getInfo(CL_PROGRAM_BINARIES, &v3);
244     vectorAddProgram.getInfo(CL_PROGRAM_BINARY_SIZES, &v4);
245 
246     std::cout << "Binaries: " << v.size() << "\n";
247     std::cout << "Binary sizes: " << v2.size() << "\n";
248     for (size_t s : v2) {
249         std::cout << "\t" << s << "\n";
250     }
251 
252     std::cout << "Output:\n";
253     for (int i = 1; i < numElements; ++i) {
254         std::cout << "\t" << output[i] << "\n";
255     }
256     std::cout << "\n\n";
257     std::cout << "Output2:\n";
258     for (auto &e : output2) {
259         std::cout << "\t" << e << "\n";
260     }
261     std::cout << "\n\n";
262 
263     return 0;
264 }
265