xref: /aosp_15_r20/external/OpenCL-CLHPP/examples/src/headerexample.cpp (revision 6fee86a4f833e4f32f25770a262884407554133d)
1 #define CL_HPP_ENABLE_EXCEPTIONS
2 #define CL_HPP_TARGET_OPENCL_VERSION 200
3 
4 #include <CL/opencl.hpp>
5 #include <iostream>
6 #include <vector>
7 #include <memory>
8 #include <algorithm>
9 
10 const int numElements = 32;
11 
main(void)12 int main(void)
13 {
14     // Filter for a 2.0 or newer platform and set it as the default
15     std::vector<cl::Platform> platforms;
16     cl::Platform::get(&platforms);
17     cl::Platform plat;
18     for (auto &p : platforms) {
19         std::string platver = p.getInfo<CL_PLATFORM_VERSION>();
20         if (platver.find("OpenCL 2.") != std::string::npos ||
21             platver.find("OpenCL 3.") != std::string::npos) {
22             // Note: an OpenCL 3.x platform may not support all required features!
23             plat = p;
24         }
25     }
26     if (plat() == 0) {
27         std::cout << "No OpenCL 2.0 or newer platform found.\n";
28         return -1;
29     }
30 
31     cl::Platform newP = cl::Platform::setDefault(plat);
32     if (newP != plat) {
33         std::cout << "Error setting default platform.\n";
34         return -1;
35     }
36 
37     // C++11 raw string literal for the first kernel
38     std::string kernel1{R"CLC(
39         global int globalA;
40         kernel void updateGlobal()
41         {
42           globalA = 75;
43         }
44     )CLC"};
45 
46     // Raw string literal for the second kernel
47     std::string kernel2{R"CLC(
48         typedef struct { global int *bar; } Foo;
49         kernel void vectorAdd(global const Foo* aNum, global const int *inputA, global const int *inputB,
50                               global int *output, int val, write_only pipe int outPipe, queue_t childQueue)
51         {
52           output[get_global_id(0)] = inputA[get_global_id(0)] + inputB[get_global_id(0)] + val + *(aNum->bar);
53           write_pipe(outPipe, &val);
54           queue_t default_queue = get_default_queue();
55           ndrange_t ndrange = ndrange_1D(get_global_size(0)/2, get_global_size(0)/2);
56 
57           // Have a child kernel write into third quarter of output
58           enqueue_kernel(default_queue, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange,
59             ^{
60                 output[get_global_size(0)*2 + get_global_id(0)] =
61                   inputA[get_global_size(0)*2 + get_global_id(0)] + inputB[get_global_size(0)*2 + get_global_id(0)] + globalA;
62             });
63 
64           // Have a child kernel write into last quarter of output
65           enqueue_kernel(childQueue, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange,
66             ^{
67                 output[get_global_size(0)*3 + get_global_id(0)] =
68                   inputA[get_global_size(0)*3 + get_global_id(0)] + inputB[get_global_size(0)*3 + get_global_id(0)] + globalA + 2;
69             });
70         }
71     )CLC"};
72 
73     std::vector<std::string> programStrings;
74     programStrings.push_back(kernel1);
75     programStrings.push_back(kernel2);
76 
77     cl::Program vectorAddProgram(programStrings);
78     try {
79         vectorAddProgram.build("-cl-std=CL2.0");
80     }
81     catch (...) {
82         // Print build info for all devices
83         cl_int buildErr = CL_SUCCESS;
84         auto buildInfo = vectorAddProgram.getBuildInfo<CL_PROGRAM_BUILD_LOG>(&buildErr);
85         for (auto &pair : buildInfo) {
86             std::cerr << pair.second << std::endl << std::endl;
87         }
88 
89         return 1;
90     }
91 
92     typedef struct { int *bar; } Foo;
93 
94     // Get and run kernel that initializes the program-scope global
95     // A test for kernels that take no arguments
96     auto program2Kernel =
97         cl::KernelFunctor<>(vectorAddProgram, "updateGlobal");
98     program2Kernel(
99         cl::EnqueueArgs(
100         cl::NDRange(1)));
101 
102     //////////////////
103     // SVM allocations
104 
105     auto anSVMInt = cl::allocate_svm<int, cl::SVMTraitCoarse<>>();
106     *anSVMInt = 5;
107     cl::SVMAllocator<Foo, cl::SVMTraitCoarse<cl::SVMTraitReadOnly<>>> svmAllocReadOnly;
108     auto fooPointer = cl::allocate_pointer<Foo>(svmAllocReadOnly);
109     fooPointer->bar = anSVMInt.get();
110     cl::SVMAllocator<int, cl::SVMTraitCoarse<>> svmAlloc;
111     std::vector<int, cl::SVMAllocator<int, cl::SVMTraitCoarse<>>> inputA(numElements, 1, svmAlloc);
112     cl::coarse_svm_vector<int> inputB(numElements, 2, svmAlloc);
113 
114     //////////////
115     // Traditional cl_mem allocations
116 
117     std::vector<int> output(numElements, 0xdeadbeef);
118     cl::Buffer outputBuffer(output.begin(), output.end(), false);
119     cl::Pipe aPipe(sizeof(cl_int), numElements / 2);
120 
121     // Default command queue, also passed in as a parameter
122     cl::DeviceCommandQueue defaultDeviceQueue = cl::DeviceCommandQueue::makeDefault(
123         cl::Context::getDefault(), cl::Device::getDefault());
124 
125     auto vectorAddKernel =
126         cl::KernelFunctor<
127             decltype(fooPointer)&,
128             int*,
129             cl::coarse_svm_vector<int>&,
130             cl::Buffer,
131             int,
132             cl::Pipe&,
133             cl::DeviceCommandQueue
134             >(vectorAddProgram, "vectorAdd");
135 
136     // Ensure that the additional SVM pointer is available to the kernel
137     // This one was not passed as a parameter
138     vectorAddKernel.setSVMPointers(anSVMInt);
139 
140     cl_int error;
141     vectorAddKernel(
142         cl::EnqueueArgs(
143             cl::NDRange(numElements/2),
144             cl::NDRange(numElements/2)),
145         fooPointer,
146         inputA.data(),
147         inputB,
148         outputBuffer,
149         3,
150         aPipe,
151         defaultDeviceQueue,
152         error
153         );
154 
155     cl::copy(outputBuffer, output.begin(), output.end());
156 
157     cl::Device d = cl::Device::getDefault();
158 
159     std::cout << "Output:\n";
160     for (int i = 1; i < numElements; ++i) {
161         std::cout << "\t" << output[i] << "\n";
162     }
163     std::cout << "\n\n";
164 
165     return 0;
166 }
167