#define CL_HPP_ENABLE_EXCEPTIONS #define CL_HPP_TARGET_OPENCL_VERSION 200 #include #include #include #include #include const int numElements = 32; int main(void) { // Filter for a 2.0 or newer platform and set it as the default std::vector platforms; cl::Platform::get(&platforms); cl::Platform plat; for (auto &p : platforms) { std::string platver = p.getInfo(); if (platver.find("OpenCL 2.") != std::string::npos || platver.find("OpenCL 3.") != std::string::npos) { // Note: an OpenCL 3.x platform may not support all required features! plat = p; } } if (plat() == 0) { std::cout << "No OpenCL 2.0 or newer platform found.\n"; return -1; } cl::Platform newP = cl::Platform::setDefault(plat); if (newP != plat) { std::cout << "Error setting default platform.\n"; return -1; } // C++11 raw string literal for the first kernel std::string kernel1{R"CLC( global int globalA; kernel void updateGlobal() { globalA = 75; } )CLC"}; // Raw string literal for the second kernel std::string kernel2{R"CLC( typedef struct { global int *bar; } Foo; kernel void vectorAdd(global const Foo* aNum, global const int *inputA, global const int *inputB, global int *output, int val, write_only pipe int outPipe, queue_t childQueue) { output[get_global_id(0)] = inputA[get_global_id(0)] + inputB[get_global_id(0)] + val + *(aNum->bar); write_pipe(outPipe, &val); queue_t default_queue = get_default_queue(); ndrange_t ndrange = ndrange_1D(get_global_size(0)/2, get_global_size(0)/2); // Have a child kernel write into third quarter of output enqueue_kernel(default_queue, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, ^{ output[get_global_size(0)*2 + get_global_id(0)] = inputA[get_global_size(0)*2 + get_global_id(0)] + inputB[get_global_size(0)*2 + get_global_id(0)] + globalA; }); // Have a child kernel write into last quarter of output enqueue_kernel(childQueue, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, ^{ output[get_global_size(0)*3 + get_global_id(0)] = inputA[get_global_size(0)*3 + get_global_id(0)] + inputB[get_global_size(0)*3 + get_global_id(0)] + globalA + 2; }); } )CLC"}; std::vector programStrings; programStrings.push_back(kernel1); programStrings.push_back(kernel2); cl::Program vectorAddProgram(programStrings); try { vectorAddProgram.build("-cl-std=CL2.0"); } catch (...) { // Print build info for all devices cl_int buildErr = CL_SUCCESS; auto buildInfo = vectorAddProgram.getBuildInfo(&buildErr); for (auto &pair : buildInfo) { std::cerr << pair.second << std::endl << std::endl; } return 1; } typedef struct { int *bar; } Foo; // Get and run kernel that initializes the program-scope global // A test for kernels that take no arguments auto program2Kernel = cl::KernelFunctor<>(vectorAddProgram, "updateGlobal"); program2Kernel( cl::EnqueueArgs( cl::NDRange(1))); ////////////////// // SVM allocations auto anSVMInt = cl::allocate_svm>(); *anSVMInt = 5; cl::SVMAllocator>> svmAllocReadOnly; auto fooPointer = cl::allocate_pointer(svmAllocReadOnly); fooPointer->bar = anSVMInt.get(); cl::SVMAllocator> svmAlloc; std::vector>> inputA(numElements, 1, svmAlloc); cl::coarse_svm_vector inputB(numElements, 2, svmAlloc); ////////////// // Traditional cl_mem allocations std::vector output(numElements, 0xdeadbeef); cl::Buffer outputBuffer(output.begin(), output.end(), false); cl::Pipe aPipe(sizeof(cl_int), numElements / 2); // Default command queue, also passed in as a parameter cl::DeviceCommandQueue defaultDeviceQueue = cl::DeviceCommandQueue::makeDefault( cl::Context::getDefault(), cl::Device::getDefault()); auto vectorAddKernel = cl::KernelFunctor< decltype(fooPointer)&, int*, cl::coarse_svm_vector&, cl::Buffer, int, cl::Pipe&, cl::DeviceCommandQueue >(vectorAddProgram, "vectorAdd"); // Ensure that the additional SVM pointer is available to the kernel // This one was not passed as a parameter vectorAddKernel.setSVMPointers(anSVMInt); cl_int error; vectorAddKernel( cl::EnqueueArgs( cl::NDRange(numElements/2), cl::NDRange(numElements/2)), fooPointer, inputA.data(), inputB, outputBuffer, 3, aPipe, defaultDeviceQueue, error ); cl::copy(outputBuffer, output.begin(), output.end()); cl::Device d = cl::Device::getDefault(); std::cout << "Output:\n"; for (int i = 1; i < numElements; ++i) { std::cout << "\t" << output[i] << "\n"; } std::cout << "\n\n"; return 0; }