xref: /aosp_15_r20/external/OpenCL-CLHPP/examples/src/headerexample.cpp (revision 6fee86a4f833e4f32f25770a262884407554133d)
1*6fee86a4SJeremy Kemp #define CL_HPP_ENABLE_EXCEPTIONS
2*6fee86a4SJeremy Kemp #define CL_HPP_TARGET_OPENCL_VERSION 200
3*6fee86a4SJeremy Kemp 
4*6fee86a4SJeremy Kemp #include <CL/opencl.hpp>
5*6fee86a4SJeremy Kemp #include <iostream>
6*6fee86a4SJeremy Kemp #include <vector>
7*6fee86a4SJeremy Kemp #include <memory>
8*6fee86a4SJeremy Kemp #include <algorithm>
9*6fee86a4SJeremy Kemp 
10*6fee86a4SJeremy Kemp const int numElements = 32;
11*6fee86a4SJeremy Kemp 
main(void)12*6fee86a4SJeremy Kemp int main(void)
13*6fee86a4SJeremy Kemp {
14*6fee86a4SJeremy Kemp     // Filter for a 2.0 or newer platform and set it as the default
15*6fee86a4SJeremy Kemp     std::vector<cl::Platform> platforms;
16*6fee86a4SJeremy Kemp     cl::Platform::get(&platforms);
17*6fee86a4SJeremy Kemp     cl::Platform plat;
18*6fee86a4SJeremy Kemp     for (auto &p : platforms) {
19*6fee86a4SJeremy Kemp         std::string platver = p.getInfo<CL_PLATFORM_VERSION>();
20*6fee86a4SJeremy Kemp         if (platver.find("OpenCL 2.") != std::string::npos ||
21*6fee86a4SJeremy Kemp             platver.find("OpenCL 3.") != std::string::npos) {
22*6fee86a4SJeremy Kemp             // Note: an OpenCL 3.x platform may not support all required features!
23*6fee86a4SJeremy Kemp             plat = p;
24*6fee86a4SJeremy Kemp         }
25*6fee86a4SJeremy Kemp     }
26*6fee86a4SJeremy Kemp     if (plat() == 0) {
27*6fee86a4SJeremy Kemp         std::cout << "No OpenCL 2.0 or newer platform found.\n";
28*6fee86a4SJeremy Kemp         return -1;
29*6fee86a4SJeremy Kemp     }
30*6fee86a4SJeremy Kemp 
31*6fee86a4SJeremy Kemp     cl::Platform newP = cl::Platform::setDefault(plat);
32*6fee86a4SJeremy Kemp     if (newP != plat) {
33*6fee86a4SJeremy Kemp         std::cout << "Error setting default platform.\n";
34*6fee86a4SJeremy Kemp         return -1;
35*6fee86a4SJeremy Kemp     }
36*6fee86a4SJeremy Kemp 
37*6fee86a4SJeremy Kemp     // C++11 raw string literal for the first kernel
38*6fee86a4SJeremy Kemp     std::string kernel1{R"CLC(
39*6fee86a4SJeremy Kemp         global int globalA;
40*6fee86a4SJeremy Kemp         kernel void updateGlobal()
41*6fee86a4SJeremy Kemp         {
42*6fee86a4SJeremy Kemp           globalA = 75;
43*6fee86a4SJeremy Kemp         }
44*6fee86a4SJeremy Kemp     )CLC"};
45*6fee86a4SJeremy Kemp 
46*6fee86a4SJeremy Kemp     // Raw string literal for the second kernel
47*6fee86a4SJeremy Kemp     std::string kernel2{R"CLC(
48*6fee86a4SJeremy Kemp         typedef struct { global int *bar; } Foo;
49*6fee86a4SJeremy Kemp         kernel void vectorAdd(global const Foo* aNum, global const int *inputA, global const int *inputB,
50*6fee86a4SJeremy Kemp                               global int *output, int val, write_only pipe int outPipe, queue_t childQueue)
51*6fee86a4SJeremy Kemp         {
52*6fee86a4SJeremy Kemp           output[get_global_id(0)] = inputA[get_global_id(0)] + inputB[get_global_id(0)] + val + *(aNum->bar);
53*6fee86a4SJeremy Kemp           write_pipe(outPipe, &val);
54*6fee86a4SJeremy Kemp           queue_t default_queue = get_default_queue();
55*6fee86a4SJeremy Kemp           ndrange_t ndrange = ndrange_1D(get_global_size(0)/2, get_global_size(0)/2);
56*6fee86a4SJeremy Kemp 
57*6fee86a4SJeremy Kemp           // Have a child kernel write into third quarter of output
58*6fee86a4SJeremy Kemp           enqueue_kernel(default_queue, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange,
59*6fee86a4SJeremy Kemp             ^{
60*6fee86a4SJeremy Kemp                 output[get_global_size(0)*2 + get_global_id(0)] =
61*6fee86a4SJeremy Kemp                   inputA[get_global_size(0)*2 + get_global_id(0)] + inputB[get_global_size(0)*2 + get_global_id(0)] + globalA;
62*6fee86a4SJeremy Kemp             });
63*6fee86a4SJeremy Kemp 
64*6fee86a4SJeremy Kemp           // Have a child kernel write into last quarter of output
65*6fee86a4SJeremy Kemp           enqueue_kernel(childQueue, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange,
66*6fee86a4SJeremy Kemp             ^{
67*6fee86a4SJeremy Kemp                 output[get_global_size(0)*3 + get_global_id(0)] =
68*6fee86a4SJeremy Kemp                   inputA[get_global_size(0)*3 + get_global_id(0)] + inputB[get_global_size(0)*3 + get_global_id(0)] + globalA + 2;
69*6fee86a4SJeremy Kemp             });
70*6fee86a4SJeremy Kemp         }
71*6fee86a4SJeremy Kemp     )CLC"};
72*6fee86a4SJeremy Kemp 
73*6fee86a4SJeremy Kemp     std::vector<std::string> programStrings;
74*6fee86a4SJeremy Kemp     programStrings.push_back(kernel1);
75*6fee86a4SJeremy Kemp     programStrings.push_back(kernel2);
76*6fee86a4SJeremy Kemp 
77*6fee86a4SJeremy Kemp     cl::Program vectorAddProgram(programStrings);
78*6fee86a4SJeremy Kemp     try {
79*6fee86a4SJeremy Kemp         vectorAddProgram.build("-cl-std=CL2.0");
80*6fee86a4SJeremy Kemp     }
81*6fee86a4SJeremy Kemp     catch (...) {
82*6fee86a4SJeremy Kemp         // Print build info for all devices
83*6fee86a4SJeremy Kemp         cl_int buildErr = CL_SUCCESS;
84*6fee86a4SJeremy Kemp         auto buildInfo = vectorAddProgram.getBuildInfo<CL_PROGRAM_BUILD_LOG>(&buildErr);
85*6fee86a4SJeremy Kemp         for (auto &pair : buildInfo) {
86*6fee86a4SJeremy Kemp             std::cerr << pair.second << std::endl << std::endl;
87*6fee86a4SJeremy Kemp         }
88*6fee86a4SJeremy Kemp 
89*6fee86a4SJeremy Kemp         return 1;
90*6fee86a4SJeremy Kemp     }
91*6fee86a4SJeremy Kemp 
92*6fee86a4SJeremy Kemp     typedef struct { int *bar; } Foo;
93*6fee86a4SJeremy Kemp 
94*6fee86a4SJeremy Kemp     // Get and run kernel that initializes the program-scope global
95*6fee86a4SJeremy Kemp     // A test for kernels that take no arguments
96*6fee86a4SJeremy Kemp     auto program2Kernel =
97*6fee86a4SJeremy Kemp         cl::KernelFunctor<>(vectorAddProgram, "updateGlobal");
98*6fee86a4SJeremy Kemp     program2Kernel(
99*6fee86a4SJeremy Kemp         cl::EnqueueArgs(
100*6fee86a4SJeremy Kemp         cl::NDRange(1)));
101*6fee86a4SJeremy Kemp 
102*6fee86a4SJeremy Kemp     //////////////////
103*6fee86a4SJeremy Kemp     // SVM allocations
104*6fee86a4SJeremy Kemp 
105*6fee86a4SJeremy Kemp     auto anSVMInt = cl::allocate_svm<int, cl::SVMTraitCoarse<>>();
106*6fee86a4SJeremy Kemp     *anSVMInt = 5;
107*6fee86a4SJeremy Kemp     cl::SVMAllocator<Foo, cl::SVMTraitCoarse<cl::SVMTraitReadOnly<>>> svmAllocReadOnly;
108*6fee86a4SJeremy Kemp     auto fooPointer = cl::allocate_pointer<Foo>(svmAllocReadOnly);
109*6fee86a4SJeremy Kemp     fooPointer->bar = anSVMInt.get();
110*6fee86a4SJeremy Kemp     cl::SVMAllocator<int, cl::SVMTraitCoarse<>> svmAlloc;
111*6fee86a4SJeremy Kemp     std::vector<int, cl::SVMAllocator<int, cl::SVMTraitCoarse<>>> inputA(numElements, 1, svmAlloc);
112*6fee86a4SJeremy Kemp     cl::coarse_svm_vector<int> inputB(numElements, 2, svmAlloc);
113*6fee86a4SJeremy Kemp 
114*6fee86a4SJeremy Kemp     //////////////
115*6fee86a4SJeremy Kemp     // Traditional cl_mem allocations
116*6fee86a4SJeremy Kemp 
117*6fee86a4SJeremy Kemp     std::vector<int> output(numElements, 0xdeadbeef);
118*6fee86a4SJeremy Kemp     cl::Buffer outputBuffer(output.begin(), output.end(), false);
119*6fee86a4SJeremy Kemp     cl::Pipe aPipe(sizeof(cl_int), numElements / 2);
120*6fee86a4SJeremy Kemp 
121*6fee86a4SJeremy Kemp     // Default command queue, also passed in as a parameter
122*6fee86a4SJeremy Kemp     cl::DeviceCommandQueue defaultDeviceQueue = cl::DeviceCommandQueue::makeDefault(
123*6fee86a4SJeremy Kemp         cl::Context::getDefault(), cl::Device::getDefault());
124*6fee86a4SJeremy Kemp 
125*6fee86a4SJeremy Kemp     auto vectorAddKernel =
126*6fee86a4SJeremy Kemp         cl::KernelFunctor<
127*6fee86a4SJeremy Kemp             decltype(fooPointer)&,
128*6fee86a4SJeremy Kemp             int*,
129*6fee86a4SJeremy Kemp             cl::coarse_svm_vector<int>&,
130*6fee86a4SJeremy Kemp             cl::Buffer,
131*6fee86a4SJeremy Kemp             int,
132*6fee86a4SJeremy Kemp             cl::Pipe&,
133*6fee86a4SJeremy Kemp             cl::DeviceCommandQueue
134*6fee86a4SJeremy Kemp             >(vectorAddProgram, "vectorAdd");
135*6fee86a4SJeremy Kemp 
136*6fee86a4SJeremy Kemp     // Ensure that the additional SVM pointer is available to the kernel
137*6fee86a4SJeremy Kemp     // This one was not passed as a parameter
138*6fee86a4SJeremy Kemp     vectorAddKernel.setSVMPointers(anSVMInt);
139*6fee86a4SJeremy Kemp 
140*6fee86a4SJeremy Kemp     cl_int error;
141*6fee86a4SJeremy Kemp     vectorAddKernel(
142*6fee86a4SJeremy Kemp         cl::EnqueueArgs(
143*6fee86a4SJeremy Kemp             cl::NDRange(numElements/2),
144*6fee86a4SJeremy Kemp             cl::NDRange(numElements/2)),
145*6fee86a4SJeremy Kemp         fooPointer,
146*6fee86a4SJeremy Kemp         inputA.data(),
147*6fee86a4SJeremy Kemp         inputB,
148*6fee86a4SJeremy Kemp         outputBuffer,
149*6fee86a4SJeremy Kemp         3,
150*6fee86a4SJeremy Kemp         aPipe,
151*6fee86a4SJeremy Kemp         defaultDeviceQueue,
152*6fee86a4SJeremy Kemp         error
153*6fee86a4SJeremy Kemp         );
154*6fee86a4SJeremy Kemp 
155*6fee86a4SJeremy Kemp     cl::copy(outputBuffer, output.begin(), output.end());
156*6fee86a4SJeremy Kemp 
157*6fee86a4SJeremy Kemp     cl::Device d = cl::Device::getDefault();
158*6fee86a4SJeremy Kemp 
159*6fee86a4SJeremy Kemp     std::cout << "Output:\n";
160*6fee86a4SJeremy Kemp     for (int i = 1; i < numElements; ++i) {
161*6fee86a4SJeremy Kemp         std::cout << "\t" << output[i] << "\n";
162*6fee86a4SJeremy Kemp     }
163*6fee86a4SJeremy Kemp     std::cout << "\n\n";
164*6fee86a4SJeremy Kemp 
165*6fee86a4SJeremy Kemp     return 0;
166*6fee86a4SJeremy Kemp }
167