1*6fee86a4SJeremy Kemp #define CL_HPP_ENABLE_EXCEPTIONS
2*6fee86a4SJeremy Kemp #define CL_HPP_TARGET_OPENCL_VERSION 200
3*6fee86a4SJeremy Kemp
4*6fee86a4SJeremy Kemp #include <CL/opencl.hpp>
5*6fee86a4SJeremy Kemp #include <iostream>
6*6fee86a4SJeremy Kemp #include <vector>
7*6fee86a4SJeremy Kemp #include <memory>
8*6fee86a4SJeremy Kemp #include <algorithm>
9*6fee86a4SJeremy Kemp
10*6fee86a4SJeremy Kemp const int numElements = 32;
11*6fee86a4SJeremy Kemp
main(void)12*6fee86a4SJeremy Kemp int main(void)
13*6fee86a4SJeremy Kemp {
14*6fee86a4SJeremy Kemp // Filter for a 2.0 or newer platform and set it as the default
15*6fee86a4SJeremy Kemp std::vector<cl::Platform> platforms;
16*6fee86a4SJeremy Kemp cl::Platform::get(&platforms);
17*6fee86a4SJeremy Kemp cl::Platform plat;
18*6fee86a4SJeremy Kemp for (auto &p : platforms) {
19*6fee86a4SJeremy Kemp std::string platver = p.getInfo<CL_PLATFORM_VERSION>();
20*6fee86a4SJeremy Kemp if (platver.find("OpenCL 2.") != std::string::npos ||
21*6fee86a4SJeremy Kemp platver.find("OpenCL 3.") != std::string::npos) {
22*6fee86a4SJeremy Kemp // Note: an OpenCL 3.x platform may not support all required features!
23*6fee86a4SJeremy Kemp plat = p;
24*6fee86a4SJeremy Kemp }
25*6fee86a4SJeremy Kemp }
26*6fee86a4SJeremy Kemp if (plat() == 0) {
27*6fee86a4SJeremy Kemp std::cout << "No OpenCL 2.0 or newer platform found.\n";
28*6fee86a4SJeremy Kemp return -1;
29*6fee86a4SJeremy Kemp }
30*6fee86a4SJeremy Kemp
31*6fee86a4SJeremy Kemp cl::Platform newP = cl::Platform::setDefault(plat);
32*6fee86a4SJeremy Kemp if (newP != plat) {
33*6fee86a4SJeremy Kemp std::cout << "Error setting default platform.\n";
34*6fee86a4SJeremy Kemp return -1;
35*6fee86a4SJeremy Kemp }
36*6fee86a4SJeremy Kemp
37*6fee86a4SJeremy Kemp // C++11 raw string literal for the first kernel
38*6fee86a4SJeremy Kemp std::string kernel1{R"CLC(
39*6fee86a4SJeremy Kemp global int globalA;
40*6fee86a4SJeremy Kemp kernel void updateGlobal()
41*6fee86a4SJeremy Kemp {
42*6fee86a4SJeremy Kemp globalA = 75;
43*6fee86a4SJeremy Kemp }
44*6fee86a4SJeremy Kemp )CLC"};
45*6fee86a4SJeremy Kemp
46*6fee86a4SJeremy Kemp // Raw string literal for the second kernel
47*6fee86a4SJeremy Kemp std::string kernel2{R"CLC(
48*6fee86a4SJeremy Kemp typedef struct { global int *bar; } Foo;
49*6fee86a4SJeremy Kemp kernel void vectorAdd(global const Foo* aNum, global const int *inputA, global const int *inputB,
50*6fee86a4SJeremy Kemp global int *output, int val, write_only pipe int outPipe, queue_t childQueue)
51*6fee86a4SJeremy Kemp {
52*6fee86a4SJeremy Kemp output[get_global_id(0)] = inputA[get_global_id(0)] + inputB[get_global_id(0)] + val + *(aNum->bar);
53*6fee86a4SJeremy Kemp write_pipe(outPipe, &val);
54*6fee86a4SJeremy Kemp queue_t default_queue = get_default_queue();
55*6fee86a4SJeremy Kemp ndrange_t ndrange = ndrange_1D(get_global_size(0)/2, get_global_size(0)/2);
56*6fee86a4SJeremy Kemp
57*6fee86a4SJeremy Kemp // Have a child kernel write into third quarter of output
58*6fee86a4SJeremy Kemp enqueue_kernel(default_queue, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange,
59*6fee86a4SJeremy Kemp ^{
60*6fee86a4SJeremy Kemp output[get_global_size(0)*2 + get_global_id(0)] =
61*6fee86a4SJeremy Kemp inputA[get_global_size(0)*2 + get_global_id(0)] + inputB[get_global_size(0)*2 + get_global_id(0)] + globalA;
62*6fee86a4SJeremy Kemp });
63*6fee86a4SJeremy Kemp
64*6fee86a4SJeremy Kemp // Have a child kernel write into last quarter of output
65*6fee86a4SJeremy Kemp enqueue_kernel(childQueue, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange,
66*6fee86a4SJeremy Kemp ^{
67*6fee86a4SJeremy Kemp output[get_global_size(0)*3 + get_global_id(0)] =
68*6fee86a4SJeremy Kemp inputA[get_global_size(0)*3 + get_global_id(0)] + inputB[get_global_size(0)*3 + get_global_id(0)] + globalA + 2;
69*6fee86a4SJeremy Kemp });
70*6fee86a4SJeremy Kemp }
71*6fee86a4SJeremy Kemp )CLC"};
72*6fee86a4SJeremy Kemp
73*6fee86a4SJeremy Kemp std::vector<std::string> programStrings;
74*6fee86a4SJeremy Kemp programStrings.push_back(kernel1);
75*6fee86a4SJeremy Kemp programStrings.push_back(kernel2);
76*6fee86a4SJeremy Kemp
77*6fee86a4SJeremy Kemp cl::Program vectorAddProgram(programStrings);
78*6fee86a4SJeremy Kemp try {
79*6fee86a4SJeremy Kemp vectorAddProgram.build("-cl-std=CL2.0");
80*6fee86a4SJeremy Kemp }
81*6fee86a4SJeremy Kemp catch (...) {
82*6fee86a4SJeremy Kemp // Print build info for all devices
83*6fee86a4SJeremy Kemp cl_int buildErr = CL_SUCCESS;
84*6fee86a4SJeremy Kemp auto buildInfo = vectorAddProgram.getBuildInfo<CL_PROGRAM_BUILD_LOG>(&buildErr);
85*6fee86a4SJeremy Kemp for (auto &pair : buildInfo) {
86*6fee86a4SJeremy Kemp std::cerr << pair.second << std::endl << std::endl;
87*6fee86a4SJeremy Kemp }
88*6fee86a4SJeremy Kemp
89*6fee86a4SJeremy Kemp return 1;
90*6fee86a4SJeremy Kemp }
91*6fee86a4SJeremy Kemp
92*6fee86a4SJeremy Kemp typedef struct { int *bar; } Foo;
93*6fee86a4SJeremy Kemp
94*6fee86a4SJeremy Kemp // Get and run kernel that initializes the program-scope global
95*6fee86a4SJeremy Kemp // A test for kernels that take no arguments
96*6fee86a4SJeremy Kemp auto program2Kernel =
97*6fee86a4SJeremy Kemp cl::KernelFunctor<>(vectorAddProgram, "updateGlobal");
98*6fee86a4SJeremy Kemp program2Kernel(
99*6fee86a4SJeremy Kemp cl::EnqueueArgs(
100*6fee86a4SJeremy Kemp cl::NDRange(1)));
101*6fee86a4SJeremy Kemp
102*6fee86a4SJeremy Kemp //////////////////
103*6fee86a4SJeremy Kemp // SVM allocations
104*6fee86a4SJeremy Kemp
105*6fee86a4SJeremy Kemp auto anSVMInt = cl::allocate_svm<int, cl::SVMTraitCoarse<>>();
106*6fee86a4SJeremy Kemp *anSVMInt = 5;
107*6fee86a4SJeremy Kemp cl::SVMAllocator<Foo, cl::SVMTraitCoarse<cl::SVMTraitReadOnly<>>> svmAllocReadOnly;
108*6fee86a4SJeremy Kemp auto fooPointer = cl::allocate_pointer<Foo>(svmAllocReadOnly);
109*6fee86a4SJeremy Kemp fooPointer->bar = anSVMInt.get();
110*6fee86a4SJeremy Kemp cl::SVMAllocator<int, cl::SVMTraitCoarse<>> svmAlloc;
111*6fee86a4SJeremy Kemp std::vector<int, cl::SVMAllocator<int, cl::SVMTraitCoarse<>>> inputA(numElements, 1, svmAlloc);
112*6fee86a4SJeremy Kemp cl::coarse_svm_vector<int> inputB(numElements, 2, svmAlloc);
113*6fee86a4SJeremy Kemp
114*6fee86a4SJeremy Kemp //////////////
115*6fee86a4SJeremy Kemp // Traditional cl_mem allocations
116*6fee86a4SJeremy Kemp
117*6fee86a4SJeremy Kemp std::vector<int> output(numElements, 0xdeadbeef);
118*6fee86a4SJeremy Kemp cl::Buffer outputBuffer(output.begin(), output.end(), false);
119*6fee86a4SJeremy Kemp cl::Pipe aPipe(sizeof(cl_int), numElements / 2);
120*6fee86a4SJeremy Kemp
121*6fee86a4SJeremy Kemp // Default command queue, also passed in as a parameter
122*6fee86a4SJeremy Kemp cl::DeviceCommandQueue defaultDeviceQueue = cl::DeviceCommandQueue::makeDefault(
123*6fee86a4SJeremy Kemp cl::Context::getDefault(), cl::Device::getDefault());
124*6fee86a4SJeremy Kemp
125*6fee86a4SJeremy Kemp auto vectorAddKernel =
126*6fee86a4SJeremy Kemp cl::KernelFunctor<
127*6fee86a4SJeremy Kemp decltype(fooPointer)&,
128*6fee86a4SJeremy Kemp int*,
129*6fee86a4SJeremy Kemp cl::coarse_svm_vector<int>&,
130*6fee86a4SJeremy Kemp cl::Buffer,
131*6fee86a4SJeremy Kemp int,
132*6fee86a4SJeremy Kemp cl::Pipe&,
133*6fee86a4SJeremy Kemp cl::DeviceCommandQueue
134*6fee86a4SJeremy Kemp >(vectorAddProgram, "vectorAdd");
135*6fee86a4SJeremy Kemp
136*6fee86a4SJeremy Kemp // Ensure that the additional SVM pointer is available to the kernel
137*6fee86a4SJeremy Kemp // This one was not passed as a parameter
138*6fee86a4SJeremy Kemp vectorAddKernel.setSVMPointers(anSVMInt);
139*6fee86a4SJeremy Kemp
140*6fee86a4SJeremy Kemp cl_int error;
141*6fee86a4SJeremy Kemp vectorAddKernel(
142*6fee86a4SJeremy Kemp cl::EnqueueArgs(
143*6fee86a4SJeremy Kemp cl::NDRange(numElements/2),
144*6fee86a4SJeremy Kemp cl::NDRange(numElements/2)),
145*6fee86a4SJeremy Kemp fooPointer,
146*6fee86a4SJeremy Kemp inputA.data(),
147*6fee86a4SJeremy Kemp inputB,
148*6fee86a4SJeremy Kemp outputBuffer,
149*6fee86a4SJeremy Kemp 3,
150*6fee86a4SJeremy Kemp aPipe,
151*6fee86a4SJeremy Kemp defaultDeviceQueue,
152*6fee86a4SJeremy Kemp error
153*6fee86a4SJeremy Kemp );
154*6fee86a4SJeremy Kemp
155*6fee86a4SJeremy Kemp cl::copy(outputBuffer, output.begin(), output.end());
156*6fee86a4SJeremy Kemp
157*6fee86a4SJeremy Kemp cl::Device d = cl::Device::getDefault();
158*6fee86a4SJeremy Kemp
159*6fee86a4SJeremy Kemp std::cout << "Output:\n";
160*6fee86a4SJeremy Kemp for (int i = 1; i < numElements; ++i) {
161*6fee86a4SJeremy Kemp std::cout << "\t" << output[i] << "\n";
162*6fee86a4SJeremy Kemp }
163*6fee86a4SJeremy Kemp std::cout << "\n\n";
164*6fee86a4SJeremy Kemp
165*6fee86a4SJeremy Kemp return 0;
166*6fee86a4SJeremy Kemp }
167