1 #include <clpeak.h>
2 #include <cstring>
3
4 #define MSTRINGIFY(...) #__VA_ARGS__
5
6 static const std::string stringifiedKernels =
7 #include "global_bandwidth_kernels.cl"
8 #include "compute_sp_kernels.cl"
9 #include "compute_hp_kernels.cl"
10 #include "compute_dp_kernels.cl"
11 #include "compute_int24_kernels.cl"
12 #include "compute_integer_kernels.cl"
13 #include "compute_char_kernels.cl"
14 #include "compute_short_kernels.cl"
15 ;
16
17 #ifdef USE_STUB_OPENCL
18 // Prototype
19 extern "C"
20 {
21 void stubOpenclReset();
22 }
23 #endif
24
clPeak()25 clPeak::clPeak() : forcePlatform(false), forceDevice(false), forceTest(false), useEventTimer(false),
26 isGlobalBW(true), isComputeHP(true), isComputeSP(true), isComputeDP(true), isComputeIntFast(true), isComputeInt(true),
27 isComputeChar(true), isComputeShort(true),
28 isTransferBW(true), isKernelLatency(true),
29 specifiedPlatform(0), specifiedDevice(0),
30 forcePlatformName(false), forceDeviceName(false),
31 specifiedPlatformName(0), specifiedDeviceName(0), specifiedTestName(0)
32 {
33 }
34
~clPeak()35 clPeak::~clPeak()
36 {
37 if (log)
38 {
39 delete log;
40 }
41 }
42
runAll()43 int clPeak::runAll()
44 {
45 try
46 {
47 #ifdef USE_STUB_OPENCL
48 stubOpenclReset();
49 #endif
50 vector<cl::Platform> platforms;
51 cl::Platform::get(&platforms);
52
53 log->xmlOpenTag("clpeak");
54 log->xmlAppendAttribs("os", OS_NAME);
55 for (size_t p = 0; p < platforms.size(); p++)
56 {
57 if (forcePlatform && (p != specifiedPlatform))
58 continue;
59
60 std::string platformName = platforms[p].getInfo<CL_PLATFORM_NAME>();
61 trimString(platformName);
62
63 if (forcePlatformName && (!strcmp(platformName.c_str(), specifiedPlatformName) == 0))
64 continue;
65
66 log->print(NEWLINE "Platform: " + platformName + NEWLINE);
67 log->xmlOpenTag("platform");
68 log->xmlAppendAttribs("name", platformName);
69
70 cl_context_properties cps[3] = {
71 CL_CONTEXT_PLATFORM,
72 (cl_context_properties)(platforms[p])(),
73 0};
74
75 cl::Context ctx(CL_DEVICE_TYPE_ALL, cps);
76 vector<cl::Device> devices = ctx.getInfo<CL_CONTEXT_DEVICES>();
77 cl::Program::Sources source(1, stringifiedKernels);
78 cl::Program prog = cl::Program(ctx, source);
79
80 for (size_t d = 0; d < devices.size(); d++)
81 {
82 if (forceDevice && (d != specifiedDevice))
83 continue;
84
85 device_info_t devInfo = getDeviceInfo(devices[d]);
86
87 if (forceDeviceName && (!strcmp(devInfo.deviceName.c_str(), specifiedDeviceName) == 0))
88 continue;
89
90 log->print(TAB "Device: " + devInfo.deviceName + NEWLINE);
91 log->print(TAB TAB "Driver version : ");
92 log->print(devInfo.driverVersion);
93 log->print(" (" OS_NAME ")" NEWLINE);
94 log->print(TAB TAB "Compute units : ");
95 log->print(devInfo.numCUs);
96 log->print(NEWLINE);
97 log->print(TAB TAB "Clock frequency : ");
98 log->print(devInfo.maxClockFreq);
99 log->print(" MHz" NEWLINE);
100 log->xmlOpenTag("device");
101 log->xmlAppendAttribs("name", devInfo.deviceName);
102 log->xmlAppendAttribs("driver_version", devInfo.driverVersion);
103 log->xmlAppendAttribs("compute_units", devInfo.numCUs);
104 log->xmlAppendAttribs("clock_frequency", devInfo.maxClockFreq);
105 log->xmlAppendAttribs("clock_frequency_unit", "MHz");
106
107 try
108 {
109 vector<cl::Device> dev = {devices[d]};
110 prog.build(dev, BUILD_OPTIONS);
111 }
112 catch (cl::Error &error)
113 {
114 UNUSED(error);
115 log->print(TAB TAB "Build Log: " + prog.getBuildInfo<CL_PROGRAM_BUILD_LOG>(devices[d]) + NEWLINE NEWLINE);
116 continue;
117 }
118
119 cl::CommandQueue queue = cl::CommandQueue(ctx, devices[d], CL_QUEUE_PROFILING_ENABLE);
120
121 runGlobalBandwidthTest(queue, prog, devInfo);
122 runComputeSP(queue, prog, devInfo);
123 runComputeHP(queue, prog, devInfo);
124 runComputeDP(queue, prog, devInfo);
125 runComputeInteger(queue, prog, devInfo);
126 runComputeIntFast(queue, prog, devInfo);
127 runComputeChar(queue, prog, devInfo);
128 runComputeShort(queue, prog, devInfo);
129 runTransferBandwidthTest(queue, prog, devInfo);
130 runKernelLatency(queue, prog, devInfo);
131
132 log->print(NEWLINE);
133 log->xmlCloseTag(); // device
134 }
135 log->xmlCloseTag(); // platform
136 }
137 log->xmlCloseTag(); // clpeak
138 }
139 catch (cl::Error &error)
140 {
141 stringstream ss;
142 ss << error.what() << " (" << error.err() << ")" NEWLINE;
143
144 log->print(ss.str());
145
146 // skip error for no platform
147 if (strcmp(error.what(), "clGetPlatformIDs") == 0)
148 {
149 log->print("no platforms found" NEWLINE);
150 }
151 else
152 {
153 return -1;
154 }
155 }
156
157 return 0;
158 }
159
run_kernel(cl::CommandQueue & queue,cl::Kernel & kernel,cl::NDRange & globalSize,cl::NDRange & localSize,uint iters)160 float clPeak::run_kernel(cl::CommandQueue &queue, cl::Kernel &kernel, cl::NDRange &globalSize, cl::NDRange &localSize, uint iters)
161 {
162 float timed = 0;
163
164 // Dummy calls
165 queue.enqueueNDRangeKernel(kernel, cl::NullRange, globalSize, localSize);
166 queue.enqueueNDRangeKernel(kernel, cl::NullRange, globalSize, localSize);
167 queue.finish();
168
169 if (useEventTimer)
170 {
171 for (uint i = 0; i < iters; i++)
172 {
173 cl::Event timeEvent;
174
175 queue.enqueueNDRangeKernel(kernel, cl::NullRange, globalSize, localSize, NULL, &timeEvent);
176 queue.finish();
177 timed += timeInUS(timeEvent);
178 }
179 }
180 else // std timer
181 {
182 Timer timer;
183
184 timer.start();
185 for (uint i = 0; i < iters; i++)
186 {
187 queue.enqueueNDRangeKernel(kernel, cl::NullRange, globalSize, localSize);
188 queue.flush();
189 }
190 queue.finish();
191 timed = timer.stopAndTime();
192 }
193
194 return (timed / static_cast<float>(iters));
195 }
196