1*1cd03ba3SJeremy Kemp #include <clpeak.h>
2*1cd03ba3SJeremy Kemp #include <cstring>
3*1cd03ba3SJeremy Kemp
4*1cd03ba3SJeremy Kemp #define MSTRINGIFY(...) #__VA_ARGS__
5*1cd03ba3SJeremy Kemp
6*1cd03ba3SJeremy Kemp static const std::string stringifiedKernels =
7*1cd03ba3SJeremy Kemp #include "global_bandwidth_kernels.cl"
8*1cd03ba3SJeremy Kemp #include "compute_sp_kernels.cl"
9*1cd03ba3SJeremy Kemp #include "compute_hp_kernels.cl"
10*1cd03ba3SJeremy Kemp #include "compute_dp_kernels.cl"
11*1cd03ba3SJeremy Kemp #include "compute_int24_kernels.cl"
12*1cd03ba3SJeremy Kemp #include "compute_integer_kernels.cl"
13*1cd03ba3SJeremy Kemp #include "compute_char_kernels.cl"
14*1cd03ba3SJeremy Kemp #include "compute_short_kernels.cl"
15*1cd03ba3SJeremy Kemp ;
16*1cd03ba3SJeremy Kemp
17*1cd03ba3SJeremy Kemp #ifdef USE_STUB_OPENCL
18*1cd03ba3SJeremy Kemp // Prototype
19*1cd03ba3SJeremy Kemp extern "C"
20*1cd03ba3SJeremy Kemp {
21*1cd03ba3SJeremy Kemp void stubOpenclReset();
22*1cd03ba3SJeremy Kemp }
23*1cd03ba3SJeremy Kemp #endif
24*1cd03ba3SJeremy Kemp
clPeak()25*1cd03ba3SJeremy Kemp clPeak::clPeak() : forcePlatform(false), forceDevice(false), forceTest(false), useEventTimer(false),
26*1cd03ba3SJeremy Kemp isGlobalBW(true), isComputeHP(true), isComputeSP(true), isComputeDP(true), isComputeIntFast(true), isComputeInt(true),
27*1cd03ba3SJeremy Kemp isComputeChar(true), isComputeShort(true),
28*1cd03ba3SJeremy Kemp isTransferBW(true), isKernelLatency(true),
29*1cd03ba3SJeremy Kemp specifiedPlatform(0), specifiedDevice(0),
30*1cd03ba3SJeremy Kemp forcePlatformName(false), forceDeviceName(false),
31*1cd03ba3SJeremy Kemp specifiedPlatformName(0), specifiedDeviceName(0), specifiedTestName(0)
32*1cd03ba3SJeremy Kemp {
33*1cd03ba3SJeremy Kemp }
34*1cd03ba3SJeremy Kemp
~clPeak()35*1cd03ba3SJeremy Kemp clPeak::~clPeak()
36*1cd03ba3SJeremy Kemp {
37*1cd03ba3SJeremy Kemp if (log)
38*1cd03ba3SJeremy Kemp {
39*1cd03ba3SJeremy Kemp delete log;
40*1cd03ba3SJeremy Kemp }
41*1cd03ba3SJeremy Kemp }
42*1cd03ba3SJeremy Kemp
runAll()43*1cd03ba3SJeremy Kemp int clPeak::runAll()
44*1cd03ba3SJeremy Kemp {
45*1cd03ba3SJeremy Kemp try
46*1cd03ba3SJeremy Kemp {
47*1cd03ba3SJeremy Kemp #ifdef USE_STUB_OPENCL
48*1cd03ba3SJeremy Kemp stubOpenclReset();
49*1cd03ba3SJeremy Kemp #endif
50*1cd03ba3SJeremy Kemp vector<cl::Platform> platforms;
51*1cd03ba3SJeremy Kemp cl::Platform::get(&platforms);
52*1cd03ba3SJeremy Kemp
53*1cd03ba3SJeremy Kemp log->xmlOpenTag("clpeak");
54*1cd03ba3SJeremy Kemp log->xmlAppendAttribs("os", OS_NAME);
55*1cd03ba3SJeremy Kemp for (size_t p = 0; p < platforms.size(); p++)
56*1cd03ba3SJeremy Kemp {
57*1cd03ba3SJeremy Kemp if (forcePlatform && (p != specifiedPlatform))
58*1cd03ba3SJeremy Kemp continue;
59*1cd03ba3SJeremy Kemp
60*1cd03ba3SJeremy Kemp std::string platformName = platforms[p].getInfo<CL_PLATFORM_NAME>();
61*1cd03ba3SJeremy Kemp trimString(platformName);
62*1cd03ba3SJeremy Kemp
63*1cd03ba3SJeremy Kemp if (forcePlatformName && (!strcmp(platformName.c_str(), specifiedPlatformName) == 0))
64*1cd03ba3SJeremy Kemp continue;
65*1cd03ba3SJeremy Kemp
66*1cd03ba3SJeremy Kemp log->print(NEWLINE "Platform: " + platformName + NEWLINE);
67*1cd03ba3SJeremy Kemp log->xmlOpenTag("platform");
68*1cd03ba3SJeremy Kemp log->xmlAppendAttribs("name", platformName);
69*1cd03ba3SJeremy Kemp
70*1cd03ba3SJeremy Kemp cl_context_properties cps[3] = {
71*1cd03ba3SJeremy Kemp CL_CONTEXT_PLATFORM,
72*1cd03ba3SJeremy Kemp (cl_context_properties)(platforms[p])(),
73*1cd03ba3SJeremy Kemp 0};
74*1cd03ba3SJeremy Kemp
75*1cd03ba3SJeremy Kemp cl::Context ctx(CL_DEVICE_TYPE_ALL, cps);
76*1cd03ba3SJeremy Kemp vector<cl::Device> devices = ctx.getInfo<CL_CONTEXT_DEVICES>();
77*1cd03ba3SJeremy Kemp cl::Program::Sources source(1, stringifiedKernels);
78*1cd03ba3SJeremy Kemp cl::Program prog = cl::Program(ctx, source);
79*1cd03ba3SJeremy Kemp
80*1cd03ba3SJeremy Kemp for (size_t d = 0; d < devices.size(); d++)
81*1cd03ba3SJeremy Kemp {
82*1cd03ba3SJeremy Kemp if (forceDevice && (d != specifiedDevice))
83*1cd03ba3SJeremy Kemp continue;
84*1cd03ba3SJeremy Kemp
85*1cd03ba3SJeremy Kemp device_info_t devInfo = getDeviceInfo(devices[d]);
86*1cd03ba3SJeremy Kemp
87*1cd03ba3SJeremy Kemp if (forceDeviceName && (!strcmp(devInfo.deviceName.c_str(), specifiedDeviceName) == 0))
88*1cd03ba3SJeremy Kemp continue;
89*1cd03ba3SJeremy Kemp
90*1cd03ba3SJeremy Kemp log->print(TAB "Device: " + devInfo.deviceName + NEWLINE);
91*1cd03ba3SJeremy Kemp log->print(TAB TAB "Driver version : ");
92*1cd03ba3SJeremy Kemp log->print(devInfo.driverVersion);
93*1cd03ba3SJeremy Kemp log->print(" (" OS_NAME ")" NEWLINE);
94*1cd03ba3SJeremy Kemp log->print(TAB TAB "Compute units : ");
95*1cd03ba3SJeremy Kemp log->print(devInfo.numCUs);
96*1cd03ba3SJeremy Kemp log->print(NEWLINE);
97*1cd03ba3SJeremy Kemp log->print(TAB TAB "Clock frequency : ");
98*1cd03ba3SJeremy Kemp log->print(devInfo.maxClockFreq);
99*1cd03ba3SJeremy Kemp log->print(" MHz" NEWLINE);
100*1cd03ba3SJeremy Kemp log->xmlOpenTag("device");
101*1cd03ba3SJeremy Kemp log->xmlAppendAttribs("name", devInfo.deviceName);
102*1cd03ba3SJeremy Kemp log->xmlAppendAttribs("driver_version", devInfo.driverVersion);
103*1cd03ba3SJeremy Kemp log->xmlAppendAttribs("compute_units", devInfo.numCUs);
104*1cd03ba3SJeremy Kemp log->xmlAppendAttribs("clock_frequency", devInfo.maxClockFreq);
105*1cd03ba3SJeremy Kemp log->xmlAppendAttribs("clock_frequency_unit", "MHz");
106*1cd03ba3SJeremy Kemp
107*1cd03ba3SJeremy Kemp try
108*1cd03ba3SJeremy Kemp {
109*1cd03ba3SJeremy Kemp vector<cl::Device> dev = {devices[d]};
110*1cd03ba3SJeremy Kemp prog.build(dev, BUILD_OPTIONS);
111*1cd03ba3SJeremy Kemp }
112*1cd03ba3SJeremy Kemp catch (cl::Error &error)
113*1cd03ba3SJeremy Kemp {
114*1cd03ba3SJeremy Kemp UNUSED(error);
115*1cd03ba3SJeremy Kemp log->print(TAB TAB "Build Log: " + prog.getBuildInfo<CL_PROGRAM_BUILD_LOG>(devices[d]) + NEWLINE NEWLINE);
116*1cd03ba3SJeremy Kemp continue;
117*1cd03ba3SJeremy Kemp }
118*1cd03ba3SJeremy Kemp
119*1cd03ba3SJeremy Kemp cl::CommandQueue queue = cl::CommandQueue(ctx, devices[d], CL_QUEUE_PROFILING_ENABLE);
120*1cd03ba3SJeremy Kemp
121*1cd03ba3SJeremy Kemp runGlobalBandwidthTest(queue, prog, devInfo);
122*1cd03ba3SJeremy Kemp runComputeSP(queue, prog, devInfo);
123*1cd03ba3SJeremy Kemp runComputeHP(queue, prog, devInfo);
124*1cd03ba3SJeremy Kemp runComputeDP(queue, prog, devInfo);
125*1cd03ba3SJeremy Kemp runComputeInteger(queue, prog, devInfo);
126*1cd03ba3SJeremy Kemp runComputeIntFast(queue, prog, devInfo);
127*1cd03ba3SJeremy Kemp runComputeChar(queue, prog, devInfo);
128*1cd03ba3SJeremy Kemp runComputeShort(queue, prog, devInfo);
129*1cd03ba3SJeremy Kemp runTransferBandwidthTest(queue, prog, devInfo);
130*1cd03ba3SJeremy Kemp runKernelLatency(queue, prog, devInfo);
131*1cd03ba3SJeremy Kemp
132*1cd03ba3SJeremy Kemp log->print(NEWLINE);
133*1cd03ba3SJeremy Kemp log->xmlCloseTag(); // device
134*1cd03ba3SJeremy Kemp }
135*1cd03ba3SJeremy Kemp log->xmlCloseTag(); // platform
136*1cd03ba3SJeremy Kemp }
137*1cd03ba3SJeremy Kemp log->xmlCloseTag(); // clpeak
138*1cd03ba3SJeremy Kemp }
139*1cd03ba3SJeremy Kemp catch (cl::Error &error)
140*1cd03ba3SJeremy Kemp {
141*1cd03ba3SJeremy Kemp stringstream ss;
142*1cd03ba3SJeremy Kemp ss << error.what() << " (" << error.err() << ")" NEWLINE;
143*1cd03ba3SJeremy Kemp
144*1cd03ba3SJeremy Kemp log->print(ss.str());
145*1cd03ba3SJeremy Kemp
146*1cd03ba3SJeremy Kemp // skip error for no platform
147*1cd03ba3SJeremy Kemp if (strcmp(error.what(), "clGetPlatformIDs") == 0)
148*1cd03ba3SJeremy Kemp {
149*1cd03ba3SJeremy Kemp log->print("no platforms found" NEWLINE);
150*1cd03ba3SJeremy Kemp }
151*1cd03ba3SJeremy Kemp else
152*1cd03ba3SJeremy Kemp {
153*1cd03ba3SJeremy Kemp return -1;
154*1cd03ba3SJeremy Kemp }
155*1cd03ba3SJeremy Kemp }
156*1cd03ba3SJeremy Kemp
157*1cd03ba3SJeremy Kemp return 0;
158*1cd03ba3SJeremy Kemp }
159*1cd03ba3SJeremy Kemp
run_kernel(cl::CommandQueue & queue,cl::Kernel & kernel,cl::NDRange & globalSize,cl::NDRange & localSize,uint iters)160*1cd03ba3SJeremy Kemp float clPeak::run_kernel(cl::CommandQueue &queue, cl::Kernel &kernel, cl::NDRange &globalSize, cl::NDRange &localSize, uint iters)
161*1cd03ba3SJeremy Kemp {
162*1cd03ba3SJeremy Kemp float timed = 0;
163*1cd03ba3SJeremy Kemp
164*1cd03ba3SJeremy Kemp // Dummy calls
165*1cd03ba3SJeremy Kemp queue.enqueueNDRangeKernel(kernel, cl::NullRange, globalSize, localSize);
166*1cd03ba3SJeremy Kemp queue.enqueueNDRangeKernel(kernel, cl::NullRange, globalSize, localSize);
167*1cd03ba3SJeremy Kemp queue.finish();
168*1cd03ba3SJeremy Kemp
169*1cd03ba3SJeremy Kemp if (useEventTimer)
170*1cd03ba3SJeremy Kemp {
171*1cd03ba3SJeremy Kemp for (uint i = 0; i < iters; i++)
172*1cd03ba3SJeremy Kemp {
173*1cd03ba3SJeremy Kemp cl::Event timeEvent;
174*1cd03ba3SJeremy Kemp
175*1cd03ba3SJeremy Kemp queue.enqueueNDRangeKernel(kernel, cl::NullRange, globalSize, localSize, NULL, &timeEvent);
176*1cd03ba3SJeremy Kemp queue.finish();
177*1cd03ba3SJeremy Kemp timed += timeInUS(timeEvent);
178*1cd03ba3SJeremy Kemp }
179*1cd03ba3SJeremy Kemp }
180*1cd03ba3SJeremy Kemp else // std timer
181*1cd03ba3SJeremy Kemp {
182*1cd03ba3SJeremy Kemp Timer timer;
183*1cd03ba3SJeremy Kemp
184*1cd03ba3SJeremy Kemp timer.start();
185*1cd03ba3SJeremy Kemp for (uint i = 0; i < iters; i++)
186*1cd03ba3SJeremy Kemp {
187*1cd03ba3SJeremy Kemp queue.enqueueNDRangeKernel(kernel, cl::NullRange, globalSize, localSize);
188*1cd03ba3SJeremy Kemp queue.flush();
189*1cd03ba3SJeremy Kemp }
190*1cd03ba3SJeremy Kemp queue.finish();
191*1cd03ba3SJeremy Kemp timed = timer.stopAndTime();
192*1cd03ba3SJeremy Kemp }
193*1cd03ba3SJeremy Kemp
194*1cd03ba3SJeremy Kemp return (timed / static_cast<float>(iters));
195*1cd03ba3SJeremy Kemp }
196