xref: /aosp_15_r20/external/clpeak/src/clpeak.cpp (revision 1cd03ba3888297bc945f2c84574e105e3ced3e34)
1*1cd03ba3SJeremy Kemp #include <clpeak.h>
2*1cd03ba3SJeremy Kemp #include <cstring>
3*1cd03ba3SJeremy Kemp 
4*1cd03ba3SJeremy Kemp #define MSTRINGIFY(...) #__VA_ARGS__
5*1cd03ba3SJeremy Kemp 
6*1cd03ba3SJeremy Kemp static const std::string stringifiedKernels =
7*1cd03ba3SJeremy Kemp #include "global_bandwidth_kernels.cl"
8*1cd03ba3SJeremy Kemp #include "compute_sp_kernels.cl"
9*1cd03ba3SJeremy Kemp #include "compute_hp_kernels.cl"
10*1cd03ba3SJeremy Kemp #include "compute_dp_kernels.cl"
11*1cd03ba3SJeremy Kemp #include "compute_int24_kernels.cl"
12*1cd03ba3SJeremy Kemp #include "compute_integer_kernels.cl"
13*1cd03ba3SJeremy Kemp #include "compute_char_kernels.cl"
14*1cd03ba3SJeremy Kemp #include "compute_short_kernels.cl"
15*1cd03ba3SJeremy Kemp     ;
16*1cd03ba3SJeremy Kemp 
17*1cd03ba3SJeremy Kemp #ifdef USE_STUB_OPENCL
18*1cd03ba3SJeremy Kemp // Prototype
19*1cd03ba3SJeremy Kemp extern "C"
20*1cd03ba3SJeremy Kemp {
21*1cd03ba3SJeremy Kemp   void stubOpenclReset();
22*1cd03ba3SJeremy Kemp }
23*1cd03ba3SJeremy Kemp #endif
24*1cd03ba3SJeremy Kemp 
clPeak()25*1cd03ba3SJeremy Kemp clPeak::clPeak() : forcePlatform(false), forceDevice(false), forceTest(false), useEventTimer(false),
26*1cd03ba3SJeremy Kemp                    isGlobalBW(true), isComputeHP(true), isComputeSP(true), isComputeDP(true), isComputeIntFast(true), isComputeInt(true),
27*1cd03ba3SJeremy Kemp                    isComputeChar(true), isComputeShort(true),
28*1cd03ba3SJeremy Kemp                    isTransferBW(true), isKernelLatency(true),
29*1cd03ba3SJeremy Kemp                    specifiedPlatform(0), specifiedDevice(0),
30*1cd03ba3SJeremy Kemp                    forcePlatformName(false), forceDeviceName(false),
31*1cd03ba3SJeremy Kemp                    specifiedPlatformName(0), specifiedDeviceName(0), specifiedTestName(0)
32*1cd03ba3SJeremy Kemp {
33*1cd03ba3SJeremy Kemp }
34*1cd03ba3SJeremy Kemp 
~clPeak()35*1cd03ba3SJeremy Kemp clPeak::~clPeak()
36*1cd03ba3SJeremy Kemp {
37*1cd03ba3SJeremy Kemp   if (log)
38*1cd03ba3SJeremy Kemp   {
39*1cd03ba3SJeremy Kemp     delete log;
40*1cd03ba3SJeremy Kemp   }
41*1cd03ba3SJeremy Kemp }
42*1cd03ba3SJeremy Kemp 
runAll()43*1cd03ba3SJeremy Kemp int clPeak::runAll()
44*1cd03ba3SJeremy Kemp {
45*1cd03ba3SJeremy Kemp   try
46*1cd03ba3SJeremy Kemp   {
47*1cd03ba3SJeremy Kemp #ifdef USE_STUB_OPENCL
48*1cd03ba3SJeremy Kemp     stubOpenclReset();
49*1cd03ba3SJeremy Kemp #endif
50*1cd03ba3SJeremy Kemp     vector<cl::Platform> platforms;
51*1cd03ba3SJeremy Kemp     cl::Platform::get(&platforms);
52*1cd03ba3SJeremy Kemp 
53*1cd03ba3SJeremy Kemp     log->xmlOpenTag("clpeak");
54*1cd03ba3SJeremy Kemp     log->xmlAppendAttribs("os", OS_NAME);
55*1cd03ba3SJeremy Kemp     for (size_t p = 0; p < platforms.size(); p++)
56*1cd03ba3SJeremy Kemp     {
57*1cd03ba3SJeremy Kemp       if (forcePlatform && (p != specifiedPlatform))
58*1cd03ba3SJeremy Kemp         continue;
59*1cd03ba3SJeremy Kemp 
60*1cd03ba3SJeremy Kemp       std::string platformName = platforms[p].getInfo<CL_PLATFORM_NAME>();
61*1cd03ba3SJeremy Kemp       trimString(platformName);
62*1cd03ba3SJeremy Kemp 
63*1cd03ba3SJeremy Kemp       if (forcePlatformName && (!strcmp(platformName.c_str(), specifiedPlatformName) == 0))
64*1cd03ba3SJeremy Kemp         continue;
65*1cd03ba3SJeremy Kemp 
66*1cd03ba3SJeremy Kemp       log->print(NEWLINE "Platform: " + platformName + NEWLINE);
67*1cd03ba3SJeremy Kemp       log->xmlOpenTag("platform");
68*1cd03ba3SJeremy Kemp       log->xmlAppendAttribs("name", platformName);
69*1cd03ba3SJeremy Kemp 
70*1cd03ba3SJeremy Kemp       cl_context_properties cps[3] = {
71*1cd03ba3SJeremy Kemp           CL_CONTEXT_PLATFORM,
72*1cd03ba3SJeremy Kemp           (cl_context_properties)(platforms[p])(),
73*1cd03ba3SJeremy Kemp           0};
74*1cd03ba3SJeremy Kemp 
75*1cd03ba3SJeremy Kemp       cl::Context ctx(CL_DEVICE_TYPE_ALL, cps);
76*1cd03ba3SJeremy Kemp       vector<cl::Device> devices = ctx.getInfo<CL_CONTEXT_DEVICES>();
77*1cd03ba3SJeremy Kemp       cl::Program::Sources source(1, stringifiedKernels);
78*1cd03ba3SJeremy Kemp       cl::Program prog = cl::Program(ctx, source);
79*1cd03ba3SJeremy Kemp 
80*1cd03ba3SJeremy Kemp       for (size_t d = 0; d < devices.size(); d++)
81*1cd03ba3SJeremy Kemp       {
82*1cd03ba3SJeremy Kemp         if (forceDevice && (d != specifiedDevice))
83*1cd03ba3SJeremy Kemp           continue;
84*1cd03ba3SJeremy Kemp 
85*1cd03ba3SJeremy Kemp         device_info_t devInfo = getDeviceInfo(devices[d]);
86*1cd03ba3SJeremy Kemp 
87*1cd03ba3SJeremy Kemp         if (forceDeviceName && (!strcmp(devInfo.deviceName.c_str(), specifiedDeviceName) == 0))
88*1cd03ba3SJeremy Kemp           continue;
89*1cd03ba3SJeremy Kemp 
90*1cd03ba3SJeremy Kemp         log->print(TAB "Device: " + devInfo.deviceName + NEWLINE);
91*1cd03ba3SJeremy Kemp         log->print(TAB TAB "Driver version  : ");
92*1cd03ba3SJeremy Kemp         log->print(devInfo.driverVersion);
93*1cd03ba3SJeremy Kemp         log->print(" (" OS_NAME ")" NEWLINE);
94*1cd03ba3SJeremy Kemp         log->print(TAB TAB "Compute units   : ");
95*1cd03ba3SJeremy Kemp         log->print(devInfo.numCUs);
96*1cd03ba3SJeremy Kemp         log->print(NEWLINE);
97*1cd03ba3SJeremy Kemp         log->print(TAB TAB "Clock frequency : ");
98*1cd03ba3SJeremy Kemp         log->print(devInfo.maxClockFreq);
99*1cd03ba3SJeremy Kemp         log->print(" MHz" NEWLINE);
100*1cd03ba3SJeremy Kemp         log->xmlOpenTag("device");
101*1cd03ba3SJeremy Kemp         log->xmlAppendAttribs("name", devInfo.deviceName);
102*1cd03ba3SJeremy Kemp         log->xmlAppendAttribs("driver_version", devInfo.driverVersion);
103*1cd03ba3SJeremy Kemp         log->xmlAppendAttribs("compute_units", devInfo.numCUs);
104*1cd03ba3SJeremy Kemp         log->xmlAppendAttribs("clock_frequency", devInfo.maxClockFreq);
105*1cd03ba3SJeremy Kemp         log->xmlAppendAttribs("clock_frequency_unit", "MHz");
106*1cd03ba3SJeremy Kemp 
107*1cd03ba3SJeremy Kemp         try
108*1cd03ba3SJeremy Kemp         {
109*1cd03ba3SJeremy Kemp           vector<cl::Device> dev = {devices[d]};
110*1cd03ba3SJeremy Kemp           prog.build(dev, BUILD_OPTIONS);
111*1cd03ba3SJeremy Kemp         }
112*1cd03ba3SJeremy Kemp         catch (cl::Error &error)
113*1cd03ba3SJeremy Kemp         {
114*1cd03ba3SJeremy Kemp           UNUSED(error);
115*1cd03ba3SJeremy Kemp           log->print(TAB TAB "Build Log: " + prog.getBuildInfo<CL_PROGRAM_BUILD_LOG>(devices[d]) + NEWLINE NEWLINE);
116*1cd03ba3SJeremy Kemp           continue;
117*1cd03ba3SJeremy Kemp         }
118*1cd03ba3SJeremy Kemp 
119*1cd03ba3SJeremy Kemp         cl::CommandQueue queue = cl::CommandQueue(ctx, devices[d], CL_QUEUE_PROFILING_ENABLE);
120*1cd03ba3SJeremy Kemp 
121*1cd03ba3SJeremy Kemp         runGlobalBandwidthTest(queue, prog, devInfo);
122*1cd03ba3SJeremy Kemp         runComputeSP(queue, prog, devInfo);
123*1cd03ba3SJeremy Kemp         runComputeHP(queue, prog, devInfo);
124*1cd03ba3SJeremy Kemp         runComputeDP(queue, prog, devInfo);
125*1cd03ba3SJeremy Kemp         runComputeInteger(queue, prog, devInfo);
126*1cd03ba3SJeremy Kemp         runComputeIntFast(queue, prog, devInfo);
127*1cd03ba3SJeremy Kemp         runComputeChar(queue, prog, devInfo);
128*1cd03ba3SJeremy Kemp         runComputeShort(queue, prog, devInfo);
129*1cd03ba3SJeremy Kemp         runTransferBandwidthTest(queue, prog, devInfo);
130*1cd03ba3SJeremy Kemp         runKernelLatency(queue, prog, devInfo);
131*1cd03ba3SJeremy Kemp 
132*1cd03ba3SJeremy Kemp         log->print(NEWLINE);
133*1cd03ba3SJeremy Kemp         log->xmlCloseTag(); // device
134*1cd03ba3SJeremy Kemp       }
135*1cd03ba3SJeremy Kemp       log->xmlCloseTag(); // platform
136*1cd03ba3SJeremy Kemp     }
137*1cd03ba3SJeremy Kemp     log->xmlCloseTag(); // clpeak
138*1cd03ba3SJeremy Kemp   }
139*1cd03ba3SJeremy Kemp   catch (cl::Error &error)
140*1cd03ba3SJeremy Kemp   {
141*1cd03ba3SJeremy Kemp     stringstream ss;
142*1cd03ba3SJeremy Kemp     ss << error.what() << " (" << error.err() << ")" NEWLINE;
143*1cd03ba3SJeremy Kemp 
144*1cd03ba3SJeremy Kemp     log->print(ss.str());
145*1cd03ba3SJeremy Kemp 
146*1cd03ba3SJeremy Kemp     // skip error for no platform
147*1cd03ba3SJeremy Kemp     if (strcmp(error.what(), "clGetPlatformIDs") == 0)
148*1cd03ba3SJeremy Kemp     {
149*1cd03ba3SJeremy Kemp       log->print("no platforms found" NEWLINE);
150*1cd03ba3SJeremy Kemp     }
151*1cd03ba3SJeremy Kemp     else
152*1cd03ba3SJeremy Kemp     {
153*1cd03ba3SJeremy Kemp       return -1;
154*1cd03ba3SJeremy Kemp     }
155*1cd03ba3SJeremy Kemp   }
156*1cd03ba3SJeremy Kemp 
157*1cd03ba3SJeremy Kemp   return 0;
158*1cd03ba3SJeremy Kemp }
159*1cd03ba3SJeremy Kemp 
run_kernel(cl::CommandQueue & queue,cl::Kernel & kernel,cl::NDRange & globalSize,cl::NDRange & localSize,uint iters)160*1cd03ba3SJeremy Kemp float clPeak::run_kernel(cl::CommandQueue &queue, cl::Kernel &kernel, cl::NDRange &globalSize, cl::NDRange &localSize, uint iters)
161*1cd03ba3SJeremy Kemp {
162*1cd03ba3SJeremy Kemp   float timed = 0;
163*1cd03ba3SJeremy Kemp 
164*1cd03ba3SJeremy Kemp   // Dummy calls
165*1cd03ba3SJeremy Kemp   queue.enqueueNDRangeKernel(kernel, cl::NullRange, globalSize, localSize);
166*1cd03ba3SJeremy Kemp   queue.enqueueNDRangeKernel(kernel, cl::NullRange, globalSize, localSize);
167*1cd03ba3SJeremy Kemp   queue.finish();
168*1cd03ba3SJeremy Kemp 
169*1cd03ba3SJeremy Kemp   if (useEventTimer)
170*1cd03ba3SJeremy Kemp   {
171*1cd03ba3SJeremy Kemp     for (uint i = 0; i < iters; i++)
172*1cd03ba3SJeremy Kemp     {
173*1cd03ba3SJeremy Kemp       cl::Event timeEvent;
174*1cd03ba3SJeremy Kemp 
175*1cd03ba3SJeremy Kemp       queue.enqueueNDRangeKernel(kernel, cl::NullRange, globalSize, localSize, NULL, &timeEvent);
176*1cd03ba3SJeremy Kemp       queue.finish();
177*1cd03ba3SJeremy Kemp       timed += timeInUS(timeEvent);
178*1cd03ba3SJeremy Kemp     }
179*1cd03ba3SJeremy Kemp   }
180*1cd03ba3SJeremy Kemp   else // std timer
181*1cd03ba3SJeremy Kemp   {
182*1cd03ba3SJeremy Kemp     Timer timer;
183*1cd03ba3SJeremy Kemp 
184*1cd03ba3SJeremy Kemp     timer.start();
185*1cd03ba3SJeremy Kemp     for (uint i = 0; i < iters; i++)
186*1cd03ba3SJeremy Kemp     {
187*1cd03ba3SJeremy Kemp       queue.enqueueNDRangeKernel(kernel, cl::NullRange, globalSize, localSize);
188*1cd03ba3SJeremy Kemp       queue.flush();
189*1cd03ba3SJeremy Kemp     }
190*1cd03ba3SJeremy Kemp     queue.finish();
191*1cd03ba3SJeremy Kemp     timed = timer.stopAndTime();
192*1cd03ba3SJeremy Kemp   }
193*1cd03ba3SJeremy Kemp 
194*1cd03ba3SJeremy Kemp   return (timed / static_cast<float>(iters));
195*1cd03ba3SJeremy Kemp }
196