xref: /aosp_15_r20/external/clpeak/src/clpeak.cpp (revision 1cd03ba3888297bc945f2c84574e105e3ced3e34)
1 #include <clpeak.h>
2 #include <cstring>
3 
4 #define MSTRINGIFY(...) #__VA_ARGS__
5 
6 static const std::string stringifiedKernels =
7 #include "global_bandwidth_kernels.cl"
8 #include "compute_sp_kernels.cl"
9 #include "compute_hp_kernels.cl"
10 #include "compute_dp_kernels.cl"
11 #include "compute_int24_kernels.cl"
12 #include "compute_integer_kernels.cl"
13 #include "compute_char_kernels.cl"
14 #include "compute_short_kernels.cl"
15     ;
16 
17 #ifdef USE_STUB_OPENCL
18 // Prototype
19 extern "C"
20 {
21   void stubOpenclReset();
22 }
23 #endif
24 
clPeak()25 clPeak::clPeak() : forcePlatform(false), forceDevice(false), forceTest(false), useEventTimer(false),
26                    isGlobalBW(true), isComputeHP(true), isComputeSP(true), isComputeDP(true), isComputeIntFast(true), isComputeInt(true),
27                    isComputeChar(true), isComputeShort(true),
28                    isTransferBW(true), isKernelLatency(true),
29                    specifiedPlatform(0), specifiedDevice(0),
30                    forcePlatformName(false), forceDeviceName(false),
31                    specifiedPlatformName(0), specifiedDeviceName(0), specifiedTestName(0)
32 {
33 }
34 
~clPeak()35 clPeak::~clPeak()
36 {
37   if (log)
38   {
39     delete log;
40   }
41 }
42 
runAll()43 int clPeak::runAll()
44 {
45   try
46   {
47 #ifdef USE_STUB_OPENCL
48     stubOpenclReset();
49 #endif
50     vector<cl::Platform> platforms;
51     cl::Platform::get(&platforms);
52 
53     log->xmlOpenTag("clpeak");
54     log->xmlAppendAttribs("os", OS_NAME);
55     for (size_t p = 0; p < platforms.size(); p++)
56     {
57       if (forcePlatform && (p != specifiedPlatform))
58         continue;
59 
60       std::string platformName = platforms[p].getInfo<CL_PLATFORM_NAME>();
61       trimString(platformName);
62 
63       if (forcePlatformName && (!strcmp(platformName.c_str(), specifiedPlatformName) == 0))
64         continue;
65 
66       log->print(NEWLINE "Platform: " + platformName + NEWLINE);
67       log->xmlOpenTag("platform");
68       log->xmlAppendAttribs("name", platformName);
69 
70       cl_context_properties cps[3] = {
71           CL_CONTEXT_PLATFORM,
72           (cl_context_properties)(platforms[p])(),
73           0};
74 
75       cl::Context ctx(CL_DEVICE_TYPE_ALL, cps);
76       vector<cl::Device> devices = ctx.getInfo<CL_CONTEXT_DEVICES>();
77       cl::Program::Sources source(1, stringifiedKernels);
78       cl::Program prog = cl::Program(ctx, source);
79 
80       for (size_t d = 0; d < devices.size(); d++)
81       {
82         if (forceDevice && (d != specifiedDevice))
83           continue;
84 
85         device_info_t devInfo = getDeviceInfo(devices[d]);
86 
87         if (forceDeviceName && (!strcmp(devInfo.deviceName.c_str(), specifiedDeviceName) == 0))
88           continue;
89 
90         log->print(TAB "Device: " + devInfo.deviceName + NEWLINE);
91         log->print(TAB TAB "Driver version  : ");
92         log->print(devInfo.driverVersion);
93         log->print(" (" OS_NAME ")" NEWLINE);
94         log->print(TAB TAB "Compute units   : ");
95         log->print(devInfo.numCUs);
96         log->print(NEWLINE);
97         log->print(TAB TAB "Clock frequency : ");
98         log->print(devInfo.maxClockFreq);
99         log->print(" MHz" NEWLINE);
100         log->xmlOpenTag("device");
101         log->xmlAppendAttribs("name", devInfo.deviceName);
102         log->xmlAppendAttribs("driver_version", devInfo.driverVersion);
103         log->xmlAppendAttribs("compute_units", devInfo.numCUs);
104         log->xmlAppendAttribs("clock_frequency", devInfo.maxClockFreq);
105         log->xmlAppendAttribs("clock_frequency_unit", "MHz");
106 
107         try
108         {
109           vector<cl::Device> dev = {devices[d]};
110           prog.build(dev, BUILD_OPTIONS);
111         }
112         catch (cl::Error &error)
113         {
114           UNUSED(error);
115           log->print(TAB TAB "Build Log: " + prog.getBuildInfo<CL_PROGRAM_BUILD_LOG>(devices[d]) + NEWLINE NEWLINE);
116           continue;
117         }
118 
119         cl::CommandQueue queue = cl::CommandQueue(ctx, devices[d], CL_QUEUE_PROFILING_ENABLE);
120 
121         runGlobalBandwidthTest(queue, prog, devInfo);
122         runComputeSP(queue, prog, devInfo);
123         runComputeHP(queue, prog, devInfo);
124         runComputeDP(queue, prog, devInfo);
125         runComputeInteger(queue, prog, devInfo);
126         runComputeIntFast(queue, prog, devInfo);
127         runComputeChar(queue, prog, devInfo);
128         runComputeShort(queue, prog, devInfo);
129         runTransferBandwidthTest(queue, prog, devInfo);
130         runKernelLatency(queue, prog, devInfo);
131 
132         log->print(NEWLINE);
133         log->xmlCloseTag(); // device
134       }
135       log->xmlCloseTag(); // platform
136     }
137     log->xmlCloseTag(); // clpeak
138   }
139   catch (cl::Error &error)
140   {
141     stringstream ss;
142     ss << error.what() << " (" << error.err() << ")" NEWLINE;
143 
144     log->print(ss.str());
145 
146     // skip error for no platform
147     if (strcmp(error.what(), "clGetPlatformIDs") == 0)
148     {
149       log->print("no platforms found" NEWLINE);
150     }
151     else
152     {
153       return -1;
154     }
155   }
156 
157   return 0;
158 }
159 
run_kernel(cl::CommandQueue & queue,cl::Kernel & kernel,cl::NDRange & globalSize,cl::NDRange & localSize,uint iters)160 float clPeak::run_kernel(cl::CommandQueue &queue, cl::Kernel &kernel, cl::NDRange &globalSize, cl::NDRange &localSize, uint iters)
161 {
162   float timed = 0;
163 
164   // Dummy calls
165   queue.enqueueNDRangeKernel(kernel, cl::NullRange, globalSize, localSize);
166   queue.enqueueNDRangeKernel(kernel, cl::NullRange, globalSize, localSize);
167   queue.finish();
168 
169   if (useEventTimer)
170   {
171     for (uint i = 0; i < iters; i++)
172     {
173       cl::Event timeEvent;
174 
175       queue.enqueueNDRangeKernel(kernel, cl::NullRange, globalSize, localSize, NULL, &timeEvent);
176       queue.finish();
177       timed += timeInUS(timeEvent);
178     }
179   }
180   else // std timer
181   {
182     Timer timer;
183 
184     timer.start();
185     for (uint i = 0; i < iters; i++)
186     {
187       queue.enqueueNDRangeKernel(kernel, cl::NullRange, globalSize, localSize);
188       queue.flush();
189     }
190     queue.finish();
191     timed = timer.stopAndTime();
192   }
193 
194   return (timed / static_cast<float>(iters));
195 }
196