xref: /aosp_15_r20/external/clpeak/src/common.cpp (revision 1cd03ba3888297bc945f2c84574e105e3ced3e34)
1 #include <common.h>
2 #include <math.h>
3 #include <iostream>
4 #include <string>
5 
6 using namespace std;
7 
getDeviceInfo(cl::Device & d)8 device_info_t getDeviceInfo(cl::Device &d)
9 {
10   device_info_t devInfo;
11 
12   devInfo.deviceName = d.getInfo<CL_DEVICE_NAME>();
13   devInfo.driverVersion = d.getInfo<CL_DRIVER_VERSION>();
14   trimString(devInfo.deviceName);
15   trimString(devInfo.driverVersion);
16 
17   devInfo.numCUs = (uint)d.getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>();
18   vector<size_t> maxWIPerDim;
19   maxWIPerDim = d.getInfo<CL_DEVICE_MAX_WORK_ITEM_SIZES>();
20   devInfo.maxWGSize = (uint)maxWIPerDim[0];
21 
22   // Limiting max work-group size to 256
23 #define MAX_WG_SIZE 256
24   devInfo.maxWGSize = std::min(devInfo.maxWGSize, (uint)MAX_WG_SIZE);
25 
26   // FIXME limit max-workgroup size for qualcomm platform to 128
27   // Kernel launch fails for workgroup size 256(CL_DEVICE_MAX_WORK_ITEM_SIZES)
28   string vendor = d.getInfo<CL_DEVICE_VENDOR>();
29   if ((vendor.find("QUALCOMM") != std::string::npos) ||
30       (vendor.find("qualcomm") != std::string::npos))
31   {
32     devInfo.maxWGSize = std::min(devInfo.maxWGSize, (uint)128);
33   }
34 
35   devInfo.maxAllocSize = static_cast<uint64_t>(d.getInfo<CL_DEVICE_MAX_MEM_ALLOC_SIZE>());
36   devInfo.maxGlobalSize = static_cast<uint64_t>(d.getInfo<CL_DEVICE_GLOBAL_MEM_SIZE>());
37   devInfo.maxClockFreq = static_cast<uint>(d.getInfo<CL_DEVICE_MAX_CLOCK_FREQUENCY>());
38   devInfo.doubleSupported = false;
39   devInfo.halfSupported = false;
40 
41   std::string extns = d.getInfo<CL_DEVICE_EXTENSIONS>();
42 
43   if ((extns.find("cl_khr_fp16") != std::string::npos))
44     devInfo.halfSupported = true;
45 
46   if ((extns.find("cl_khr_fp64") != std::string::npos) || (extns.find("cl_amd_fp64") != std::string::npos))
47     devInfo.doubleSupported = true;
48 
49   devInfo.deviceType = d.getInfo<CL_DEVICE_TYPE>();
50 
51   if (devInfo.deviceType & CL_DEVICE_TYPE_CPU)
52   {
53     devInfo.gloalBWIters = 20;
54     devInfo.globalBWMaxSize = 1 << 27;
55     devInfo.computeWgsPerCU = 512;
56     devInfo.computeDPWgsPerCU = 256;
57     devInfo.computeIters = 10;
58     devInfo.transferBWMaxSize = 1 << 27;
59   }
60   else
61   { // GPU
62     devInfo.gloalBWIters = 50;
63     devInfo.globalBWMaxSize = 1 << 29;
64     devInfo.computeWgsPerCU = 2048;
65     devInfo.computeDPWgsPerCU = 512;
66     devInfo.computeIters = 30;
67     devInfo.transferBWMaxSize = 1 << 29;
68   }
69   devInfo.transferBWIters = 20;
70   devInfo.kernelLatencyIters = 20000;
71 
72   return devInfo;
73 }
74 
timeInUS(cl::Event & timeEvent)75 float timeInUS(cl::Event &timeEvent)
76 {
77   cl_ulong start = timeEvent.getProfilingInfo<CL_PROFILING_COMMAND_START>() / 1000;
78   cl_ulong end = timeEvent.getProfilingInfo<CL_PROFILING_COMMAND_END>() / 1000;
79 
80   return (float)((int)end - (int)start);
81 }
82 
start()83 void Timer::start()
84 {
85   tick = chrono::high_resolution_clock::now();
86 }
87 
stopAndTime()88 float Timer::stopAndTime()
89 {
90   tock = chrono::high_resolution_clock::now();
91   return (float)(chrono::duration_cast<chrono::microseconds>(tock - tick).count());
92 }
93 
populate(float * ptr,uint64_t N)94 void populate(float *ptr, uint64_t N)
95 {
96   srand((unsigned int)time(NULL));
97 
98   for (uint64_t i = 0; i < N; i++)
99   {
100     //ptr[i] = (float)rand();
101     ptr[i] = (float)i;
102   }
103 }
104 
populate(double * ptr,uint64_t N)105 void populate(double *ptr, uint64_t N)
106 {
107   srand((unsigned int)time(NULL));
108 
109   for (uint64_t i = 0; i < N; i++)
110   {
111     //ptr[i] = (double)rand();
112     ptr[i] = (double)i;
113   }
114 }
115 
roundToMultipleOf(uint64_t number,uint64_t base,uint64_t maxValue)116 uint64_t roundToMultipleOf(uint64_t number, uint64_t base, uint64_t maxValue)
117 {
118   uint64_t n = (number > maxValue) ? maxValue : number;
119   return (n / base) * base;
120 }
121 
trimString(std::string & str)122 void trimString(std::string &str)
123 {
124   size_t pos = str.find('\0');
125 
126   if (pos != std::string::npos)
127   {
128     str.erase(pos);
129   }
130 }
131