xref: /aosp_15_r20/external/clpeak/src/kernel_latency.cpp (revision 1cd03ba3888297bc945f2c84574e105e3ced3e34)
1 #include <clpeak.h>
2 
3 #define FETCH_PER_WI 16
4 
runKernelLatency(cl::CommandQueue & queue,cl::Program & prog,device_info_t & devInfo)5 int clPeak::runKernelLatency(cl::CommandQueue &queue, cl::Program &prog, device_info_t &devInfo)
6 {
7   if (!isKernelLatency)
8     return 0;
9 
10   cl::Context ctx = queue.getInfo<CL_QUEUE_CONTEXT>();
11   cl_uint numItems = (devInfo.maxWGSize) * (devInfo.numCUs) * FETCH_PER_WI;
12   cl::NDRange globalSize = (numItems / FETCH_PER_WI);
13   cl::NDRange localSize = devInfo.maxWGSize;
14   uint iters = devInfo.kernelLatencyIters;
15   float latency;
16 
17   try
18   {
19     log->print(NEWLINE TAB TAB "Kernel launch latency : ");
20     log->xmlOpenTag("kernel_launch_latency");
21     log->xmlAppendAttribs("unit", "us");
22 
23     cl::Buffer inputBuf = cl::Buffer(ctx, CL_MEM_READ_ONLY, (numItems * sizeof(float)));
24     cl::Buffer outputBuf = cl::Buffer(ctx, CL_MEM_WRITE_ONLY, (numItems * sizeof(float)));
25 
26     cl::Kernel kernel_v1(prog, "global_bandwidth_v1_local_offset");
27     kernel_v1.setArg(0, inputBuf), kernel_v1.setArg(1, outputBuf);
28 
29     // Dummy calls
30     queue.enqueueNDRangeKernel(kernel_v1, cl::NullRange, globalSize, localSize);
31     queue.enqueueNDRangeKernel(kernel_v1, cl::NullRange, globalSize, localSize);
32     queue.finish();
33 
34     latency = 0;
35     for (uint i = 0; i < iters; i++)
36     {
37       cl::Event timeEvent;
38       queue.enqueueNDRangeKernel(kernel_v1, cl::NullRange, globalSize, localSize, NULL, &timeEvent);
39       queue.finish();
40       cl_ulong start = timeEvent.getProfilingInfo<CL_PROFILING_COMMAND_QUEUED>() / 1000;
41       cl_ulong end = timeEvent.getProfilingInfo<CL_PROFILING_COMMAND_START>() / 1000;
42       latency += (float)((int)end - (int)start);
43     }
44     latency /= static_cast<float>(iters);
45 
46     log->print(latency);
47     log->print(" us" NEWLINE);
48     log->xmlSetContent(latency);
49     log->xmlCloseTag();
50   }
51   catch (cl::Error &error)
52   {
53     stringstream ss;
54     ss << error.what() << " (" << error.err() << ")" NEWLINE
55        << TAB TAB TAB "Tests skipped" NEWLINE;
56     log->print(ss.str());
57     return -1;
58   }
59 
60   return 0;
61 }
62