1 #include <clpeak.h>
2
3 #define FETCH_PER_WI 16
4
runKernelLatency(cl::CommandQueue & queue,cl::Program & prog,device_info_t & devInfo)5 int clPeak::runKernelLatency(cl::CommandQueue &queue, cl::Program &prog, device_info_t &devInfo)
6 {
7 if (!isKernelLatency)
8 return 0;
9
10 cl::Context ctx = queue.getInfo<CL_QUEUE_CONTEXT>();
11 cl_uint numItems = (devInfo.maxWGSize) * (devInfo.numCUs) * FETCH_PER_WI;
12 cl::NDRange globalSize = (numItems / FETCH_PER_WI);
13 cl::NDRange localSize = devInfo.maxWGSize;
14 uint iters = devInfo.kernelLatencyIters;
15 float latency;
16
17 try
18 {
19 log->print(NEWLINE TAB TAB "Kernel launch latency : ");
20 log->xmlOpenTag("kernel_launch_latency");
21 log->xmlAppendAttribs("unit", "us");
22
23 cl::Buffer inputBuf = cl::Buffer(ctx, CL_MEM_READ_ONLY, (numItems * sizeof(float)));
24 cl::Buffer outputBuf = cl::Buffer(ctx, CL_MEM_WRITE_ONLY, (numItems * sizeof(float)));
25
26 cl::Kernel kernel_v1(prog, "global_bandwidth_v1_local_offset");
27 kernel_v1.setArg(0, inputBuf), kernel_v1.setArg(1, outputBuf);
28
29 // Dummy calls
30 queue.enqueueNDRangeKernel(kernel_v1, cl::NullRange, globalSize, localSize);
31 queue.enqueueNDRangeKernel(kernel_v1, cl::NullRange, globalSize, localSize);
32 queue.finish();
33
34 latency = 0;
35 for (uint i = 0; i < iters; i++)
36 {
37 cl::Event timeEvent;
38 queue.enqueueNDRangeKernel(kernel_v1, cl::NullRange, globalSize, localSize, NULL, &timeEvent);
39 queue.finish();
40 cl_ulong start = timeEvent.getProfilingInfo<CL_PROFILING_COMMAND_QUEUED>() / 1000;
41 cl_ulong end = timeEvent.getProfilingInfo<CL_PROFILING_COMMAND_START>() / 1000;
42 latency += (float)((int)end - (int)start);
43 }
44 latency /= static_cast<float>(iters);
45
46 log->print(latency);
47 log->print(" us" NEWLINE);
48 log->xmlSetContent(latency);
49 log->xmlCloseTag();
50 }
51 catch (cl::Error &error)
52 {
53 stringstream ss;
54 ss << error.what() << " (" << error.err() << ")" NEWLINE
55 << TAB TAB TAB "Tests skipped" NEWLINE;
56 log->print(ss.str());
57 return -1;
58 }
59
60 return 0;
61 }
62