xref: /aosp_15_r20/external/armnn/src/backends/cl/OpenClTimer.cpp (revision 89c4ff92f2867872bb9e2354d150bf0c8c502810)
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #include "OpenClTimer.hpp"
7 
8 #include <armnn/utility/IgnoreUnused.hpp>
9 
10 #include <string>
11 #include <sstream>
12 
13 
14 namespace armnn
15 {
16 
OpenClTimer()17 OpenClTimer::OpenClTimer()
18 {
19 }
20 
Start()21 void OpenClTimer::Start()
22 {
23     m_Kernels.clear();
24 
25     auto interceptor = [this](  cl_command_queue command_queue,
26                                 cl_kernel        kernel,
27                                 cl_uint          work_dim,
28                                 const size_t    *gwo,
29                                 const size_t    *gws,
30                                 const size_t    *lws,
31                                 cl_uint          num_events_in_wait_list,
32                                 const cl_event * event_wait_list,
33                                 cl_event *       event)
34         {
35             IgnoreUnused(event);
36             cl_int retVal = 0;
37 
38             // Get the name of the kernel
39             cl::Kernel retainedKernel(kernel, true);
40             std::stringstream ss;
41             ss << retainedKernel.getInfo<CL_KERNEL_FUNCTION_NAME>();
42 
43             // Embed workgroup sizes into the name
44             if(gws != nullptr)
45             {
46                 ss << " GWS[" << gws[0] << "," << gws[1] << "," << gws[2] << "]";
47             }
48             if(lws != nullptr)
49             {
50                 ss << " LWS[" << lws[0] << "," << lws[1] << "," << lws[2] << "]";
51             }
52 
53             cl_event customEvent;
54 
55             // Forward to original OpenCl function
56             retVal = m_OriginalEnqueueFunction( command_queue,
57                                                 kernel,
58                                                 work_dim,
59                                                 gwo,
60                                                 gws,
61                                                 lws,
62                                                 num_events_in_wait_list,
63                                                 event_wait_list,
64                                                 &customEvent);
65 
66             // Store the Kernel info for later GetMeasurements() call
67             m_Kernels.emplace_back(ss.str(), customEvent);
68 
69             if(event != nullptr)
70             {
71                 //return cl_event from the intercepted call
72                 clRetainEvent(customEvent);
73                 *event = customEvent;
74             }
75 
76             return retVal;
77         };
78 
79     m_OriginalEnqueueFunction = CLSymbols::get().clEnqueueNDRangeKernel_ptr;
80     CLSymbols::get().clEnqueueNDRangeKernel_ptr = interceptor;
81 }
82 
Stop()83 void OpenClTimer::Stop()
84 {
85     CLSymbols::get().clEnqueueNDRangeKernel_ptr = m_OriginalEnqueueFunction;
86 }
87 
HasKernelMeasurements() const88 bool OpenClTimer::HasKernelMeasurements() const
89 {
90     return m_Kernels.size() > 0;
91 }
92 
GetMeasurements() const93 std::vector<Measurement> OpenClTimer::GetMeasurements() const
94 {
95     std::vector<Measurement> measurements;
96 
97     cl_command_queue_properties clQueueProperties = CLScheduler::get().queue().getInfo<CL_QUEUE_PROPERTIES>();
98 
99     int idx = 0;
100     for (auto& kernel : m_Kernels)
101     {
102         std::string name = std::string(this->GetName()) + "/" + std::to_string(idx++) + ": " + kernel.m_Name;
103 
104         double timeUs = 0.0;
105         if((clQueueProperties & CL_QUEUE_PROFILING_ENABLE) != 0)
106         {
107             // Wait for the event to finish before accessing profile results.
108             kernel.m_Event.wait();
109 
110             cl_ulong start = kernel.m_Event.getProfilingInfo<CL_PROFILING_COMMAND_START>();
111             cl_ulong end   = kernel.m_Event.getProfilingInfo<CL_PROFILING_COMMAND_END>();
112             timeUs = static_cast<double>(end - start) / 1000.0;
113         }
114 
115         measurements.emplace_back(name, timeUs, Measurement::Unit::TIME_US);
116     }
117 
118     return measurements;
119 }
120 
121 } //namespace armnn
122