xref: /aosp_15_r20/external/mesa3d/src/tool/pps/pps_datasource.cc (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2019-2021 Collabora, Ltd.
3  * Author: Antonio Caggiano <[email protected]>
4  * Author: Rohan Garg <[email protected]>
5  * Author: Robert Beckett <[email protected]>
6  * Author: Corentin Noël <[email protected]>
7  *
8  * SPDX-License-Identifier: MIT
9  */
10 
11 #include "pps_datasource.h"
12 #include "pps_driver.h"
13 
14 #include <condition_variable>
15 #include <thread>
16 #include <variant>
17 #include <inttypes.h>
18 
19 // Minimum supported sampling period in nanoseconds
20 #define MIN_SAMPLING_PERIOD_NS 50000
21 
22 #define CORRELATION_TIMESTAMP_PERIOD (1000000000ull)
23 
24 namespace pps
25 {
26 static std::string driver_name;
27 
28 /// Synchronize access to started_cv and started
29 static std::mutex started_m;
30 static std::condition_variable started_cv;
31 static bool started = false;
32 
ms(const std::chrono::nanoseconds & t)33 float ms(const std::chrono::nanoseconds &t)
34 {
35    return t.count() / 1000000.0f;
36 }
37 
OnSetup(const SetupArgs & args)38 void GpuDataSource::OnSetup(const SetupArgs &args)
39 {
40    // Create drivers for all supported devices
41    auto drm_devices = DrmDevice::create_all();
42    for (auto &drm_device : drm_devices) {
43       if (drm_device.name != driver_name)
44          continue;
45 
46       if (auto driver = Driver::get_driver(std::move(drm_device))) {
47          if (!driver->init_perfcnt()) {
48             // Skip failing driver
49             PPS_LOG_ERROR("Failed to initialize %s driver", driver->drm_device.name.c_str());
50             continue;
51          }
52 
53          this->driver = driver;
54       }
55    }
56    if (driver == nullptr) {
57       PPS_LOG_FATAL("No DRM devices supported");
58    }
59 
60    // Parse perfetto config
61    const std::string &config_raw = args.config->gpu_counter_config_raw();
62    perfetto::protos::pbzero::GpuCounterConfig::Decoder config(config_raw);
63 
64    if (config.has_counter_ids()) {
65       // Get enabled counters
66       PPS_LOG_IMPORTANT("Selecting counters");
67       for (auto it = config.counter_ids(); it; ++it) {
68          uint32_t counter_id = it->as_uint32();
69          driver->enable_counter(counter_id);
70       }
71    } else {
72       // Enable all counters
73       driver->enable_all_counters();
74    }
75 
76    // Get sampling period
77    auto min_sampling_period = std::chrono::nanoseconds(MIN_SAMPLING_PERIOD_NS);
78 
79    auto dev_supported = std::chrono::nanoseconds(driver->get_min_sampling_period_ns());
80    if (dev_supported > min_sampling_period) {
81       min_sampling_period = dev_supported;
82    }
83 
84    time_to_sleep = std::max(time_to_sleep, min_sampling_period);
85 
86    if (config.has_counter_period_ns()) {
87       auto requested_sampling_period = std::chrono::nanoseconds(config.counter_period_ns());
88       if (requested_sampling_period < min_sampling_period) {
89          PPS_LOG_ERROR("Sampling period should be greater than %" PRIu64 " ns (%.2f ms)",
90             uint64_t(min_sampling_period.count()),
91             ms(min_sampling_period));
92       } else {
93          time_to_sleep = requested_sampling_period;
94       }
95    }
96    PPS_LOG("Sampling period set to %" PRIu64 " ns", uint64_t(time_to_sleep.count()));
97 }
98 
OnStart(const StartArgs & args)99 void GpuDataSource::OnStart(const StartArgs &args)
100 {
101    driver->enable_perfcnt(time_to_sleep.count());
102 
103    state = State::Start;
104    got_first_counters = false;
105 
106    {
107       std::lock_guard<std::mutex> lock(started_m);
108       started = true;
109    }
110    started_cv.notify_all();
111 }
112 
close_callback(GpuDataSource::TraceContext ctx)113 void close_callback(GpuDataSource::TraceContext ctx)
114 {
115    auto packet = ctx.NewTracePacket();
116    packet->set_timestamp_clock_id(perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME);
117    packet->set_timestamp(perfetto::base::GetBootTimeNs().count());
118    packet->Finalize();
119    ctx.Flush();
120    PPS_LOG("Context flushed");
121 }
122 
OnStop(const StopArgs & args)123 void GpuDataSource::OnStop(const StopArgs &args)
124 {
125    state = State::Stop;
126    auto stop_closure = args.HandleStopAsynchronously();
127    Trace(close_callback);
128    stop_closure();
129 
130    driver->disable_perfcnt();
131    driver = nullptr;
132 
133    std::lock_guard<std::mutex> lock(started_m);
134    started = false;
135 }
136 
wait_started()137 void GpuDataSource::wait_started()
138 {
139    std::unique_lock<std::mutex> lock(started_m);
140    if (!started) {
141       PPS_LOG("Waiting for start");
142       started_cv.wait(lock, [] { return started; });
143    }
144 }
145 
register_data_source(const std::string & _driver_name)146 void GpuDataSource::register_data_source(const std::string &_driver_name)
147 {
148    driver_name = _driver_name;
149    static perfetto::DataSourceDescriptor dsd;
150    dsd.set_name("gpu.counters." + driver_name);
151    Register(dsd);
152 }
153 
add_group(perfetto::protos::pbzero::GpuCounterDescriptor * desc,const CounterGroup & group,const std::string & prefix,int32_t gpu_num)154 void add_group(perfetto::protos::pbzero::GpuCounterDescriptor *desc,
155    const CounterGroup &group,
156    const std::string &prefix,
157    int32_t gpu_num)
158 {
159    if (!group.counters.empty()) {
160       // Define a block for each group containing counters
161       auto block_desc = desc->add_blocks();
162       block_desc->set_name(prefix + "." + group.name);
163       block_desc->set_block_id(group.id);
164 
165       // Associate counters to blocks
166       for (auto id : group.counters) {
167          block_desc->add_counter_ids(id);
168       }
169    }
170 
171    for (auto const &sub : group.subgroups) {
172       // Perfetto doesnt currently support nested groups.
173       // Flatten group hierarchy, using dot separator
174       add_group(desc, sub, prefix + "." + group.name, gpu_num);
175    }
176 }
177 
add_descriptors(perfetto::protos::pbzero::GpuCounterEvent * event,std::vector<CounterGroup> const & groups,std::vector<Counter> const & counters,Driver & driver)178 void add_descriptors(perfetto::protos::pbzero::GpuCounterEvent *event,
179    std::vector<CounterGroup> const &groups,
180    std::vector<Counter> const &counters,
181    Driver &driver)
182 {
183    // Start a counter descriptor
184    auto desc = event->set_counter_descriptor();
185 
186    // Add the groups
187    for (auto const &group : groups) {
188       add_group(desc, group, driver.drm_device.name, driver.drm_device.gpu_num);
189    }
190 
191    // Add the counters
192    for (auto const &counter : counters) {
193       auto spec = desc->add_specs();
194       spec->set_counter_id(counter.id);
195       spec->set_name(counter.name);
196 
197       auto units = perfetto::protos::pbzero::GpuCounterDescriptor::NONE;
198       switch (counter.units) {
199       case Counter::Units::Percent:
200          units = perfetto::protos::pbzero::GpuCounterDescriptor::PERCENT;
201          break;
202       case Counter::Units::Byte:
203          units = perfetto::protos::pbzero::GpuCounterDescriptor::BYTE;
204          break;
205       case Counter::Units::Hertz:
206          units = perfetto::protos::pbzero::GpuCounterDescriptor::HERTZ;
207          break;
208       case Counter::Units::None:
209          units = perfetto::protos::pbzero::GpuCounterDescriptor::NONE;
210          break;
211       default:
212          assert(false && "Missing counter units type!");
213          break;
214       }
215       spec->add_numerator_units(units);
216    }
217 }
218 
add_samples(perfetto::protos::pbzero::GpuCounterEvent & event,const Driver & driver)219 void add_samples(perfetto::protos::pbzero::GpuCounterEvent &event, const Driver &driver)
220 {
221    if (driver.enabled_counters.size() == 0) {
222       PPS_LOG_FATAL("There are no counters enabled");
223    }
224 
225    for (const auto &counter : driver.enabled_counters) {
226       auto counter_event = event.add_counters();
227 
228       counter_event->set_counter_id(counter.id);
229 
230       auto value = counter.get_value(driver);
231       if (auto d_value = std::get_if<double>(&value)) {
232          counter_event->set_double_value(*d_value);
233       } else if (auto i_value = std::get_if<int64_t>(&value)) {
234          counter_event->set_int_value(*i_value);
235       } else {
236          PPS_LOG_ERROR("Failed to get value for counter %s", counter.name.c_str());
237       }
238    }
239 }
240 
add_timestamp(perfetto::protos::pbzero::ClockSnapshot * event,const Driver * driver)241 void add_timestamp(perfetto::protos::pbzero::ClockSnapshot *event, const Driver *driver)
242 {
243    uint32_t gpu_clock_id = driver->gpu_clock_id();
244    if (perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME == gpu_clock_id)
245       return;
246 
247    // Send a correlation event between GPU & CPU timestamps
248    uint64_t cpu_ts, gpu_ts;
249 
250    // Try to use the optimized driver correlation if available, otherwise do a
251    // separate CPU & GPU sample
252    if (!driver->cpu_gpu_timestamp(cpu_ts, gpu_ts)) {
253       cpu_ts = perfetto::base::GetBootTimeNs().count();
254       gpu_ts = driver->gpu_timestamp();
255    }
256 
257    {
258       auto clock = event->add_clocks();
259 
260       clock->set_clock_id(perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME);
261       clock->set_timestamp(cpu_ts);
262    }
263 
264    {
265       auto clock = event->add_clocks();
266 
267       clock->set_clock_id(gpu_clock_id);
268       clock->set_timestamp(gpu_ts);
269    }
270 }
271 
trace(TraceContext & ctx)272 void GpuDataSource::trace(TraceContext &ctx)
273 {
274    using namespace perfetto::protos::pbzero;
275 
276    if (auto state = ctx.GetIncrementalState(); state->was_cleared) {
277       descriptor_timestamp = perfetto::base::GetBootTimeNs().count();
278 
279       {
280          // Mark any incremental state before this point invalid
281          auto packet = ctx.NewTracePacket();
282          packet->set_timestamp_clock_id(perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME);
283          packet->set_timestamp(descriptor_timestamp);
284          packet->set_sequence_flags(TracePacket::SEQ_INCREMENTAL_STATE_CLEARED);
285       }
286 
287       descriptor_timestamp = perfetto::base::GetBootTimeNs().count();
288       {
289          // Counter descriptions
290          auto packet = ctx.NewTracePacket();
291          packet->set_timestamp_clock_id(perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME);
292          packet->set_timestamp(descriptor_timestamp);
293          auto event = packet->set_gpu_counter_event();
294          event->set_gpu_id(driver->drm_device.gpu_num);
295 
296          auto &groups = driver->groups;
297          auto &counters = driver->enabled_counters;
298          add_descriptors(event, groups, counters, *driver);
299       }
300 
301       {
302          // Initial timestamp correlation event
303          auto packet = ctx.NewTracePacket();
304          packet->set_timestamp_clock_id(perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME);
305          packet->set_timestamp(descriptor_timestamp);
306          last_correlation_timestamp = perfetto::base::GetBootTimeNs().count();
307          auto event = packet->set_clock_snapshot();
308          add_timestamp(event, driver);
309       }
310 
311       // Capture GPU timestamp of the first packet. Anything prior to this can
312       // be discarded.
313       descriptor_gpu_timestamp = driver->gpu_timestamp();
314       state->was_cleared = false;
315    }
316 
317    if (driver->dump_perfcnt()) {
318       while (auto gpu_timestamp = driver->next()) {
319          if (gpu_timestamp <= descriptor_gpu_timestamp) {
320             // Do not send counter values before counter descriptors
321             PPS_LOG_ERROR("Skipping counter values coming before descriptors");
322             continue;
323          }
324 
325          if (!got_first_counters) {
326             PPS_LOG("Got first counters at gpu_ts=0x%016" PRIx64, gpu_timestamp);
327             got_first_counters = true;
328          }
329 
330          auto packet = ctx.NewTracePacket();
331          packet->set_timestamp_clock_id(driver->gpu_clock_id());
332          packet->set_timestamp(gpu_timestamp);
333 
334          auto event = packet->set_gpu_counter_event();
335          event->set_gpu_id(driver->drm_device.gpu_num);
336 
337          add_samples(*event, *driver);
338       }
339    }
340 
341    uint64_t cpu_ts = perfetto::base::GetBootTimeNs().count();
342    if ((cpu_ts - last_correlation_timestamp) > CORRELATION_TIMESTAMP_PERIOD) {
343       auto packet = ctx.NewTracePacket();
344       packet->set_timestamp_clock_id(perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME);
345       packet->set_timestamp(cpu_ts);
346       auto event = packet->set_clock_snapshot();
347       add_timestamp(event, driver);
348       last_correlation_timestamp = cpu_ts;
349    }
350 }
351 
trace_callback(TraceContext ctx)352 void GpuDataSource::trace_callback(TraceContext ctx)
353 {
354    using namespace std::chrono;
355 
356    nanoseconds sleep_time = nanoseconds(0);
357 
358    if (auto data_source = ctx.GetDataSourceLocked()) {
359       if (data_source->time_to_sleep > data_source->time_to_trace) {
360          sleep_time = data_source->time_to_sleep - data_source->time_to_trace;
361       }
362    }
363 
364    // Wait sampling period before tracing
365    std::this_thread::sleep_for(sleep_time);
366 
367    auto time_zero = perfetto::base::GetBootTimeNs();
368    if (auto data_source = ctx.GetDataSourceLocked()) {
369       // Check data source is still running
370       if (data_source->state == pps::State::Start) {
371          data_source->trace(ctx);
372          data_source->time_to_trace = perfetto::base::GetBootTimeNs() - time_zero;
373       }
374    } else {
375       PPS_LOG("Tracing finished");
376    }
377 }
378 
379 } // namespace pps
380