xref: /aosp_15_r20/external/mesa3d/src/freedreno/ds/fd_pps_driver.cc (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2021 Google, Inc.
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include "fd_pps_driver.h"
7 
8 #include <cstring>
9 #include <iostream>
10 #include <perfetto.h>
11 
12 #include "common/freedreno_dev_info.h"
13 #include "drm/freedreno_drmif.h"
14 #include "drm/freedreno_ringbuffer.h"
15 #include "perfcntrs/freedreno_dt.h"
16 #include "perfcntrs/freedreno_perfcntr.h"
17 
18 #include "pps/pps.h"
19 #include "pps/pps_algorithm.h"
20 
21 namespace pps
22 {
23 
24 double
safe_div(uint64_t a,uint64_t b)25 safe_div(uint64_t a, uint64_t b)
26 {
27    if (b == 0)
28       return 0;
29 
30    return a / static_cast<double>(b);
31 }
32 
33 float
percent(uint64_t a,uint64_t b)34 percent(uint64_t a, uint64_t b)
35 {
36    /* Sometimes we get bogus values but we want for the timeline
37     * to look nice without higher than 100% values.
38     */
39    if (b == 0 || a > b)
40       return 0;
41 
42    return 100.f * (a / static_cast<double>(b));
43 }
44 
45 bool
is_dump_perfcnt_preemptible() const46 FreedrenoDriver::is_dump_perfcnt_preemptible() const
47 {
48    return false;
49 }
50 
51 uint64_t
get_min_sampling_period_ns()52 FreedrenoDriver::get_min_sampling_period_ns()
53 {
54    return 100000;
55 }
56 
57 /*
58 TODO this sees like it would be largely the same for a5xx as well
59 (ie. same countable names)..
60  */
61 void
setup_a6xx_counters()62 FreedrenoDriver::setup_a6xx_counters()
63 {
64    /* TODO is there a reason to want more than one group? */
65    CounterGroup group = {};
66    group.name = "counters";
67    groups.clear();
68    counters.clear();
69    countables.clear();
70    enabled_counters.clear();
71    groups.emplace_back(std::move(group));
72 
73    /*
74     * Create the countables that we'll be using.
75     */
76 
77    auto PERF_CP_ALWAYS_COUNT = countable("PERF_CP_ALWAYS_COUNT");
78    auto PERF_CP_BUSY_CYCLES  = countable("PERF_CP_BUSY_CYCLES");
79    auto PERF_RB_3D_PIXELS    = countable("PERF_RB_3D_PIXELS");
80    auto PERF_TP_L1_CACHELINE_MISSES = countable("PERF_TP_L1_CACHELINE_MISSES");
81    auto PERF_TP_L1_CACHELINE_REQUESTS = countable("PERF_TP_L1_CACHELINE_REQUESTS");
82 
83    auto PERF_TP_OUTPUT_PIXELS  = countable("PERF_TP_OUTPUT_PIXELS");
84    auto PERF_TP_OUTPUT_PIXELS_ANISO  = countable("PERF_TP_OUTPUT_PIXELS_ANISO");
85    auto PERF_TP_OUTPUT_PIXELS_BILINEAR = countable("PERF_TP_OUTPUT_PIXELS_BILINEAR");
86    auto PERF_TP_OUTPUT_PIXELS_POINT = countable("PERF_TP_OUTPUT_PIXELS_POINT");
87    auto PERF_TP_OUTPUT_PIXELS_ZERO_LOD = countable("PERF_TP_OUTPUT_PIXELS_ZERO_LOD");
88 
89    auto PERF_TSE_INPUT_PRIM  = countable("PERF_TSE_INPUT_PRIM");
90    auto PERF_TSE_CLIPPED_PRIM  = countable("PERF_TSE_CLIPPED_PRIM");
91    auto PERF_TSE_TRIVAL_REJ_PRIM  = countable("PERF_TSE_TRIVAL_REJ_PRIM");
92    auto PERF_TSE_OUTPUT_VISIBLE_PRIM = countable("PERF_TSE_OUTPUT_VISIBLE_PRIM");
93 
94    auto PERF_SP_BUSY_CYCLES  = countable("PERF_SP_BUSY_CYCLES");
95    auto PERF_SP_ALU_WORKING_CYCLES = countable("PERF_SP_ALU_WORKING_CYCLES");
96    auto PERF_SP_EFU_WORKING_CYCLES = countable("PERF_SP_EFU_WORKING_CYCLES");
97    auto PERF_SP_VS_STAGE_EFU_INSTRUCTIONS = countable("PERF_SP_VS_STAGE_EFU_INSTRUCTIONS");
98    auto PERF_SP_VS_STAGE_FULL_ALU_INSTRUCTIONS = countable("PERF_SP_VS_STAGE_FULL_ALU_INSTRUCTIONS");
99    auto PERF_SP_VS_STAGE_TEX_INSTRUCTIONS = countable("PERF_SP_VS_STAGE_TEX_INSTRUCTIONS");
100    auto PERF_SP_FS_STAGE_EFU_INSTRUCTIONS = countable("PERF_SP_FS_STAGE_EFU_INSTRUCTIONS");
101    auto PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS = countable("PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS");
102    auto PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS = countable("PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS");
103    auto PERF_SP_STALL_CYCLES_TP = countable("PERF_SP_STALL_CYCLES_TP");
104    auto PERF_SP_ANY_EU_WORKING_FS_STAGE = countable("PERF_SP_ANY_EU_WORKING_FS_STAGE");
105    auto PERF_SP_ANY_EU_WORKING_VS_STAGE = countable("PERF_SP_ANY_EU_WORKING_VS_STAGE");
106    auto PERF_SP_ANY_EU_WORKING_CS_STAGE = countable("PERF_SP_ANY_EU_WORKING_CS_STAGE");
107 
108    auto PERF_UCHE_STALL_CYCLES_ARBITER = countable("PERF_UCHE_STALL_CYCLES_ARBITER");
109    auto PERF_UCHE_VBIF_READ_BEATS_TP = countable("PERF_UCHE_VBIF_READ_BEATS_TP");
110    auto PERF_UCHE_VBIF_READ_BEATS_VFD = countable("PERF_UCHE_VBIF_READ_BEATS_VFD");
111    auto PERF_UCHE_VBIF_READ_BEATS_SP = countable("PERF_UCHE_VBIF_READ_BEATS_SP");
112    auto PERF_UCHE_READ_REQUESTS_TP = countable("PERF_UCHE_READ_REQUESTS_TP");
113 
114    auto PERF_PC_STALL_CYCLES_VFD = countable("PERF_PC_STALL_CYCLES_VFD");
115    auto PERF_PC_VS_INVOCATIONS = countable("PERF_PC_VS_INVOCATIONS");
116    auto PERF_PC_VERTEX_HITS = countable("PERF_PC_VERTEX_HITS");
117 
118    auto PERF_HLSQ_QUADS = countable("PERF_HLSQ_QUADS"); /* Quads (fragments / 4) produced */
119 
120    auto PERF_CP_NUM_PREEMPTIONS = countable("PERF_CP_NUM_PREEMPTIONS");
121    auto PERF_CP_PREEMPTION_REACTION_DELAY = countable("PERF_CP_PREEMPTION_REACTION_DELAY");
122 
123    /* TODO: resolve() tells there is no PERF_CMPDECMP_VBIF_READ_DATA */
124    // auto PERF_CMPDECMP_VBIF_READ_DATA = countable("PERF_CMPDECMP_VBIF_READ_DATA");
125 
126    /*
127     * And then setup the derived counters that we are exporting to
128     * pps based on the captured countable values.
129     *
130     * We try to expose the same counters as blob:
131     * https://gpuinspector.dev/docs/gpu-counters/qualcomm
132     */
133 
134    counter("GPU Frequency", Counter::Units::Hertz, [=]() {
135          return PERF_CP_ALWAYS_COUNT / time;
136       }
137    );
138 
139    counter("GPU % Utilization", Counter::Units::Percent, [=]() {
140          return percent(PERF_CP_BUSY_CYCLES / time, max_freq);
141       }
142    );
143 
144    counter("TP L1 Cache Misses", Counter::Units::None, [=]() {
145          return PERF_TP_L1_CACHELINE_MISSES / time;
146       }
147    );
148 
149    counter("Shader Core Utilization", Counter::Units::Percent, [=]() {
150          return percent(PERF_SP_BUSY_CYCLES / time, max_freq * info->num_sp_cores);
151       }
152    );
153 
154    /* TODO: verify */
155    counter("(?) % Texture Fetch Stall", Counter::Units::Percent, [=]() {
156          return percent(PERF_SP_STALL_CYCLES_TP / time, max_freq * info->num_sp_cores);
157       }
158    );
159 
160    /* TODO: verify */
161    counter("(?) % Vertex Fetch Stall", Counter::Units::Percent, [=]() {
162          return percent(PERF_PC_STALL_CYCLES_VFD / time, max_freq * info->num_sp_cores);
163       }
164    );
165 
166    counter("L1 Texture Cache Miss Per Pixel", Counter::Units::None, [=]() {
167          return safe_div(PERF_TP_L1_CACHELINE_MISSES, PERF_HLSQ_QUADS * 4);
168       }
169    );
170 
171    counter("% Texture L1 Miss", Counter::Units::Percent, [=]() {
172          return percent(PERF_TP_L1_CACHELINE_MISSES, PERF_TP_L1_CACHELINE_REQUESTS);
173       }
174    );
175 
176    counter("% Texture L2 Miss", Counter::Units::Percent, [=]() {
177          return percent(PERF_UCHE_VBIF_READ_BEATS_TP / 2, PERF_UCHE_READ_REQUESTS_TP);
178       }
179    );
180 
181    /* TODO: verify */
182    counter("(?) % Stalled on System Memory", Counter::Units::Percent, [=]() {
183          return percent(PERF_UCHE_STALL_CYCLES_ARBITER / time, max_freq * info->num_sp_cores);
184       }
185    );
186 
187    counter("Pre-clipped Polygons / Second", Counter::Units::None, [=]() {
188          return PERF_TSE_INPUT_PRIM * (1.f / time);
189       }
190    );
191 
192    counter("% Prims Trivially Rejected", Counter::Units::Percent, [=]() {
193          return percent(PERF_TSE_TRIVAL_REJ_PRIM, PERF_TSE_INPUT_PRIM);
194       }
195    );
196 
197    counter("% Prims Clipped", Counter::Units::Percent, [=]() {
198          return percent(PERF_TSE_CLIPPED_PRIM, PERF_TSE_INPUT_PRIM);
199       }
200    );
201 
202    counter("Average Vertices / Polygon", Counter::Units::None, [=]() {
203          return PERF_PC_VS_INVOCATIONS / PERF_TSE_INPUT_PRIM;
204       }
205    );
206 
207    counter("Reused Vertices / Second", Counter::Units::None, [=]() {
208          return PERF_PC_VERTEX_HITS * (1.f / time);
209       }
210    );
211 
212    counter("Average Polygon Area", Counter::Units::None, [=]() {
213          return safe_div(PERF_HLSQ_QUADS * 4, PERF_TSE_OUTPUT_VISIBLE_PRIM);
214       }
215    );
216 
217    /* TODO: find formula */
218    // counter("% Shaders Busy", Counter::Units::Percent, [=]() {
219    //       return 100.0 * 0;
220    //    }
221    // );
222 
223    counter("Vertices Shaded / Second", Counter::Units::None, [=]() {
224          return PERF_PC_VS_INVOCATIONS * (1.f / time);
225       }
226    );
227 
228    counter("Fragments Shaded / Second", Counter::Units::None, [=]() {
229          return PERF_HLSQ_QUADS * 4 * (1.f / time);
230       }
231    );
232 
233    counter("Vertex Instructions / Second", Counter::Units::None, [=]() {
234          return (PERF_SP_VS_STAGE_FULL_ALU_INSTRUCTIONS +
235                  PERF_SP_VS_STAGE_EFU_INSTRUCTIONS) * (1.f / time);
236       }
237    );
238 
239    counter("Fragment Instructions / Second", Counter::Units::None, [=]() {
240          return (PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS +
241                  PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS / 2 +
242                  PERF_SP_FS_STAGE_EFU_INSTRUCTIONS) * (1.f / time);
243       }
244    );
245 
246    counter("Fragment ALU Instructions / Sec (Full)", Counter::Units::None, [=]() {
247          return PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS * (1.f / time);
248       }
249    );
250 
251    counter("Fragment ALU Instructions / Sec (Half)", Counter::Units::None, [=]() {
252          return PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS * (1.f / time);
253       }
254    );
255 
256    counter("Fragment EFU Instructions / Second", Counter::Units::None, [=]() {
257          return PERF_SP_FS_STAGE_EFU_INSTRUCTIONS * (1.f / time);
258       }
259    );
260 
261    counter("Textures / Vertex", Counter::Units::None, [=]() {
262          return safe_div(PERF_SP_VS_STAGE_TEX_INSTRUCTIONS, PERF_PC_VS_INVOCATIONS);
263       }
264    );
265 
266    counter("Textures / Fragment", Counter::Units::None, [=]() {
267          return safe_div(PERF_TP_OUTPUT_PIXELS, PERF_HLSQ_QUADS * 4);
268       }
269    );
270 
271    counter("ALU / Vertex", Counter::Units::None, [=]() {
272          return safe_div(PERF_SP_VS_STAGE_FULL_ALU_INSTRUCTIONS, PERF_PC_VS_INVOCATIONS);
273       }
274    );
275 
276    counter("EFU / Vertex", Counter::Units::None, [=]() {
277          return safe_div(PERF_SP_VS_STAGE_EFU_INSTRUCTIONS, PERF_PC_VS_INVOCATIONS);
278       }
279    );
280 
281    counter("ALU / Fragment", Counter::Units::None, [=]() {
282          return safe_div(PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS +
283                          PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS / 2, PERF_HLSQ_QUADS);
284       }
285    );
286 
287    counter("EFU / Fragment", Counter::Units::None, [=]() {
288          return safe_div(PERF_SP_FS_STAGE_EFU_INSTRUCTIONS, PERF_HLSQ_QUADS);
289       }
290    );
291 
292    counter("% Time Shading Vertices", Counter::Units::Percent, [=]() {
293          return percent(PERF_SP_ANY_EU_WORKING_VS_STAGE,
294                         (PERF_SP_ANY_EU_WORKING_VS_STAGE +
295                          PERF_SP_ANY_EU_WORKING_FS_STAGE +
296                          PERF_SP_ANY_EU_WORKING_CS_STAGE));
297       }
298    );
299 
300    counter("% Time Shading Fragments", Counter::Units::Percent, [=]() {
301          return percent(PERF_SP_ANY_EU_WORKING_FS_STAGE,
302                         (PERF_SP_ANY_EU_WORKING_VS_STAGE +
303                          PERF_SP_ANY_EU_WORKING_FS_STAGE +
304                          PERF_SP_ANY_EU_WORKING_CS_STAGE));
305       }
306    );
307 
308    counter("% Time Compute", Counter::Units::Percent, [=]() {
309          return percent(PERF_SP_ANY_EU_WORKING_CS_STAGE,
310                         (PERF_SP_ANY_EU_WORKING_VS_STAGE +
311                          PERF_SP_ANY_EU_WORKING_FS_STAGE +
312                          PERF_SP_ANY_EU_WORKING_CS_STAGE));
313       }
314    );
315 
316    counter("% Shader ALU Capacity Utilized", Counter::Units::Percent, [=]() {
317          return percent((PERF_SP_VS_STAGE_FULL_ALU_INSTRUCTIONS +
318                          PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS +
319                          PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS / 2) / 64,
320                         PERF_SP_BUSY_CYCLES);
321       }
322    );
323 
324    counter("% Time ALUs Working", Counter::Units::Percent, [=]() {
325          return percent(PERF_SP_ALU_WORKING_CYCLES / 2, PERF_SP_BUSY_CYCLES);
326       }
327    );
328 
329    counter("% Time EFUs Working", Counter::Units::Percent, [=]() {
330          return percent(PERF_SP_EFU_WORKING_CYCLES / 2, PERF_SP_BUSY_CYCLES);
331       }
332    );
333 
334    counter("% Anisotropic Filtered", Counter::Units::Percent, [=]() {
335          return percent(PERF_TP_OUTPUT_PIXELS_ANISO, PERF_TP_OUTPUT_PIXELS);
336       }
337    );
338 
339    counter("% Linear Filtered", Counter::Units::Percent, [=]() {
340          return percent(PERF_TP_OUTPUT_PIXELS_BILINEAR, PERF_TP_OUTPUT_PIXELS);
341       }
342    );
343 
344    counter("% Nearest Filtered", Counter::Units::Percent, [=]() {
345          return percent(PERF_TP_OUTPUT_PIXELS_POINT, PERF_TP_OUTPUT_PIXELS);
346       }
347    );
348 
349    counter("% Non-Base Level Textures", Counter::Units::Percent, [=]() {
350          return percent(PERF_TP_OUTPUT_PIXELS_ZERO_LOD, PERF_TP_OUTPUT_PIXELS);
351       }
352    );
353 
354    /* Reads from KGSL_PERFCOUNTER_GROUP_VBIF countable=63 */
355    // counter("Read Total (Bytes/sec)", Counter::Units::Byte, [=]() {
356    //       return  * (1.f / time);
357    //    }
358    // );
359 
360    /* Reads from KGSL_PERFCOUNTER_GROUP_VBIF countable=84 */
361    // counter("Write Total (Bytes/sec)", Counter::Units::Byte, [=]() {
362    //       return  * (1.f / time);
363    //    }
364    // );
365 
366    /* Cannot get PERF_CMPDECMP_VBIF_READ_DATA countable */
367    // counter("Texture Memory Read BW (Bytes/Second)", Counter::Units::Byte, [=]() {
368    //       return (PERF_CMPDECMP_VBIF_READ_DATA + PERF_UCHE_VBIF_READ_BEATS_TP) * (1.f / time);
369    //    }
370    // );
371 
372    /* TODO: verify */
373    counter("(?) Vertex Memory Read (Bytes/Second)", Counter::Units::Byte, [=]() {
374          return PERF_UCHE_VBIF_READ_BEATS_VFD * 32 * (1.f / time);
375       }
376    );
377 
378    /* TODO: verify */
379    counter("SP Memory Read (Bytes/Second)", Counter::Units::Byte, [=]() {
380          return PERF_UCHE_VBIF_READ_BEATS_SP * 32 * (1.f / time);
381       }
382    );
383 
384    counter("Avg Bytes / Fragment", Counter::Units::Byte, [=]() {
385          return safe_div(PERF_UCHE_VBIF_READ_BEATS_TP * 32, PERF_HLSQ_QUADS * 4);
386       }
387    );
388 
389    counter("Avg Bytes / Vertex", Counter::Units::Byte, [=]() {
390          return safe_div(PERF_UCHE_VBIF_READ_BEATS_VFD * 32, PERF_PC_VS_INVOCATIONS);
391       }
392    );
393 
394    counter("Preemptions / second", Counter::Units::None, [=]() {
395          return PERF_CP_NUM_PREEMPTIONS * (1.f / time);
396       }
397    );
398 
399    counter("Avg Preemption Delay", Counter::Units::None, [=]() {
400          return PERF_CP_PREEMPTION_REACTION_DELAY * (1.f / time);
401       }
402    );
403 }
404 
405 /**
406  * Generate an submit the cmdstream to configure the counter/countable
407  * muxing
408  */
409 void
configure_counters(bool reset,bool wait)410 FreedrenoDriver::configure_counters(bool reset, bool wait)
411 {
412    struct fd_submit *submit = fd_submit_new(pipe);
413    enum fd_ringbuffer_flags flags =
414       (enum fd_ringbuffer_flags)(FD_RINGBUFFER_PRIMARY | FD_RINGBUFFER_GROWABLE);
415    struct fd_ringbuffer *ring = fd_submit_new_ringbuffer(submit, 0x1000, flags);
416 
417    for (const auto &countable : countables)
418       countable.configure(ring, reset);
419 
420    struct fd_fence *fence = fd_submit_flush(submit, -1, false);
421 
422    fd_fence_flush(fence);
423    fd_fence_del(fence);
424 
425    fd_ringbuffer_del(ring);
426    fd_submit_del(submit);
427 
428    if (wait)
429       fd_pipe_wait(pipe, fence);
430 }
431 
432 /**
433  * Read the current counter values and record the time.
434  */
435 void
collect_countables()436 FreedrenoDriver::collect_countables()
437 {
438    last_dump_ts = perfetto::base::GetBootTimeNs().count();
439 
440    for (const auto &countable : countables)
441       countable.collect();
442 }
443 
444 bool
init_perfcnt()445 FreedrenoDriver::init_perfcnt()
446 {
447    uint64_t val;
448 
449    if (dev)
450       return true;
451 
452    dev = fd_device_new(drm_device.fd);
453    pipe = fd_pipe_new2(dev, FD_PIPE_3D, 0);
454    dev_id = fd_pipe_dev_id(pipe);
455 
456    if (fd_pipe_get_param(pipe, FD_MAX_FREQ, &val)) {
457       PERFETTO_FATAL("Could not get MAX_FREQ");
458       return false;
459    }
460    max_freq = val;
461 
462    if (fd_pipe_get_param(pipe, FD_SUSPEND_COUNT, &val)) {
463       PERFETTO_ILOG("Could not get SUSPEND_COUNT");
464    } else {
465       suspend_count = val;
466       has_suspend_count = true;
467    }
468 
469    fd_pipe_set_param(pipe, FD_SYSPROF, 1);
470 
471    perfcntrs = fd_perfcntrs(fd_pipe_dev_id(pipe), &num_perfcntrs);
472    if (num_perfcntrs == 0) {
473       PERFETTO_FATAL("No hw counters available");
474       return false;
475    }
476 
477    assigned_counters.resize(num_perfcntrs);
478    assigned_counters.assign(assigned_counters.size(), 0);
479 
480    switch (fd_dev_gen(dev_id)) {
481    case 6:
482       setup_a6xx_counters();
483       break;
484    default:
485       PERFETTO_FATAL("Unsupported GPU: a%03u", fd_dev_gpu_id(dev_id));
486       return false;
487    }
488 
489    state.resize(next_countable_id);
490 
491    for (const auto &countable : countables)
492       countable.resolve();
493 
494    info = fd_dev_info_raw(dev_id);
495 
496    io = fd_dt_find_io();
497    if (!io) {
498       PERFETTO_FATAL("Could not map GPU I/O space");
499       return false;
500    }
501 
502    configure_counters(true, true);
503    collect_countables();
504 
505    return true;
506 }
507 
508 void
enable_counter(const uint32_t counter_id)509 FreedrenoDriver::enable_counter(const uint32_t counter_id)
510 {
511    enabled_counters.push_back(counters[counter_id]);
512 }
513 
514 void
enable_all_counters()515 FreedrenoDriver::enable_all_counters()
516 {
517    enabled_counters.reserve(counters.size());
518    for (auto &counter : counters) {
519       enabled_counters.push_back(counter);
520    }
521 }
522 
523 void
enable_perfcnt(const uint64_t)524 FreedrenoDriver::enable_perfcnt(const uint64_t /* sampling_period_ns */)
525 {
526 }
527 
528 bool
dump_perfcnt()529 FreedrenoDriver::dump_perfcnt()
530 {
531    if (has_suspend_count) {
532       uint64_t val;
533 
534       fd_pipe_get_param(pipe, FD_SUSPEND_COUNT, &val);
535 
536       if (suspend_count != val) {
537          PERFETTO_ILOG("Device had suspended!");
538 
539          suspend_count = val;
540 
541          configure_counters(true, true);
542          collect_countables();
543 
544          /* We aren't going to have anything sensible by comparing
545           * current values to values from prior to the suspend, so
546           * just skip this sampling period.
547           */
548          return false;
549       }
550    }
551 
552    auto last_ts = last_dump_ts;
553 
554    /* Capture the timestamp from the *start* of the sampling period: */
555    last_capture_ts = last_dump_ts;
556 
557    collect_countables();
558 
559    auto elapsed_time_ns = last_dump_ts - last_ts;
560 
561    time = (float)elapsed_time_ns / 1000000000.0;
562 
563    /* On older kernels that dont' support querying the suspend-
564     * count, just send configuration cmdstream regularly to keep
565     * the GPU alive and correctly configured for the countables
566     * we want
567     */
568    if (!has_suspend_count) {
569       configure_counters(false, false);
570    }
571 
572    return true;
573 }
574 
next()575 uint64_t FreedrenoDriver::next()
576 {
577    auto ret = last_capture_ts;
578    last_capture_ts = 0;
579    return ret;
580 }
581 
disable_perfcnt()582 void FreedrenoDriver::disable_perfcnt()
583 {
584    /* There isn't really any disable, only reconfiguring which countables
585     * get muxed to which counters
586     */
587 }
588 
589 /*
590  * Countable
591  */
592 
593 FreedrenoDriver::Countable
countable(std::string name)594 FreedrenoDriver::countable(std::string name)
595 {
596    auto countable = Countable(this, name);
597    countables.emplace_back(countable);
598    return countable;
599 }
600 
Countable(FreedrenoDriver * d,std::string name)601 FreedrenoDriver::Countable::Countable(FreedrenoDriver *d, std::string name)
602    : id {d->next_countable_id++}, d {d}, name {name}
603 {
604 }
605 
606 /* Emit register writes on ring to configure counter/countable muxing: */
607 void
configure(struct fd_ringbuffer * ring,bool reset) const608 FreedrenoDriver::Countable::configure(struct fd_ringbuffer *ring, bool reset) const
609 {
610    const struct fd_perfcntr_countable *countable = d->state[id].countable;
611    const struct fd_perfcntr_counter   *counter   = d->state[id].counter;
612 
613    OUT_PKT7(ring, CP_WAIT_FOR_IDLE, 0);
614 
615    if (counter->enable && reset) {
616       OUT_PKT4(ring, counter->enable, 1);
617       OUT_RING(ring, 0);
618    }
619 
620    if (counter->clear && reset) {
621       OUT_PKT4(ring, counter->clear, 1);
622       OUT_RING(ring, 1);
623 
624       OUT_PKT4(ring, counter->clear, 1);
625       OUT_RING(ring, 0);
626    }
627 
628    OUT_PKT4(ring, counter->select_reg, 1);
629    OUT_RING(ring, countable->selector);
630 
631    if (counter->enable && reset) {
632       OUT_PKT4(ring, counter->enable, 1);
633       OUT_RING(ring, 1);
634    }
635 }
636 
637 /* Collect current counter value and calculate delta since last sample: */
638 void
collect() const639 FreedrenoDriver::Countable::collect() const
640 {
641    const struct fd_perfcntr_counter *counter = d->state[id].counter;
642 
643    d->state[id].last_value = d->state[id].value;
644 
645    /* this is true on a5xx and later */
646    assert(counter->counter_reg_lo + 1 == counter->counter_reg_hi);
647    uint64_t *reg = (uint64_t *)((uint32_t *)d->io + counter->counter_reg_lo);
648 
649    d->state[id].value = *reg;
650 }
651 
652 /* Resolve the countable and assign next counter from it's group: */
653 void
resolve() const654 FreedrenoDriver::Countable::resolve() const
655 {
656    for (unsigned i = 0; i < d->num_perfcntrs; i++) {
657       const struct fd_perfcntr_group *g = &d->perfcntrs[i];
658       for (unsigned j = 0; j < g->num_countables; j++) {
659          const struct fd_perfcntr_countable *c = &g->countables[j];
660          if (name == c->name) {
661             d->state[id].countable = c;
662 
663             /* Assign a counter from the same group: */
664             assert(d->assigned_counters[i] < g->num_counters);
665             d->state[id].counter = &g->counters[d->assigned_counters[i]++];
666 
667             std::cout << "Countable: " << name << ", group=" << g->name <<
668                   ", counter=" << d->assigned_counters[i] - 1 << "\n";
669 
670             return;
671          }
672       }
673    }
674    unreachable("no such countable!");
675 }
676 
677 uint64_t
get_value() const678 FreedrenoDriver::Countable::get_value() const
679 {
680    return d->state[id].value - d->state[id].last_value;
681 }
682 
683 /*
684  * DerivedCounter
685  */
686 
DerivedCounter(FreedrenoDriver * d,std::string name,Counter::Units units,std::function<int64_t ()> derive)687 FreedrenoDriver::DerivedCounter::DerivedCounter(FreedrenoDriver *d, std::string name,
688                                                 Counter::Units units,
689                                                 std::function<int64_t()> derive)
690    : Counter(d->next_counter_id++, name, 0)
691 {
692    std::cout << "DerivedCounter: " << name << ", id=" << id << "\n";
693    this->units = units;
694    set_getter([=](const Counter &c, const Driver &d) {
695          return derive();
696       }
697    );
698 }
699 
700 FreedrenoDriver::DerivedCounter
counter(std::string name,Counter::Units units,std::function<int64_t ()> derive)701 FreedrenoDriver::counter(std::string name, Counter::Units units,
702                          std::function<int64_t()> derive)
703 {
704    auto counter = DerivedCounter(this, name, units, derive);
705    counters.emplace_back(counter);
706    return counter;
707 }
708 
709 uint32_t
gpu_clock_id() const710 FreedrenoDriver::gpu_clock_id() const
711 {
712    return perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME;
713 }
714 
715 uint64_t
gpu_timestamp() const716 FreedrenoDriver::gpu_timestamp() const
717 {
718    return perfetto::base::GetBootTimeNs().count();
719 }
720 
721 bool
cpu_gpu_timestamp(uint64_t &,uint64_t &) const722 FreedrenoDriver::cpu_gpu_timestamp(uint64_t &, uint64_t &) const
723 {
724    /* Not supported */
725    return false;
726 }
727 
728 } // namespace pps
729