1 /*
2 * Copyright © 2021 Google, Inc.
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include "fd_pps_driver.h"
7
8 #include <cstring>
9 #include <iostream>
10 #include <perfetto.h>
11
12 #include "common/freedreno_dev_info.h"
13 #include "drm/freedreno_drmif.h"
14 #include "drm/freedreno_ringbuffer.h"
15 #include "perfcntrs/freedreno_dt.h"
16 #include "perfcntrs/freedreno_perfcntr.h"
17
18 #include "pps/pps.h"
19 #include "pps/pps_algorithm.h"
20
21 namespace pps
22 {
23
24 double
safe_div(uint64_t a,uint64_t b)25 safe_div(uint64_t a, uint64_t b)
26 {
27 if (b == 0)
28 return 0;
29
30 return a / static_cast<double>(b);
31 }
32
33 float
percent(uint64_t a,uint64_t b)34 percent(uint64_t a, uint64_t b)
35 {
36 /* Sometimes we get bogus values but we want for the timeline
37 * to look nice without higher than 100% values.
38 */
39 if (b == 0 || a > b)
40 return 0;
41
42 return 100.f * (a / static_cast<double>(b));
43 }
44
45 bool
is_dump_perfcnt_preemptible() const46 FreedrenoDriver::is_dump_perfcnt_preemptible() const
47 {
48 return false;
49 }
50
51 uint64_t
get_min_sampling_period_ns()52 FreedrenoDriver::get_min_sampling_period_ns()
53 {
54 return 100000;
55 }
56
57 /*
58 TODO this sees like it would be largely the same for a5xx as well
59 (ie. same countable names)..
60 */
61 void
setup_a6xx_counters()62 FreedrenoDriver::setup_a6xx_counters()
63 {
64 /* TODO is there a reason to want more than one group? */
65 CounterGroup group = {};
66 group.name = "counters";
67 groups.clear();
68 counters.clear();
69 countables.clear();
70 enabled_counters.clear();
71 groups.emplace_back(std::move(group));
72
73 /*
74 * Create the countables that we'll be using.
75 */
76
77 auto PERF_CP_ALWAYS_COUNT = countable("PERF_CP_ALWAYS_COUNT");
78 auto PERF_CP_BUSY_CYCLES = countable("PERF_CP_BUSY_CYCLES");
79 auto PERF_RB_3D_PIXELS = countable("PERF_RB_3D_PIXELS");
80 auto PERF_TP_L1_CACHELINE_MISSES = countable("PERF_TP_L1_CACHELINE_MISSES");
81 auto PERF_TP_L1_CACHELINE_REQUESTS = countable("PERF_TP_L1_CACHELINE_REQUESTS");
82
83 auto PERF_TP_OUTPUT_PIXELS = countable("PERF_TP_OUTPUT_PIXELS");
84 auto PERF_TP_OUTPUT_PIXELS_ANISO = countable("PERF_TP_OUTPUT_PIXELS_ANISO");
85 auto PERF_TP_OUTPUT_PIXELS_BILINEAR = countable("PERF_TP_OUTPUT_PIXELS_BILINEAR");
86 auto PERF_TP_OUTPUT_PIXELS_POINT = countable("PERF_TP_OUTPUT_PIXELS_POINT");
87 auto PERF_TP_OUTPUT_PIXELS_ZERO_LOD = countable("PERF_TP_OUTPUT_PIXELS_ZERO_LOD");
88
89 auto PERF_TSE_INPUT_PRIM = countable("PERF_TSE_INPUT_PRIM");
90 auto PERF_TSE_CLIPPED_PRIM = countable("PERF_TSE_CLIPPED_PRIM");
91 auto PERF_TSE_TRIVAL_REJ_PRIM = countable("PERF_TSE_TRIVAL_REJ_PRIM");
92 auto PERF_TSE_OUTPUT_VISIBLE_PRIM = countable("PERF_TSE_OUTPUT_VISIBLE_PRIM");
93
94 auto PERF_SP_BUSY_CYCLES = countable("PERF_SP_BUSY_CYCLES");
95 auto PERF_SP_ALU_WORKING_CYCLES = countable("PERF_SP_ALU_WORKING_CYCLES");
96 auto PERF_SP_EFU_WORKING_CYCLES = countable("PERF_SP_EFU_WORKING_CYCLES");
97 auto PERF_SP_VS_STAGE_EFU_INSTRUCTIONS = countable("PERF_SP_VS_STAGE_EFU_INSTRUCTIONS");
98 auto PERF_SP_VS_STAGE_FULL_ALU_INSTRUCTIONS = countable("PERF_SP_VS_STAGE_FULL_ALU_INSTRUCTIONS");
99 auto PERF_SP_VS_STAGE_TEX_INSTRUCTIONS = countable("PERF_SP_VS_STAGE_TEX_INSTRUCTIONS");
100 auto PERF_SP_FS_STAGE_EFU_INSTRUCTIONS = countable("PERF_SP_FS_STAGE_EFU_INSTRUCTIONS");
101 auto PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS = countable("PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS");
102 auto PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS = countable("PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS");
103 auto PERF_SP_STALL_CYCLES_TP = countable("PERF_SP_STALL_CYCLES_TP");
104 auto PERF_SP_ANY_EU_WORKING_FS_STAGE = countable("PERF_SP_ANY_EU_WORKING_FS_STAGE");
105 auto PERF_SP_ANY_EU_WORKING_VS_STAGE = countable("PERF_SP_ANY_EU_WORKING_VS_STAGE");
106 auto PERF_SP_ANY_EU_WORKING_CS_STAGE = countable("PERF_SP_ANY_EU_WORKING_CS_STAGE");
107
108 auto PERF_UCHE_STALL_CYCLES_ARBITER = countable("PERF_UCHE_STALL_CYCLES_ARBITER");
109 auto PERF_UCHE_VBIF_READ_BEATS_TP = countable("PERF_UCHE_VBIF_READ_BEATS_TP");
110 auto PERF_UCHE_VBIF_READ_BEATS_VFD = countable("PERF_UCHE_VBIF_READ_BEATS_VFD");
111 auto PERF_UCHE_VBIF_READ_BEATS_SP = countable("PERF_UCHE_VBIF_READ_BEATS_SP");
112 auto PERF_UCHE_READ_REQUESTS_TP = countable("PERF_UCHE_READ_REQUESTS_TP");
113
114 auto PERF_PC_STALL_CYCLES_VFD = countable("PERF_PC_STALL_CYCLES_VFD");
115 auto PERF_PC_VS_INVOCATIONS = countable("PERF_PC_VS_INVOCATIONS");
116 auto PERF_PC_VERTEX_HITS = countable("PERF_PC_VERTEX_HITS");
117
118 auto PERF_HLSQ_QUADS = countable("PERF_HLSQ_QUADS"); /* Quads (fragments / 4) produced */
119
120 auto PERF_CP_NUM_PREEMPTIONS = countable("PERF_CP_NUM_PREEMPTIONS");
121 auto PERF_CP_PREEMPTION_REACTION_DELAY = countable("PERF_CP_PREEMPTION_REACTION_DELAY");
122
123 /* TODO: resolve() tells there is no PERF_CMPDECMP_VBIF_READ_DATA */
124 // auto PERF_CMPDECMP_VBIF_READ_DATA = countable("PERF_CMPDECMP_VBIF_READ_DATA");
125
126 /*
127 * And then setup the derived counters that we are exporting to
128 * pps based on the captured countable values.
129 *
130 * We try to expose the same counters as blob:
131 * https://gpuinspector.dev/docs/gpu-counters/qualcomm
132 */
133
134 counter("GPU Frequency", Counter::Units::Hertz, [=]() {
135 return PERF_CP_ALWAYS_COUNT / time;
136 }
137 );
138
139 counter("GPU % Utilization", Counter::Units::Percent, [=]() {
140 return percent(PERF_CP_BUSY_CYCLES / time, max_freq);
141 }
142 );
143
144 counter("TP L1 Cache Misses", Counter::Units::None, [=]() {
145 return PERF_TP_L1_CACHELINE_MISSES / time;
146 }
147 );
148
149 counter("Shader Core Utilization", Counter::Units::Percent, [=]() {
150 return percent(PERF_SP_BUSY_CYCLES / time, max_freq * info->num_sp_cores);
151 }
152 );
153
154 /* TODO: verify */
155 counter("(?) % Texture Fetch Stall", Counter::Units::Percent, [=]() {
156 return percent(PERF_SP_STALL_CYCLES_TP / time, max_freq * info->num_sp_cores);
157 }
158 );
159
160 /* TODO: verify */
161 counter("(?) % Vertex Fetch Stall", Counter::Units::Percent, [=]() {
162 return percent(PERF_PC_STALL_CYCLES_VFD / time, max_freq * info->num_sp_cores);
163 }
164 );
165
166 counter("L1 Texture Cache Miss Per Pixel", Counter::Units::None, [=]() {
167 return safe_div(PERF_TP_L1_CACHELINE_MISSES, PERF_HLSQ_QUADS * 4);
168 }
169 );
170
171 counter("% Texture L1 Miss", Counter::Units::Percent, [=]() {
172 return percent(PERF_TP_L1_CACHELINE_MISSES, PERF_TP_L1_CACHELINE_REQUESTS);
173 }
174 );
175
176 counter("% Texture L2 Miss", Counter::Units::Percent, [=]() {
177 return percent(PERF_UCHE_VBIF_READ_BEATS_TP / 2, PERF_UCHE_READ_REQUESTS_TP);
178 }
179 );
180
181 /* TODO: verify */
182 counter("(?) % Stalled on System Memory", Counter::Units::Percent, [=]() {
183 return percent(PERF_UCHE_STALL_CYCLES_ARBITER / time, max_freq * info->num_sp_cores);
184 }
185 );
186
187 counter("Pre-clipped Polygons / Second", Counter::Units::None, [=]() {
188 return PERF_TSE_INPUT_PRIM * (1.f / time);
189 }
190 );
191
192 counter("% Prims Trivially Rejected", Counter::Units::Percent, [=]() {
193 return percent(PERF_TSE_TRIVAL_REJ_PRIM, PERF_TSE_INPUT_PRIM);
194 }
195 );
196
197 counter("% Prims Clipped", Counter::Units::Percent, [=]() {
198 return percent(PERF_TSE_CLIPPED_PRIM, PERF_TSE_INPUT_PRIM);
199 }
200 );
201
202 counter("Average Vertices / Polygon", Counter::Units::None, [=]() {
203 return PERF_PC_VS_INVOCATIONS / PERF_TSE_INPUT_PRIM;
204 }
205 );
206
207 counter("Reused Vertices / Second", Counter::Units::None, [=]() {
208 return PERF_PC_VERTEX_HITS * (1.f / time);
209 }
210 );
211
212 counter("Average Polygon Area", Counter::Units::None, [=]() {
213 return safe_div(PERF_HLSQ_QUADS * 4, PERF_TSE_OUTPUT_VISIBLE_PRIM);
214 }
215 );
216
217 /* TODO: find formula */
218 // counter("% Shaders Busy", Counter::Units::Percent, [=]() {
219 // return 100.0 * 0;
220 // }
221 // );
222
223 counter("Vertices Shaded / Second", Counter::Units::None, [=]() {
224 return PERF_PC_VS_INVOCATIONS * (1.f / time);
225 }
226 );
227
228 counter("Fragments Shaded / Second", Counter::Units::None, [=]() {
229 return PERF_HLSQ_QUADS * 4 * (1.f / time);
230 }
231 );
232
233 counter("Vertex Instructions / Second", Counter::Units::None, [=]() {
234 return (PERF_SP_VS_STAGE_FULL_ALU_INSTRUCTIONS +
235 PERF_SP_VS_STAGE_EFU_INSTRUCTIONS) * (1.f / time);
236 }
237 );
238
239 counter("Fragment Instructions / Second", Counter::Units::None, [=]() {
240 return (PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS +
241 PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS / 2 +
242 PERF_SP_FS_STAGE_EFU_INSTRUCTIONS) * (1.f / time);
243 }
244 );
245
246 counter("Fragment ALU Instructions / Sec (Full)", Counter::Units::None, [=]() {
247 return PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS * (1.f / time);
248 }
249 );
250
251 counter("Fragment ALU Instructions / Sec (Half)", Counter::Units::None, [=]() {
252 return PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS * (1.f / time);
253 }
254 );
255
256 counter("Fragment EFU Instructions / Second", Counter::Units::None, [=]() {
257 return PERF_SP_FS_STAGE_EFU_INSTRUCTIONS * (1.f / time);
258 }
259 );
260
261 counter("Textures / Vertex", Counter::Units::None, [=]() {
262 return safe_div(PERF_SP_VS_STAGE_TEX_INSTRUCTIONS, PERF_PC_VS_INVOCATIONS);
263 }
264 );
265
266 counter("Textures / Fragment", Counter::Units::None, [=]() {
267 return safe_div(PERF_TP_OUTPUT_PIXELS, PERF_HLSQ_QUADS * 4);
268 }
269 );
270
271 counter("ALU / Vertex", Counter::Units::None, [=]() {
272 return safe_div(PERF_SP_VS_STAGE_FULL_ALU_INSTRUCTIONS, PERF_PC_VS_INVOCATIONS);
273 }
274 );
275
276 counter("EFU / Vertex", Counter::Units::None, [=]() {
277 return safe_div(PERF_SP_VS_STAGE_EFU_INSTRUCTIONS, PERF_PC_VS_INVOCATIONS);
278 }
279 );
280
281 counter("ALU / Fragment", Counter::Units::None, [=]() {
282 return safe_div(PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS +
283 PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS / 2, PERF_HLSQ_QUADS);
284 }
285 );
286
287 counter("EFU / Fragment", Counter::Units::None, [=]() {
288 return safe_div(PERF_SP_FS_STAGE_EFU_INSTRUCTIONS, PERF_HLSQ_QUADS);
289 }
290 );
291
292 counter("% Time Shading Vertices", Counter::Units::Percent, [=]() {
293 return percent(PERF_SP_ANY_EU_WORKING_VS_STAGE,
294 (PERF_SP_ANY_EU_WORKING_VS_STAGE +
295 PERF_SP_ANY_EU_WORKING_FS_STAGE +
296 PERF_SP_ANY_EU_WORKING_CS_STAGE));
297 }
298 );
299
300 counter("% Time Shading Fragments", Counter::Units::Percent, [=]() {
301 return percent(PERF_SP_ANY_EU_WORKING_FS_STAGE,
302 (PERF_SP_ANY_EU_WORKING_VS_STAGE +
303 PERF_SP_ANY_EU_WORKING_FS_STAGE +
304 PERF_SP_ANY_EU_WORKING_CS_STAGE));
305 }
306 );
307
308 counter("% Time Compute", Counter::Units::Percent, [=]() {
309 return percent(PERF_SP_ANY_EU_WORKING_CS_STAGE,
310 (PERF_SP_ANY_EU_WORKING_VS_STAGE +
311 PERF_SP_ANY_EU_WORKING_FS_STAGE +
312 PERF_SP_ANY_EU_WORKING_CS_STAGE));
313 }
314 );
315
316 counter("% Shader ALU Capacity Utilized", Counter::Units::Percent, [=]() {
317 return percent((PERF_SP_VS_STAGE_FULL_ALU_INSTRUCTIONS +
318 PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS +
319 PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS / 2) / 64,
320 PERF_SP_BUSY_CYCLES);
321 }
322 );
323
324 counter("% Time ALUs Working", Counter::Units::Percent, [=]() {
325 return percent(PERF_SP_ALU_WORKING_CYCLES / 2, PERF_SP_BUSY_CYCLES);
326 }
327 );
328
329 counter("% Time EFUs Working", Counter::Units::Percent, [=]() {
330 return percent(PERF_SP_EFU_WORKING_CYCLES / 2, PERF_SP_BUSY_CYCLES);
331 }
332 );
333
334 counter("% Anisotropic Filtered", Counter::Units::Percent, [=]() {
335 return percent(PERF_TP_OUTPUT_PIXELS_ANISO, PERF_TP_OUTPUT_PIXELS);
336 }
337 );
338
339 counter("% Linear Filtered", Counter::Units::Percent, [=]() {
340 return percent(PERF_TP_OUTPUT_PIXELS_BILINEAR, PERF_TP_OUTPUT_PIXELS);
341 }
342 );
343
344 counter("% Nearest Filtered", Counter::Units::Percent, [=]() {
345 return percent(PERF_TP_OUTPUT_PIXELS_POINT, PERF_TP_OUTPUT_PIXELS);
346 }
347 );
348
349 counter("% Non-Base Level Textures", Counter::Units::Percent, [=]() {
350 return percent(PERF_TP_OUTPUT_PIXELS_ZERO_LOD, PERF_TP_OUTPUT_PIXELS);
351 }
352 );
353
354 /* Reads from KGSL_PERFCOUNTER_GROUP_VBIF countable=63 */
355 // counter("Read Total (Bytes/sec)", Counter::Units::Byte, [=]() {
356 // return * (1.f / time);
357 // }
358 // );
359
360 /* Reads from KGSL_PERFCOUNTER_GROUP_VBIF countable=84 */
361 // counter("Write Total (Bytes/sec)", Counter::Units::Byte, [=]() {
362 // return * (1.f / time);
363 // }
364 // );
365
366 /* Cannot get PERF_CMPDECMP_VBIF_READ_DATA countable */
367 // counter("Texture Memory Read BW (Bytes/Second)", Counter::Units::Byte, [=]() {
368 // return (PERF_CMPDECMP_VBIF_READ_DATA + PERF_UCHE_VBIF_READ_BEATS_TP) * (1.f / time);
369 // }
370 // );
371
372 /* TODO: verify */
373 counter("(?) Vertex Memory Read (Bytes/Second)", Counter::Units::Byte, [=]() {
374 return PERF_UCHE_VBIF_READ_BEATS_VFD * 32 * (1.f / time);
375 }
376 );
377
378 /* TODO: verify */
379 counter("SP Memory Read (Bytes/Second)", Counter::Units::Byte, [=]() {
380 return PERF_UCHE_VBIF_READ_BEATS_SP * 32 * (1.f / time);
381 }
382 );
383
384 counter("Avg Bytes / Fragment", Counter::Units::Byte, [=]() {
385 return safe_div(PERF_UCHE_VBIF_READ_BEATS_TP * 32, PERF_HLSQ_QUADS * 4);
386 }
387 );
388
389 counter("Avg Bytes / Vertex", Counter::Units::Byte, [=]() {
390 return safe_div(PERF_UCHE_VBIF_READ_BEATS_VFD * 32, PERF_PC_VS_INVOCATIONS);
391 }
392 );
393
394 counter("Preemptions / second", Counter::Units::None, [=]() {
395 return PERF_CP_NUM_PREEMPTIONS * (1.f / time);
396 }
397 );
398
399 counter("Avg Preemption Delay", Counter::Units::None, [=]() {
400 return PERF_CP_PREEMPTION_REACTION_DELAY * (1.f / time);
401 }
402 );
403 }
404
405 /**
406 * Generate an submit the cmdstream to configure the counter/countable
407 * muxing
408 */
409 void
configure_counters(bool reset,bool wait)410 FreedrenoDriver::configure_counters(bool reset, bool wait)
411 {
412 struct fd_submit *submit = fd_submit_new(pipe);
413 enum fd_ringbuffer_flags flags =
414 (enum fd_ringbuffer_flags)(FD_RINGBUFFER_PRIMARY | FD_RINGBUFFER_GROWABLE);
415 struct fd_ringbuffer *ring = fd_submit_new_ringbuffer(submit, 0x1000, flags);
416
417 for (const auto &countable : countables)
418 countable.configure(ring, reset);
419
420 struct fd_fence *fence = fd_submit_flush(submit, -1, false);
421
422 fd_fence_flush(fence);
423 fd_fence_del(fence);
424
425 fd_ringbuffer_del(ring);
426 fd_submit_del(submit);
427
428 if (wait)
429 fd_pipe_wait(pipe, fence);
430 }
431
432 /**
433 * Read the current counter values and record the time.
434 */
435 void
collect_countables()436 FreedrenoDriver::collect_countables()
437 {
438 last_dump_ts = perfetto::base::GetBootTimeNs().count();
439
440 for (const auto &countable : countables)
441 countable.collect();
442 }
443
444 bool
init_perfcnt()445 FreedrenoDriver::init_perfcnt()
446 {
447 uint64_t val;
448
449 if (dev)
450 return true;
451
452 dev = fd_device_new(drm_device.fd);
453 pipe = fd_pipe_new2(dev, FD_PIPE_3D, 0);
454 dev_id = fd_pipe_dev_id(pipe);
455
456 if (fd_pipe_get_param(pipe, FD_MAX_FREQ, &val)) {
457 PERFETTO_FATAL("Could not get MAX_FREQ");
458 return false;
459 }
460 max_freq = val;
461
462 if (fd_pipe_get_param(pipe, FD_SUSPEND_COUNT, &val)) {
463 PERFETTO_ILOG("Could not get SUSPEND_COUNT");
464 } else {
465 suspend_count = val;
466 has_suspend_count = true;
467 }
468
469 fd_pipe_set_param(pipe, FD_SYSPROF, 1);
470
471 perfcntrs = fd_perfcntrs(fd_pipe_dev_id(pipe), &num_perfcntrs);
472 if (num_perfcntrs == 0) {
473 PERFETTO_FATAL("No hw counters available");
474 return false;
475 }
476
477 assigned_counters.resize(num_perfcntrs);
478 assigned_counters.assign(assigned_counters.size(), 0);
479
480 switch (fd_dev_gen(dev_id)) {
481 case 6:
482 setup_a6xx_counters();
483 break;
484 default:
485 PERFETTO_FATAL("Unsupported GPU: a%03u", fd_dev_gpu_id(dev_id));
486 return false;
487 }
488
489 state.resize(next_countable_id);
490
491 for (const auto &countable : countables)
492 countable.resolve();
493
494 info = fd_dev_info_raw(dev_id);
495
496 io = fd_dt_find_io();
497 if (!io) {
498 PERFETTO_FATAL("Could not map GPU I/O space");
499 return false;
500 }
501
502 configure_counters(true, true);
503 collect_countables();
504
505 return true;
506 }
507
508 void
enable_counter(const uint32_t counter_id)509 FreedrenoDriver::enable_counter(const uint32_t counter_id)
510 {
511 enabled_counters.push_back(counters[counter_id]);
512 }
513
514 void
enable_all_counters()515 FreedrenoDriver::enable_all_counters()
516 {
517 enabled_counters.reserve(counters.size());
518 for (auto &counter : counters) {
519 enabled_counters.push_back(counter);
520 }
521 }
522
523 void
enable_perfcnt(const uint64_t)524 FreedrenoDriver::enable_perfcnt(const uint64_t /* sampling_period_ns */)
525 {
526 }
527
528 bool
dump_perfcnt()529 FreedrenoDriver::dump_perfcnt()
530 {
531 if (has_suspend_count) {
532 uint64_t val;
533
534 fd_pipe_get_param(pipe, FD_SUSPEND_COUNT, &val);
535
536 if (suspend_count != val) {
537 PERFETTO_ILOG("Device had suspended!");
538
539 suspend_count = val;
540
541 configure_counters(true, true);
542 collect_countables();
543
544 /* We aren't going to have anything sensible by comparing
545 * current values to values from prior to the suspend, so
546 * just skip this sampling period.
547 */
548 return false;
549 }
550 }
551
552 auto last_ts = last_dump_ts;
553
554 /* Capture the timestamp from the *start* of the sampling period: */
555 last_capture_ts = last_dump_ts;
556
557 collect_countables();
558
559 auto elapsed_time_ns = last_dump_ts - last_ts;
560
561 time = (float)elapsed_time_ns / 1000000000.0;
562
563 /* On older kernels that dont' support querying the suspend-
564 * count, just send configuration cmdstream regularly to keep
565 * the GPU alive and correctly configured for the countables
566 * we want
567 */
568 if (!has_suspend_count) {
569 configure_counters(false, false);
570 }
571
572 return true;
573 }
574
next()575 uint64_t FreedrenoDriver::next()
576 {
577 auto ret = last_capture_ts;
578 last_capture_ts = 0;
579 return ret;
580 }
581
disable_perfcnt()582 void FreedrenoDriver::disable_perfcnt()
583 {
584 /* There isn't really any disable, only reconfiguring which countables
585 * get muxed to which counters
586 */
587 }
588
589 /*
590 * Countable
591 */
592
593 FreedrenoDriver::Countable
countable(std::string name)594 FreedrenoDriver::countable(std::string name)
595 {
596 auto countable = Countable(this, name);
597 countables.emplace_back(countable);
598 return countable;
599 }
600
Countable(FreedrenoDriver * d,std::string name)601 FreedrenoDriver::Countable::Countable(FreedrenoDriver *d, std::string name)
602 : id {d->next_countable_id++}, d {d}, name {name}
603 {
604 }
605
606 /* Emit register writes on ring to configure counter/countable muxing: */
607 void
configure(struct fd_ringbuffer * ring,bool reset) const608 FreedrenoDriver::Countable::configure(struct fd_ringbuffer *ring, bool reset) const
609 {
610 const struct fd_perfcntr_countable *countable = d->state[id].countable;
611 const struct fd_perfcntr_counter *counter = d->state[id].counter;
612
613 OUT_PKT7(ring, CP_WAIT_FOR_IDLE, 0);
614
615 if (counter->enable && reset) {
616 OUT_PKT4(ring, counter->enable, 1);
617 OUT_RING(ring, 0);
618 }
619
620 if (counter->clear && reset) {
621 OUT_PKT4(ring, counter->clear, 1);
622 OUT_RING(ring, 1);
623
624 OUT_PKT4(ring, counter->clear, 1);
625 OUT_RING(ring, 0);
626 }
627
628 OUT_PKT4(ring, counter->select_reg, 1);
629 OUT_RING(ring, countable->selector);
630
631 if (counter->enable && reset) {
632 OUT_PKT4(ring, counter->enable, 1);
633 OUT_RING(ring, 1);
634 }
635 }
636
637 /* Collect current counter value and calculate delta since last sample: */
638 void
collect() const639 FreedrenoDriver::Countable::collect() const
640 {
641 const struct fd_perfcntr_counter *counter = d->state[id].counter;
642
643 d->state[id].last_value = d->state[id].value;
644
645 /* this is true on a5xx and later */
646 assert(counter->counter_reg_lo + 1 == counter->counter_reg_hi);
647 uint64_t *reg = (uint64_t *)((uint32_t *)d->io + counter->counter_reg_lo);
648
649 d->state[id].value = *reg;
650 }
651
652 /* Resolve the countable and assign next counter from it's group: */
653 void
resolve() const654 FreedrenoDriver::Countable::resolve() const
655 {
656 for (unsigned i = 0; i < d->num_perfcntrs; i++) {
657 const struct fd_perfcntr_group *g = &d->perfcntrs[i];
658 for (unsigned j = 0; j < g->num_countables; j++) {
659 const struct fd_perfcntr_countable *c = &g->countables[j];
660 if (name == c->name) {
661 d->state[id].countable = c;
662
663 /* Assign a counter from the same group: */
664 assert(d->assigned_counters[i] < g->num_counters);
665 d->state[id].counter = &g->counters[d->assigned_counters[i]++];
666
667 std::cout << "Countable: " << name << ", group=" << g->name <<
668 ", counter=" << d->assigned_counters[i] - 1 << "\n";
669
670 return;
671 }
672 }
673 }
674 unreachable("no such countable!");
675 }
676
677 uint64_t
get_value() const678 FreedrenoDriver::Countable::get_value() const
679 {
680 return d->state[id].value - d->state[id].last_value;
681 }
682
683 /*
684 * DerivedCounter
685 */
686
DerivedCounter(FreedrenoDriver * d,std::string name,Counter::Units units,std::function<int64_t ()> derive)687 FreedrenoDriver::DerivedCounter::DerivedCounter(FreedrenoDriver *d, std::string name,
688 Counter::Units units,
689 std::function<int64_t()> derive)
690 : Counter(d->next_counter_id++, name, 0)
691 {
692 std::cout << "DerivedCounter: " << name << ", id=" << id << "\n";
693 this->units = units;
694 set_getter([=](const Counter &c, const Driver &d) {
695 return derive();
696 }
697 );
698 }
699
700 FreedrenoDriver::DerivedCounter
counter(std::string name,Counter::Units units,std::function<int64_t ()> derive)701 FreedrenoDriver::counter(std::string name, Counter::Units units,
702 std::function<int64_t()> derive)
703 {
704 auto counter = DerivedCounter(this, name, units, derive);
705 counters.emplace_back(counter);
706 return counter;
707 }
708
709 uint32_t
gpu_clock_id() const710 FreedrenoDriver::gpu_clock_id() const
711 {
712 return perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME;
713 }
714
715 uint64_t
gpu_timestamp() const716 FreedrenoDriver::gpu_timestamp() const
717 {
718 return perfetto::base::GetBootTimeNs().count();
719 }
720
721 bool
cpu_gpu_timestamp(uint64_t &,uint64_t &) const722 FreedrenoDriver::cpu_gpu_timestamp(uint64_t &, uint64_t &) const
723 {
724 /* Not supported */
725 return false;
726 }
727
728 } // namespace pps
729