xref: /aosp_15_r20/external/mesa3d/src/freedreno/perfcntrs/fdperf.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2016 Rob Clark <[email protected]>
3  * All Rights Reserved.
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #include <assert.h>
8 #include <curses.h>
9 #include <err.h>
10 #include <inttypes.h>
11 #include <libconfig.h>
12 #include <locale.h>
13 #include <stdint.h>
14 #include <stdio.h>
15 #include <stdlib.h>
16 #include <string.h>
17 #include <time.h>
18 #include <unistd.h>
19 #include <xf86drm.h>
20 
21 #include "drm/freedreno_drmif.h"
22 #include "drm/freedreno_ringbuffer.h"
23 
24 #include "util/os_file.h"
25 
26 #include "freedreno_dt.h"
27 #include "freedreno_perfcntr.h"
28 
29 #define MAX_CNTR_PER_GROUP 24
30 #define REFRESH_MS         500
31 
32 static struct {
33    int refresh_ms;
34    bool dump;
35 } options = {
36    .refresh_ms = REFRESH_MS,
37    .dump = false,
38 };
39 
40 /* NOTE first counter group should always be CP, since we unconditionally
41  * use CP counter to measure the gpu freq.
42  */
43 
44 struct counter_group {
45    const struct fd_perfcntr_group *group;
46 
47    struct {
48       const struct fd_perfcntr_counter *counter;
49       uint16_t select_val;
50       bool is_gpufreq_counter;
51    } counter[MAX_CNTR_PER_GROUP];
52 
53    /* name of currently selected counters (for UI): */
54    const char *label[MAX_CNTR_PER_GROUP];
55 
56    uint64_t value[MAX_CNTR_PER_GROUP];
57    uint64_t value_delta[MAX_CNTR_PER_GROUP];
58 
59    uint64_t sample_time[MAX_CNTR_PER_GROUP];
60    uint64_t sample_time_delta[MAX_CNTR_PER_GROUP];
61 };
62 
63 static struct {
64    void *io;
65    uint32_t min_freq;
66    uint32_t max_freq;
67    /* per-generation table of counters: */
68    unsigned ngroups;
69    struct counter_group *groups;
70    /* drm device (for writing select regs via ring): */
71    struct fd_device *dev;
72    struct fd_pipe *pipe;
73    const struct fd_dev_id *dev_id;
74    struct fd_submit *submit;
75    struct fd_ringbuffer *ring;
76 } dev;
77 
78 static void config_save(void);
79 static void config_restore(void);
80 static void restore_counter_groups(void);
81 
82 /*
83  * helpers
84  */
85 
86 static uint64_t
gettime_us(void)87 gettime_us(void)
88 {
89    struct timespec ts;
90    clock_gettime(CLOCK_MONOTONIC, &ts);
91    return (ts.tv_sec * 1000000) + (ts.tv_nsec / 1000);
92 }
93 
94 static void
sleep_us(uint32_t us)95 sleep_us(uint32_t us)
96 {
97    const struct timespec ts = {
98       .tv_sec = us / 1000000,
99       .tv_nsec = (us % 1000000) * 1000,
100    };
101    clock_nanosleep(CLOCK_MONOTONIC, 0, &ts, NULL);
102 }
103 
104 static uint64_t
delta(uint64_t a,uint64_t b)105 delta(uint64_t a, uint64_t b)
106 {
107    /* deal with rollover: */
108    if (a > b)
109       return 0xffffffffffffffffull - a + b;
110    else
111       return b - a;
112 }
113 
114 static void
find_device(void)115 find_device(void)
116 {
117    int ret;
118 
119    dev.dev = fd_device_open();
120    if (!dev.dev)
121       err(1, "could not open drm device");
122 
123    dev.pipe = fd_pipe_new(dev.dev, FD_PIPE_3D);
124 
125    dev.dev_id = fd_pipe_dev_id(dev.pipe);
126    if (!fd_dev_info_raw(dev.dev_id))
127       err(1, "unknown device");
128 
129    printf("device: %s\n", fd_dev_name(dev.dev_id));
130 
131    /* try MAX_FREQ first as that will work regardless of old dt
132     * dt bindings vs upstream bindings:
133     */
134    uint64_t val;
135    ret = fd_pipe_get_param(dev.pipe, FD_MAX_FREQ, &val);
136    if (ret) {
137       printf("falling back to parsing DT bindings for freq\n");
138       if (!fd_dt_find_freqs(&dev.min_freq, &dev.max_freq))
139          err(1, "could not find GPU freqs");
140    } else {
141       dev.min_freq = 0;
142       dev.max_freq = val;
143    }
144 
145    printf("min_freq=%u, max_freq=%u\n", dev.min_freq, dev.max_freq);
146 
147    dev.io = fd_dt_find_io();
148    if (!dev.io) {
149       err(1, "could not map device");
150    }
151 
152    fd_pipe_set_param(dev.pipe, FD_SYSPROF, 1);
153 }
154 
155 /*
156  * perf-monitor
157  */
158 
159 static void
flush_ring(void)160 flush_ring(void)
161 {
162    if (!dev.submit)
163       return;
164 
165    struct fd_fence *fence = fd_submit_flush(dev.submit, -1, false);
166 
167    if (!fence)
168       errx(1, "submit failed");
169 
170    fd_fence_flush(fence);
171    fd_fence_del(fence);
172    fd_ringbuffer_del(dev.ring);
173    fd_submit_del(dev.submit);
174 
175    dev.ring = NULL;
176    dev.submit = NULL;
177 }
178 
179 static void
select_counter(struct counter_group * group,int ctr,int countable_val)180 select_counter(struct counter_group *group, int ctr, int countable_val)
181 {
182    assert(ctr < group->group->num_counters);
183 
184    unsigned countable_idx = UINT32_MAX;
185    for (unsigned i = 0; i < group->group->num_countables; i++) {
186       if (countable_val != group->group->countables[i].selector)
187          continue;
188 
189       countable_idx = i;
190       break;
191    }
192 
193    if (countable_idx >= group->group->num_countables)
194       return;
195 
196    group->label[ctr] = group->group->countables[countable_idx].name;
197    group->counter[ctr].select_val = countable_val;
198 
199    if (!dev.submit) {
200       dev.submit = fd_submit_new(dev.pipe);
201       dev.ring = fd_submit_new_ringbuffer(
202          dev.submit, 0x1000, FD_RINGBUFFER_PRIMARY | FD_RINGBUFFER_GROWABLE);
203    }
204 
205    /* bashing select register directly while gpu is active will end
206     * in tears.. so we need to write it via the ring:
207     *
208     * TODO it would help startup time, if gpu is loaded, to batch
209     * all the initial writes and do a single flush.. although that
210     * makes things more complicated for capturing inital sample value
211     */
212    struct fd_ringbuffer *ring = dev.ring;
213    switch (fd_dev_gen(dev.dev_id)) {
214    case 2:
215    case 3:
216    case 4:
217       OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1);
218       OUT_RING(ring, 0x00000000);
219 
220       if (group->group->counters[ctr].enable) {
221          OUT_PKT0(ring, group->group->counters[ctr].enable, 1);
222          OUT_RING(ring, 0);
223       }
224 
225       if (group->group->counters[ctr].clear) {
226          OUT_PKT0(ring, group->group->counters[ctr].clear, 1);
227          OUT_RING(ring, 1);
228 
229          OUT_PKT0(ring, group->group->counters[ctr].clear, 1);
230          OUT_RING(ring, 0);
231       }
232 
233       OUT_PKT0(ring, group->group->counters[ctr].select_reg, 1);
234       OUT_RING(ring, countable_val);
235 
236       if (group->group->counters[ctr].enable) {
237          OUT_PKT0(ring, group->group->counters[ctr].enable, 1);
238          OUT_RING(ring, 1);
239       }
240 
241       break;
242    case 5:
243    case 6:
244    case 7:
245       OUT_PKT7(ring, CP_WAIT_FOR_IDLE, 0);
246 
247       if (group->group->counters[ctr].enable) {
248          OUT_PKT4(ring, group->group->counters[ctr].enable, 1);
249          OUT_RING(ring, 0);
250       }
251 
252       if (group->group->counters[ctr].clear) {
253          OUT_PKT4(ring, group->group->counters[ctr].clear, 1);
254          OUT_RING(ring, 1);
255 
256          OUT_PKT4(ring, group->group->counters[ctr].clear, 1);
257          OUT_RING(ring, 0);
258       }
259 
260       OUT_PKT4(ring, group->group->counters[ctr].select_reg, 1);
261       OUT_RING(ring, countable_val);
262 
263       if (group->group->counters[ctr].enable) {
264          OUT_PKT4(ring, group->group->counters[ctr].enable, 1);
265          OUT_RING(ring, 1);
266       }
267 
268       break;
269    }
270 }
271 
load_counter_value(struct counter_group * group,int ctr)272 static uint64_t load_counter_value(struct counter_group *group, int ctr)
273 {
274    /* We can read the counter register value as an uint64_t, as long as the
275     * lo/hi addresses are neighboring and the lo address is 8-byte-aligned.
276     * This currently holds for all counters exposed in perfcounter groups.
277     */
278    const struct fd_perfcntr_counter *counter = group->counter[ctr].counter;
279    assert(counter->counter_reg_lo + 1 == counter->counter_reg_hi);
280    assert(!((counter->counter_reg_lo * 4) % 8));
281    return *((uint64_t *) (dev.io + counter->counter_reg_lo * 4));
282 }
283 
284 static void
resample_counter(struct counter_group * group,int ctr,uint64_t sample_time)285 resample_counter(struct counter_group *group, int ctr, uint64_t sample_time)
286 {
287    uint64_t previous_value = group->value[ctr];
288    group->value[ctr] = load_counter_value(group, ctr);
289    group->value_delta[ctr] = delta(previous_value, group->value[ctr]);
290 
291    uint64_t previous_sample_time = group->sample_time[ctr];
292    group->sample_time[ctr] = sample_time;
293    group->sample_time_delta[ctr] = delta(previous_sample_time, sample_time);
294 }
295 
296 /* sample all the counters: */
297 static void
resample(void)298 resample(void)
299 {
300    static uint64_t last_time;
301    uint64_t current_time = gettime_us();
302 
303    if ((current_time - last_time) < (options.refresh_ms * 1000 / 2))
304       return;
305 
306    last_time = current_time;
307 
308    for (unsigned i = 0; i < dev.ngroups; i++) {
309       struct counter_group *group = &dev.groups[i];
310       for (unsigned j = 0; j < group->group->num_counters; j++) {
311          resample_counter(group, j, current_time);
312       }
313    }
314 }
315 
316 /*
317  * The UI
318  */
319 
320 #define COLOR_GROUP_HEADER 1
321 #define COLOR_FOOTER       2
322 #define COLOR_INVERSE      3
323 
324 static int w, h;
325 static int ctr_width;
326 static int max_rows, current_cntr = 1;
327 
328 static void
redraw_footer(WINDOW * win)329 redraw_footer(WINDOW *win)
330 {
331    char footer[128];
332    int n = snprintf(footer, sizeof(footer), " fdperf: %s (%.2fMHz..%.2fMHz)",
333                     fd_dev_name(dev.dev_id), ((float)dev.min_freq) / 1000000.0,
334                     ((float)dev.max_freq) / 1000000.0);
335 
336    wmove(win, h - 1, 0);
337    wattron(win, COLOR_PAIR(COLOR_FOOTER));
338    waddstr(win, footer);
339    whline(win, ' ', w - n);
340    wattroff(win, COLOR_PAIR(COLOR_FOOTER));
341 }
342 
343 static void
redraw_group_header(WINDOW * win,int row,const char * name)344 redraw_group_header(WINDOW *win, int row, const char *name)
345 {
346    wmove(win, row, 0);
347    wattron(win, A_BOLD);
348    wattron(win, COLOR_PAIR(COLOR_GROUP_HEADER));
349    waddstr(win, name);
350    whline(win, ' ', w - strlen(name));
351    wattroff(win, COLOR_PAIR(COLOR_GROUP_HEADER));
352    wattroff(win, A_BOLD);
353 }
354 
355 static void
redraw_counter_label(WINDOW * win,int row,const char * name,bool selected)356 redraw_counter_label(WINDOW *win, int row, const char *name, bool selected)
357 {
358    int n = strlen(name);
359    assert(n <= ctr_width);
360    wmove(win, row, 0);
361    whline(win, ' ', ctr_width - n);
362    wmove(win, row, ctr_width - n);
363    if (selected)
364       wattron(win, COLOR_PAIR(COLOR_INVERSE));
365    waddstr(win, name);
366    if (selected)
367       wattroff(win, COLOR_PAIR(COLOR_INVERSE));
368    waddstr(win, ": ");
369 }
370 
371 static void
redraw_counter_value_cycles(WINDOW * win,float val)372 redraw_counter_value_cycles(WINDOW *win, float val)
373 {
374    char str[32];
375    int x = getcurx(win);
376    int valwidth = w - x;
377    int barwidth, n;
378 
379    /* convert to fraction of max freq: */
380    val = val / (float)dev.max_freq;
381 
382    /* figure out percentage-bar width: */
383    barwidth = (int)(val * valwidth);
384 
385    /* sometimes things go over 100%.. idk why, could be
386     * things running faster than base clock, or counter
387     * summing up cycles in multiple cores?
388     */
389    barwidth = MIN2(barwidth, valwidth - 1);
390 
391    n = snprintf(str, sizeof(str), "%.2f%%", 100.0 * val);
392    wattron(win, COLOR_PAIR(COLOR_INVERSE));
393    waddnstr(win, str, barwidth);
394    if (barwidth > n) {
395       whline(win, ' ', barwidth - n);
396       wmove(win, getcury(win), x + barwidth);
397    }
398    wattroff(win, COLOR_PAIR(COLOR_INVERSE));
399    if (barwidth < n)
400       waddstr(win, str + barwidth);
401    whline(win, ' ', w - getcurx(win));
402 }
403 
404 static void
redraw_counter_value(WINDOW * win,int row,struct counter_group * group,int ctr)405 redraw_counter_value(WINDOW *win, int row, struct counter_group *group, int ctr)
406 {
407    char str[32];
408    int n = snprintf(str, sizeof(str), "%" PRIu64 " ", group->value_delta[ctr]);
409 
410    whline(win, ' ', 24 - n);
411    wmove(win, row, getcurx(win) + 24 - n);
412    waddstr(win, str);
413 
414    /* quick hack, if the label has "CYCLE" in the name, it is
415     * probably a cycle counter ;-)
416     * Perhaps add more info in rnndb schema to know how to
417     * treat individual counters (ie. which are cycles, and
418     * for those we want to present as a percentage do we
419     * need to scale the result.. ie. is it running at some
420     * multiple or divisor of core clk, etc)
421     *
422     * TODO it would be much more clever to get this from xml
423     * Also.. in some cases I think we want to know how many
424     * units the counter is counting for, ie. if a320 has 2x
425     * shader as a306 we might need to scale the result..
426     */
427    if (strstr(group->label[ctr], "CYCLE") ||
428        strstr(group->label[ctr], "BUSY") || strstr(group->label[ctr], "IDLE")) {
429       float cycles_val = (float) group->value_delta[ctr] * 1000000.0 /
430                          (float) group->sample_time_delta[ctr];
431       redraw_counter_value_cycles(win, cycles_val);
432    } else {
433       whline(win, ' ', w - getcurx(win));
434    }
435 }
436 
437 static void
redraw_counter(WINDOW * win,int row,struct counter_group * group,int ctr,bool selected)438 redraw_counter(WINDOW *win, int row, struct counter_group *group, int ctr,
439                bool selected)
440 {
441    redraw_counter_label(win, row, group->label[ctr], selected);
442    redraw_counter_value(win, row, group, ctr);
443 }
444 
445 static void
redraw_gpufreq_counter(WINDOW * win,int row)446 redraw_gpufreq_counter(WINDOW *win, int row)
447 {
448    redraw_counter_label(win, row, "Freq (MHz)", false);
449 
450    struct counter_group *group = &dev.groups[0];
451    float freq_val = (float) group->value_delta[0] / (float) group->sample_time_delta[0];
452 
453    char str[32];
454    snprintf(str, sizeof(str), "%.2f", freq_val);
455 
456    waddstr(win, str);
457    whline(win, ' ', w - getcurx(win));
458 }
459 
460 static void
redraw(WINDOW * win)461 redraw(WINDOW *win)
462 {
463    static int scroll = 0;
464    int max, row = 0;
465 
466    w = getmaxx(win);
467    h = getmaxy(win);
468 
469    max = h - 3;
470 
471    if ((current_cntr - scroll) > (max - 1)) {
472       scroll = current_cntr - (max - 1);
473    } else if ((current_cntr - 1) < scroll) {
474       scroll = current_cntr - 1;
475    }
476 
477    for (unsigned i = 0; i < dev.ngroups; i++) {
478       struct counter_group *group = &dev.groups[i];
479       unsigned j = 0;
480 
481       if (group->counter[0].is_gpufreq_counter)
482          j++;
483 
484       if (j < group->group->num_counters) {
485          if ((scroll <= row) && ((row - scroll) < max))
486             redraw_group_header(win, row - scroll, group->group->name);
487          row++;
488       }
489 
490       for (; j < group->group->num_counters; j++) {
491          if ((scroll <= row) && ((row - scroll) < max))
492             redraw_counter(win, row - scroll, group, j, row == current_cntr);
493          row++;
494       }
495    }
496 
497    /* convert back to physical (unscrolled) offset: */
498    row = max;
499 
500    redraw_group_header(win, row, "Status");
501    row++;
502 
503    /* Draw GPU freq row: */
504    redraw_gpufreq_counter(win, row);
505    row++;
506 
507    redraw_footer(win);
508 
509    refresh();
510 }
511 
512 static struct counter_group *
current_counter(int * ctr)513 current_counter(int *ctr)
514 {
515    int n = 0;
516 
517    for (unsigned i = 0; i < dev.ngroups; i++) {
518       struct counter_group *group = &dev.groups[i];
519       unsigned j = 0;
520 
521       if (group->counter[0].is_gpufreq_counter)
522          j++;
523 
524       /* account for group header: */
525       if (j < group->group->num_counters) {
526          /* cannot select group header.. return null to indicate this
527           * main_ui():
528           */
529          if (n == current_cntr)
530             return NULL;
531          n++;
532       }
533 
534       for (; j < group->group->num_counters; j++) {
535          if (n == current_cntr) {
536             if (ctr)
537                *ctr = j;
538             return group;
539          }
540          n++;
541       }
542    }
543 
544    assert(0);
545    return NULL;
546 }
547 
548 static void
counter_dialog(void)549 counter_dialog(void)
550 {
551    WINDOW *dialog;
552    struct counter_group *group;
553    int cnt = 0, current = 0, scroll;
554 
555    /* figure out dialog size: */
556    int dh = h / 2;
557    int dw = ctr_width + 2;
558 
559    group = current_counter(&cnt);
560 
561    /* find currently selected idx (note there can be discontinuities
562     * so the selected value does not map 1:1 to current idx)
563     */
564    uint32_t selected = group->counter[cnt].select_val;
565    for (int i = 0; i < group->group->num_countables; i++) {
566       if (group->group->countables[i].selector == selected) {
567          current = i;
568          break;
569       }
570    }
571 
572    /* scrolling offset, if dialog is too small for all the choices: */
573    scroll = 0;
574 
575    dialog = newwin(dh, dw, (h - dh) / 2, (w - dw) / 2);
576    box(dialog, 0, 0);
577    wrefresh(dialog);
578    keypad(dialog, true);
579 
580    while (true) {
581       int max = MIN2(dh - 2, group->group->num_countables);
582       int selector = -1;
583 
584       if ((current - scroll) >= (dh - 3)) {
585          scroll = current - (dh - 3);
586       } else if (current < scroll) {
587          scroll = current;
588       }
589 
590       for (int i = 0; i < max; i++) {
591          int n = scroll + i;
592          wmove(dialog, i + 1, 1);
593          if (n == current) {
594             assert(n < group->group->num_countables);
595             selector = group->group->countables[n].selector;
596             wattron(dialog, COLOR_PAIR(COLOR_INVERSE));
597          }
598          if (n < group->group->num_countables)
599             waddstr(dialog, group->group->countables[n].name);
600          whline(dialog, ' ', dw - getcurx(dialog) - 1);
601          if (n == current)
602             wattroff(dialog, COLOR_PAIR(COLOR_INVERSE));
603       }
604 
605       assert(selector >= 0);
606 
607       switch (wgetch(dialog)) {
608       case KEY_UP:
609          current = MAX2(0, current - 1);
610          break;
611       case KEY_DOWN:
612          current = MIN2(group->group->num_countables - 1, current + 1);
613          break;
614       case KEY_LEFT:
615       case KEY_ENTER:
616          /* select new sampler */
617          select_counter(group, cnt, selector);
618          flush_ring();
619          config_save();
620          goto out;
621       case 'q':
622          goto out;
623       default:
624          /* ignore */
625          break;
626       }
627 
628       resample();
629    }
630 
631 out:
632    wborder(dialog, ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ');
633    delwin(dialog);
634 }
635 
636 static void
scroll_cntr(int amount)637 scroll_cntr(int amount)
638 {
639    if (amount < 0) {
640       current_cntr = MAX2(1, current_cntr + amount);
641       if (current_counter(NULL) == NULL) {
642          current_cntr = MAX2(1, current_cntr - 1);
643       }
644    } else {
645       current_cntr = MIN2(max_rows - 1, current_cntr + amount);
646       if (current_counter(NULL) == NULL)
647          current_cntr = MIN2(max_rows - 1, current_cntr + 1);
648    }
649 }
650 
651 static void
main_ui(void)652 main_ui(void)
653 {
654    WINDOW *mainwin;
655    uint64_t last_time = gettime_us();
656 
657    /* Run an initial sample to set up baseline counter values. */
658    resample();
659 
660    /* curses setup: */
661    mainwin = initscr();
662    if (!mainwin)
663       goto out;
664 
665    cbreak();
666    wtimeout(mainwin, options.refresh_ms);
667    noecho();
668    keypad(mainwin, true);
669    curs_set(0);
670    start_color();
671    init_pair(COLOR_GROUP_HEADER, COLOR_WHITE, COLOR_GREEN);
672    init_pair(COLOR_FOOTER, COLOR_WHITE, COLOR_BLUE);
673    init_pair(COLOR_INVERSE, COLOR_BLACK, COLOR_WHITE);
674 
675    while (true) {
676       switch (wgetch(mainwin)) {
677       case KEY_UP:
678          scroll_cntr(-1);
679          break;
680       case KEY_DOWN:
681          scroll_cntr(+1);
682          break;
683       case KEY_NPAGE: /* page-down */
684          /* TODO figure out # of rows visible? */
685          scroll_cntr(+15);
686          break;
687       case KEY_PPAGE: /* page-up */
688          /* TODO figure out # of rows visible? */
689          scroll_cntr(-15);
690          break;
691       case KEY_RIGHT:
692          counter_dialog();
693          break;
694       case 'q':
695          goto out;
696          break;
697       default:
698          /* ignore */
699          break;
700       }
701       resample();
702       redraw(mainwin);
703 
704       /* restore the counters every 0.5s in case the GPU has suspended,
705        * in which case the current selected countables will have reset:
706        */
707       uint64_t t = gettime_us();
708       if (delta(last_time, t) > 500000) {
709          restore_counter_groups();
710          flush_ring();
711          last_time = t;
712       }
713    }
714 
715    /* restore settings.. maybe we need an atexit()??*/
716 out:
717    delwin(mainwin);
718    endwin();
719    refresh();
720 }
721 
722 static void
dump_counters(void)723 dump_counters(void)
724 {
725    resample();
726    sleep_us(options.refresh_ms * 1000);
727    resample();
728 
729    for (unsigned i = 0; i < dev.ngroups; i++) {
730       const struct counter_group *group = &dev.groups[i];
731       for (unsigned j = 0; j < group->group->num_counters; j++) {
732          const char *label = group->label[j];
733          float val = (float) group->value_delta[j] * 1000000.0 /
734                      (float) group->sample_time_delta[j];
735 
736          int n = printf("%s: ", label) - 2;
737          while (n++ < ctr_width)
738             fputc(' ', stdout);
739 
740          n = printf("%" PRIu64, group->value_delta[j]);
741          while (n++ < 24)
742             fputc(' ', stdout);
743 
744          if (strstr(label, "CYCLE") ||
745              strstr(label, "BUSY") ||
746              strstr(label, "IDLE")) {
747             val = val / dev.max_freq * 100.0f;
748             printf(" %.2f%%\n", val);
749          } else {
750             printf("\n");
751          }
752       }
753    }
754 }
755 
756 static void
restore_counter_groups(void)757 restore_counter_groups(void)
758 {
759    for (unsigned i = 0; i < dev.ngroups; i++) {
760       struct counter_group *group = &dev.groups[i];
761 
762       for (unsigned j = 0; j < group->group->num_counters; j++) {
763          /* This should also write the CP_ALWAYS_COUNT selectable value into
764           * the reserved CP counter we use for GPU frequency measurement,
765           * avoiding someone else writing a different value there.
766           */
767          select_counter(group, j, group->counter[j].select_val);
768       }
769    }
770 }
771 
772 static void
setup_counter_groups(const struct fd_perfcntr_group * groups)773 setup_counter_groups(const struct fd_perfcntr_group *groups)
774 {
775    for (unsigned i = 0; i < dev.ngroups; i++) {
776       struct counter_group *group = &dev.groups[i];
777 
778       group->group = &groups[i];
779 
780       max_rows += group->group->num_counters + 1;
781 
782       /* We reserve the first counter of the CP group (first in the list) for
783        * measuring GPU frequency that's displayed in the footer.
784        */
785       if (i == 0) {
786          /* We won't be displaying the private counter alongside others. We
787           * also won't be displaying the group header if we're taking over
788           * the only counter (e.g. on a2xx).
789           */
790          max_rows--;
791          if (groups[0].num_counters < 2)
792             max_rows--;
793 
794          /* Enforce the CP_ALWAYS_COUNT countable for this counter. */
795          unsigned always_count_index = UINT32_MAX;
796          for (unsigned i = 0; i < groups[0].num_countables; ++i) {
797             if (strcmp(groups[0].countables[i].name, "PERF_CP_ALWAYS_COUNT"))
798                continue;
799 
800             always_count_index = i;
801             break;
802          }
803 
804          if (always_count_index < groups[0].num_countables) {
805             group->counter[0].select_val = groups[0].countables[always_count_index].selector;
806             group->counter[0].is_gpufreq_counter = true;
807          }
808       }
809 
810       for (unsigned j = 0; j < group->group->num_counters; j++) {
811          group->counter[j].counter = &group->group->counters[j];
812 
813          if (!group->counter[j].is_gpufreq_counter)
814             group->counter[j].select_val = j;
815       }
816 
817       for (unsigned j = 0; j < group->group->num_countables; j++) {
818          ctr_width =
819             MAX2(ctr_width, strlen(group->group->countables[j].name) + 1);
820       }
821    }
822 }
823 
824 /*
825  * configuration / persistence
826  */
827 
828 static config_t cfg;
829 static config_setting_t *setting;
830 
831 static void
config_save(void)832 config_save(void)
833 {
834    for (unsigned i = 0; i < dev.ngroups; i++) {
835       struct counter_group *group = &dev.groups[i];
836       config_setting_t *sect =
837          config_setting_get_member(setting, group->group->name);
838 
839       for (unsigned j = 0; j < group->group->num_counters; j++) {
840          /* Don't save the GPU frequency measurement counter. */
841          if (group->counter[j].is_gpufreq_counter)
842             continue;
843 
844          char name[] = "counter0000";
845          sprintf(name, "counter%d", j);
846          config_setting_t *s = config_setting_lookup(sect, name);
847          config_setting_set_int(s, group->counter[j].select_val);
848       }
849    }
850 
851    config_write_file(&cfg, "fdperf.cfg");
852 }
853 
854 static void
config_restore(void)855 config_restore(void)
856 {
857    config_init(&cfg);
858 
859    /* Read the file. If there is an error, report it and exit. */
860    if (!config_read_file(&cfg, "fdperf.cfg")) {
861       warn("could not restore settings");
862    }
863 
864    config_setting_t *root = config_root_setting(&cfg);
865 
866    /* per device settings: */
867    char device_name[64];
868    snprintf(device_name, sizeof(device_name), "%s", fd_dev_name(dev.dev_id));
869    setting = config_setting_get_member(root, device_name);
870    if (!setting)
871       setting = config_setting_add(root, device_name, CONFIG_TYPE_GROUP);
872    if (!setting)
873       return;
874 
875    for (unsigned i = 0; i < dev.ngroups; i++) {
876       struct counter_group *group = &dev.groups[i];
877       config_setting_t *sect =
878          config_setting_get_member(setting, group->group->name);
879 
880       if (!sect) {
881          sect =
882             config_setting_add(setting, group->group->name, CONFIG_TYPE_GROUP);
883       }
884 
885       for (unsigned j = 0; j < group->group->num_counters; j++) {
886          /* Don't restore the GPU frequency measurement counter. */
887          if (group->counter[j].is_gpufreq_counter)
888             continue;
889 
890          char name[] = "counter0000";
891          sprintf(name, "counter%d", j);
892          config_setting_t *s = config_setting_lookup(sect, name);
893          if (!s) {
894             config_setting_add(sect, name, CONFIG_TYPE_INT);
895             continue;
896          }
897          select_counter(group, j, config_setting_get_int(s));
898       }
899    }
900 }
901 
902 static void
print_usage(const char * argv0)903 print_usage(const char *argv0)
904 {
905    fprintf(stderr,
906            "Usage: %s [OPTION]...\n"
907            "\n"
908            "  -r <N>     refresh every N milliseconds\n"
909            "  -d         dump counters and exit\n"
910            "  -h         show this message\n",
911            argv0);
912    exit(2);
913 }
914 
915 static void
parse_options(int argc,char ** argv)916 parse_options(int argc, char **argv)
917 {
918    int c;
919 
920    while ((c = getopt(argc, argv, "r:d")) != -1) {
921       switch (c) {
922       case 'r':
923          options.refresh_ms = atoi(optarg);
924          break;
925       case 'd':
926          options.dump = true;
927          break;
928       default:
929          print_usage(argv[0]);
930          break;
931       }
932    }
933 }
934 
935 /*
936  * main
937  */
938 
939 int
main(int argc,char ** argv)940 main(int argc, char **argv)
941 {
942    parse_options(argc, argv);
943 
944    find_device();
945 
946    const struct fd_perfcntr_group *groups;
947    groups = fd_perfcntrs(dev.dev_id, &dev.ngroups);
948    if (!groups) {
949       errx(1, "no perfcntr support");
950    }
951 
952    dev.groups = calloc(dev.ngroups, sizeof(struct counter_group));
953 
954    setlocale(LC_NUMERIC, "en_US.UTF-8");
955 
956    setup_counter_groups(groups);
957    restore_counter_groups();
958    config_restore();
959    flush_ring();
960 
961    if (options.dump)
962       dump_counters();
963    else
964       main_ui();
965 
966    return 0;
967 }
968