1 /*
2 * Copyright © 2016 Rob Clark <[email protected]>
3 * All Rights Reserved.
4 * SPDX-License-Identifier: MIT
5 */
6
7 #include <assert.h>
8 #include <curses.h>
9 #include <err.h>
10 #include <inttypes.h>
11 #include <libconfig.h>
12 #include <locale.h>
13 #include <stdint.h>
14 #include <stdio.h>
15 #include <stdlib.h>
16 #include <string.h>
17 #include <time.h>
18 #include <unistd.h>
19 #include <xf86drm.h>
20
21 #include "drm/freedreno_drmif.h"
22 #include "drm/freedreno_ringbuffer.h"
23
24 #include "util/os_file.h"
25
26 #include "freedreno_dt.h"
27 #include "freedreno_perfcntr.h"
28
29 #define MAX_CNTR_PER_GROUP 24
30 #define REFRESH_MS 500
31
32 static struct {
33 int refresh_ms;
34 bool dump;
35 } options = {
36 .refresh_ms = REFRESH_MS,
37 .dump = false,
38 };
39
40 /* NOTE first counter group should always be CP, since we unconditionally
41 * use CP counter to measure the gpu freq.
42 */
43
44 struct counter_group {
45 const struct fd_perfcntr_group *group;
46
47 struct {
48 const struct fd_perfcntr_counter *counter;
49 uint16_t select_val;
50 bool is_gpufreq_counter;
51 } counter[MAX_CNTR_PER_GROUP];
52
53 /* name of currently selected counters (for UI): */
54 const char *label[MAX_CNTR_PER_GROUP];
55
56 uint64_t value[MAX_CNTR_PER_GROUP];
57 uint64_t value_delta[MAX_CNTR_PER_GROUP];
58
59 uint64_t sample_time[MAX_CNTR_PER_GROUP];
60 uint64_t sample_time_delta[MAX_CNTR_PER_GROUP];
61 };
62
63 static struct {
64 void *io;
65 uint32_t min_freq;
66 uint32_t max_freq;
67 /* per-generation table of counters: */
68 unsigned ngroups;
69 struct counter_group *groups;
70 /* drm device (for writing select regs via ring): */
71 struct fd_device *dev;
72 struct fd_pipe *pipe;
73 const struct fd_dev_id *dev_id;
74 struct fd_submit *submit;
75 struct fd_ringbuffer *ring;
76 } dev;
77
78 static void config_save(void);
79 static void config_restore(void);
80 static void restore_counter_groups(void);
81
82 /*
83 * helpers
84 */
85
86 static uint64_t
gettime_us(void)87 gettime_us(void)
88 {
89 struct timespec ts;
90 clock_gettime(CLOCK_MONOTONIC, &ts);
91 return (ts.tv_sec * 1000000) + (ts.tv_nsec / 1000);
92 }
93
94 static void
sleep_us(uint32_t us)95 sleep_us(uint32_t us)
96 {
97 const struct timespec ts = {
98 .tv_sec = us / 1000000,
99 .tv_nsec = (us % 1000000) * 1000,
100 };
101 clock_nanosleep(CLOCK_MONOTONIC, 0, &ts, NULL);
102 }
103
104 static uint64_t
delta(uint64_t a,uint64_t b)105 delta(uint64_t a, uint64_t b)
106 {
107 /* deal with rollover: */
108 if (a > b)
109 return 0xffffffffffffffffull - a + b;
110 else
111 return b - a;
112 }
113
114 static void
find_device(void)115 find_device(void)
116 {
117 int ret;
118
119 dev.dev = fd_device_open();
120 if (!dev.dev)
121 err(1, "could not open drm device");
122
123 dev.pipe = fd_pipe_new(dev.dev, FD_PIPE_3D);
124
125 dev.dev_id = fd_pipe_dev_id(dev.pipe);
126 if (!fd_dev_info_raw(dev.dev_id))
127 err(1, "unknown device");
128
129 printf("device: %s\n", fd_dev_name(dev.dev_id));
130
131 /* try MAX_FREQ first as that will work regardless of old dt
132 * dt bindings vs upstream bindings:
133 */
134 uint64_t val;
135 ret = fd_pipe_get_param(dev.pipe, FD_MAX_FREQ, &val);
136 if (ret) {
137 printf("falling back to parsing DT bindings for freq\n");
138 if (!fd_dt_find_freqs(&dev.min_freq, &dev.max_freq))
139 err(1, "could not find GPU freqs");
140 } else {
141 dev.min_freq = 0;
142 dev.max_freq = val;
143 }
144
145 printf("min_freq=%u, max_freq=%u\n", dev.min_freq, dev.max_freq);
146
147 dev.io = fd_dt_find_io();
148 if (!dev.io) {
149 err(1, "could not map device");
150 }
151
152 fd_pipe_set_param(dev.pipe, FD_SYSPROF, 1);
153 }
154
155 /*
156 * perf-monitor
157 */
158
159 static void
flush_ring(void)160 flush_ring(void)
161 {
162 if (!dev.submit)
163 return;
164
165 struct fd_fence *fence = fd_submit_flush(dev.submit, -1, false);
166
167 if (!fence)
168 errx(1, "submit failed");
169
170 fd_fence_flush(fence);
171 fd_fence_del(fence);
172 fd_ringbuffer_del(dev.ring);
173 fd_submit_del(dev.submit);
174
175 dev.ring = NULL;
176 dev.submit = NULL;
177 }
178
179 static void
select_counter(struct counter_group * group,int ctr,int countable_val)180 select_counter(struct counter_group *group, int ctr, int countable_val)
181 {
182 assert(ctr < group->group->num_counters);
183
184 unsigned countable_idx = UINT32_MAX;
185 for (unsigned i = 0; i < group->group->num_countables; i++) {
186 if (countable_val != group->group->countables[i].selector)
187 continue;
188
189 countable_idx = i;
190 break;
191 }
192
193 if (countable_idx >= group->group->num_countables)
194 return;
195
196 group->label[ctr] = group->group->countables[countable_idx].name;
197 group->counter[ctr].select_val = countable_val;
198
199 if (!dev.submit) {
200 dev.submit = fd_submit_new(dev.pipe);
201 dev.ring = fd_submit_new_ringbuffer(
202 dev.submit, 0x1000, FD_RINGBUFFER_PRIMARY | FD_RINGBUFFER_GROWABLE);
203 }
204
205 /* bashing select register directly while gpu is active will end
206 * in tears.. so we need to write it via the ring:
207 *
208 * TODO it would help startup time, if gpu is loaded, to batch
209 * all the initial writes and do a single flush.. although that
210 * makes things more complicated for capturing inital sample value
211 */
212 struct fd_ringbuffer *ring = dev.ring;
213 switch (fd_dev_gen(dev.dev_id)) {
214 case 2:
215 case 3:
216 case 4:
217 OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1);
218 OUT_RING(ring, 0x00000000);
219
220 if (group->group->counters[ctr].enable) {
221 OUT_PKT0(ring, group->group->counters[ctr].enable, 1);
222 OUT_RING(ring, 0);
223 }
224
225 if (group->group->counters[ctr].clear) {
226 OUT_PKT0(ring, group->group->counters[ctr].clear, 1);
227 OUT_RING(ring, 1);
228
229 OUT_PKT0(ring, group->group->counters[ctr].clear, 1);
230 OUT_RING(ring, 0);
231 }
232
233 OUT_PKT0(ring, group->group->counters[ctr].select_reg, 1);
234 OUT_RING(ring, countable_val);
235
236 if (group->group->counters[ctr].enable) {
237 OUT_PKT0(ring, group->group->counters[ctr].enable, 1);
238 OUT_RING(ring, 1);
239 }
240
241 break;
242 case 5:
243 case 6:
244 case 7:
245 OUT_PKT7(ring, CP_WAIT_FOR_IDLE, 0);
246
247 if (group->group->counters[ctr].enable) {
248 OUT_PKT4(ring, group->group->counters[ctr].enable, 1);
249 OUT_RING(ring, 0);
250 }
251
252 if (group->group->counters[ctr].clear) {
253 OUT_PKT4(ring, group->group->counters[ctr].clear, 1);
254 OUT_RING(ring, 1);
255
256 OUT_PKT4(ring, group->group->counters[ctr].clear, 1);
257 OUT_RING(ring, 0);
258 }
259
260 OUT_PKT4(ring, group->group->counters[ctr].select_reg, 1);
261 OUT_RING(ring, countable_val);
262
263 if (group->group->counters[ctr].enable) {
264 OUT_PKT4(ring, group->group->counters[ctr].enable, 1);
265 OUT_RING(ring, 1);
266 }
267
268 break;
269 }
270 }
271
load_counter_value(struct counter_group * group,int ctr)272 static uint64_t load_counter_value(struct counter_group *group, int ctr)
273 {
274 /* We can read the counter register value as an uint64_t, as long as the
275 * lo/hi addresses are neighboring and the lo address is 8-byte-aligned.
276 * This currently holds for all counters exposed in perfcounter groups.
277 */
278 const struct fd_perfcntr_counter *counter = group->counter[ctr].counter;
279 assert(counter->counter_reg_lo + 1 == counter->counter_reg_hi);
280 assert(!((counter->counter_reg_lo * 4) % 8));
281 return *((uint64_t *) (dev.io + counter->counter_reg_lo * 4));
282 }
283
284 static void
resample_counter(struct counter_group * group,int ctr,uint64_t sample_time)285 resample_counter(struct counter_group *group, int ctr, uint64_t sample_time)
286 {
287 uint64_t previous_value = group->value[ctr];
288 group->value[ctr] = load_counter_value(group, ctr);
289 group->value_delta[ctr] = delta(previous_value, group->value[ctr]);
290
291 uint64_t previous_sample_time = group->sample_time[ctr];
292 group->sample_time[ctr] = sample_time;
293 group->sample_time_delta[ctr] = delta(previous_sample_time, sample_time);
294 }
295
296 /* sample all the counters: */
297 static void
resample(void)298 resample(void)
299 {
300 static uint64_t last_time;
301 uint64_t current_time = gettime_us();
302
303 if ((current_time - last_time) < (options.refresh_ms * 1000 / 2))
304 return;
305
306 last_time = current_time;
307
308 for (unsigned i = 0; i < dev.ngroups; i++) {
309 struct counter_group *group = &dev.groups[i];
310 for (unsigned j = 0; j < group->group->num_counters; j++) {
311 resample_counter(group, j, current_time);
312 }
313 }
314 }
315
316 /*
317 * The UI
318 */
319
320 #define COLOR_GROUP_HEADER 1
321 #define COLOR_FOOTER 2
322 #define COLOR_INVERSE 3
323
324 static int w, h;
325 static int ctr_width;
326 static int max_rows, current_cntr = 1;
327
328 static void
redraw_footer(WINDOW * win)329 redraw_footer(WINDOW *win)
330 {
331 char footer[128];
332 int n = snprintf(footer, sizeof(footer), " fdperf: %s (%.2fMHz..%.2fMHz)",
333 fd_dev_name(dev.dev_id), ((float)dev.min_freq) / 1000000.0,
334 ((float)dev.max_freq) / 1000000.0);
335
336 wmove(win, h - 1, 0);
337 wattron(win, COLOR_PAIR(COLOR_FOOTER));
338 waddstr(win, footer);
339 whline(win, ' ', w - n);
340 wattroff(win, COLOR_PAIR(COLOR_FOOTER));
341 }
342
343 static void
redraw_group_header(WINDOW * win,int row,const char * name)344 redraw_group_header(WINDOW *win, int row, const char *name)
345 {
346 wmove(win, row, 0);
347 wattron(win, A_BOLD);
348 wattron(win, COLOR_PAIR(COLOR_GROUP_HEADER));
349 waddstr(win, name);
350 whline(win, ' ', w - strlen(name));
351 wattroff(win, COLOR_PAIR(COLOR_GROUP_HEADER));
352 wattroff(win, A_BOLD);
353 }
354
355 static void
redraw_counter_label(WINDOW * win,int row,const char * name,bool selected)356 redraw_counter_label(WINDOW *win, int row, const char *name, bool selected)
357 {
358 int n = strlen(name);
359 assert(n <= ctr_width);
360 wmove(win, row, 0);
361 whline(win, ' ', ctr_width - n);
362 wmove(win, row, ctr_width - n);
363 if (selected)
364 wattron(win, COLOR_PAIR(COLOR_INVERSE));
365 waddstr(win, name);
366 if (selected)
367 wattroff(win, COLOR_PAIR(COLOR_INVERSE));
368 waddstr(win, ": ");
369 }
370
371 static void
redraw_counter_value_cycles(WINDOW * win,float val)372 redraw_counter_value_cycles(WINDOW *win, float val)
373 {
374 char str[32];
375 int x = getcurx(win);
376 int valwidth = w - x;
377 int barwidth, n;
378
379 /* convert to fraction of max freq: */
380 val = val / (float)dev.max_freq;
381
382 /* figure out percentage-bar width: */
383 barwidth = (int)(val * valwidth);
384
385 /* sometimes things go over 100%.. idk why, could be
386 * things running faster than base clock, or counter
387 * summing up cycles in multiple cores?
388 */
389 barwidth = MIN2(barwidth, valwidth - 1);
390
391 n = snprintf(str, sizeof(str), "%.2f%%", 100.0 * val);
392 wattron(win, COLOR_PAIR(COLOR_INVERSE));
393 waddnstr(win, str, barwidth);
394 if (barwidth > n) {
395 whline(win, ' ', barwidth - n);
396 wmove(win, getcury(win), x + barwidth);
397 }
398 wattroff(win, COLOR_PAIR(COLOR_INVERSE));
399 if (barwidth < n)
400 waddstr(win, str + barwidth);
401 whline(win, ' ', w - getcurx(win));
402 }
403
404 static void
redraw_counter_value(WINDOW * win,int row,struct counter_group * group,int ctr)405 redraw_counter_value(WINDOW *win, int row, struct counter_group *group, int ctr)
406 {
407 char str[32];
408 int n = snprintf(str, sizeof(str), "%" PRIu64 " ", group->value_delta[ctr]);
409
410 whline(win, ' ', 24 - n);
411 wmove(win, row, getcurx(win) + 24 - n);
412 waddstr(win, str);
413
414 /* quick hack, if the label has "CYCLE" in the name, it is
415 * probably a cycle counter ;-)
416 * Perhaps add more info in rnndb schema to know how to
417 * treat individual counters (ie. which are cycles, and
418 * for those we want to present as a percentage do we
419 * need to scale the result.. ie. is it running at some
420 * multiple or divisor of core clk, etc)
421 *
422 * TODO it would be much more clever to get this from xml
423 * Also.. in some cases I think we want to know how many
424 * units the counter is counting for, ie. if a320 has 2x
425 * shader as a306 we might need to scale the result..
426 */
427 if (strstr(group->label[ctr], "CYCLE") ||
428 strstr(group->label[ctr], "BUSY") || strstr(group->label[ctr], "IDLE")) {
429 float cycles_val = (float) group->value_delta[ctr] * 1000000.0 /
430 (float) group->sample_time_delta[ctr];
431 redraw_counter_value_cycles(win, cycles_val);
432 } else {
433 whline(win, ' ', w - getcurx(win));
434 }
435 }
436
437 static void
redraw_counter(WINDOW * win,int row,struct counter_group * group,int ctr,bool selected)438 redraw_counter(WINDOW *win, int row, struct counter_group *group, int ctr,
439 bool selected)
440 {
441 redraw_counter_label(win, row, group->label[ctr], selected);
442 redraw_counter_value(win, row, group, ctr);
443 }
444
445 static void
redraw_gpufreq_counter(WINDOW * win,int row)446 redraw_gpufreq_counter(WINDOW *win, int row)
447 {
448 redraw_counter_label(win, row, "Freq (MHz)", false);
449
450 struct counter_group *group = &dev.groups[0];
451 float freq_val = (float) group->value_delta[0] / (float) group->sample_time_delta[0];
452
453 char str[32];
454 snprintf(str, sizeof(str), "%.2f", freq_val);
455
456 waddstr(win, str);
457 whline(win, ' ', w - getcurx(win));
458 }
459
460 static void
redraw(WINDOW * win)461 redraw(WINDOW *win)
462 {
463 static int scroll = 0;
464 int max, row = 0;
465
466 w = getmaxx(win);
467 h = getmaxy(win);
468
469 max = h - 3;
470
471 if ((current_cntr - scroll) > (max - 1)) {
472 scroll = current_cntr - (max - 1);
473 } else if ((current_cntr - 1) < scroll) {
474 scroll = current_cntr - 1;
475 }
476
477 for (unsigned i = 0; i < dev.ngroups; i++) {
478 struct counter_group *group = &dev.groups[i];
479 unsigned j = 0;
480
481 if (group->counter[0].is_gpufreq_counter)
482 j++;
483
484 if (j < group->group->num_counters) {
485 if ((scroll <= row) && ((row - scroll) < max))
486 redraw_group_header(win, row - scroll, group->group->name);
487 row++;
488 }
489
490 for (; j < group->group->num_counters; j++) {
491 if ((scroll <= row) && ((row - scroll) < max))
492 redraw_counter(win, row - scroll, group, j, row == current_cntr);
493 row++;
494 }
495 }
496
497 /* convert back to physical (unscrolled) offset: */
498 row = max;
499
500 redraw_group_header(win, row, "Status");
501 row++;
502
503 /* Draw GPU freq row: */
504 redraw_gpufreq_counter(win, row);
505 row++;
506
507 redraw_footer(win);
508
509 refresh();
510 }
511
512 static struct counter_group *
current_counter(int * ctr)513 current_counter(int *ctr)
514 {
515 int n = 0;
516
517 for (unsigned i = 0; i < dev.ngroups; i++) {
518 struct counter_group *group = &dev.groups[i];
519 unsigned j = 0;
520
521 if (group->counter[0].is_gpufreq_counter)
522 j++;
523
524 /* account for group header: */
525 if (j < group->group->num_counters) {
526 /* cannot select group header.. return null to indicate this
527 * main_ui():
528 */
529 if (n == current_cntr)
530 return NULL;
531 n++;
532 }
533
534 for (; j < group->group->num_counters; j++) {
535 if (n == current_cntr) {
536 if (ctr)
537 *ctr = j;
538 return group;
539 }
540 n++;
541 }
542 }
543
544 assert(0);
545 return NULL;
546 }
547
548 static void
counter_dialog(void)549 counter_dialog(void)
550 {
551 WINDOW *dialog;
552 struct counter_group *group;
553 int cnt = 0, current = 0, scroll;
554
555 /* figure out dialog size: */
556 int dh = h / 2;
557 int dw = ctr_width + 2;
558
559 group = current_counter(&cnt);
560
561 /* find currently selected idx (note there can be discontinuities
562 * so the selected value does not map 1:1 to current idx)
563 */
564 uint32_t selected = group->counter[cnt].select_val;
565 for (int i = 0; i < group->group->num_countables; i++) {
566 if (group->group->countables[i].selector == selected) {
567 current = i;
568 break;
569 }
570 }
571
572 /* scrolling offset, if dialog is too small for all the choices: */
573 scroll = 0;
574
575 dialog = newwin(dh, dw, (h - dh) / 2, (w - dw) / 2);
576 box(dialog, 0, 0);
577 wrefresh(dialog);
578 keypad(dialog, true);
579
580 while (true) {
581 int max = MIN2(dh - 2, group->group->num_countables);
582 int selector = -1;
583
584 if ((current - scroll) >= (dh - 3)) {
585 scroll = current - (dh - 3);
586 } else if (current < scroll) {
587 scroll = current;
588 }
589
590 for (int i = 0; i < max; i++) {
591 int n = scroll + i;
592 wmove(dialog, i + 1, 1);
593 if (n == current) {
594 assert(n < group->group->num_countables);
595 selector = group->group->countables[n].selector;
596 wattron(dialog, COLOR_PAIR(COLOR_INVERSE));
597 }
598 if (n < group->group->num_countables)
599 waddstr(dialog, group->group->countables[n].name);
600 whline(dialog, ' ', dw - getcurx(dialog) - 1);
601 if (n == current)
602 wattroff(dialog, COLOR_PAIR(COLOR_INVERSE));
603 }
604
605 assert(selector >= 0);
606
607 switch (wgetch(dialog)) {
608 case KEY_UP:
609 current = MAX2(0, current - 1);
610 break;
611 case KEY_DOWN:
612 current = MIN2(group->group->num_countables - 1, current + 1);
613 break;
614 case KEY_LEFT:
615 case KEY_ENTER:
616 /* select new sampler */
617 select_counter(group, cnt, selector);
618 flush_ring();
619 config_save();
620 goto out;
621 case 'q':
622 goto out;
623 default:
624 /* ignore */
625 break;
626 }
627
628 resample();
629 }
630
631 out:
632 wborder(dialog, ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ');
633 delwin(dialog);
634 }
635
636 static void
scroll_cntr(int amount)637 scroll_cntr(int amount)
638 {
639 if (amount < 0) {
640 current_cntr = MAX2(1, current_cntr + amount);
641 if (current_counter(NULL) == NULL) {
642 current_cntr = MAX2(1, current_cntr - 1);
643 }
644 } else {
645 current_cntr = MIN2(max_rows - 1, current_cntr + amount);
646 if (current_counter(NULL) == NULL)
647 current_cntr = MIN2(max_rows - 1, current_cntr + 1);
648 }
649 }
650
651 static void
main_ui(void)652 main_ui(void)
653 {
654 WINDOW *mainwin;
655 uint64_t last_time = gettime_us();
656
657 /* Run an initial sample to set up baseline counter values. */
658 resample();
659
660 /* curses setup: */
661 mainwin = initscr();
662 if (!mainwin)
663 goto out;
664
665 cbreak();
666 wtimeout(mainwin, options.refresh_ms);
667 noecho();
668 keypad(mainwin, true);
669 curs_set(0);
670 start_color();
671 init_pair(COLOR_GROUP_HEADER, COLOR_WHITE, COLOR_GREEN);
672 init_pair(COLOR_FOOTER, COLOR_WHITE, COLOR_BLUE);
673 init_pair(COLOR_INVERSE, COLOR_BLACK, COLOR_WHITE);
674
675 while (true) {
676 switch (wgetch(mainwin)) {
677 case KEY_UP:
678 scroll_cntr(-1);
679 break;
680 case KEY_DOWN:
681 scroll_cntr(+1);
682 break;
683 case KEY_NPAGE: /* page-down */
684 /* TODO figure out # of rows visible? */
685 scroll_cntr(+15);
686 break;
687 case KEY_PPAGE: /* page-up */
688 /* TODO figure out # of rows visible? */
689 scroll_cntr(-15);
690 break;
691 case KEY_RIGHT:
692 counter_dialog();
693 break;
694 case 'q':
695 goto out;
696 break;
697 default:
698 /* ignore */
699 break;
700 }
701 resample();
702 redraw(mainwin);
703
704 /* restore the counters every 0.5s in case the GPU has suspended,
705 * in which case the current selected countables will have reset:
706 */
707 uint64_t t = gettime_us();
708 if (delta(last_time, t) > 500000) {
709 restore_counter_groups();
710 flush_ring();
711 last_time = t;
712 }
713 }
714
715 /* restore settings.. maybe we need an atexit()??*/
716 out:
717 delwin(mainwin);
718 endwin();
719 refresh();
720 }
721
722 static void
dump_counters(void)723 dump_counters(void)
724 {
725 resample();
726 sleep_us(options.refresh_ms * 1000);
727 resample();
728
729 for (unsigned i = 0; i < dev.ngroups; i++) {
730 const struct counter_group *group = &dev.groups[i];
731 for (unsigned j = 0; j < group->group->num_counters; j++) {
732 const char *label = group->label[j];
733 float val = (float) group->value_delta[j] * 1000000.0 /
734 (float) group->sample_time_delta[j];
735
736 int n = printf("%s: ", label) - 2;
737 while (n++ < ctr_width)
738 fputc(' ', stdout);
739
740 n = printf("%" PRIu64, group->value_delta[j]);
741 while (n++ < 24)
742 fputc(' ', stdout);
743
744 if (strstr(label, "CYCLE") ||
745 strstr(label, "BUSY") ||
746 strstr(label, "IDLE")) {
747 val = val / dev.max_freq * 100.0f;
748 printf(" %.2f%%\n", val);
749 } else {
750 printf("\n");
751 }
752 }
753 }
754 }
755
756 static void
restore_counter_groups(void)757 restore_counter_groups(void)
758 {
759 for (unsigned i = 0; i < dev.ngroups; i++) {
760 struct counter_group *group = &dev.groups[i];
761
762 for (unsigned j = 0; j < group->group->num_counters; j++) {
763 /* This should also write the CP_ALWAYS_COUNT selectable value into
764 * the reserved CP counter we use for GPU frequency measurement,
765 * avoiding someone else writing a different value there.
766 */
767 select_counter(group, j, group->counter[j].select_val);
768 }
769 }
770 }
771
772 static void
setup_counter_groups(const struct fd_perfcntr_group * groups)773 setup_counter_groups(const struct fd_perfcntr_group *groups)
774 {
775 for (unsigned i = 0; i < dev.ngroups; i++) {
776 struct counter_group *group = &dev.groups[i];
777
778 group->group = &groups[i];
779
780 max_rows += group->group->num_counters + 1;
781
782 /* We reserve the first counter of the CP group (first in the list) for
783 * measuring GPU frequency that's displayed in the footer.
784 */
785 if (i == 0) {
786 /* We won't be displaying the private counter alongside others. We
787 * also won't be displaying the group header if we're taking over
788 * the only counter (e.g. on a2xx).
789 */
790 max_rows--;
791 if (groups[0].num_counters < 2)
792 max_rows--;
793
794 /* Enforce the CP_ALWAYS_COUNT countable for this counter. */
795 unsigned always_count_index = UINT32_MAX;
796 for (unsigned i = 0; i < groups[0].num_countables; ++i) {
797 if (strcmp(groups[0].countables[i].name, "PERF_CP_ALWAYS_COUNT"))
798 continue;
799
800 always_count_index = i;
801 break;
802 }
803
804 if (always_count_index < groups[0].num_countables) {
805 group->counter[0].select_val = groups[0].countables[always_count_index].selector;
806 group->counter[0].is_gpufreq_counter = true;
807 }
808 }
809
810 for (unsigned j = 0; j < group->group->num_counters; j++) {
811 group->counter[j].counter = &group->group->counters[j];
812
813 if (!group->counter[j].is_gpufreq_counter)
814 group->counter[j].select_val = j;
815 }
816
817 for (unsigned j = 0; j < group->group->num_countables; j++) {
818 ctr_width =
819 MAX2(ctr_width, strlen(group->group->countables[j].name) + 1);
820 }
821 }
822 }
823
824 /*
825 * configuration / persistence
826 */
827
828 static config_t cfg;
829 static config_setting_t *setting;
830
831 static void
config_save(void)832 config_save(void)
833 {
834 for (unsigned i = 0; i < dev.ngroups; i++) {
835 struct counter_group *group = &dev.groups[i];
836 config_setting_t *sect =
837 config_setting_get_member(setting, group->group->name);
838
839 for (unsigned j = 0; j < group->group->num_counters; j++) {
840 /* Don't save the GPU frequency measurement counter. */
841 if (group->counter[j].is_gpufreq_counter)
842 continue;
843
844 char name[] = "counter0000";
845 sprintf(name, "counter%d", j);
846 config_setting_t *s = config_setting_lookup(sect, name);
847 config_setting_set_int(s, group->counter[j].select_val);
848 }
849 }
850
851 config_write_file(&cfg, "fdperf.cfg");
852 }
853
854 static void
config_restore(void)855 config_restore(void)
856 {
857 config_init(&cfg);
858
859 /* Read the file. If there is an error, report it and exit. */
860 if (!config_read_file(&cfg, "fdperf.cfg")) {
861 warn("could not restore settings");
862 }
863
864 config_setting_t *root = config_root_setting(&cfg);
865
866 /* per device settings: */
867 char device_name[64];
868 snprintf(device_name, sizeof(device_name), "%s", fd_dev_name(dev.dev_id));
869 setting = config_setting_get_member(root, device_name);
870 if (!setting)
871 setting = config_setting_add(root, device_name, CONFIG_TYPE_GROUP);
872 if (!setting)
873 return;
874
875 for (unsigned i = 0; i < dev.ngroups; i++) {
876 struct counter_group *group = &dev.groups[i];
877 config_setting_t *sect =
878 config_setting_get_member(setting, group->group->name);
879
880 if (!sect) {
881 sect =
882 config_setting_add(setting, group->group->name, CONFIG_TYPE_GROUP);
883 }
884
885 for (unsigned j = 0; j < group->group->num_counters; j++) {
886 /* Don't restore the GPU frequency measurement counter. */
887 if (group->counter[j].is_gpufreq_counter)
888 continue;
889
890 char name[] = "counter0000";
891 sprintf(name, "counter%d", j);
892 config_setting_t *s = config_setting_lookup(sect, name);
893 if (!s) {
894 config_setting_add(sect, name, CONFIG_TYPE_INT);
895 continue;
896 }
897 select_counter(group, j, config_setting_get_int(s));
898 }
899 }
900 }
901
902 static void
print_usage(const char * argv0)903 print_usage(const char *argv0)
904 {
905 fprintf(stderr,
906 "Usage: %s [OPTION]...\n"
907 "\n"
908 " -r <N> refresh every N milliseconds\n"
909 " -d dump counters and exit\n"
910 " -h show this message\n",
911 argv0);
912 exit(2);
913 }
914
915 static void
parse_options(int argc,char ** argv)916 parse_options(int argc, char **argv)
917 {
918 int c;
919
920 while ((c = getopt(argc, argv, "r:d")) != -1) {
921 switch (c) {
922 case 'r':
923 options.refresh_ms = atoi(optarg);
924 break;
925 case 'd':
926 options.dump = true;
927 break;
928 default:
929 print_usage(argv[0]);
930 break;
931 }
932 }
933 }
934
935 /*
936 * main
937 */
938
939 int
main(int argc,char ** argv)940 main(int argc, char **argv)
941 {
942 parse_options(argc, argv);
943
944 find_device();
945
946 const struct fd_perfcntr_group *groups;
947 groups = fd_perfcntrs(dev.dev_id, &dev.ngroups);
948 if (!groups) {
949 errx(1, "no perfcntr support");
950 }
951
952 dev.groups = calloc(dev.ngroups, sizeof(struct counter_group));
953
954 setlocale(LC_NUMERIC, "en_US.UTF-8");
955
956 setup_counter_groups(groups);
957 restore_counter_groups();
958 config_restore();
959 flush_ring();
960
961 if (options.dump)
962 dump_counters();
963 else
964 main_ui();
965
966 return 0;
967 }
968