1 /*
2 * Copyright 2015 Advanced Micro Devices, Inc.
3 *
4 * SPDX-License-Identifier: MIT
5 */
6
7 #ifndef AC_PERFCOUNTER_H
8 #define AC_PERFCOUNTER_H
9
10 #include <stdbool.h>
11
12 #include "sid.h"
13
14 #include "ac_gpu_info.h"
15
16 /* Max counters per HW block */
17 #define AC_QUERY_MAX_COUNTERS 16
18
19 #define AC_PC_SHADERS_WINDOWING (1u << 31)
20
21 enum ac_pc_block_flags
22 {
23 /* This block is part of the shader engine */
24 AC_PC_BLOCK_SE = (1 << 0),
25
26 /* Expose per-instance groups instead of summing all instances (within
27 * an SE). */
28 AC_PC_BLOCK_INSTANCE_GROUPS = (1 << 1),
29
30 /* Expose per-SE groups instead of summing instances across SEs. */
31 AC_PC_BLOCK_SE_GROUPS = (1 << 2),
32
33 /* Shader block */
34 AC_PC_BLOCK_SHADER = (1 << 3),
35
36 /* Non-shader block with perfcounters windowed by shaders. */
37 AC_PC_BLOCK_SHADER_WINDOWED = (1 << 4),
38 };
39
40 enum ac_pc_gpu_block {
41 CPF = 0x0,
42 IA = 0x1,
43 VGT = 0x2,
44 PA_SU = 0x3,
45 PA_SC = 0x4,
46 SPI = 0x5,
47 SQ = 0x6,
48 SX = 0x7,
49 TA = 0x8,
50 TD = 0x9,
51 TCP = 0xA,
52 TCC = 0xB,
53 TCA = 0xC,
54 DB = 0xD,
55 CB = 0xE,
56 GDS = 0xF,
57 SRBM = 0x10,
58 GRBM = 0x11,
59 GRBMSE = 0x12,
60 RLC = 0x13,
61 DMA = 0x14,
62 MC = 0x15,
63 CPG = 0x16,
64 CPC = 0x17,
65 WD = 0x18,
66 TCS = 0x19,
67 ATC = 0x1A,
68 ATCL2 = 0x1B,
69 MCVML2 = 0x1C,
70 EA = 0x1D,
71 RPB = 0x1E,
72 RMI = 0x1F,
73 UMCCH = 0x20,
74 GE = 0x21,
75 GE1 = GE,
76 GL1A = 0x22,
77 GL1C = 0x23,
78 GL1CG = 0x24,
79 GL2A = 0x25,
80 GL2C = 0x26,
81 CHA = 0x27,
82 CHC = 0x28,
83 CHCG = 0x29,
84 GUS = 0x2A,
85 GCR = 0x2B,
86 PA_PH = 0x2C,
87 UTCL1 = 0x2D,
88 GEDIST = 0x2E,
89 GESE = 0x2F,
90 DF = 0x30,
91 SQ_WGP = 0x31, /* GFX11+ */
92 NUM_GPU_BLOCK,
93 };
94
95 struct ac_pc_block_base {
96 enum ac_pc_gpu_block gpu_block;
97 const char *name;
98 unsigned num_counters;
99 unsigned flags;
100
101 unsigned select_or;
102 unsigned *select0;
103 unsigned counter0_lo;
104 unsigned *counters;
105
106 /* SPM */
107 unsigned num_spm_counters;
108 unsigned num_spm_wires;
109 unsigned *select1;
110 unsigned spm_block_select;
111 };
112
113 struct ac_pc_block_gfxdescr {
114 struct ac_pc_block_base *b;
115 unsigned selectors;
116 unsigned instances;
117 };
118
119 struct ac_pc_block {
120 const struct ac_pc_block_gfxdescr *b;
121 unsigned num_instances;
122 unsigned num_global_instances;
123
124 unsigned num_groups;
125 char *group_names;
126 unsigned group_name_stride;
127
128 char *selector_names;
129 unsigned selector_name_stride;
130 };
131
132 struct ac_perfcounters {
133 unsigned num_groups;
134 unsigned num_blocks;
135 struct ac_pc_block *blocks;
136
137 bool separate_se;
138 bool separate_instance;
139 };
140
141 /* The order is chosen to be compatible with GPUPerfStudio's hardcoding of
142 * performance counter group IDs.
143 */
144 static const char *const ac_pc_shader_type_suffixes[] = {"", "_ES", "_GS", "_VS",
145 "_PS", "_LS", "_HS", "_CS"};
146
147 static const unsigned ac_pc_shader_type_bits[] = {
148 0x7f,
149 S_036780_ES_EN(1),
150 S_036780_GS_EN(1),
151 S_036780_VS_EN(1),
152 S_036780_PS_EN(1),
153 S_036780_LS_EN(1),
154 S_036780_HS_EN(1),
155 S_036780_CS_EN(1),
156 };
157
158 static inline bool
ac_pc_block_has_per_se_groups(const struct ac_perfcounters * pc,const struct ac_pc_block * block)159 ac_pc_block_has_per_se_groups(const struct ac_perfcounters *pc,
160 const struct ac_pc_block *block)
161 {
162 return block->b->b->flags & AC_PC_BLOCK_SE_GROUPS ||
163 (block->b->b->flags & AC_PC_BLOCK_SE && pc->separate_se);
164 }
165
166 static inline bool
ac_pc_block_has_per_instance_groups(const struct ac_perfcounters * pc,const struct ac_pc_block * block)167 ac_pc_block_has_per_instance_groups(const struct ac_perfcounters *pc,
168 const struct ac_pc_block *block)
169 {
170 return block->b->b->flags & AC_PC_BLOCK_INSTANCE_GROUPS ||
171 (block->num_instances > 1 && pc->separate_instance);
172 }
173
174 struct ac_pc_block *ac_lookup_counter(const struct ac_perfcounters *pc,
175 unsigned index, unsigned *base_gid,
176 unsigned *sub_index);
177 struct ac_pc_block *ac_lookup_group(const struct ac_perfcounters *pc,
178 unsigned *index);
179
180 struct ac_pc_block *ac_pc_get_block(const struct ac_perfcounters *pc,
181 enum ac_pc_gpu_block gpu_block);
182
183 bool ac_init_block_names(const struct radeon_info *info,
184 const struct ac_perfcounters *pc,
185 struct ac_pc_block *block);
186
187 bool ac_init_perfcounters(const struct radeon_info *info,
188 bool separate_se,
189 bool separate_instance,
190 struct ac_perfcounters *pc);
191 void ac_destroy_perfcounters(struct ac_perfcounters *pc);
192
193 #endif
194