xref: /aosp_15_r20/external/mesa3d/src/amd/common/ac_perfcounter.h (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright 2015 Advanced Micro Devices, Inc.
3  *
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #ifndef AC_PERFCOUNTER_H
8 #define AC_PERFCOUNTER_H
9 
10 #include <stdbool.h>
11 
12 #include "sid.h"
13 
14 #include "ac_gpu_info.h"
15 
16 /* Max counters per HW block */
17 #define AC_QUERY_MAX_COUNTERS 16
18 
19 #define AC_PC_SHADERS_WINDOWING (1u << 31)
20 
21 enum ac_pc_block_flags
22 {
23    /* This block is part of the shader engine */
24    AC_PC_BLOCK_SE = (1 << 0),
25 
26    /* Expose per-instance groups instead of summing all instances (within
27     * an SE). */
28    AC_PC_BLOCK_INSTANCE_GROUPS = (1 << 1),
29 
30    /* Expose per-SE groups instead of summing instances across SEs. */
31    AC_PC_BLOCK_SE_GROUPS = (1 << 2),
32 
33    /* Shader block */
34    AC_PC_BLOCK_SHADER = (1 << 3),
35 
36    /* Non-shader block with perfcounters windowed by shaders. */
37    AC_PC_BLOCK_SHADER_WINDOWED = (1 << 4),
38 };
39 
40 enum ac_pc_gpu_block {
41    CPF     = 0x0,
42    IA      = 0x1,
43    VGT     = 0x2,
44    PA_SU   = 0x3,
45    PA_SC   = 0x4,
46    SPI     = 0x5,
47    SQ      = 0x6,
48    SX      = 0x7,
49    TA      = 0x8,
50    TD      = 0x9,
51    TCP     = 0xA,
52    TCC     = 0xB,
53    TCA     = 0xC,
54    DB      = 0xD,
55    CB      = 0xE,
56    GDS     = 0xF,
57    SRBM    = 0x10,
58    GRBM    = 0x11,
59    GRBMSE  = 0x12,
60    RLC     = 0x13,
61    DMA     = 0x14,
62    MC      = 0x15,
63    CPG     = 0x16,
64    CPC     = 0x17,
65    WD      = 0x18,
66    TCS     = 0x19,
67    ATC     = 0x1A,
68    ATCL2   = 0x1B,
69    MCVML2  = 0x1C,
70    EA      = 0x1D,
71    RPB     = 0x1E,
72    RMI     = 0x1F,
73    UMCCH   = 0x20,
74    GE      = 0x21,
75    GE1     = GE,
76    GL1A    = 0x22,
77    GL1C    = 0x23,
78    GL1CG   = 0x24,
79    GL2A    = 0x25,
80    GL2C    = 0x26,
81    CHA     = 0x27,
82    CHC     = 0x28,
83    CHCG    = 0x29,
84    GUS     = 0x2A,
85    GCR     = 0x2B,
86    PA_PH   = 0x2C,
87    UTCL1   = 0x2D,
88    GEDIST  = 0x2E,
89    GESE    = 0x2F,
90    DF      = 0x30,
91    SQ_WGP  = 0x31, /* GFX11+ */
92    NUM_GPU_BLOCK,
93 };
94 
95 struct ac_pc_block_base {
96    enum ac_pc_gpu_block gpu_block;
97    const char *name;
98    unsigned num_counters;
99    unsigned flags;
100 
101    unsigned select_or;
102    unsigned *select0;
103    unsigned counter0_lo;
104    unsigned *counters;
105 
106    /* SPM */
107    unsigned num_spm_counters;
108    unsigned num_spm_wires;
109    unsigned *select1;
110    unsigned spm_block_select;
111 };
112 
113 struct ac_pc_block_gfxdescr {
114    struct ac_pc_block_base *b;
115    unsigned selectors;
116    unsigned instances;
117 };
118 
119 struct ac_pc_block {
120    const struct ac_pc_block_gfxdescr *b;
121    unsigned num_instances;
122    unsigned num_global_instances;
123 
124    unsigned num_groups;
125    char *group_names;
126    unsigned group_name_stride;
127 
128    char *selector_names;
129    unsigned selector_name_stride;
130 };
131 
132 struct ac_perfcounters {
133    unsigned num_groups;
134    unsigned num_blocks;
135    struct ac_pc_block *blocks;
136 
137    bool separate_se;
138    bool separate_instance;
139 };
140 
141 /* The order is chosen to be compatible with GPUPerfStudio's hardcoding of
142  * performance counter group IDs.
143  */
144 static const char *const ac_pc_shader_type_suffixes[] = {"",    "_ES", "_GS", "_VS",
145                                                          "_PS", "_LS", "_HS", "_CS"};
146 
147 static const unsigned ac_pc_shader_type_bits[] = {
148    0x7f,
149    S_036780_ES_EN(1),
150    S_036780_GS_EN(1),
151    S_036780_VS_EN(1),
152    S_036780_PS_EN(1),
153    S_036780_LS_EN(1),
154    S_036780_HS_EN(1),
155    S_036780_CS_EN(1),
156 };
157 
158 static inline bool
ac_pc_block_has_per_se_groups(const struct ac_perfcounters * pc,const struct ac_pc_block * block)159 ac_pc_block_has_per_se_groups(const struct ac_perfcounters *pc,
160                               const struct ac_pc_block *block)
161 {
162    return block->b->b->flags & AC_PC_BLOCK_SE_GROUPS ||
163           (block->b->b->flags & AC_PC_BLOCK_SE && pc->separate_se);
164 }
165 
166 static inline bool
ac_pc_block_has_per_instance_groups(const struct ac_perfcounters * pc,const struct ac_pc_block * block)167 ac_pc_block_has_per_instance_groups(const struct ac_perfcounters *pc,
168                                     const struct ac_pc_block *block)
169 {
170    return block->b->b->flags & AC_PC_BLOCK_INSTANCE_GROUPS ||
171           (block->num_instances > 1 && pc->separate_instance);
172 }
173 
174 struct ac_pc_block *ac_lookup_counter(const struct ac_perfcounters *pc,
175                                       unsigned index, unsigned *base_gid,
176                                       unsigned *sub_index);
177 struct ac_pc_block *ac_lookup_group(const struct ac_perfcounters *pc,
178                                     unsigned *index);
179 
180 struct ac_pc_block *ac_pc_get_block(const struct ac_perfcounters *pc,
181                                     enum ac_pc_gpu_block gpu_block);
182 
183 bool ac_init_block_names(const struct radeon_info *info,
184                          const struct ac_perfcounters *pc,
185                          struct ac_pc_block *block);
186 
187 bool ac_init_perfcounters(const struct radeon_info *info,
188                           bool separate_se,
189                           bool separate_instance,
190                           struct ac_perfcounters *pc);
191 void ac_destroy_perfcounters(struct ac_perfcounters *pc);
192 
193 #endif
194