1*61046927SAndroid Build Coastguard Worker /*
2*61046927SAndroid Build Coastguard Worker * Copyright © 2021 Valve Corporation
3*61046927SAndroid Build Coastguard Worker *
4*61046927SAndroid Build Coastguard Worker * SPDX-License-Identifier: MIT
5*61046927SAndroid Build Coastguard Worker */
6*61046927SAndroid Build Coastguard Worker
7*61046927SAndroid Build Coastguard Worker #include <inttypes.h>
8*61046927SAndroid Build Coastguard Worker
9*61046927SAndroid Build Coastguard Worker #include "ac_perfcounter.h"
10*61046927SAndroid Build Coastguard Worker #include "amdgfxregs.h"
11*61046927SAndroid Build Coastguard Worker #include "radv_cs.h"
12*61046927SAndroid Build Coastguard Worker #include "radv_entrypoints.h"
13*61046927SAndroid Build Coastguard Worker #include "radv_perfcounter.h"
14*61046927SAndroid Build Coastguard Worker #include "radv_sqtt.h"
15*61046927SAndroid Build Coastguard Worker #include "sid.h"
16*61046927SAndroid Build Coastguard Worker
17*61046927SAndroid Build Coastguard Worker void
radv_perfcounter_emit_shaders(struct radv_device * device,struct radeon_cmdbuf * cs,unsigned shaders)18*61046927SAndroid Build Coastguard Worker radv_perfcounter_emit_shaders(struct radv_device *device, struct radeon_cmdbuf *cs, unsigned shaders)
19*61046927SAndroid Build Coastguard Worker {
20*61046927SAndroid Build Coastguard Worker const struct radv_physical_device *pdev = radv_device_physical(device);
21*61046927SAndroid Build Coastguard Worker
22*61046927SAndroid Build Coastguard Worker if (pdev->info.gfx_level >= GFX10) {
23*61046927SAndroid Build Coastguard Worker radeon_set_uconfig_reg(cs, R_036780_SQ_PERFCOUNTER_CTRL, shaders & 0x7f);
24*61046927SAndroid Build Coastguard Worker if (pdev->info.gfx_level >= GFX11)
25*61046927SAndroid Build Coastguard Worker radeon_set_uconfig_reg(cs, R_036760_SQG_PERFCOUNTER_CTRL, shaders & 0x7f);
26*61046927SAndroid Build Coastguard Worker } else {
27*61046927SAndroid Build Coastguard Worker radeon_set_uconfig_reg_seq(cs, R_036780_SQ_PERFCOUNTER_CTRL, 2);
28*61046927SAndroid Build Coastguard Worker radeon_emit(cs, shaders & 0x7f);
29*61046927SAndroid Build Coastguard Worker radeon_emit(cs, 0xffffffff);
30*61046927SAndroid Build Coastguard Worker }
31*61046927SAndroid Build Coastguard Worker }
32*61046927SAndroid Build Coastguard Worker
33*61046927SAndroid Build Coastguard Worker static void
radv_emit_windowed_counters(struct radv_device * device,struct radeon_cmdbuf * cs,int family,bool enable)34*61046927SAndroid Build Coastguard Worker radv_emit_windowed_counters(struct radv_device *device, struct radeon_cmdbuf *cs, int family, bool enable)
35*61046927SAndroid Build Coastguard Worker {
36*61046927SAndroid Build Coastguard Worker if (family == RADV_QUEUE_GENERAL) {
37*61046927SAndroid Build Coastguard Worker radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
38*61046927SAndroid Build Coastguard Worker radeon_emit(cs, EVENT_TYPE(enable ? V_028A90_PERFCOUNTER_START : V_028A90_PERFCOUNTER_STOP) | EVENT_INDEX(0));
39*61046927SAndroid Build Coastguard Worker }
40*61046927SAndroid Build Coastguard Worker
41*61046927SAndroid Build Coastguard Worker radeon_set_sh_reg(cs, R_00B82C_COMPUTE_PERFCOUNT_ENABLE, S_00B82C_PERFCOUNT_ENABLE(enable));
42*61046927SAndroid Build Coastguard Worker }
43*61046927SAndroid Build Coastguard Worker
44*61046927SAndroid Build Coastguard Worker void
radv_perfcounter_emit_spm_reset(struct radeon_cmdbuf * cs)45*61046927SAndroid Build Coastguard Worker radv_perfcounter_emit_spm_reset(struct radeon_cmdbuf *cs)
46*61046927SAndroid Build Coastguard Worker {
47*61046927SAndroid Build Coastguard Worker radeon_set_uconfig_reg(cs, R_036020_CP_PERFMON_CNTL,
48*61046927SAndroid Build Coastguard Worker S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_DISABLE_AND_RESET) |
49*61046927SAndroid Build Coastguard Worker S_036020_SPM_PERFMON_STATE(V_036020_STRM_PERFMON_STATE_DISABLE_AND_RESET));
50*61046927SAndroid Build Coastguard Worker }
51*61046927SAndroid Build Coastguard Worker
52*61046927SAndroid Build Coastguard Worker void
radv_perfcounter_emit_spm_start(struct radv_device * device,struct radeon_cmdbuf * cs,int family)53*61046927SAndroid Build Coastguard Worker radv_perfcounter_emit_spm_start(struct radv_device *device, struct radeon_cmdbuf *cs, int family)
54*61046927SAndroid Build Coastguard Worker {
55*61046927SAndroid Build Coastguard Worker /* Start SPM counters. */
56*61046927SAndroid Build Coastguard Worker radeon_set_uconfig_reg(cs, R_036020_CP_PERFMON_CNTL,
57*61046927SAndroid Build Coastguard Worker S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_DISABLE_AND_RESET) |
58*61046927SAndroid Build Coastguard Worker S_036020_SPM_PERFMON_STATE(V_036020_STRM_PERFMON_STATE_START_COUNTING));
59*61046927SAndroid Build Coastguard Worker
60*61046927SAndroid Build Coastguard Worker radv_emit_windowed_counters(device, cs, family, true);
61*61046927SAndroid Build Coastguard Worker }
62*61046927SAndroid Build Coastguard Worker
63*61046927SAndroid Build Coastguard Worker void
radv_perfcounter_emit_spm_stop(struct radv_device * device,struct radeon_cmdbuf * cs,int family)64*61046927SAndroid Build Coastguard Worker radv_perfcounter_emit_spm_stop(struct radv_device *device, struct radeon_cmdbuf *cs, int family)
65*61046927SAndroid Build Coastguard Worker {
66*61046927SAndroid Build Coastguard Worker const struct radv_physical_device *pdev = radv_device_physical(device);
67*61046927SAndroid Build Coastguard Worker
68*61046927SAndroid Build Coastguard Worker radv_emit_windowed_counters(device, cs, family, false);
69*61046927SAndroid Build Coastguard Worker
70*61046927SAndroid Build Coastguard Worker /* Stop SPM counters. */
71*61046927SAndroid Build Coastguard Worker radeon_set_uconfig_reg(cs, R_036020_CP_PERFMON_CNTL,
72*61046927SAndroid Build Coastguard Worker S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_DISABLE_AND_RESET) |
73*61046927SAndroid Build Coastguard Worker S_036020_SPM_PERFMON_STATE(pdev->info.never_stop_sq_perf_counters
74*61046927SAndroid Build Coastguard Worker ? V_036020_STRM_PERFMON_STATE_START_COUNTING
75*61046927SAndroid Build Coastguard Worker : V_036020_STRM_PERFMON_STATE_STOP_COUNTING));
76*61046927SAndroid Build Coastguard Worker }
77*61046927SAndroid Build Coastguard Worker
78*61046927SAndroid Build Coastguard Worker enum radv_perfcounter_op {
79*61046927SAndroid Build Coastguard Worker RADV_PC_OP_SUM,
80*61046927SAndroid Build Coastguard Worker RADV_PC_OP_MAX,
81*61046927SAndroid Build Coastguard Worker RADV_PC_OP_RATIO_DIVSCALE,
82*61046927SAndroid Build Coastguard Worker RADV_PC_OP_REVERSE_RATIO, /* (reg1 - reg0) / reg1 */
83*61046927SAndroid Build Coastguard Worker RADV_PC_OP_SUM_WEIGHTED_4,
84*61046927SAndroid Build Coastguard Worker };
85*61046927SAndroid Build Coastguard Worker
86*61046927SAndroid Build Coastguard Worker #define S_REG_SEL(x) ((x)&0xFFFF)
87*61046927SAndroid Build Coastguard Worker #define G_REG_SEL(x) ((x)&0xFFFF)
88*61046927SAndroid Build Coastguard Worker #define S_REG_BLOCK(x) ((x) << 16)
89*61046927SAndroid Build Coastguard Worker #define G_REG_BLOCK(x) (((x) >> 16) & 0x7FFF)
90*61046927SAndroid Build Coastguard Worker
91*61046927SAndroid Build Coastguard Worker #define S_REG_OFFSET(x) ((x)&0xFFFF)
92*61046927SAndroid Build Coastguard Worker #define G_REG_OFFSET(x) ((x)&0xFFFF)
93*61046927SAndroid Build Coastguard Worker #define S_REG_INSTANCES(x) ((x) << 16)
94*61046927SAndroid Build Coastguard Worker #define G_REG_INSTANCES(x) (((x) >> 16) & 0x7FFF)
95*61046927SAndroid Build Coastguard Worker #define S_REG_CONSTANT(x) ((x) << 31)
96*61046927SAndroid Build Coastguard Worker #define G_REG_CONSTANT(x) ((x) >> 31)
97*61046927SAndroid Build Coastguard Worker
98*61046927SAndroid Build Coastguard Worker struct radv_perfcounter_impl {
99*61046927SAndroid Build Coastguard Worker enum radv_perfcounter_op op;
100*61046927SAndroid Build Coastguard Worker uint32_t regs[8];
101*61046927SAndroid Build Coastguard Worker };
102*61046927SAndroid Build Coastguard Worker
103*61046927SAndroid Build Coastguard Worker /* Only append to this list, never insert into the middle or remove (but can rename).
104*61046927SAndroid Build Coastguard Worker *
105*61046927SAndroid Build Coastguard Worker * The invariant we're trying to get here is counters that have the same meaning, so
106*61046927SAndroid Build Coastguard Worker * these can be shared between counters that have different implementations on different
107*61046927SAndroid Build Coastguard Worker * GPUs, but should be unique within a GPU.
108*61046927SAndroid Build Coastguard Worker */
109*61046927SAndroid Build Coastguard Worker enum radv_perfcounter_uuid {
110*61046927SAndroid Build Coastguard Worker RADV_PC_UUID_GPU_CYCLES,
111*61046927SAndroid Build Coastguard Worker RADV_PC_UUID_SHADER_WAVES,
112*61046927SAndroid Build Coastguard Worker RADV_PC_UUID_SHADER_INSTRUCTIONS,
113*61046927SAndroid Build Coastguard Worker RADV_PC_UUID_SHADER_INSTRUCTIONS_VALU,
114*61046927SAndroid Build Coastguard Worker RADV_PC_UUID_SHADER_INSTRUCTIONS_SALU,
115*61046927SAndroid Build Coastguard Worker RADV_PC_UUID_SHADER_INSTRUCTIONS_VMEM_LOAD,
116*61046927SAndroid Build Coastguard Worker RADV_PC_UUID_SHADER_INSTRUCTIONS_SMEM_LOAD,
117*61046927SAndroid Build Coastguard Worker RADV_PC_UUID_SHADER_INSTRUCTIONS_VMEM_STORE,
118*61046927SAndroid Build Coastguard Worker RADV_PC_UUID_SHADER_INSTRUCTIONS_LDS,
119*61046927SAndroid Build Coastguard Worker RADV_PC_UUID_SHADER_INSTRUCTIONS_GDS,
120*61046927SAndroid Build Coastguard Worker RADV_PC_UUID_SHADER_VALU_BUSY,
121*61046927SAndroid Build Coastguard Worker RADV_PC_UUID_SHADER_SALU_BUSY,
122*61046927SAndroid Build Coastguard Worker RADV_PC_UUID_VRAM_READ_SIZE,
123*61046927SAndroid Build Coastguard Worker RADV_PC_UUID_VRAM_WRITE_SIZE,
124*61046927SAndroid Build Coastguard Worker RADV_PC_UUID_L0_CACHE_HIT_RATIO,
125*61046927SAndroid Build Coastguard Worker RADV_PC_UUID_L1_CACHE_HIT_RATIO,
126*61046927SAndroid Build Coastguard Worker RADV_PC_UUID_L2_CACHE_HIT_RATIO,
127*61046927SAndroid Build Coastguard Worker };
128*61046927SAndroid Build Coastguard Worker
129*61046927SAndroid Build Coastguard Worker struct radv_perfcounter_desc {
130*61046927SAndroid Build Coastguard Worker struct radv_perfcounter_impl impl;
131*61046927SAndroid Build Coastguard Worker
132*61046927SAndroid Build Coastguard Worker VkPerformanceCounterUnitKHR unit;
133*61046927SAndroid Build Coastguard Worker
134*61046927SAndroid Build Coastguard Worker char name[VK_MAX_DESCRIPTION_SIZE];
135*61046927SAndroid Build Coastguard Worker char category[VK_MAX_DESCRIPTION_SIZE];
136*61046927SAndroid Build Coastguard Worker char description[VK_MAX_DESCRIPTION_SIZE];
137*61046927SAndroid Build Coastguard Worker enum radv_perfcounter_uuid uuid;
138*61046927SAndroid Build Coastguard Worker };
139*61046927SAndroid Build Coastguard Worker
140*61046927SAndroid Build Coastguard Worker #define PC_DESC(arg_op, arg_unit, arg_name, arg_category, arg_description, arg_uuid, ...) \
141*61046927SAndroid Build Coastguard Worker (struct radv_perfcounter_desc) \
142*61046927SAndroid Build Coastguard Worker { \
143*61046927SAndroid Build Coastguard Worker .impl = {.op = arg_op, .regs = {__VA_ARGS__}}, .unit = VK_PERFORMANCE_COUNTER_UNIT_##arg_unit##_KHR, \
144*61046927SAndroid Build Coastguard Worker .name = arg_name, .category = arg_category, .description = arg_description, .uuid = RADV_PC_UUID_##arg_uuid \
145*61046927SAndroid Build Coastguard Worker }
146*61046927SAndroid Build Coastguard Worker
147*61046927SAndroid Build Coastguard Worker #define ADD_PC(op, unit, name, category, description, uuid, ...) \
148*61046927SAndroid Build Coastguard Worker do { \
149*61046927SAndroid Build Coastguard Worker if (descs) { \
150*61046927SAndroid Build Coastguard Worker descs[*count] = PC_DESC((op), unit, name, category, description, uuid, __VA_ARGS__); \
151*61046927SAndroid Build Coastguard Worker } \
152*61046927SAndroid Build Coastguard Worker ++*count; \
153*61046927SAndroid Build Coastguard Worker } while (0)
154*61046927SAndroid Build Coastguard Worker #define CTR(block, ctr) (S_REG_BLOCK(block) | S_REG_SEL(ctr))
155*61046927SAndroid Build Coastguard Worker #define CONSTANT(v) (S_REG_CONSTANT(1) | (uint32_t)(v))
156*61046927SAndroid Build Coastguard Worker
157*61046927SAndroid Build Coastguard Worker enum { GRBM_PERF_SEL_GUI_ACTIVE = CTR(GRBM, 2) };
158*61046927SAndroid Build Coastguard Worker
159*61046927SAndroid Build Coastguard Worker enum { CPF_PERF_SEL_CPF_STAT_BUSY_GFX10 = CTR(CPF, 0x18) };
160*61046927SAndroid Build Coastguard Worker
161*61046927SAndroid Build Coastguard Worker enum {
162*61046927SAndroid Build Coastguard Worker GL1C_PERF_SEL_REQ = CTR(GL1C, 0xe),
163*61046927SAndroid Build Coastguard Worker GL1C_PERF_SEL_REQ_MISS = CTR(GL1C, 0x12),
164*61046927SAndroid Build Coastguard Worker };
165*61046927SAndroid Build Coastguard Worker
166*61046927SAndroid Build Coastguard Worker enum {
167*61046927SAndroid Build Coastguard Worker GL2C_PERF_SEL_REQ = CTR(GL2C, 0x3),
168*61046927SAndroid Build Coastguard Worker
169*61046927SAndroid Build Coastguard Worker GL2C_PERF_SEL_MISS_GFX101 = CTR(GL2C, 0x23),
170*61046927SAndroid Build Coastguard Worker GL2C_PERF_SEL_MC_WRREQ_GFX101 = CTR(GL2C, 0x4b),
171*61046927SAndroid Build Coastguard Worker GL2C_PERF_SEL_EA_WRREQ_64B_GFX101 = CTR(GL2C, 0x4c),
172*61046927SAndroid Build Coastguard Worker GL2C_PERF_SEL_EA_RDREQ_32B_GFX101 = CTR(GL2C, 0x59),
173*61046927SAndroid Build Coastguard Worker GL2C_PERF_SEL_EA_RDREQ_64B_GFX101 = CTR(GL2C, 0x5a),
174*61046927SAndroid Build Coastguard Worker GL2C_PERF_SEL_EA_RDREQ_96B_GFX101 = CTR(GL2C, 0x5b),
175*61046927SAndroid Build Coastguard Worker GL2C_PERF_SEL_EA_RDREQ_128B_GFX101 = CTR(GL2C, 0x5c),
176*61046927SAndroid Build Coastguard Worker
177*61046927SAndroid Build Coastguard Worker GL2C_PERF_SEL_MISS_GFX103 = CTR(GL2C, 0x2b),
178*61046927SAndroid Build Coastguard Worker GL2C_PERF_SEL_MC_WRREQ_GFX103 = CTR(GL2C, 0x53),
179*61046927SAndroid Build Coastguard Worker GL2C_PERF_SEL_EA_WRREQ_64B_GFX103 = CTR(GL2C, 0x55),
180*61046927SAndroid Build Coastguard Worker GL2C_PERF_SEL_EA_RDREQ_32B_GFX103 = CTR(GL2C, 0x63),
181*61046927SAndroid Build Coastguard Worker GL2C_PERF_SEL_EA_RDREQ_64B_GFX103 = CTR(GL2C, 0x64),
182*61046927SAndroid Build Coastguard Worker GL2C_PERF_SEL_EA_RDREQ_96B_GFX103 = CTR(GL2C, 0x65),
183*61046927SAndroid Build Coastguard Worker GL2C_PERF_SEL_EA_RDREQ_128B_GFX103 = CTR(GL2C, 0x66),
184*61046927SAndroid Build Coastguard Worker };
185*61046927SAndroid Build Coastguard Worker
186*61046927SAndroid Build Coastguard Worker enum {
187*61046927SAndroid Build Coastguard Worker SQ_PERF_SEL_WAVES = CTR(SQ, 0x4),
188*61046927SAndroid Build Coastguard Worker SQ_PERF_SEL_INSTS_ALL_GFX10 = CTR(SQ, 0x31),
189*61046927SAndroid Build Coastguard Worker SQ_PERF_SEL_INSTS_GDS_GFX10 = CTR(SQ, 0x37),
190*61046927SAndroid Build Coastguard Worker SQ_PERF_SEL_INSTS_LDS_GFX10 = CTR(SQ, 0x3b),
191*61046927SAndroid Build Coastguard Worker SQ_PERF_SEL_INSTS_SALU_GFX10 = CTR(SQ, 0x3c),
192*61046927SAndroid Build Coastguard Worker SQ_PERF_SEL_INSTS_SMEM_GFX10 = CTR(SQ, 0x3d),
193*61046927SAndroid Build Coastguard Worker SQ_PERF_SEL_INSTS_VALU_GFX10 = CTR(SQ, 0x40),
194*61046927SAndroid Build Coastguard Worker SQ_PERF_SEL_INSTS_TEX_LOAD_GFX10 = CTR(SQ, 0x45),
195*61046927SAndroid Build Coastguard Worker SQ_PERF_SEL_INSTS_TEX_STORE_GFX10 = CTR(SQ, 0x46),
196*61046927SAndroid Build Coastguard Worker SQ_PERF_SEL_INST_CYCLES_VALU_GFX10 = CTR(SQ, 0x75),
197*61046927SAndroid Build Coastguard Worker };
198*61046927SAndroid Build Coastguard Worker
199*61046927SAndroid Build Coastguard Worker enum {
200*61046927SAndroid Build Coastguard Worker TCP_PERF_SEL_REQ_GFX10 = CTR(TCP, 0x9),
201*61046927SAndroid Build Coastguard Worker TCP_PERF_SEL_REQ_MISS_GFX10 = CTR(TCP, 0x12),
202*61046927SAndroid Build Coastguard Worker };
203*61046927SAndroid Build Coastguard Worker
204*61046927SAndroid Build Coastguard Worker #define CTR_NUM_SIMD CONSTANT(pdev->info.num_simd_per_compute_unit * pdev->info.num_cu)
205*61046927SAndroid Build Coastguard Worker #define CTR_NUM_CUS CONSTANT(pdev->info.num_cu)
206*61046927SAndroid Build Coastguard Worker
207*61046927SAndroid Build Coastguard Worker static void
radv_query_perfcounter_descs(struct radv_physical_device * pdev,uint32_t * count,struct radv_perfcounter_desc * descs)208*61046927SAndroid Build Coastguard Worker radv_query_perfcounter_descs(struct radv_physical_device *pdev, uint32_t *count, struct radv_perfcounter_desc *descs)
209*61046927SAndroid Build Coastguard Worker {
210*61046927SAndroid Build Coastguard Worker *count = 0;
211*61046927SAndroid Build Coastguard Worker
212*61046927SAndroid Build Coastguard Worker ADD_PC(RADV_PC_OP_MAX, CYCLES, "GPU active cycles", "GRBM", "cycles the GPU is active processing a command buffer.",
213*61046927SAndroid Build Coastguard Worker GPU_CYCLES, GRBM_PERF_SEL_GUI_ACTIVE);
214*61046927SAndroid Build Coastguard Worker
215*61046927SAndroid Build Coastguard Worker ADD_PC(RADV_PC_OP_SUM, GENERIC, "Waves", "Shaders", "Number of waves executed", SHADER_WAVES, SQ_PERF_SEL_WAVES);
216*61046927SAndroid Build Coastguard Worker ADD_PC(RADV_PC_OP_SUM, GENERIC, "Instructions", "Shaders", "Number of Instructions executed", SHADER_INSTRUCTIONS,
217*61046927SAndroid Build Coastguard Worker SQ_PERF_SEL_INSTS_ALL_GFX10);
218*61046927SAndroid Build Coastguard Worker ADD_PC(RADV_PC_OP_SUM, GENERIC, "VALU Instructions", "Shaders", "Number of VALU Instructions executed",
219*61046927SAndroid Build Coastguard Worker SHADER_INSTRUCTIONS_VALU, SQ_PERF_SEL_INSTS_VALU_GFX10);
220*61046927SAndroid Build Coastguard Worker ADD_PC(RADV_PC_OP_SUM, GENERIC, "SALU Instructions", "Shaders", "Number of SALU Instructions executed",
221*61046927SAndroid Build Coastguard Worker SHADER_INSTRUCTIONS_SALU, SQ_PERF_SEL_INSTS_SALU_GFX10);
222*61046927SAndroid Build Coastguard Worker ADD_PC(RADV_PC_OP_SUM, GENERIC, "VMEM Load Instructions", "Shaders", "Number of VMEM load instructions executed",
223*61046927SAndroid Build Coastguard Worker SHADER_INSTRUCTIONS_VMEM_LOAD, SQ_PERF_SEL_INSTS_TEX_LOAD_GFX10);
224*61046927SAndroid Build Coastguard Worker ADD_PC(RADV_PC_OP_SUM, GENERIC, "SMEM Load Instructions", "Shaders", "Number of SMEM load instructions executed",
225*61046927SAndroid Build Coastguard Worker SHADER_INSTRUCTIONS_SMEM_LOAD, SQ_PERF_SEL_INSTS_SMEM_GFX10);
226*61046927SAndroid Build Coastguard Worker ADD_PC(RADV_PC_OP_SUM, GENERIC, "VMEM Store Instructions", "Shaders", "Number of VMEM store instructions executed",
227*61046927SAndroid Build Coastguard Worker SHADER_INSTRUCTIONS_VMEM_STORE, SQ_PERF_SEL_INSTS_TEX_STORE_GFX10);
228*61046927SAndroid Build Coastguard Worker ADD_PC(RADV_PC_OP_SUM, GENERIC, "LDS Instructions", "Shaders", "Number of LDS Instructions executed",
229*61046927SAndroid Build Coastguard Worker SHADER_INSTRUCTIONS_LDS, SQ_PERF_SEL_INSTS_LDS_GFX10);
230*61046927SAndroid Build Coastguard Worker ADD_PC(RADV_PC_OP_SUM, GENERIC, "GDS Instructions", "Shaders", "Number of GDS Instructions executed",
231*61046927SAndroid Build Coastguard Worker SHADER_INSTRUCTIONS_GDS, SQ_PERF_SEL_INSTS_GDS_GFX10);
232*61046927SAndroid Build Coastguard Worker
233*61046927SAndroid Build Coastguard Worker ADD_PC(RADV_PC_OP_RATIO_DIVSCALE, PERCENTAGE, "VALU Busy", "Shader Utilization",
234*61046927SAndroid Build Coastguard Worker "Percentage of time the VALU units are busy", SHADER_VALU_BUSY, SQ_PERF_SEL_INST_CYCLES_VALU_GFX10,
235*61046927SAndroid Build Coastguard Worker CPF_PERF_SEL_CPF_STAT_BUSY_GFX10, CTR_NUM_SIMD);
236*61046927SAndroid Build Coastguard Worker ADD_PC(RADV_PC_OP_RATIO_DIVSCALE, PERCENTAGE, "SALU Busy", "Shader Utilization",
237*61046927SAndroid Build Coastguard Worker "Percentage of time the SALU units are busy", SHADER_SALU_BUSY, SQ_PERF_SEL_INSTS_SALU_GFX10,
238*61046927SAndroid Build Coastguard Worker CPF_PERF_SEL_CPF_STAT_BUSY_GFX10, CTR_NUM_CUS);
239*61046927SAndroid Build Coastguard Worker
240*61046927SAndroid Build Coastguard Worker if (pdev->info.gfx_level >= GFX10_3) {
241*61046927SAndroid Build Coastguard Worker ADD_PC(RADV_PC_OP_SUM_WEIGHTED_4, BYTES, "VRAM read size", "Memory", "Number of bytes read from VRAM",
242*61046927SAndroid Build Coastguard Worker VRAM_READ_SIZE, GL2C_PERF_SEL_EA_RDREQ_32B_GFX103, CONSTANT(32), GL2C_PERF_SEL_EA_RDREQ_64B_GFX103,
243*61046927SAndroid Build Coastguard Worker CONSTANT(64), GL2C_PERF_SEL_EA_RDREQ_96B_GFX103, CONSTANT(96), GL2C_PERF_SEL_EA_RDREQ_128B_GFX103,
244*61046927SAndroid Build Coastguard Worker CONSTANT(128));
245*61046927SAndroid Build Coastguard Worker ADD_PC(RADV_PC_OP_SUM_WEIGHTED_4, BYTES, "VRAM write size", "Memory", "Number of bytes written to VRAM",
246*61046927SAndroid Build Coastguard Worker VRAM_WRITE_SIZE, GL2C_PERF_SEL_MC_WRREQ_GFX103, CONSTANT(32), GL2C_PERF_SEL_EA_WRREQ_64B_GFX103,
247*61046927SAndroid Build Coastguard Worker CONSTANT(64), CONSTANT(0), CONSTANT(0), CONSTANT(0), CONSTANT(0));
248*61046927SAndroid Build Coastguard Worker } else {
249*61046927SAndroid Build Coastguard Worker ADD_PC(RADV_PC_OP_SUM_WEIGHTED_4, BYTES, "VRAM read size", "Memory", "Number of bytes read from VRAM",
250*61046927SAndroid Build Coastguard Worker VRAM_READ_SIZE, GL2C_PERF_SEL_EA_RDREQ_32B_GFX101, CONSTANT(32), GL2C_PERF_SEL_EA_RDREQ_64B_GFX101,
251*61046927SAndroid Build Coastguard Worker CONSTANT(64), GL2C_PERF_SEL_EA_RDREQ_96B_GFX101, CONSTANT(96), GL2C_PERF_SEL_EA_RDREQ_128B_GFX101,
252*61046927SAndroid Build Coastguard Worker CONSTANT(128));
253*61046927SAndroid Build Coastguard Worker ADD_PC(RADV_PC_OP_SUM_WEIGHTED_4, BYTES, "VRAM write size", "Memory", "Number of bytes written to VRAM",
254*61046927SAndroid Build Coastguard Worker VRAM_WRITE_SIZE, GL2C_PERF_SEL_MC_WRREQ_GFX101, CONSTANT(32), GL2C_PERF_SEL_EA_WRREQ_64B_GFX101,
255*61046927SAndroid Build Coastguard Worker CONSTANT(32), CONSTANT(0), CONSTANT(0), CONSTANT(0), CONSTANT(0));
256*61046927SAndroid Build Coastguard Worker }
257*61046927SAndroid Build Coastguard Worker
258*61046927SAndroid Build Coastguard Worker ADD_PC(RADV_PC_OP_REVERSE_RATIO, BYTES, "L0 cache hit ratio", "Memory", "Hit ratio of L0 cache", L0_CACHE_HIT_RATIO,
259*61046927SAndroid Build Coastguard Worker TCP_PERF_SEL_REQ_MISS_GFX10, TCP_PERF_SEL_REQ_GFX10);
260*61046927SAndroid Build Coastguard Worker ADD_PC(RADV_PC_OP_REVERSE_RATIO, BYTES, "L1 cache hit ratio", "Memory", "Hit ratio of L1 cache", L1_CACHE_HIT_RATIO,
261*61046927SAndroid Build Coastguard Worker GL1C_PERF_SEL_REQ_MISS, GL1C_PERF_SEL_REQ);
262*61046927SAndroid Build Coastguard Worker if (pdev->info.gfx_level >= GFX10_3) {
263*61046927SAndroid Build Coastguard Worker ADD_PC(RADV_PC_OP_REVERSE_RATIO, BYTES, "L2 cache hit ratio", "Memory", "Hit ratio of L2 cache",
264*61046927SAndroid Build Coastguard Worker L2_CACHE_HIT_RATIO, GL2C_PERF_SEL_MISS_GFX103, GL2C_PERF_SEL_REQ);
265*61046927SAndroid Build Coastguard Worker } else {
266*61046927SAndroid Build Coastguard Worker ADD_PC(RADV_PC_OP_REVERSE_RATIO, BYTES, "L2 cache hit ratio", "Memory", "Hit ratio of L2 cache",
267*61046927SAndroid Build Coastguard Worker L2_CACHE_HIT_RATIO, GL2C_PERF_SEL_MISS_GFX101, GL2C_PERF_SEL_REQ);
268*61046927SAndroid Build Coastguard Worker }
269*61046927SAndroid Build Coastguard Worker }
270*61046927SAndroid Build Coastguard Worker
271*61046927SAndroid Build Coastguard Worker static bool
radv_init_perfcounter_descs(struct radv_physical_device * pdev)272*61046927SAndroid Build Coastguard Worker radv_init_perfcounter_descs(struct radv_physical_device *pdev)
273*61046927SAndroid Build Coastguard Worker {
274*61046927SAndroid Build Coastguard Worker if (pdev->perfcounters)
275*61046927SAndroid Build Coastguard Worker return true;
276*61046927SAndroid Build Coastguard Worker
277*61046927SAndroid Build Coastguard Worker uint32_t count;
278*61046927SAndroid Build Coastguard Worker radv_query_perfcounter_descs(pdev, &count, NULL);
279*61046927SAndroid Build Coastguard Worker
280*61046927SAndroid Build Coastguard Worker struct radv_perfcounter_desc *descs = malloc(sizeof(*descs) * count);
281*61046927SAndroid Build Coastguard Worker if (!descs)
282*61046927SAndroid Build Coastguard Worker return false;
283*61046927SAndroid Build Coastguard Worker
284*61046927SAndroid Build Coastguard Worker radv_query_perfcounter_descs(pdev, &count, descs);
285*61046927SAndroid Build Coastguard Worker pdev->num_perfcounters = count;
286*61046927SAndroid Build Coastguard Worker pdev->perfcounters = descs;
287*61046927SAndroid Build Coastguard Worker
288*61046927SAndroid Build Coastguard Worker return true;
289*61046927SAndroid Build Coastguard Worker }
290*61046927SAndroid Build Coastguard Worker
291*61046927SAndroid Build Coastguard Worker static int
cmp_uint32_t(const void * a,const void * b)292*61046927SAndroid Build Coastguard Worker cmp_uint32_t(const void *a, const void *b)
293*61046927SAndroid Build Coastguard Worker {
294*61046927SAndroid Build Coastguard Worker uint32_t l = *(const uint32_t *)a;
295*61046927SAndroid Build Coastguard Worker uint32_t r = *(const uint32_t *)b;
296*61046927SAndroid Build Coastguard Worker
297*61046927SAndroid Build Coastguard Worker return (l < r) ? -1 : (l > r) ? 1 : 0;
298*61046927SAndroid Build Coastguard Worker }
299*61046927SAndroid Build Coastguard Worker
300*61046927SAndroid Build Coastguard Worker static VkResult
radv_get_counter_registers(const struct radv_physical_device * pdev,uint32_t num_indices,const uint32_t * indices,unsigned * out_num_regs,uint32_t ** out_regs)301*61046927SAndroid Build Coastguard Worker radv_get_counter_registers(const struct radv_physical_device *pdev, uint32_t num_indices, const uint32_t *indices,
302*61046927SAndroid Build Coastguard Worker unsigned *out_num_regs, uint32_t **out_regs)
303*61046927SAndroid Build Coastguard Worker {
304*61046927SAndroid Build Coastguard Worker ASSERTED uint32_t num_counters = pdev->num_perfcounters;
305*61046927SAndroid Build Coastguard Worker const struct radv_perfcounter_desc *descs = pdev->perfcounters;
306*61046927SAndroid Build Coastguard Worker
307*61046927SAndroid Build Coastguard Worker unsigned full_reg_cnt = num_indices * ARRAY_SIZE(descs->impl.regs);
308*61046927SAndroid Build Coastguard Worker uint32_t *regs = malloc(full_reg_cnt * sizeof(uint32_t));
309*61046927SAndroid Build Coastguard Worker if (!regs)
310*61046927SAndroid Build Coastguard Worker return VK_ERROR_OUT_OF_HOST_MEMORY;
311*61046927SAndroid Build Coastguard Worker
312*61046927SAndroid Build Coastguard Worker unsigned reg_cnt = 0;
313*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < num_indices; ++i) {
314*61046927SAndroid Build Coastguard Worker uint32_t index = indices[i];
315*61046927SAndroid Build Coastguard Worker assert(index < num_counters);
316*61046927SAndroid Build Coastguard Worker for (unsigned j = 0; j < ARRAY_SIZE(descs[index].impl.regs) && descs[index].impl.regs[j]; ++j) {
317*61046927SAndroid Build Coastguard Worker if (!G_REG_CONSTANT(descs[index].impl.regs[j]))
318*61046927SAndroid Build Coastguard Worker regs[reg_cnt++] = descs[index].impl.regs[j];
319*61046927SAndroid Build Coastguard Worker }
320*61046927SAndroid Build Coastguard Worker }
321*61046927SAndroid Build Coastguard Worker
322*61046927SAndroid Build Coastguard Worker qsort(regs, reg_cnt, sizeof(uint32_t), cmp_uint32_t);
323*61046927SAndroid Build Coastguard Worker
324*61046927SAndroid Build Coastguard Worker unsigned deduped_reg_cnt = 0;
325*61046927SAndroid Build Coastguard Worker for (unsigned i = 1; i < reg_cnt; ++i) {
326*61046927SAndroid Build Coastguard Worker if (regs[i] != regs[deduped_reg_cnt])
327*61046927SAndroid Build Coastguard Worker regs[++deduped_reg_cnt] = regs[i];
328*61046927SAndroid Build Coastguard Worker }
329*61046927SAndroid Build Coastguard Worker ++deduped_reg_cnt;
330*61046927SAndroid Build Coastguard Worker
331*61046927SAndroid Build Coastguard Worker *out_num_regs = deduped_reg_cnt;
332*61046927SAndroid Build Coastguard Worker *out_regs = regs;
333*61046927SAndroid Build Coastguard Worker return VK_SUCCESS;
334*61046927SAndroid Build Coastguard Worker }
335*61046927SAndroid Build Coastguard Worker
336*61046927SAndroid Build Coastguard Worker static unsigned
radv_pc_get_num_instances(const struct radv_physical_device * pdev,struct ac_pc_block * ac_block)337*61046927SAndroid Build Coastguard Worker radv_pc_get_num_instances(const struct radv_physical_device *pdev, struct ac_pc_block *ac_block)
338*61046927SAndroid Build Coastguard Worker {
339*61046927SAndroid Build Coastguard Worker return ac_block->num_instances * ((ac_block->b->b->flags & AC_PC_BLOCK_SE) ? pdev->info.max_se : 1);
340*61046927SAndroid Build Coastguard Worker }
341*61046927SAndroid Build Coastguard Worker
342*61046927SAndroid Build Coastguard Worker static unsigned
radv_get_num_counter_passes(const struct radv_physical_device * pdev,unsigned num_regs,const uint32_t * regs)343*61046927SAndroid Build Coastguard Worker radv_get_num_counter_passes(const struct radv_physical_device *pdev, unsigned num_regs, const uint32_t *regs)
344*61046927SAndroid Build Coastguard Worker {
345*61046927SAndroid Build Coastguard Worker enum ac_pc_gpu_block prev_block = NUM_GPU_BLOCK;
346*61046927SAndroid Build Coastguard Worker unsigned block_reg_count = 0;
347*61046927SAndroid Build Coastguard Worker struct ac_pc_block *ac_block = NULL;
348*61046927SAndroid Build Coastguard Worker unsigned passes_needed = 1;
349*61046927SAndroid Build Coastguard Worker
350*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < num_regs; ++i) {
351*61046927SAndroid Build Coastguard Worker enum ac_pc_gpu_block block = G_REG_BLOCK(regs[i]);
352*61046927SAndroid Build Coastguard Worker
353*61046927SAndroid Build Coastguard Worker if (block != prev_block) {
354*61046927SAndroid Build Coastguard Worker block_reg_count = 0;
355*61046927SAndroid Build Coastguard Worker prev_block = block;
356*61046927SAndroid Build Coastguard Worker ac_block = ac_pc_get_block(&pdev->ac_perfcounters, block);
357*61046927SAndroid Build Coastguard Worker }
358*61046927SAndroid Build Coastguard Worker
359*61046927SAndroid Build Coastguard Worker ++block_reg_count;
360*61046927SAndroid Build Coastguard Worker
361*61046927SAndroid Build Coastguard Worker passes_needed = MAX2(passes_needed, DIV_ROUND_UP(block_reg_count, ac_block->b->b->num_counters));
362*61046927SAndroid Build Coastguard Worker }
363*61046927SAndroid Build Coastguard Worker
364*61046927SAndroid Build Coastguard Worker return passes_needed;
365*61046927SAndroid Build Coastguard Worker }
366*61046927SAndroid Build Coastguard Worker
367*61046927SAndroid Build Coastguard Worker void
radv_pc_deinit_query_pool(struct radv_pc_query_pool * pool)368*61046927SAndroid Build Coastguard Worker radv_pc_deinit_query_pool(struct radv_pc_query_pool *pool)
369*61046927SAndroid Build Coastguard Worker {
370*61046927SAndroid Build Coastguard Worker free(pool->counters);
371*61046927SAndroid Build Coastguard Worker free(pool->pc_regs);
372*61046927SAndroid Build Coastguard Worker }
373*61046927SAndroid Build Coastguard Worker
374*61046927SAndroid Build Coastguard Worker VkResult
radv_pc_init_query_pool(struct radv_physical_device * pdev,const VkQueryPoolCreateInfo * pCreateInfo,struct radv_pc_query_pool * pool)375*61046927SAndroid Build Coastguard Worker radv_pc_init_query_pool(struct radv_physical_device *pdev, const VkQueryPoolCreateInfo *pCreateInfo,
376*61046927SAndroid Build Coastguard Worker struct radv_pc_query_pool *pool)
377*61046927SAndroid Build Coastguard Worker {
378*61046927SAndroid Build Coastguard Worker const VkQueryPoolPerformanceCreateInfoKHR *perf_info =
379*61046927SAndroid Build Coastguard Worker vk_find_struct_const(pCreateInfo->pNext, QUERY_POOL_PERFORMANCE_CREATE_INFO_KHR);
380*61046927SAndroid Build Coastguard Worker VkResult result;
381*61046927SAndroid Build Coastguard Worker
382*61046927SAndroid Build Coastguard Worker if (!radv_init_perfcounter_descs(pdev))
383*61046927SAndroid Build Coastguard Worker return VK_ERROR_OUT_OF_HOST_MEMORY;
384*61046927SAndroid Build Coastguard Worker
385*61046927SAndroid Build Coastguard Worker result = radv_get_counter_registers(pdev, perf_info->counterIndexCount, perf_info->pCounterIndices,
386*61046927SAndroid Build Coastguard Worker &pool->num_pc_regs, &pool->pc_regs);
387*61046927SAndroid Build Coastguard Worker if (result != VK_SUCCESS)
388*61046927SAndroid Build Coastguard Worker return result;
389*61046927SAndroid Build Coastguard Worker
390*61046927SAndroid Build Coastguard Worker pool->num_passes = radv_get_num_counter_passes(pdev, pool->num_pc_regs, pool->pc_regs);
391*61046927SAndroid Build Coastguard Worker
392*61046927SAndroid Build Coastguard Worker uint32_t *pc_reg_offsets = malloc(pool->num_pc_regs * sizeof(uint32_t));
393*61046927SAndroid Build Coastguard Worker if (!pc_reg_offsets)
394*61046927SAndroid Build Coastguard Worker return VK_ERROR_OUT_OF_HOST_MEMORY;
395*61046927SAndroid Build Coastguard Worker
396*61046927SAndroid Build Coastguard Worker unsigned offset = 0;
397*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < pool->num_pc_regs; ++i) {
398*61046927SAndroid Build Coastguard Worker enum ac_pc_gpu_block block = pool->pc_regs[i] >> 16;
399*61046927SAndroid Build Coastguard Worker struct ac_pc_block *ac_block = ac_pc_get_block(&pdev->ac_perfcounters, block);
400*61046927SAndroid Build Coastguard Worker unsigned num_instances = radv_pc_get_num_instances(pdev, ac_block);
401*61046927SAndroid Build Coastguard Worker
402*61046927SAndroid Build Coastguard Worker pc_reg_offsets[i] = S_REG_OFFSET(offset) | S_REG_INSTANCES(num_instances);
403*61046927SAndroid Build Coastguard Worker offset += sizeof(uint64_t) * 2 * num_instances;
404*61046927SAndroid Build Coastguard Worker }
405*61046927SAndroid Build Coastguard Worker
406*61046927SAndroid Build Coastguard Worker /* allow an uint32_t per pass to signal completion. */
407*61046927SAndroid Build Coastguard Worker pool->b.stride = offset + 8 * pool->num_passes;
408*61046927SAndroid Build Coastguard Worker
409*61046927SAndroid Build Coastguard Worker pool->num_counters = perf_info->counterIndexCount;
410*61046927SAndroid Build Coastguard Worker pool->counters = malloc(pool->num_counters * sizeof(struct radv_perfcounter_impl));
411*61046927SAndroid Build Coastguard Worker if (!pool->counters) {
412*61046927SAndroid Build Coastguard Worker free(pc_reg_offsets);
413*61046927SAndroid Build Coastguard Worker return VK_ERROR_OUT_OF_HOST_MEMORY;
414*61046927SAndroid Build Coastguard Worker }
415*61046927SAndroid Build Coastguard Worker
416*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < pool->num_counters; ++i) {
417*61046927SAndroid Build Coastguard Worker pool->counters[i] = pdev->perfcounters[perf_info->pCounterIndices[i]].impl;
418*61046927SAndroid Build Coastguard Worker
419*61046927SAndroid Build Coastguard Worker for (unsigned j = 0; j < ARRAY_SIZE(pool->counters[i].regs); ++j) {
420*61046927SAndroid Build Coastguard Worker uint32_t reg = pool->counters[i].regs[j];
421*61046927SAndroid Build Coastguard Worker if (!reg || G_REG_CONSTANT(reg))
422*61046927SAndroid Build Coastguard Worker continue;
423*61046927SAndroid Build Coastguard Worker
424*61046927SAndroid Build Coastguard Worker unsigned k;
425*61046927SAndroid Build Coastguard Worker for (k = 0; k < pool->num_pc_regs; ++k)
426*61046927SAndroid Build Coastguard Worker if (pool->pc_regs[k] == reg)
427*61046927SAndroid Build Coastguard Worker break;
428*61046927SAndroid Build Coastguard Worker pool->counters[i].regs[j] = pc_reg_offsets[k];
429*61046927SAndroid Build Coastguard Worker }
430*61046927SAndroid Build Coastguard Worker }
431*61046927SAndroid Build Coastguard Worker
432*61046927SAndroid Build Coastguard Worker free(pc_reg_offsets);
433*61046927SAndroid Build Coastguard Worker return VK_SUCCESS;
434*61046927SAndroid Build Coastguard Worker }
435*61046927SAndroid Build Coastguard Worker
436*61046927SAndroid Build Coastguard Worker static void
radv_emit_instance(struct radv_cmd_buffer * cmd_buffer,int se,int instance)437*61046927SAndroid Build Coastguard Worker radv_emit_instance(struct radv_cmd_buffer *cmd_buffer, int se, int instance)
438*61046927SAndroid Build Coastguard Worker {
439*61046927SAndroid Build Coastguard Worker struct radeon_cmdbuf *cs = cmd_buffer->cs;
440*61046927SAndroid Build Coastguard Worker unsigned value = S_030800_SH_BROADCAST_WRITES(1);
441*61046927SAndroid Build Coastguard Worker
442*61046927SAndroid Build Coastguard Worker if (se >= 0) {
443*61046927SAndroid Build Coastguard Worker value |= S_030800_SE_INDEX(se);
444*61046927SAndroid Build Coastguard Worker } else {
445*61046927SAndroid Build Coastguard Worker value |= S_030800_SE_BROADCAST_WRITES(1);
446*61046927SAndroid Build Coastguard Worker }
447*61046927SAndroid Build Coastguard Worker
448*61046927SAndroid Build Coastguard Worker if (instance >= 0) {
449*61046927SAndroid Build Coastguard Worker value |= S_030800_INSTANCE_INDEX(instance);
450*61046927SAndroid Build Coastguard Worker } else {
451*61046927SAndroid Build Coastguard Worker value |= S_030800_INSTANCE_BROADCAST_WRITES(1);
452*61046927SAndroid Build Coastguard Worker }
453*61046927SAndroid Build Coastguard Worker
454*61046927SAndroid Build Coastguard Worker radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX, value);
455*61046927SAndroid Build Coastguard Worker }
456*61046927SAndroid Build Coastguard Worker
457*61046927SAndroid Build Coastguard Worker static void
radv_emit_select(struct radv_cmd_buffer * cmd_buffer,struct ac_pc_block * block,unsigned count,unsigned * selectors)458*61046927SAndroid Build Coastguard Worker radv_emit_select(struct radv_cmd_buffer *cmd_buffer, struct ac_pc_block *block, unsigned count, unsigned *selectors)
459*61046927SAndroid Build Coastguard Worker {
460*61046927SAndroid Build Coastguard Worker struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
461*61046927SAndroid Build Coastguard Worker const struct radv_physical_device *pdev = radv_device_physical(device);
462*61046927SAndroid Build Coastguard Worker const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
463*61046927SAndroid Build Coastguard Worker const enum radv_queue_family qf = cmd_buffer->qf;
464*61046927SAndroid Build Coastguard Worker struct ac_pc_block_base *regs = block->b->b;
465*61046927SAndroid Build Coastguard Worker struct radeon_cmdbuf *cs = cmd_buffer->cs;
466*61046927SAndroid Build Coastguard Worker unsigned idx;
467*61046927SAndroid Build Coastguard Worker
468*61046927SAndroid Build Coastguard Worker assert(count <= regs->num_counters);
469*61046927SAndroid Build Coastguard Worker
470*61046927SAndroid Build Coastguard Worker /* Fake counters. */
471*61046927SAndroid Build Coastguard Worker if (!regs->select0)
472*61046927SAndroid Build Coastguard Worker return;
473*61046927SAndroid Build Coastguard Worker
474*61046927SAndroid Build Coastguard Worker for (idx = 0; idx < count; ++idx) {
475*61046927SAndroid Build Coastguard Worker radeon_set_uconfig_perfctr_reg(gfx_level, qf, cs, regs->select0[idx],
476*61046927SAndroid Build Coastguard Worker G_REG_SEL(selectors[idx]) | regs->select_or);
477*61046927SAndroid Build Coastguard Worker }
478*61046927SAndroid Build Coastguard Worker
479*61046927SAndroid Build Coastguard Worker for (idx = 0; idx < regs->num_spm_counters; idx++) {
480*61046927SAndroid Build Coastguard Worker radeon_set_uconfig_reg_seq(cs, regs->select1[idx], 1);
481*61046927SAndroid Build Coastguard Worker radeon_emit(cs, 0);
482*61046927SAndroid Build Coastguard Worker }
483*61046927SAndroid Build Coastguard Worker }
484*61046927SAndroid Build Coastguard Worker
485*61046927SAndroid Build Coastguard Worker static void
radv_pc_emit_block_instance_read(struct radv_cmd_buffer * cmd_buffer,struct ac_pc_block * block,unsigned count,uint64_t va)486*61046927SAndroid Build Coastguard Worker radv_pc_emit_block_instance_read(struct radv_cmd_buffer *cmd_buffer, struct ac_pc_block *block, unsigned count,
487*61046927SAndroid Build Coastguard Worker uint64_t va)
488*61046927SAndroid Build Coastguard Worker {
489*61046927SAndroid Build Coastguard Worker struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
490*61046927SAndroid Build Coastguard Worker const struct radv_physical_device *pdev = radv_device_physical(device);
491*61046927SAndroid Build Coastguard Worker struct ac_pc_block_base *regs = block->b->b;
492*61046927SAndroid Build Coastguard Worker struct radeon_cmdbuf *cs = cmd_buffer->cs;
493*61046927SAndroid Build Coastguard Worker unsigned reg = regs->counter0_lo;
494*61046927SAndroid Build Coastguard Worker unsigned reg_delta = 8;
495*61046927SAndroid Build Coastguard Worker
496*61046927SAndroid Build Coastguard Worker assert(regs->select0);
497*61046927SAndroid Build Coastguard Worker for (unsigned idx = 0; idx < count; ++idx) {
498*61046927SAndroid Build Coastguard Worker if (regs->counters)
499*61046927SAndroid Build Coastguard Worker reg = regs->counters[idx];
500*61046927SAndroid Build Coastguard Worker
501*61046927SAndroid Build Coastguard Worker radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
502*61046927SAndroid Build Coastguard Worker radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_PERF) | COPY_DATA_DST_SEL(COPY_DATA_TC_L2) | COPY_DATA_WR_CONFIRM |
503*61046927SAndroid Build Coastguard Worker COPY_DATA_COUNT_SEL); /* 64 bits */
504*61046927SAndroid Build Coastguard Worker radeon_emit(cs, reg >> 2);
505*61046927SAndroid Build Coastguard Worker radeon_emit(cs, 0); /* unused */
506*61046927SAndroid Build Coastguard Worker radeon_emit(cs, va);
507*61046927SAndroid Build Coastguard Worker radeon_emit(cs, va >> 32);
508*61046927SAndroid Build Coastguard Worker
509*61046927SAndroid Build Coastguard Worker va += sizeof(uint64_t) * 2 * radv_pc_get_num_instances(pdev, block);
510*61046927SAndroid Build Coastguard Worker reg += reg_delta;
511*61046927SAndroid Build Coastguard Worker }
512*61046927SAndroid Build Coastguard Worker }
513*61046927SAndroid Build Coastguard Worker
514*61046927SAndroid Build Coastguard Worker static void
radv_pc_sample_block(struct radv_cmd_buffer * cmd_buffer,struct ac_pc_block * block,unsigned count,uint64_t va)515*61046927SAndroid Build Coastguard Worker radv_pc_sample_block(struct radv_cmd_buffer *cmd_buffer, struct ac_pc_block *block, unsigned count, uint64_t va)
516*61046927SAndroid Build Coastguard Worker {
517*61046927SAndroid Build Coastguard Worker struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
518*61046927SAndroid Build Coastguard Worker const struct radv_physical_device *pdev = radv_device_physical(device);
519*61046927SAndroid Build Coastguard Worker unsigned se_end = 1;
520*61046927SAndroid Build Coastguard Worker if (block->b->b->flags & AC_PC_BLOCK_SE)
521*61046927SAndroid Build Coastguard Worker se_end = pdev->info.max_se;
522*61046927SAndroid Build Coastguard Worker
523*61046927SAndroid Build Coastguard Worker for (unsigned se = 0; se < se_end; ++se) {
524*61046927SAndroid Build Coastguard Worker for (unsigned instance = 0; instance < block->num_instances; ++instance) {
525*61046927SAndroid Build Coastguard Worker radv_emit_instance(cmd_buffer, se, instance);
526*61046927SAndroid Build Coastguard Worker radv_pc_emit_block_instance_read(cmd_buffer, block, count, va);
527*61046927SAndroid Build Coastguard Worker va += sizeof(uint64_t) * 2;
528*61046927SAndroid Build Coastguard Worker }
529*61046927SAndroid Build Coastguard Worker }
530*61046927SAndroid Build Coastguard Worker }
531*61046927SAndroid Build Coastguard Worker
532*61046927SAndroid Build Coastguard Worker static void
radv_pc_wait_idle(struct radv_cmd_buffer * cmd_buffer)533*61046927SAndroid Build Coastguard Worker radv_pc_wait_idle(struct radv_cmd_buffer *cmd_buffer)
534*61046927SAndroid Build Coastguard Worker {
535*61046927SAndroid Build Coastguard Worker struct radeon_cmdbuf *cs = cmd_buffer->cs;
536*61046927SAndroid Build Coastguard Worker
537*61046927SAndroid Build Coastguard Worker radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
538*61046927SAndroid Build Coastguard Worker radeon_emit(cs, EVENT_TYPE(V_028A90_CS_PARTIAL_FLUSH | EVENT_INDEX(4)));
539*61046927SAndroid Build Coastguard Worker
540*61046927SAndroid Build Coastguard Worker radeon_emit(cs, PKT3(PKT3_ACQUIRE_MEM, 6, 0));
541*61046927SAndroid Build Coastguard Worker radeon_emit(cs, 0); /* CP_COHER_CNTL */
542*61046927SAndroid Build Coastguard Worker radeon_emit(cs, 0xffffffff); /* CP_COHER_SIZE */
543*61046927SAndroid Build Coastguard Worker radeon_emit(cs, 0xffffff); /* CP_COHER_SIZE_HI */
544*61046927SAndroid Build Coastguard Worker radeon_emit(cs, 0); /* CP_COHER_BASE */
545*61046927SAndroid Build Coastguard Worker radeon_emit(cs, 0); /* CP_COHER_BASE_HI */
546*61046927SAndroid Build Coastguard Worker radeon_emit(cs, 0x0000000A); /* POLL_INTERVAL */
547*61046927SAndroid Build Coastguard Worker radeon_emit(cs, 0); /* GCR_CNTL */
548*61046927SAndroid Build Coastguard Worker
549*61046927SAndroid Build Coastguard Worker radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
550*61046927SAndroid Build Coastguard Worker radeon_emit(cs, 0);
551*61046927SAndroid Build Coastguard Worker }
552*61046927SAndroid Build Coastguard Worker
553*61046927SAndroid Build Coastguard Worker static void
radv_pc_stop_and_sample(struct radv_cmd_buffer * cmd_buffer,struct radv_pc_query_pool * pool,uint64_t va,bool end)554*61046927SAndroid Build Coastguard Worker radv_pc_stop_and_sample(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_pool *pool, uint64_t va, bool end)
555*61046927SAndroid Build Coastguard Worker {
556*61046927SAndroid Build Coastguard Worker struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
557*61046927SAndroid Build Coastguard Worker const struct radv_physical_device *pdev = radv_device_physical(device);
558*61046927SAndroid Build Coastguard Worker struct radeon_cmdbuf *cs = cmd_buffer->cs;
559*61046927SAndroid Build Coastguard Worker
560*61046927SAndroid Build Coastguard Worker radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
561*61046927SAndroid Build Coastguard Worker radeon_emit(cs, EVENT_TYPE(V_028A90_PERFCOUNTER_SAMPLE) | EVENT_INDEX(0));
562*61046927SAndroid Build Coastguard Worker
563*61046927SAndroid Build Coastguard Worker radv_pc_wait_idle(cmd_buffer);
564*61046927SAndroid Build Coastguard Worker
565*61046927SAndroid Build Coastguard Worker radv_emit_instance(cmd_buffer, -1, -1);
566*61046927SAndroid Build Coastguard Worker radv_emit_windowed_counters(device, cs, cmd_buffer->qf, false);
567*61046927SAndroid Build Coastguard Worker
568*61046927SAndroid Build Coastguard Worker radeon_set_uconfig_reg(
569*61046927SAndroid Build Coastguard Worker cs, R_036020_CP_PERFMON_CNTL,
570*61046927SAndroid Build Coastguard Worker S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_STOP_COUNTING) | S_036020_PERFMON_SAMPLE_ENABLE(1));
571*61046927SAndroid Build Coastguard Worker
572*61046927SAndroid Build Coastguard Worker for (unsigned pass = 0; pass < pool->num_passes; ++pass) {
573*61046927SAndroid Build Coastguard Worker uint64_t pred_va = radv_buffer_get_va(device->perf_counter_bo) + PERF_CTR_BO_PASS_OFFSET + 8 * pass;
574*61046927SAndroid Build Coastguard Worker uint64_t reg_va = va + (end ? 8 : 0);
575*61046927SAndroid Build Coastguard Worker
576*61046927SAndroid Build Coastguard Worker radeon_emit(cs, PKT3(PKT3_COND_EXEC, 3, 0));
577*61046927SAndroid Build Coastguard Worker radeon_emit(cs, pred_va);
578*61046927SAndroid Build Coastguard Worker radeon_emit(cs, pred_va >> 32);
579*61046927SAndroid Build Coastguard Worker radeon_emit(cs, 0); /* Cache policy */
580*61046927SAndroid Build Coastguard Worker
581*61046927SAndroid Build Coastguard Worker uint32_t *skip_dwords = cs->buf + cs->cdw;
582*61046927SAndroid Build Coastguard Worker radeon_emit(cs, 0);
583*61046927SAndroid Build Coastguard Worker
584*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < pool->num_pc_regs;) {
585*61046927SAndroid Build Coastguard Worker enum ac_pc_gpu_block block = G_REG_BLOCK(pool->pc_regs[i]);
586*61046927SAndroid Build Coastguard Worker struct ac_pc_block *ac_block = ac_pc_get_block(&pdev->ac_perfcounters, block);
587*61046927SAndroid Build Coastguard Worker unsigned offset = ac_block->num_instances * pass;
588*61046927SAndroid Build Coastguard Worker unsigned num_instances = radv_pc_get_num_instances(pdev, ac_block);
589*61046927SAndroid Build Coastguard Worker
590*61046927SAndroid Build Coastguard Worker unsigned cnt = 1;
591*61046927SAndroid Build Coastguard Worker while (cnt < pool->num_pc_regs - i && block == G_REG_BLOCK(pool->pc_regs[i + cnt]))
592*61046927SAndroid Build Coastguard Worker ++cnt;
593*61046927SAndroid Build Coastguard Worker
594*61046927SAndroid Build Coastguard Worker if (offset < cnt) {
595*61046927SAndroid Build Coastguard Worker unsigned pass_reg_cnt = MIN2(cnt - offset, ac_block->b->b->num_counters);
596*61046927SAndroid Build Coastguard Worker radv_pc_sample_block(cmd_buffer, ac_block, pass_reg_cnt,
597*61046927SAndroid Build Coastguard Worker reg_va + offset * num_instances * sizeof(uint64_t));
598*61046927SAndroid Build Coastguard Worker }
599*61046927SAndroid Build Coastguard Worker
600*61046927SAndroid Build Coastguard Worker i += cnt;
601*61046927SAndroid Build Coastguard Worker reg_va += num_instances * sizeof(uint64_t) * 2 * cnt;
602*61046927SAndroid Build Coastguard Worker }
603*61046927SAndroid Build Coastguard Worker
604*61046927SAndroid Build Coastguard Worker if (end) {
605*61046927SAndroid Build Coastguard Worker uint64_t signal_va = va + pool->b.stride - 8 - 8 * pass;
606*61046927SAndroid Build Coastguard Worker radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
607*61046927SAndroid Build Coastguard Worker radeon_emit(cs, S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_ME));
608*61046927SAndroid Build Coastguard Worker radeon_emit(cs, signal_va);
609*61046927SAndroid Build Coastguard Worker radeon_emit(cs, signal_va >> 32);
610*61046927SAndroid Build Coastguard Worker radeon_emit(cs, 1); /* value */
611*61046927SAndroid Build Coastguard Worker }
612*61046927SAndroid Build Coastguard Worker
613*61046927SAndroid Build Coastguard Worker *skip_dwords = cs->buf + cs->cdw - skip_dwords - 1;
614*61046927SAndroid Build Coastguard Worker }
615*61046927SAndroid Build Coastguard Worker
616*61046927SAndroid Build Coastguard Worker radv_emit_instance(cmd_buffer, -1, -1);
617*61046927SAndroid Build Coastguard Worker }
618*61046927SAndroid Build Coastguard Worker
619*61046927SAndroid Build Coastguard Worker void
radv_pc_begin_query(struct radv_cmd_buffer * cmd_buffer,struct radv_pc_query_pool * pool,uint64_t va)620*61046927SAndroid Build Coastguard Worker radv_pc_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_pool *pool, uint64_t va)
621*61046927SAndroid Build Coastguard Worker {
622*61046927SAndroid Build Coastguard Worker struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
623*61046927SAndroid Build Coastguard Worker struct radeon_cmdbuf *cs = cmd_buffer->cs;
624*61046927SAndroid Build Coastguard Worker const struct radv_physical_device *pdev = radv_device_physical(device);
625*61046927SAndroid Build Coastguard Worker ASSERTED unsigned cdw_max;
626*61046927SAndroid Build Coastguard Worker
627*61046927SAndroid Build Coastguard Worker cmd_buffer->state.uses_perf_counters = true;
628*61046927SAndroid Build Coastguard Worker
629*61046927SAndroid Build Coastguard Worker cdw_max = radeon_check_space(device->ws, cs,
630*61046927SAndroid Build Coastguard Worker 256 + /* Random one time stuff */
631*61046927SAndroid Build Coastguard Worker 10 * pool->num_passes + /* COND_EXECs */
632*61046927SAndroid Build Coastguard Worker pool->b.stride / 8 * (5 + 8));
633*61046927SAndroid Build Coastguard Worker
634*61046927SAndroid Build Coastguard Worker radv_cs_add_buffer(device->ws, cmd_buffer->cs, pool->b.bo);
635*61046927SAndroid Build Coastguard Worker radv_cs_add_buffer(device->ws, cmd_buffer->cs, device->perf_counter_bo);
636*61046927SAndroid Build Coastguard Worker
637*61046927SAndroid Build Coastguard Worker uint64_t perf_ctr_va = radv_buffer_get_va(device->perf_counter_bo) + PERF_CTR_BO_FENCE_OFFSET;
638*61046927SAndroid Build Coastguard Worker radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
639*61046927SAndroid Build Coastguard Worker radeon_emit(cs, S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_ME));
640*61046927SAndroid Build Coastguard Worker radeon_emit(cs, perf_ctr_va);
641*61046927SAndroid Build Coastguard Worker radeon_emit(cs, perf_ctr_va >> 32);
642*61046927SAndroid Build Coastguard Worker radeon_emit(cs, 0); /* value */
643*61046927SAndroid Build Coastguard Worker
644*61046927SAndroid Build Coastguard Worker radv_pc_wait_idle(cmd_buffer);
645*61046927SAndroid Build Coastguard Worker
646*61046927SAndroid Build Coastguard Worker radeon_set_uconfig_reg(cs, R_036020_CP_PERFMON_CNTL,
647*61046927SAndroid Build Coastguard Worker S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_DISABLE_AND_RESET));
648*61046927SAndroid Build Coastguard Worker
649*61046927SAndroid Build Coastguard Worker radv_emit_inhibit_clockgating(device, cs, true);
650*61046927SAndroid Build Coastguard Worker radv_emit_spi_config_cntl(device, cs, true);
651*61046927SAndroid Build Coastguard Worker radv_perfcounter_emit_shaders(device, cs, 0x7f);
652*61046927SAndroid Build Coastguard Worker
653*61046927SAndroid Build Coastguard Worker for (unsigned pass = 0; pass < pool->num_passes; ++pass) {
654*61046927SAndroid Build Coastguard Worker uint64_t pred_va = radv_buffer_get_va(device->perf_counter_bo) + PERF_CTR_BO_PASS_OFFSET + 8 * pass;
655*61046927SAndroid Build Coastguard Worker
656*61046927SAndroid Build Coastguard Worker radeon_emit(cs, PKT3(PKT3_COND_EXEC, 3, 0));
657*61046927SAndroid Build Coastguard Worker radeon_emit(cs, pred_va);
658*61046927SAndroid Build Coastguard Worker radeon_emit(cs, pred_va >> 32);
659*61046927SAndroid Build Coastguard Worker radeon_emit(cs, 0); /* Cache policy */
660*61046927SAndroid Build Coastguard Worker
661*61046927SAndroid Build Coastguard Worker uint32_t *skip_dwords = cs->buf + cs->cdw;
662*61046927SAndroid Build Coastguard Worker radeon_emit(cs, 0);
663*61046927SAndroid Build Coastguard Worker
664*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < pool->num_pc_regs;) {
665*61046927SAndroid Build Coastguard Worker enum ac_pc_gpu_block block = G_REG_BLOCK(pool->pc_regs[i]);
666*61046927SAndroid Build Coastguard Worker struct ac_pc_block *ac_block = ac_pc_get_block(&pdev->ac_perfcounters, block);
667*61046927SAndroid Build Coastguard Worker unsigned offset = ac_block->num_instances * pass;
668*61046927SAndroid Build Coastguard Worker
669*61046927SAndroid Build Coastguard Worker unsigned cnt = 1;
670*61046927SAndroid Build Coastguard Worker while (cnt < pool->num_pc_regs - i && block == G_REG_BLOCK(pool->pc_regs[i + cnt]))
671*61046927SAndroid Build Coastguard Worker ++cnt;
672*61046927SAndroid Build Coastguard Worker
673*61046927SAndroid Build Coastguard Worker if (offset < cnt) {
674*61046927SAndroid Build Coastguard Worker unsigned pass_reg_cnt = MIN2(cnt - offset, ac_block->b->b->num_counters);
675*61046927SAndroid Build Coastguard Worker radv_emit_select(cmd_buffer, ac_block, pass_reg_cnt, pool->pc_regs + i + offset);
676*61046927SAndroid Build Coastguard Worker }
677*61046927SAndroid Build Coastguard Worker
678*61046927SAndroid Build Coastguard Worker i += cnt;
679*61046927SAndroid Build Coastguard Worker }
680*61046927SAndroid Build Coastguard Worker
681*61046927SAndroid Build Coastguard Worker *skip_dwords = cs->buf + cs->cdw - skip_dwords - 1;
682*61046927SAndroid Build Coastguard Worker }
683*61046927SAndroid Build Coastguard Worker
684*61046927SAndroid Build Coastguard Worker radv_emit_instance(cmd_buffer, -1, -1);
685*61046927SAndroid Build Coastguard Worker
686*61046927SAndroid Build Coastguard Worker /* The following sequence actually starts the perfcounters. */
687*61046927SAndroid Build Coastguard Worker
688*61046927SAndroid Build Coastguard Worker radv_pc_stop_and_sample(cmd_buffer, pool, va, false);
689*61046927SAndroid Build Coastguard Worker
690*61046927SAndroid Build Coastguard Worker radeon_set_uconfig_reg(cs, R_036020_CP_PERFMON_CNTL,
691*61046927SAndroid Build Coastguard Worker S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_START_COUNTING));
692*61046927SAndroid Build Coastguard Worker
693*61046927SAndroid Build Coastguard Worker radv_emit_windowed_counters(device, cs, cmd_buffer->qf, true);
694*61046927SAndroid Build Coastguard Worker
695*61046927SAndroid Build Coastguard Worker assert(cmd_buffer->cs->cdw <= cdw_max);
696*61046927SAndroid Build Coastguard Worker }
697*61046927SAndroid Build Coastguard Worker
698*61046927SAndroid Build Coastguard Worker void
radv_pc_end_query(struct radv_cmd_buffer * cmd_buffer,struct radv_pc_query_pool * pool,uint64_t va)699*61046927SAndroid Build Coastguard Worker radv_pc_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_pool *pool, uint64_t va)
700*61046927SAndroid Build Coastguard Worker {
701*61046927SAndroid Build Coastguard Worker struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
702*61046927SAndroid Build Coastguard Worker const struct radv_physical_device *pdev = radv_device_physical(device);
703*61046927SAndroid Build Coastguard Worker struct radeon_cmdbuf *cs = cmd_buffer->cs;
704*61046927SAndroid Build Coastguard Worker ASSERTED unsigned cdw_max;
705*61046927SAndroid Build Coastguard Worker
706*61046927SAndroid Build Coastguard Worker cdw_max = radeon_check_space(device->ws, cs,
707*61046927SAndroid Build Coastguard Worker 256 + /* Reserved for things that don't scale with passes/counters */
708*61046927SAndroid Build Coastguard Worker 5 * pool->num_passes + /* COND_EXECs */
709*61046927SAndroid Build Coastguard Worker pool->b.stride / 8 * 8);
710*61046927SAndroid Build Coastguard Worker
711*61046927SAndroid Build Coastguard Worker radv_cs_add_buffer(device->ws, cmd_buffer->cs, pool->b.bo);
712*61046927SAndroid Build Coastguard Worker radv_cs_add_buffer(device->ws, cmd_buffer->cs, device->perf_counter_bo);
713*61046927SAndroid Build Coastguard Worker
714*61046927SAndroid Build Coastguard Worker uint64_t perf_ctr_va = radv_buffer_get_va(device->perf_counter_bo) + PERF_CTR_BO_FENCE_OFFSET;
715*61046927SAndroid Build Coastguard Worker radv_cs_emit_write_event_eop(cs, pdev->info.gfx_level, cmd_buffer->qf, V_028A90_BOTTOM_OF_PIPE_TS, 0,
716*61046927SAndroid Build Coastguard Worker EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT, perf_ctr_va, 1, cmd_buffer->gfx9_fence_va);
717*61046927SAndroid Build Coastguard Worker radv_cp_wait_mem(cs, cmd_buffer->qf, WAIT_REG_MEM_EQUAL, perf_ctr_va, 1, 0xffffffff);
718*61046927SAndroid Build Coastguard Worker
719*61046927SAndroid Build Coastguard Worker radv_pc_wait_idle(cmd_buffer);
720*61046927SAndroid Build Coastguard Worker radv_pc_stop_and_sample(cmd_buffer, pool, va, true);
721*61046927SAndroid Build Coastguard Worker
722*61046927SAndroid Build Coastguard Worker radeon_set_uconfig_reg(cs, R_036020_CP_PERFMON_CNTL,
723*61046927SAndroid Build Coastguard Worker S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_DISABLE_AND_RESET));
724*61046927SAndroid Build Coastguard Worker radv_emit_spi_config_cntl(device, cs, false);
725*61046927SAndroid Build Coastguard Worker radv_emit_inhibit_clockgating(device, cs, false);
726*61046927SAndroid Build Coastguard Worker
727*61046927SAndroid Build Coastguard Worker assert(cmd_buffer->cs->cdw <= cdw_max);
728*61046927SAndroid Build Coastguard Worker }
729*61046927SAndroid Build Coastguard Worker
730*61046927SAndroid Build Coastguard Worker static uint64_t
radv_pc_sum_reg(uint32_t reg,const uint64_t * data)731*61046927SAndroid Build Coastguard Worker radv_pc_sum_reg(uint32_t reg, const uint64_t *data)
732*61046927SAndroid Build Coastguard Worker {
733*61046927SAndroid Build Coastguard Worker unsigned instances = G_REG_INSTANCES(reg);
734*61046927SAndroid Build Coastguard Worker unsigned offset = G_REG_OFFSET(reg) / 8;
735*61046927SAndroid Build Coastguard Worker uint64_t result = 0;
736*61046927SAndroid Build Coastguard Worker
737*61046927SAndroid Build Coastguard Worker if (G_REG_CONSTANT(reg))
738*61046927SAndroid Build Coastguard Worker return reg & 0x7fffffffu;
739*61046927SAndroid Build Coastguard Worker
740*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < instances; ++i) {
741*61046927SAndroid Build Coastguard Worker result += data[offset + 2 * i + 1] - data[offset + 2 * i];
742*61046927SAndroid Build Coastguard Worker }
743*61046927SAndroid Build Coastguard Worker
744*61046927SAndroid Build Coastguard Worker return result;
745*61046927SAndroid Build Coastguard Worker }
746*61046927SAndroid Build Coastguard Worker
747*61046927SAndroid Build Coastguard Worker static uint64_t
radv_pc_max_reg(uint32_t reg,const uint64_t * data)748*61046927SAndroid Build Coastguard Worker radv_pc_max_reg(uint32_t reg, const uint64_t *data)
749*61046927SAndroid Build Coastguard Worker {
750*61046927SAndroid Build Coastguard Worker unsigned instances = G_REG_INSTANCES(reg);
751*61046927SAndroid Build Coastguard Worker unsigned offset = G_REG_OFFSET(reg) / 8;
752*61046927SAndroid Build Coastguard Worker uint64_t result = 0;
753*61046927SAndroid Build Coastguard Worker
754*61046927SAndroid Build Coastguard Worker if (G_REG_CONSTANT(reg))
755*61046927SAndroid Build Coastguard Worker return reg & 0x7fffffffu;
756*61046927SAndroid Build Coastguard Worker
757*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < instances; ++i) {
758*61046927SAndroid Build Coastguard Worker result = MAX2(result, data[offset + 2 * i + 1]);
759*61046927SAndroid Build Coastguard Worker }
760*61046927SAndroid Build Coastguard Worker
761*61046927SAndroid Build Coastguard Worker return result;
762*61046927SAndroid Build Coastguard Worker }
763*61046927SAndroid Build Coastguard Worker
764*61046927SAndroid Build Coastguard Worker static union VkPerformanceCounterResultKHR
radv_pc_get_result(const struct radv_perfcounter_impl * impl,const uint64_t * data)765*61046927SAndroid Build Coastguard Worker radv_pc_get_result(const struct radv_perfcounter_impl *impl, const uint64_t *data)
766*61046927SAndroid Build Coastguard Worker {
767*61046927SAndroid Build Coastguard Worker union VkPerformanceCounterResultKHR result;
768*61046927SAndroid Build Coastguard Worker
769*61046927SAndroid Build Coastguard Worker switch (impl->op) {
770*61046927SAndroid Build Coastguard Worker case RADV_PC_OP_MAX:
771*61046927SAndroid Build Coastguard Worker result.float64 = radv_pc_max_reg(impl->regs[0], data);
772*61046927SAndroid Build Coastguard Worker break;
773*61046927SAndroid Build Coastguard Worker case RADV_PC_OP_SUM:
774*61046927SAndroid Build Coastguard Worker result.float64 = radv_pc_sum_reg(impl->regs[0], data);
775*61046927SAndroid Build Coastguard Worker break;
776*61046927SAndroid Build Coastguard Worker case RADV_PC_OP_RATIO_DIVSCALE:
777*61046927SAndroid Build Coastguard Worker result.float64 = radv_pc_sum_reg(impl->regs[0], data) / (double)radv_pc_sum_reg(impl->regs[1], data) /
778*61046927SAndroid Build Coastguard Worker radv_pc_sum_reg(impl->regs[2], data) * 100.0;
779*61046927SAndroid Build Coastguard Worker break;
780*61046927SAndroid Build Coastguard Worker case RADV_PC_OP_REVERSE_RATIO: {
781*61046927SAndroid Build Coastguard Worker double tmp = radv_pc_sum_reg(impl->regs[1], data);
782*61046927SAndroid Build Coastguard Worker result.float64 = (tmp - radv_pc_sum_reg(impl->regs[0], data)) / tmp * 100.0;
783*61046927SAndroid Build Coastguard Worker break;
784*61046927SAndroid Build Coastguard Worker }
785*61046927SAndroid Build Coastguard Worker case RADV_PC_OP_SUM_WEIGHTED_4:
786*61046927SAndroid Build Coastguard Worker result.float64 = 0.0;
787*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < 4; ++i)
788*61046927SAndroid Build Coastguard Worker result.float64 += radv_pc_sum_reg(impl->regs[2 * i], data) * radv_pc_sum_reg(impl->regs[2 * i + 1], data);
789*61046927SAndroid Build Coastguard Worker break;
790*61046927SAndroid Build Coastguard Worker default:
791*61046927SAndroid Build Coastguard Worker unreachable("unhandled performance counter operation");
792*61046927SAndroid Build Coastguard Worker }
793*61046927SAndroid Build Coastguard Worker return result;
794*61046927SAndroid Build Coastguard Worker }
795*61046927SAndroid Build Coastguard Worker
796*61046927SAndroid Build Coastguard Worker void
radv_pc_get_results(const struct radv_pc_query_pool * pc_pool,const uint64_t * data,void * out)797*61046927SAndroid Build Coastguard Worker radv_pc_get_results(const struct radv_pc_query_pool *pc_pool, const uint64_t *data, void *out)
798*61046927SAndroid Build Coastguard Worker {
799*61046927SAndroid Build Coastguard Worker union VkPerformanceCounterResultKHR *pc_result = out;
800*61046927SAndroid Build Coastguard Worker
801*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < pc_pool->num_counters; ++i) {
802*61046927SAndroid Build Coastguard Worker pc_result[i] = radv_pc_get_result(pc_pool->counters + i, data);
803*61046927SAndroid Build Coastguard Worker }
804*61046927SAndroid Build Coastguard Worker }
805*61046927SAndroid Build Coastguard Worker
806*61046927SAndroid Build Coastguard Worker VKAPI_ATTR VkResult VKAPI_CALL
radv_EnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(VkPhysicalDevice physicalDevice,uint32_t queueFamilyIndex,uint32_t * pCounterCount,VkPerformanceCounterKHR * pCounters,VkPerformanceCounterDescriptionKHR * pCounterDescriptions)807*61046927SAndroid Build Coastguard Worker radv_EnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(
808*61046927SAndroid Build Coastguard Worker VkPhysicalDevice physicalDevice, uint32_t queueFamilyIndex, uint32_t *pCounterCount,
809*61046927SAndroid Build Coastguard Worker VkPerformanceCounterKHR *pCounters, VkPerformanceCounterDescriptionKHR *pCounterDescriptions)
810*61046927SAndroid Build Coastguard Worker {
811*61046927SAndroid Build Coastguard Worker VK_FROM_HANDLE(radv_physical_device, pdev, physicalDevice);
812*61046927SAndroid Build Coastguard Worker
813*61046927SAndroid Build Coastguard Worker if (vk_queue_to_radv(pdev, queueFamilyIndex) != RADV_QUEUE_GENERAL) {
814*61046927SAndroid Build Coastguard Worker *pCounterCount = 0;
815*61046927SAndroid Build Coastguard Worker return VK_SUCCESS;
816*61046927SAndroid Build Coastguard Worker }
817*61046927SAndroid Build Coastguard Worker
818*61046927SAndroid Build Coastguard Worker if (!radv_init_perfcounter_descs(pdev))
819*61046927SAndroid Build Coastguard Worker return VK_ERROR_OUT_OF_HOST_MEMORY;
820*61046927SAndroid Build Coastguard Worker
821*61046927SAndroid Build Coastguard Worker uint32_t counter_cnt = pdev->num_perfcounters;
822*61046927SAndroid Build Coastguard Worker const struct radv_perfcounter_desc *descs = pdev->perfcounters;
823*61046927SAndroid Build Coastguard Worker
824*61046927SAndroid Build Coastguard Worker if (!pCounters && !pCounterDescriptions) {
825*61046927SAndroid Build Coastguard Worker *pCounterCount = counter_cnt;
826*61046927SAndroid Build Coastguard Worker return VK_SUCCESS;
827*61046927SAndroid Build Coastguard Worker }
828*61046927SAndroid Build Coastguard Worker
829*61046927SAndroid Build Coastguard Worker VkResult result = counter_cnt > *pCounterCount ? VK_INCOMPLETE : VK_SUCCESS;
830*61046927SAndroid Build Coastguard Worker counter_cnt = MIN2(counter_cnt, *pCounterCount);
831*61046927SAndroid Build Coastguard Worker *pCounterCount = counter_cnt;
832*61046927SAndroid Build Coastguard Worker
833*61046927SAndroid Build Coastguard Worker for (uint32_t i = 0; i < counter_cnt; ++i) {
834*61046927SAndroid Build Coastguard Worker if (pCounters) {
835*61046927SAndroid Build Coastguard Worker pCounters[i].sType = VK_STRUCTURE_TYPE_PERFORMANCE_COUNTER_KHR;
836*61046927SAndroid Build Coastguard Worker pCounters[i].unit = descs[i].unit;
837*61046927SAndroid Build Coastguard Worker pCounters[i].scope = VK_PERFORMANCE_COUNTER_SCOPE_COMMAND_KHR;
838*61046927SAndroid Build Coastguard Worker pCounters[i].storage = VK_PERFORMANCE_COUNTER_STORAGE_FLOAT64_KHR;
839*61046927SAndroid Build Coastguard Worker
840*61046927SAndroid Build Coastguard Worker memset(&pCounters[i].uuid, 0, sizeof(pCounters[i].uuid));
841*61046927SAndroid Build Coastguard Worker strcpy((char *)&pCounters[i].uuid, "RADV");
842*61046927SAndroid Build Coastguard Worker
843*61046927SAndroid Build Coastguard Worker const uint32_t uuid = descs[i].uuid;
844*61046927SAndroid Build Coastguard Worker memcpy(&pCounters[i].uuid[12], &uuid, sizeof(uuid));
845*61046927SAndroid Build Coastguard Worker }
846*61046927SAndroid Build Coastguard Worker
847*61046927SAndroid Build Coastguard Worker if (pCounterDescriptions) {
848*61046927SAndroid Build Coastguard Worker pCounterDescriptions[i].sType = VK_STRUCTURE_TYPE_PERFORMANCE_COUNTER_DESCRIPTION_KHR;
849*61046927SAndroid Build Coastguard Worker pCounterDescriptions[i].flags = VK_PERFORMANCE_COUNTER_DESCRIPTION_CONCURRENTLY_IMPACTED_BIT_KHR;
850*61046927SAndroid Build Coastguard Worker strcpy(pCounterDescriptions[i].name, descs[i].name);
851*61046927SAndroid Build Coastguard Worker strcpy(pCounterDescriptions[i].category, descs[i].category);
852*61046927SAndroid Build Coastguard Worker strcpy(pCounterDescriptions[i].description, descs[i].description);
853*61046927SAndroid Build Coastguard Worker }
854*61046927SAndroid Build Coastguard Worker }
855*61046927SAndroid Build Coastguard Worker return result;
856*61046927SAndroid Build Coastguard Worker }
857*61046927SAndroid Build Coastguard Worker
858*61046927SAndroid Build Coastguard Worker VKAPI_ATTR void VKAPI_CALL
radv_GetPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR(VkPhysicalDevice physicalDevice,const VkQueryPoolPerformanceCreateInfoKHR * pPerformanceQueryCreateInfo,uint32_t * pNumPasses)859*61046927SAndroid Build Coastguard Worker radv_GetPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR(
860*61046927SAndroid Build Coastguard Worker VkPhysicalDevice physicalDevice, const VkQueryPoolPerformanceCreateInfoKHR *pPerformanceQueryCreateInfo,
861*61046927SAndroid Build Coastguard Worker uint32_t *pNumPasses)
862*61046927SAndroid Build Coastguard Worker {
863*61046927SAndroid Build Coastguard Worker VK_FROM_HANDLE(radv_physical_device, pdev, physicalDevice);
864*61046927SAndroid Build Coastguard Worker
865*61046927SAndroid Build Coastguard Worker if (pPerformanceQueryCreateInfo->counterIndexCount == 0) {
866*61046927SAndroid Build Coastguard Worker *pNumPasses = 0;
867*61046927SAndroid Build Coastguard Worker return;
868*61046927SAndroid Build Coastguard Worker }
869*61046927SAndroid Build Coastguard Worker
870*61046927SAndroid Build Coastguard Worker if (!radv_init_perfcounter_descs(pdev)) {
871*61046927SAndroid Build Coastguard Worker /* Can't return an error, so log */
872*61046927SAndroid Build Coastguard Worker fprintf(stderr, "radv: Failed to init perf counters\n");
873*61046927SAndroid Build Coastguard Worker *pNumPasses = 1;
874*61046927SAndroid Build Coastguard Worker return;
875*61046927SAndroid Build Coastguard Worker }
876*61046927SAndroid Build Coastguard Worker
877*61046927SAndroid Build Coastguard Worker assert(vk_queue_to_radv(pdev, pPerformanceQueryCreateInfo->queueFamilyIndex) == RADV_QUEUE_GENERAL);
878*61046927SAndroid Build Coastguard Worker
879*61046927SAndroid Build Coastguard Worker unsigned num_regs = 0;
880*61046927SAndroid Build Coastguard Worker uint32_t *regs = NULL;
881*61046927SAndroid Build Coastguard Worker VkResult result = radv_get_counter_registers(pdev, pPerformanceQueryCreateInfo->counterIndexCount,
882*61046927SAndroid Build Coastguard Worker pPerformanceQueryCreateInfo->pCounterIndices, &num_regs, ®s);
883*61046927SAndroid Build Coastguard Worker if (result != VK_SUCCESS) {
884*61046927SAndroid Build Coastguard Worker /* Can't return an error, so log */
885*61046927SAndroid Build Coastguard Worker fprintf(stderr, "radv: Failed to allocate memory for perf counters\n");
886*61046927SAndroid Build Coastguard Worker }
887*61046927SAndroid Build Coastguard Worker
888*61046927SAndroid Build Coastguard Worker *pNumPasses = radv_get_num_counter_passes(pdev, num_regs, regs);
889*61046927SAndroid Build Coastguard Worker free(regs);
890*61046927SAndroid Build Coastguard Worker }
891