xref: /aosp_15_r20/external/mesa3d/src/amd/common/ac_sqtt.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1*61046927SAndroid Build Coastguard Worker /*
2*61046927SAndroid Build Coastguard Worker  * Copyright 2020 Advanced Micro Devices, Inc.
3*61046927SAndroid Build Coastguard Worker  * Copyright 2020 Valve Corporation
4*61046927SAndroid Build Coastguard Worker  *
5*61046927SAndroid Build Coastguard Worker  * SPDX-License-Identifier: MIT
6*61046927SAndroid Build Coastguard Worker  */
7*61046927SAndroid Build Coastguard Worker 
8*61046927SAndroid Build Coastguard Worker #include "ac_pm4.h"
9*61046927SAndroid Build Coastguard Worker #include "ac_sqtt.h"
10*61046927SAndroid Build Coastguard Worker 
11*61046927SAndroid Build Coastguard Worker #include "sid.h"
12*61046927SAndroid Build Coastguard Worker #include "ac_gpu_info.h"
13*61046927SAndroid Build Coastguard Worker #include "util/u_math.h"
14*61046927SAndroid Build Coastguard Worker #include "util/os_time.h"
15*61046927SAndroid Build Coastguard Worker 
16*61046927SAndroid Build Coastguard Worker #include "sid.h"
17*61046927SAndroid Build Coastguard Worker 
18*61046927SAndroid Build Coastguard Worker uint64_t
ac_sqtt_get_info_offset(unsigned se)19*61046927SAndroid Build Coastguard Worker ac_sqtt_get_info_offset(unsigned se)
20*61046927SAndroid Build Coastguard Worker {
21*61046927SAndroid Build Coastguard Worker    return sizeof(struct ac_sqtt_data_info) * se;
22*61046927SAndroid Build Coastguard Worker }
23*61046927SAndroid Build Coastguard Worker 
24*61046927SAndroid Build Coastguard Worker uint64_t
ac_sqtt_get_data_offset(const struct radeon_info * rad_info,const struct ac_sqtt * data,unsigned se)25*61046927SAndroid Build Coastguard Worker ac_sqtt_get_data_offset(const struct radeon_info *rad_info, const struct ac_sqtt *data, unsigned se)
26*61046927SAndroid Build Coastguard Worker {
27*61046927SAndroid Build Coastguard Worker    unsigned max_se = rad_info->max_se;
28*61046927SAndroid Build Coastguard Worker    uint64_t data_offset;
29*61046927SAndroid Build Coastguard Worker 
30*61046927SAndroid Build Coastguard Worker    data_offset = align64(sizeof(struct ac_sqtt_data_info) * max_se, 1 << SQTT_BUFFER_ALIGN_SHIFT);
31*61046927SAndroid Build Coastguard Worker    data_offset += data->buffer_size * se;
32*61046927SAndroid Build Coastguard Worker 
33*61046927SAndroid Build Coastguard Worker    return data_offset;
34*61046927SAndroid Build Coastguard Worker }
35*61046927SAndroid Build Coastguard Worker 
36*61046927SAndroid Build Coastguard Worker static uint64_t
ac_sqtt_get_info_va(uint64_t va,unsigned se)37*61046927SAndroid Build Coastguard Worker ac_sqtt_get_info_va(uint64_t va, unsigned se)
38*61046927SAndroid Build Coastguard Worker {
39*61046927SAndroid Build Coastguard Worker    return va + ac_sqtt_get_info_offset(se);
40*61046927SAndroid Build Coastguard Worker }
41*61046927SAndroid Build Coastguard Worker 
42*61046927SAndroid Build Coastguard Worker static uint64_t
ac_sqtt_get_data_va(const struct radeon_info * rad_info,const struct ac_sqtt * data,unsigned se)43*61046927SAndroid Build Coastguard Worker ac_sqtt_get_data_va(const struct radeon_info *rad_info, const struct ac_sqtt *data,
44*61046927SAndroid Build Coastguard Worker                     unsigned se)
45*61046927SAndroid Build Coastguard Worker {
46*61046927SAndroid Build Coastguard Worker    return data->buffer_va + ac_sqtt_get_data_offset(rad_info, data, se);
47*61046927SAndroid Build Coastguard Worker }
48*61046927SAndroid Build Coastguard Worker 
49*61046927SAndroid Build Coastguard Worker void
ac_sqtt_init(struct ac_sqtt * data)50*61046927SAndroid Build Coastguard Worker ac_sqtt_init(struct ac_sqtt *data)
51*61046927SAndroid Build Coastguard Worker {
52*61046927SAndroid Build Coastguard Worker    list_inithead(&data->rgp_pso_correlation.record);
53*61046927SAndroid Build Coastguard Worker    simple_mtx_init(&data->rgp_pso_correlation.lock, mtx_plain);
54*61046927SAndroid Build Coastguard Worker 
55*61046927SAndroid Build Coastguard Worker    list_inithead(&data->rgp_loader_events.record);
56*61046927SAndroid Build Coastguard Worker    simple_mtx_init(&data->rgp_loader_events.lock, mtx_plain);
57*61046927SAndroid Build Coastguard Worker 
58*61046927SAndroid Build Coastguard Worker    list_inithead(&data->rgp_code_object.record);
59*61046927SAndroid Build Coastguard Worker    simple_mtx_init(&data->rgp_code_object.lock, mtx_plain);
60*61046927SAndroid Build Coastguard Worker 
61*61046927SAndroid Build Coastguard Worker    list_inithead(&data->rgp_clock_calibration.record);
62*61046927SAndroid Build Coastguard Worker    simple_mtx_init(&data->rgp_clock_calibration.lock, mtx_plain);
63*61046927SAndroid Build Coastguard Worker 
64*61046927SAndroid Build Coastguard Worker    list_inithead(&data->rgp_queue_info.record);
65*61046927SAndroid Build Coastguard Worker    simple_mtx_init(&data->rgp_queue_info.lock, mtx_plain);
66*61046927SAndroid Build Coastguard Worker 
67*61046927SAndroid Build Coastguard Worker    list_inithead(&data->rgp_queue_event.record);
68*61046927SAndroid Build Coastguard Worker    simple_mtx_init(&data->rgp_queue_event.lock, mtx_plain);
69*61046927SAndroid Build Coastguard Worker }
70*61046927SAndroid Build Coastguard Worker 
71*61046927SAndroid Build Coastguard Worker void
ac_sqtt_finish(struct ac_sqtt * data)72*61046927SAndroid Build Coastguard Worker ac_sqtt_finish(struct ac_sqtt *data)
73*61046927SAndroid Build Coastguard Worker {
74*61046927SAndroid Build Coastguard Worker    assert(data->rgp_pso_correlation.record_count == 0);
75*61046927SAndroid Build Coastguard Worker    simple_mtx_destroy(&data->rgp_pso_correlation.lock);
76*61046927SAndroid Build Coastguard Worker 
77*61046927SAndroid Build Coastguard Worker    assert(data->rgp_loader_events.record_count == 0);
78*61046927SAndroid Build Coastguard Worker    simple_mtx_destroy(&data->rgp_loader_events.lock);
79*61046927SAndroid Build Coastguard Worker 
80*61046927SAndroid Build Coastguard Worker    assert(data->rgp_code_object.record_count == 0);
81*61046927SAndroid Build Coastguard Worker    simple_mtx_destroy(&data->rgp_code_object.lock);
82*61046927SAndroid Build Coastguard Worker 
83*61046927SAndroid Build Coastguard Worker    assert(data->rgp_clock_calibration.record_count == 0);
84*61046927SAndroid Build Coastguard Worker    simple_mtx_destroy(&data->rgp_clock_calibration.lock);
85*61046927SAndroid Build Coastguard Worker 
86*61046927SAndroid Build Coastguard Worker    assert(data->rgp_queue_info.record_count == 0);
87*61046927SAndroid Build Coastguard Worker    simple_mtx_destroy(&data->rgp_queue_info.lock);
88*61046927SAndroid Build Coastguard Worker 
89*61046927SAndroid Build Coastguard Worker    assert(data->rgp_queue_event.record_count == 0);
90*61046927SAndroid Build Coastguard Worker    simple_mtx_destroy(&data->rgp_queue_event.lock);
91*61046927SAndroid Build Coastguard Worker }
92*61046927SAndroid Build Coastguard Worker 
93*61046927SAndroid Build Coastguard Worker bool
ac_is_sqtt_complete(const struct radeon_info * rad_info,const struct ac_sqtt * data,const struct ac_sqtt_data_info * info)94*61046927SAndroid Build Coastguard Worker ac_is_sqtt_complete(const struct radeon_info *rad_info, const struct ac_sqtt *data,
95*61046927SAndroid Build Coastguard Worker                     const struct ac_sqtt_data_info *info)
96*61046927SAndroid Build Coastguard Worker {
97*61046927SAndroid Build Coastguard Worker    if (rad_info->gfx_level >= GFX10) {
98*61046927SAndroid Build Coastguard Worker       /* GFX10 doesn't have THREAD_TRACE_CNTR but it reports the number of
99*61046927SAndroid Build Coastguard Worker        * dropped bytes per SE via THREAD_TRACE_DROPPED_CNTR. Though, this
100*61046927SAndroid Build Coastguard Worker        * doesn't seem reliable because it might still report non-zero even if
101*61046927SAndroid Build Coastguard Worker        * the SQTT buffer isn't full.
102*61046927SAndroid Build Coastguard Worker        *
103*61046927SAndroid Build Coastguard Worker        * The solution here is to compare the number of bytes written by the hw
104*61046927SAndroid Build Coastguard Worker        * (in units of 32 bytes) to the SQTT buffer size. If it's equal, that
105*61046927SAndroid Build Coastguard Worker        * means that the buffer is full and should be resized.
106*61046927SAndroid Build Coastguard Worker        */
107*61046927SAndroid Build Coastguard Worker       return !(info->cur_offset * 32 == data->buffer_size - 32);
108*61046927SAndroid Build Coastguard Worker    }
109*61046927SAndroid Build Coastguard Worker 
110*61046927SAndroid Build Coastguard Worker    /* Otherwise, compare the current thread trace offset with the number
111*61046927SAndroid Build Coastguard Worker     * of written bytes.
112*61046927SAndroid Build Coastguard Worker     */
113*61046927SAndroid Build Coastguard Worker    return info->cur_offset == info->gfx9_write_counter;
114*61046927SAndroid Build Coastguard Worker }
115*61046927SAndroid Build Coastguard Worker 
116*61046927SAndroid Build Coastguard Worker uint32_t
ac_get_expected_buffer_size(struct radeon_info * rad_info,const struct ac_sqtt_data_info * info)117*61046927SAndroid Build Coastguard Worker ac_get_expected_buffer_size(struct radeon_info *rad_info, const struct ac_sqtt_data_info *info)
118*61046927SAndroid Build Coastguard Worker {
119*61046927SAndroid Build Coastguard Worker    if (rad_info->gfx_level >= GFX10) {
120*61046927SAndroid Build Coastguard Worker       uint32_t dropped_cntr_per_se = info->gfx10_dropped_cntr / rad_info->max_se;
121*61046927SAndroid Build Coastguard Worker       return ((info->cur_offset * 32) + dropped_cntr_per_se) / 1024;
122*61046927SAndroid Build Coastguard Worker    }
123*61046927SAndroid Build Coastguard Worker 
124*61046927SAndroid Build Coastguard Worker    return (info->gfx9_write_counter * 32) / 1024;
125*61046927SAndroid Build Coastguard Worker }
126*61046927SAndroid Build Coastguard Worker 
127*61046927SAndroid Build Coastguard Worker bool
ac_sqtt_add_pso_correlation(struct ac_sqtt * sqtt,uint64_t pipeline_hash,uint64_t api_hash)128*61046927SAndroid Build Coastguard Worker ac_sqtt_add_pso_correlation(struct ac_sqtt *sqtt, uint64_t pipeline_hash, uint64_t api_hash)
129*61046927SAndroid Build Coastguard Worker {
130*61046927SAndroid Build Coastguard Worker    struct rgp_pso_correlation *pso_correlation = &sqtt->rgp_pso_correlation;
131*61046927SAndroid Build Coastguard Worker    struct rgp_pso_correlation_record *record;
132*61046927SAndroid Build Coastguard Worker 
133*61046927SAndroid Build Coastguard Worker    record = malloc(sizeof(struct rgp_pso_correlation_record));
134*61046927SAndroid Build Coastguard Worker    if (!record)
135*61046927SAndroid Build Coastguard Worker       return false;
136*61046927SAndroid Build Coastguard Worker 
137*61046927SAndroid Build Coastguard Worker    record->api_pso_hash = api_hash;
138*61046927SAndroid Build Coastguard Worker    record->pipeline_hash[0] = pipeline_hash;
139*61046927SAndroid Build Coastguard Worker    record->pipeline_hash[1] = pipeline_hash;
140*61046927SAndroid Build Coastguard Worker    memset(record->api_level_obj_name, 0, sizeof(record->api_level_obj_name));
141*61046927SAndroid Build Coastguard Worker 
142*61046927SAndroid Build Coastguard Worker    simple_mtx_lock(&pso_correlation->lock);
143*61046927SAndroid Build Coastguard Worker    list_addtail(&record->list, &pso_correlation->record);
144*61046927SAndroid Build Coastguard Worker    pso_correlation->record_count++;
145*61046927SAndroid Build Coastguard Worker    simple_mtx_unlock(&pso_correlation->lock);
146*61046927SAndroid Build Coastguard Worker 
147*61046927SAndroid Build Coastguard Worker    return true;
148*61046927SAndroid Build Coastguard Worker }
149*61046927SAndroid Build Coastguard Worker 
150*61046927SAndroid Build Coastguard Worker bool
ac_sqtt_add_code_object_loader_event(struct ac_sqtt * sqtt,uint64_t pipeline_hash,uint64_t base_address)151*61046927SAndroid Build Coastguard Worker ac_sqtt_add_code_object_loader_event(struct ac_sqtt *sqtt, uint64_t pipeline_hash,
152*61046927SAndroid Build Coastguard Worker                                      uint64_t base_address)
153*61046927SAndroid Build Coastguard Worker {
154*61046927SAndroid Build Coastguard Worker    struct rgp_loader_events *loader_events = &sqtt->rgp_loader_events;
155*61046927SAndroid Build Coastguard Worker    struct rgp_loader_events_record *record;
156*61046927SAndroid Build Coastguard Worker 
157*61046927SAndroid Build Coastguard Worker    record = malloc(sizeof(struct rgp_loader_events_record));
158*61046927SAndroid Build Coastguard Worker    if (!record)
159*61046927SAndroid Build Coastguard Worker       return false;
160*61046927SAndroid Build Coastguard Worker 
161*61046927SAndroid Build Coastguard Worker    record->loader_event_type = RGP_LOAD_TO_GPU_MEMORY;
162*61046927SAndroid Build Coastguard Worker    record->reserved = 0;
163*61046927SAndroid Build Coastguard Worker    record->base_address = base_address & 0xffffffffffff;
164*61046927SAndroid Build Coastguard Worker    record->code_object_hash[0] = pipeline_hash;
165*61046927SAndroid Build Coastguard Worker    record->code_object_hash[1] = pipeline_hash;
166*61046927SAndroid Build Coastguard Worker    record->time_stamp = os_time_get_nano();
167*61046927SAndroid Build Coastguard Worker 
168*61046927SAndroid Build Coastguard Worker    simple_mtx_lock(&loader_events->lock);
169*61046927SAndroid Build Coastguard Worker    list_addtail(&record->list, &loader_events->record);
170*61046927SAndroid Build Coastguard Worker    loader_events->record_count++;
171*61046927SAndroid Build Coastguard Worker    simple_mtx_unlock(&loader_events->lock);
172*61046927SAndroid Build Coastguard Worker 
173*61046927SAndroid Build Coastguard Worker    return true;
174*61046927SAndroid Build Coastguard Worker }
175*61046927SAndroid Build Coastguard Worker 
176*61046927SAndroid Build Coastguard Worker bool
ac_sqtt_add_clock_calibration(struct ac_sqtt * sqtt,uint64_t cpu_timestamp,uint64_t gpu_timestamp)177*61046927SAndroid Build Coastguard Worker ac_sqtt_add_clock_calibration(struct ac_sqtt *sqtt, uint64_t cpu_timestamp, uint64_t gpu_timestamp)
178*61046927SAndroid Build Coastguard Worker {
179*61046927SAndroid Build Coastguard Worker    struct rgp_clock_calibration *clock_calibration = &sqtt->rgp_clock_calibration;
180*61046927SAndroid Build Coastguard Worker    struct rgp_clock_calibration_record *record;
181*61046927SAndroid Build Coastguard Worker 
182*61046927SAndroid Build Coastguard Worker    record = malloc(sizeof(struct rgp_clock_calibration_record));
183*61046927SAndroid Build Coastguard Worker    if (!record)
184*61046927SAndroid Build Coastguard Worker       return false;
185*61046927SAndroid Build Coastguard Worker 
186*61046927SAndroid Build Coastguard Worker    record->cpu_timestamp = cpu_timestamp;
187*61046927SAndroid Build Coastguard Worker    record->gpu_timestamp = gpu_timestamp;
188*61046927SAndroid Build Coastguard Worker 
189*61046927SAndroid Build Coastguard Worker    simple_mtx_lock(&clock_calibration->lock);
190*61046927SAndroid Build Coastguard Worker    list_addtail(&record->list, &clock_calibration->record);
191*61046927SAndroid Build Coastguard Worker    clock_calibration->record_count++;
192*61046927SAndroid Build Coastguard Worker    simple_mtx_unlock(&clock_calibration->lock);
193*61046927SAndroid Build Coastguard Worker 
194*61046927SAndroid Build Coastguard Worker    return true;
195*61046927SAndroid Build Coastguard Worker }
196*61046927SAndroid Build Coastguard Worker 
197*61046927SAndroid Build Coastguard Worker /* See https://gitlab.freedesktop.org/mesa/mesa/-/issues/5260
198*61046927SAndroid Build Coastguard Worker  * On some HW SQTT can hang if we're not in one of the profiling pstates. */
199*61046927SAndroid Build Coastguard Worker bool
ac_check_profile_state(const struct radeon_info * info)200*61046927SAndroid Build Coastguard Worker ac_check_profile_state(const struct radeon_info *info)
201*61046927SAndroid Build Coastguard Worker {
202*61046927SAndroid Build Coastguard Worker    char path[128];
203*61046927SAndroid Build Coastguard Worker    char data[128];
204*61046927SAndroid Build Coastguard Worker    int n;
205*61046927SAndroid Build Coastguard Worker 
206*61046927SAndroid Build Coastguard Worker    if (!info->pci.valid)
207*61046927SAndroid Build Coastguard Worker       return false; /* Unknown but optimistic. */
208*61046927SAndroid Build Coastguard Worker 
209*61046927SAndroid Build Coastguard Worker    snprintf(path, sizeof(path),
210*61046927SAndroid Build Coastguard Worker             "/sys/bus/pci/devices/%04x:%02x:%02x.%x/power_dpm_force_performance_level",
211*61046927SAndroid Build Coastguard Worker             info->pci.domain, info->pci.bus, info->pci.dev, info->pci.func);
212*61046927SAndroid Build Coastguard Worker 
213*61046927SAndroid Build Coastguard Worker    FILE *f = fopen(path, "r");
214*61046927SAndroid Build Coastguard Worker    if (!f)
215*61046927SAndroid Build Coastguard Worker       return false; /* Unknown but optimistic. */
216*61046927SAndroid Build Coastguard Worker    n = fread(data, 1, sizeof(data) - 1, f);
217*61046927SAndroid Build Coastguard Worker    fclose(f);
218*61046927SAndroid Build Coastguard Worker    data[n] = 0;
219*61046927SAndroid Build Coastguard Worker    return strstr(data, "profile") == NULL;
220*61046927SAndroid Build Coastguard Worker }
221*61046927SAndroid Build Coastguard Worker 
222*61046927SAndroid Build Coastguard Worker union rgp_sqtt_marker_cb_id
ac_sqtt_get_next_cmdbuf_id(struct ac_sqtt * data,enum amd_ip_type ip_type)223*61046927SAndroid Build Coastguard Worker ac_sqtt_get_next_cmdbuf_id(struct ac_sqtt *data, enum amd_ip_type ip_type)
224*61046927SAndroid Build Coastguard Worker {
225*61046927SAndroid Build Coastguard Worker    union rgp_sqtt_marker_cb_id cb_id = {0};
226*61046927SAndroid Build Coastguard Worker 
227*61046927SAndroid Build Coastguard Worker    cb_id.global_cb_id.cb_index =
228*61046927SAndroid Build Coastguard Worker       p_atomic_inc_return(&data->cmdbuf_ids_per_queue[ip_type]);
229*61046927SAndroid Build Coastguard Worker 
230*61046927SAndroid Build Coastguard Worker    return cb_id;
231*61046927SAndroid Build Coastguard Worker }
232*61046927SAndroid Build Coastguard Worker 
233*61046927SAndroid Build Coastguard Worker static bool
ac_sqtt_se_is_disabled(const struct radeon_info * info,unsigned se)234*61046927SAndroid Build Coastguard Worker ac_sqtt_se_is_disabled(const struct radeon_info *info, unsigned se)
235*61046927SAndroid Build Coastguard Worker {
236*61046927SAndroid Build Coastguard Worker    /* No active CU on the SE means it is disabled. */
237*61046927SAndroid Build Coastguard Worker    return info->cu_mask[se][0] == 0;
238*61046927SAndroid Build Coastguard Worker }
239*61046927SAndroid Build Coastguard Worker 
240*61046927SAndroid Build Coastguard Worker static uint32_t
ac_sqtt_get_active_cu(const struct radeon_info * info,unsigned se)241*61046927SAndroid Build Coastguard Worker ac_sqtt_get_active_cu(const struct radeon_info *info, unsigned se)
242*61046927SAndroid Build Coastguard Worker {
243*61046927SAndroid Build Coastguard Worker    uint32_t cu_index;
244*61046927SAndroid Build Coastguard Worker 
245*61046927SAndroid Build Coastguard Worker    if (info->gfx_level >= GFX11) {
246*61046927SAndroid Build Coastguard Worker       /* GFX11 seems to operate on the last active CU. */
247*61046927SAndroid Build Coastguard Worker       cu_index = util_last_bit(info->cu_mask[se][0]) - 1;
248*61046927SAndroid Build Coastguard Worker    } else {
249*61046927SAndroid Build Coastguard Worker       /* Default to the first active CU. */
250*61046927SAndroid Build Coastguard Worker       cu_index = ffs(info->cu_mask[se][0]);
251*61046927SAndroid Build Coastguard Worker    }
252*61046927SAndroid Build Coastguard Worker 
253*61046927SAndroid Build Coastguard Worker    return cu_index;
254*61046927SAndroid Build Coastguard Worker }
255*61046927SAndroid Build Coastguard Worker 
256*61046927SAndroid Build Coastguard Worker bool
ac_sqtt_get_trace(struct ac_sqtt * data,const struct radeon_info * info,struct ac_sqtt_trace * sqtt_trace)257*61046927SAndroid Build Coastguard Worker ac_sqtt_get_trace(struct ac_sqtt *data, const struct radeon_info *info,
258*61046927SAndroid Build Coastguard Worker                   struct ac_sqtt_trace *sqtt_trace)
259*61046927SAndroid Build Coastguard Worker {
260*61046927SAndroid Build Coastguard Worker    unsigned max_se = info->max_se;
261*61046927SAndroid Build Coastguard Worker    void *ptr = data->ptr;
262*61046927SAndroid Build Coastguard Worker 
263*61046927SAndroid Build Coastguard Worker    memset(sqtt_trace, 0, sizeof(*sqtt_trace));
264*61046927SAndroid Build Coastguard Worker 
265*61046927SAndroid Build Coastguard Worker    for (unsigned se = 0; se < max_se; se++) {
266*61046927SAndroid Build Coastguard Worker       uint64_t info_offset = ac_sqtt_get_info_offset(se);
267*61046927SAndroid Build Coastguard Worker       uint64_t data_offset = ac_sqtt_get_data_offset(info, data, se);
268*61046927SAndroid Build Coastguard Worker       void *info_ptr = (uint8_t *)ptr + info_offset;
269*61046927SAndroid Build Coastguard Worker       void *data_ptr = (uint8_t *)ptr + data_offset;
270*61046927SAndroid Build Coastguard Worker       struct ac_sqtt_data_info *trace_info = (struct ac_sqtt_data_info *)info_ptr;
271*61046927SAndroid Build Coastguard Worker       struct ac_sqtt_data_se data_se = {0};
272*61046927SAndroid Build Coastguard Worker       int active_cu = ac_sqtt_get_active_cu(info, se);
273*61046927SAndroid Build Coastguard Worker 
274*61046927SAndroid Build Coastguard Worker       if (ac_sqtt_se_is_disabled(info, se))
275*61046927SAndroid Build Coastguard Worker          continue;
276*61046927SAndroid Build Coastguard Worker 
277*61046927SAndroid Build Coastguard Worker       if (!ac_is_sqtt_complete(info, data, trace_info))
278*61046927SAndroid Build Coastguard Worker          return false;
279*61046927SAndroid Build Coastguard Worker 
280*61046927SAndroid Build Coastguard Worker       data_se.data_ptr = data_ptr;
281*61046927SAndroid Build Coastguard Worker       data_se.info = *trace_info;
282*61046927SAndroid Build Coastguard Worker       data_se.shader_engine = se;
283*61046927SAndroid Build Coastguard Worker 
284*61046927SAndroid Build Coastguard Worker       /* RGP seems to expect units of WGP on GFX10+. */
285*61046927SAndroid Build Coastguard Worker       data_se.compute_unit = info->gfx_level >= GFX10 ? (active_cu / 2) : active_cu;
286*61046927SAndroid Build Coastguard Worker 
287*61046927SAndroid Build Coastguard Worker       sqtt_trace->traces[sqtt_trace->num_traces] = data_se;
288*61046927SAndroid Build Coastguard Worker       sqtt_trace->num_traces++;
289*61046927SAndroid Build Coastguard Worker    }
290*61046927SAndroid Build Coastguard Worker 
291*61046927SAndroid Build Coastguard Worker    sqtt_trace->rgp_code_object = &data->rgp_code_object;
292*61046927SAndroid Build Coastguard Worker    sqtt_trace->rgp_loader_events = &data->rgp_loader_events;
293*61046927SAndroid Build Coastguard Worker    sqtt_trace->rgp_pso_correlation = &data->rgp_pso_correlation;
294*61046927SAndroid Build Coastguard Worker    sqtt_trace->rgp_queue_info = &data->rgp_queue_info;
295*61046927SAndroid Build Coastguard Worker    sqtt_trace->rgp_queue_event = &data->rgp_queue_event;
296*61046927SAndroid Build Coastguard Worker    sqtt_trace->rgp_clock_calibration = &data->rgp_clock_calibration;
297*61046927SAndroid Build Coastguard Worker 
298*61046927SAndroid Build Coastguard Worker    return true;
299*61046927SAndroid Build Coastguard Worker }
300*61046927SAndroid Build Coastguard Worker 
301*61046927SAndroid Build Coastguard Worker uint32_t
ac_sqtt_get_ctrl(const struct radeon_info * info,bool enable)302*61046927SAndroid Build Coastguard Worker ac_sqtt_get_ctrl(const struct radeon_info *info, bool enable)
303*61046927SAndroid Build Coastguard Worker {
304*61046927SAndroid Build Coastguard Worker 
305*61046927SAndroid Build Coastguard Worker    uint32_t ctrl;
306*61046927SAndroid Build Coastguard Worker 
307*61046927SAndroid Build Coastguard Worker    if (info->gfx_level >= GFX11) {
308*61046927SAndroid Build Coastguard Worker       ctrl = S_0367B0_MODE(enable) | S_0367B0_HIWATER(5) |
309*61046927SAndroid Build Coastguard Worker              S_0367B0_UTIL_TIMER_GFX11(1) | S_0367B0_RT_FREQ(2) | /* 4096 clk */
310*61046927SAndroid Build Coastguard Worker              S_0367B0_DRAW_EVENT_EN(1) | S_0367B0_SPI_STALL_EN(1) |
311*61046927SAndroid Build Coastguard Worker              S_0367B0_SQ_STALL_EN(1) | S_0367B0_REG_AT_HWM(2);
312*61046927SAndroid Build Coastguard Worker    } else {
313*61046927SAndroid Build Coastguard Worker       assert(info->gfx_level >= GFX10);
314*61046927SAndroid Build Coastguard Worker 
315*61046927SAndroid Build Coastguard Worker       ctrl = S_008D1C_MODE(enable) | S_008D1C_HIWATER(5) | S_008D1C_UTIL_TIMER(1) |
316*61046927SAndroid Build Coastguard Worker              S_008D1C_RT_FREQ(2) | /* 4096 clk */ S_008D1C_DRAW_EVENT_EN(1) |
317*61046927SAndroid Build Coastguard Worker              S_008D1C_REG_STALL_EN(1) | S_008D1C_SPI_STALL_EN(1) |
318*61046927SAndroid Build Coastguard Worker              S_008D1C_SQ_STALL_EN(1) | S_008D1C_REG_DROP_ON_STALL(0);
319*61046927SAndroid Build Coastguard Worker 
320*61046927SAndroid Build Coastguard Worker       if (info->gfx_level == GFX10_3)
321*61046927SAndroid Build Coastguard Worker          ctrl |= S_008D1C_LOWATER_OFFSET(4);
322*61046927SAndroid Build Coastguard Worker 
323*61046927SAndroid Build Coastguard Worker       if (info->has_sqtt_auto_flush_mode_bug)
324*61046927SAndroid Build Coastguard Worker          ctrl |= S_008D1C_AUTO_FLUSH_MODE(1);
325*61046927SAndroid Build Coastguard Worker    }
326*61046927SAndroid Build Coastguard Worker 
327*61046927SAndroid Build Coastguard Worker    return ctrl;
328*61046927SAndroid Build Coastguard Worker }
329*61046927SAndroid Build Coastguard Worker 
330*61046927SAndroid Build Coastguard Worker uint32_t
ac_sqtt_get_shader_mask(const struct radeon_info * info)331*61046927SAndroid Build Coastguard Worker ac_sqtt_get_shader_mask(const struct radeon_info *info)
332*61046927SAndroid Build Coastguard Worker {
333*61046927SAndroid Build Coastguard Worker    unsigned shader_mask = 0x7f; /* all shader stages */
334*61046927SAndroid Build Coastguard Worker 
335*61046927SAndroid Build Coastguard Worker    if (info->gfx_level >= GFX11) {
336*61046927SAndroid Build Coastguard Worker       /* Disable unsupported hw shader stages */
337*61046927SAndroid Build Coastguard Worker       shader_mask &= ~(0x02 /* VS */ | 0x08 /* ES */ | 0x20 /* LS */);
338*61046927SAndroid Build Coastguard Worker    }
339*61046927SAndroid Build Coastguard Worker 
340*61046927SAndroid Build Coastguard Worker    return shader_mask;
341*61046927SAndroid Build Coastguard Worker }
342*61046927SAndroid Build Coastguard Worker 
343*61046927SAndroid Build Coastguard Worker void
ac_sqtt_emit_start(const struct radeon_info * info,struct ac_pm4_state * pm4,const struct ac_sqtt * sqtt,bool is_compute_queue)344*61046927SAndroid Build Coastguard Worker ac_sqtt_emit_start(const struct radeon_info *info, struct ac_pm4_state *pm4,
345*61046927SAndroid Build Coastguard Worker                    const struct ac_sqtt *sqtt, bool is_compute_queue)
346*61046927SAndroid Build Coastguard Worker {
347*61046927SAndroid Build Coastguard Worker    const uint32_t shifted_size = sqtt->buffer_size >> SQTT_BUFFER_ALIGN_SHIFT;
348*61046927SAndroid Build Coastguard Worker    const unsigned shader_mask = ac_sqtt_get_shader_mask(info);
349*61046927SAndroid Build Coastguard Worker    const unsigned max_se = info->max_se;
350*61046927SAndroid Build Coastguard Worker 
351*61046927SAndroid Build Coastguard Worker    for (unsigned se = 0; se < max_se; se++) {
352*61046927SAndroid Build Coastguard Worker       uint64_t data_va = ac_sqtt_get_data_va(info, sqtt, se);
353*61046927SAndroid Build Coastguard Worker       uint64_t shifted_va = data_va >> SQTT_BUFFER_ALIGN_SHIFT;
354*61046927SAndroid Build Coastguard Worker       int active_cu = ac_sqtt_get_active_cu(info, se);
355*61046927SAndroid Build Coastguard Worker 
356*61046927SAndroid Build Coastguard Worker       if (ac_sqtt_se_is_disabled(info, se))
357*61046927SAndroid Build Coastguard Worker          continue;
358*61046927SAndroid Build Coastguard Worker 
359*61046927SAndroid Build Coastguard Worker       /* Target SEx and SH0. */
360*61046927SAndroid Build Coastguard Worker       ac_pm4_set_reg(pm4, R_030800_GRBM_GFX_INDEX, S_030800_SE_INDEX(se) |
361*61046927SAndroid Build Coastguard Worker                      S_030800_SH_INDEX(0) | S_030800_INSTANCE_BROADCAST_WRITES(1));
362*61046927SAndroid Build Coastguard Worker 
363*61046927SAndroid Build Coastguard Worker       if (info->gfx_level >= GFX11) {
364*61046927SAndroid Build Coastguard Worker          /* Order seems important for the following 2 registers. */
365*61046927SAndroid Build Coastguard Worker          ac_pm4_set_reg(pm4, R_0367A4_SQ_THREAD_TRACE_BUF0_SIZE,
366*61046927SAndroid Build Coastguard Worker                         S_0367A4_SIZE(shifted_size) | S_0367A4_BASE_HI(shifted_va >> 32));
367*61046927SAndroid Build Coastguard Worker 
368*61046927SAndroid Build Coastguard Worker          ac_pm4_set_reg(pm4, R_0367A0_SQ_THREAD_TRACE_BUF0_BASE, shifted_va);
369*61046927SAndroid Build Coastguard Worker 
370*61046927SAndroid Build Coastguard Worker          ac_pm4_set_reg(pm4, R_0367B4_SQ_THREAD_TRACE_MASK,
371*61046927SAndroid Build Coastguard Worker                         S_0367B4_WTYPE_INCLUDE(shader_mask) | S_0367B4_SA_SEL(0) |
372*61046927SAndroid Build Coastguard Worker                         S_0367B4_WGP_SEL(active_cu / 2) | S_0367B4_SIMD_SEL(0));
373*61046927SAndroid Build Coastguard Worker 
374*61046927SAndroid Build Coastguard Worker          uint32_t sqtt_token_mask = S_0367B8_REG_INCLUDE(V_0367B8_REG_INCLUDE_SQDEC | V_0367B8_REG_INCLUDE_SHDEC |
375*61046927SAndroid Build Coastguard Worker                                                          V_0367B8_REG_INCLUDE_GFXUDEC | V_0367B8_REG_INCLUDE_COMP |
376*61046927SAndroid Build Coastguard Worker                                                          V_0367B8_REG_INCLUDE_CONTEXT | V_0367B8_REG_INCLUDE_CONFIG);
377*61046927SAndroid Build Coastguard Worker 
378*61046927SAndroid Build Coastguard Worker          /* Performance counters with SQTT are considered deprecated. */
379*61046927SAndroid Build Coastguard Worker          uint32_t token_exclude = V_0367B8_TOKEN_EXCLUDE_PERF;
380*61046927SAndroid Build Coastguard Worker 
381*61046927SAndroid Build Coastguard Worker          if (!sqtt->instruction_timing_enabled) {
382*61046927SAndroid Build Coastguard Worker             /* Reduce SQTT traffic when instruction timing isn't enabled. */
383*61046927SAndroid Build Coastguard Worker             token_exclude |= V_0367B8_TOKEN_EXCLUDE_VMEMEXEC | V_0367B8_TOKEN_EXCLUDE_ALUEXEC |
384*61046927SAndroid Build Coastguard Worker                              V_0367B8_TOKEN_EXCLUDE_VALUINST | V_0367B8_TOKEN_EXCLUDE_IMMEDIATE |
385*61046927SAndroid Build Coastguard Worker                              V_0367B8_TOKEN_EXCLUDE_INST;
386*61046927SAndroid Build Coastguard Worker          }
387*61046927SAndroid Build Coastguard Worker          sqtt_token_mask |= S_0367B8_TOKEN_EXCLUDE_GFX11(token_exclude) | S_0367B8_BOP_EVENTS_TOKEN_INCLUDE_GFX11(1);
388*61046927SAndroid Build Coastguard Worker 
389*61046927SAndroid Build Coastguard Worker          ac_pm4_set_reg(pm4, R_0367B8_SQ_THREAD_TRACE_TOKEN_MASK, sqtt_token_mask);
390*61046927SAndroid Build Coastguard Worker 
391*61046927SAndroid Build Coastguard Worker          /* Should be emitted last (it enables thread traces). */
392*61046927SAndroid Build Coastguard Worker          ac_pm4_set_reg(pm4, R_0367B0_SQ_THREAD_TRACE_CTRL, ac_sqtt_get_ctrl(info, true));
393*61046927SAndroid Build Coastguard Worker       } else if (info->gfx_level >= GFX10) {
394*61046927SAndroid Build Coastguard Worker          /* Order seems important for the following 2 registers. */
395*61046927SAndroid Build Coastguard Worker          ac_pm4_set_reg(pm4, R_008D04_SQ_THREAD_TRACE_BUF0_SIZE,
396*61046927SAndroid Build Coastguard Worker                         S_008D04_SIZE(shifted_size) | S_008D04_BASE_HI(shifted_va >> 32));
397*61046927SAndroid Build Coastguard Worker 
398*61046927SAndroid Build Coastguard Worker          ac_pm4_set_reg(pm4, R_008D00_SQ_THREAD_TRACE_BUF0_BASE, shifted_va);
399*61046927SAndroid Build Coastguard Worker 
400*61046927SAndroid Build Coastguard Worker          ac_pm4_set_reg(pm4, R_008D14_SQ_THREAD_TRACE_MASK,
401*61046927SAndroid Build Coastguard Worker                         S_008D14_WTYPE_INCLUDE(shader_mask) | S_008D14_SA_SEL(0) |
402*61046927SAndroid Build Coastguard Worker                         S_008D14_WGP_SEL(active_cu / 2) | S_008D14_SIMD_SEL(0));
403*61046927SAndroid Build Coastguard Worker 
404*61046927SAndroid Build Coastguard Worker          uint32_t sqtt_token_mask = S_008D18_REG_INCLUDE(V_008D18_REG_INCLUDE_SQDEC | V_008D18_REG_INCLUDE_SHDEC |
405*61046927SAndroid Build Coastguard Worker                                                          V_008D18_REG_INCLUDE_GFXUDEC | V_008D18_REG_INCLUDE_COMP |
406*61046927SAndroid Build Coastguard Worker                                                          V_008D18_REG_INCLUDE_CONTEXT | V_008D18_REG_INCLUDE_CONFIG);
407*61046927SAndroid Build Coastguard Worker 
408*61046927SAndroid Build Coastguard Worker          /* Performance counters with SQTT are considered deprecated. */
409*61046927SAndroid Build Coastguard Worker          uint32_t token_exclude = V_008D18_TOKEN_EXCLUDE_PERF;
410*61046927SAndroid Build Coastguard Worker 
411*61046927SAndroid Build Coastguard Worker          if (!sqtt->instruction_timing_enabled) {
412*61046927SAndroid Build Coastguard Worker             /* Reduce SQTT traffic when instruction timing isn't enabled. */
413*61046927SAndroid Build Coastguard Worker             token_exclude |= V_008D18_TOKEN_EXCLUDE_VMEMEXEC | V_008D18_TOKEN_EXCLUDE_ALUEXEC |
414*61046927SAndroid Build Coastguard Worker                              V_008D18_TOKEN_EXCLUDE_VALUINST | V_008D18_TOKEN_EXCLUDE_IMMEDIATE |
415*61046927SAndroid Build Coastguard Worker                              V_008D18_TOKEN_EXCLUDE_INST;
416*61046927SAndroid Build Coastguard Worker          }
417*61046927SAndroid Build Coastguard Worker          sqtt_token_mask |=
418*61046927SAndroid Build Coastguard Worker             S_008D18_TOKEN_EXCLUDE(token_exclude) | S_008D18_BOP_EVENTS_TOKEN_INCLUDE(info->gfx_level == GFX10_3);
419*61046927SAndroid Build Coastguard Worker 
420*61046927SAndroid Build Coastguard Worker          ac_pm4_set_reg(pm4, R_008D18_SQ_THREAD_TRACE_TOKEN_MASK, sqtt_token_mask);
421*61046927SAndroid Build Coastguard Worker 
422*61046927SAndroid Build Coastguard Worker          /* Should be emitted last (it enables thread traces). */
423*61046927SAndroid Build Coastguard Worker          ac_pm4_set_reg(pm4, R_008D1C_SQ_THREAD_TRACE_CTRL, ac_sqtt_get_ctrl(info, true));
424*61046927SAndroid Build Coastguard Worker       } else {
425*61046927SAndroid Build Coastguard Worker          /* Order seems important for the following 4 registers. */
426*61046927SAndroid Build Coastguard Worker          ac_pm4_set_reg(pm4, R_030CDC_SQ_THREAD_TRACE_BASE2, S_030CDC_ADDR_HI(shifted_va >> 32));
427*61046927SAndroid Build Coastguard Worker 
428*61046927SAndroid Build Coastguard Worker          ac_pm4_set_reg(pm4, R_030CC0_SQ_THREAD_TRACE_BASE, shifted_va);
429*61046927SAndroid Build Coastguard Worker 
430*61046927SAndroid Build Coastguard Worker          ac_pm4_set_reg(pm4, R_030CC4_SQ_THREAD_TRACE_SIZE, S_030CC4_SIZE(shifted_size));
431*61046927SAndroid Build Coastguard Worker 
432*61046927SAndroid Build Coastguard Worker          ac_pm4_set_reg(pm4, R_030CD4_SQ_THREAD_TRACE_CTRL, S_030CD4_RESET_BUFFER(1));
433*61046927SAndroid Build Coastguard Worker 
434*61046927SAndroid Build Coastguard Worker          uint32_t sqtt_mask = S_030CC8_CU_SEL(active_cu) | S_030CC8_SH_SEL(0) | S_030CC8_SIMD_EN(0xf) |
435*61046927SAndroid Build Coastguard Worker                               S_030CC8_VM_ID_MASK(0) | S_030CC8_REG_STALL_EN(1) | S_030CC8_SPI_STALL_EN(1) |
436*61046927SAndroid Build Coastguard Worker                               S_030CC8_SQ_STALL_EN(1);
437*61046927SAndroid Build Coastguard Worker 
438*61046927SAndroid Build Coastguard Worker          if (info->gfx_level < GFX9) {
439*61046927SAndroid Build Coastguard Worker             sqtt_mask |= S_030CC8_RANDOM_SEED(0xffff);
440*61046927SAndroid Build Coastguard Worker          }
441*61046927SAndroid Build Coastguard Worker 
442*61046927SAndroid Build Coastguard Worker          ac_pm4_set_reg(pm4, R_030CC8_SQ_THREAD_TRACE_MASK, sqtt_mask);
443*61046927SAndroid Build Coastguard Worker 
444*61046927SAndroid Build Coastguard Worker          /* Trace all tokens and registers. */
445*61046927SAndroid Build Coastguard Worker          ac_pm4_set_reg(pm4, R_030CCC_SQ_THREAD_TRACE_TOKEN_MASK,
446*61046927SAndroid Build Coastguard Worker                         S_030CCC_TOKEN_MASK(0xbfff) | S_030CCC_REG_MASK(0xff) | S_030CCC_REG_DROP_ON_STALL(0));
447*61046927SAndroid Build Coastguard Worker 
448*61046927SAndroid Build Coastguard Worker          /* Enable SQTT perf counters for all CUs. */
449*61046927SAndroid Build Coastguard Worker          ac_pm4_set_reg(pm4, R_030CD0_SQ_THREAD_TRACE_PERF_MASK,
450*61046927SAndroid Build Coastguard Worker                         S_030CD0_SH0_MASK(0xffff) | S_030CD0_SH1_MASK(0xffff));
451*61046927SAndroid Build Coastguard Worker 
452*61046927SAndroid Build Coastguard Worker          ac_pm4_set_reg(pm4, R_030CE0_SQ_THREAD_TRACE_TOKEN_MASK2, 0xffffffff);
453*61046927SAndroid Build Coastguard Worker 
454*61046927SAndroid Build Coastguard Worker          ac_pm4_set_reg(pm4, R_030CEC_SQ_THREAD_TRACE_HIWATER, S_030CEC_HIWATER(4));
455*61046927SAndroid Build Coastguard Worker 
456*61046927SAndroid Build Coastguard Worker          if (info->gfx_level == GFX9) {
457*61046927SAndroid Build Coastguard Worker             /* Reset thread trace status errors. */
458*61046927SAndroid Build Coastguard Worker             ac_pm4_set_reg(pm4, R_030CE8_SQ_THREAD_TRACE_STATUS, S_030CE8_UTC_ERROR(0));
459*61046927SAndroid Build Coastguard Worker          }
460*61046927SAndroid Build Coastguard Worker 
461*61046927SAndroid Build Coastguard Worker          /* Enable the thread trace mode. */
462*61046927SAndroid Build Coastguard Worker          uint32_t sqtt_mode = S_030CD8_MASK_PS(1) | S_030CD8_MASK_VS(1) | S_030CD8_MASK_GS(1) | S_030CD8_MASK_ES(1) |
463*61046927SAndroid Build Coastguard Worker                               S_030CD8_MASK_HS(1) | S_030CD8_MASK_LS(1) | S_030CD8_MASK_CS(1) |
464*61046927SAndroid Build Coastguard Worker                               S_030CD8_AUTOFLUSH_EN(1) | /* periodically flush SQTT data to memory */
465*61046927SAndroid Build Coastguard Worker                               S_030CD8_MODE(1);
466*61046927SAndroid Build Coastguard Worker 
467*61046927SAndroid Build Coastguard Worker          if (info->gfx_level == GFX9) {
468*61046927SAndroid Build Coastguard Worker             /* Count SQTT traffic in TCC perf counters. */
469*61046927SAndroid Build Coastguard Worker             sqtt_mode |= S_030CD8_TC_PERF_EN(1);
470*61046927SAndroid Build Coastguard Worker          }
471*61046927SAndroid Build Coastguard Worker 
472*61046927SAndroid Build Coastguard Worker          ac_pm4_set_reg(pm4, R_030CD8_SQ_THREAD_TRACE_MODE, sqtt_mode);
473*61046927SAndroid Build Coastguard Worker       }
474*61046927SAndroid Build Coastguard Worker    }
475*61046927SAndroid Build Coastguard Worker 
476*61046927SAndroid Build Coastguard Worker    /* Restore global broadcasting. */
477*61046927SAndroid Build Coastguard Worker    ac_pm4_set_reg(pm4, R_030800_GRBM_GFX_INDEX,  S_030800_SE_BROADCAST_WRITES(1) |
478*61046927SAndroid Build Coastguard Worker                   S_030800_SH_BROADCAST_WRITES(1) | S_030800_INSTANCE_BROADCAST_WRITES(1));
479*61046927SAndroid Build Coastguard Worker 
480*61046927SAndroid Build Coastguard Worker    /* Start the thread trace with a different event based on the queue. */
481*61046927SAndroid Build Coastguard Worker    if (is_compute_queue) {
482*61046927SAndroid Build Coastguard Worker       ac_pm4_set_reg(pm4, R_00B878_COMPUTE_THREAD_TRACE_ENABLE, S_00B878_THREAD_TRACE_ENABLE(1));
483*61046927SAndroid Build Coastguard Worker    } else {
484*61046927SAndroid Build Coastguard Worker       ac_pm4_cmd_add(pm4, PKT3(PKT3_EVENT_WRITE, 0, 0));
485*61046927SAndroid Build Coastguard Worker       ac_pm4_cmd_add(pm4, EVENT_TYPE(V_028A90_THREAD_TRACE_START) | EVENT_INDEX(0));
486*61046927SAndroid Build Coastguard Worker    }
487*61046927SAndroid Build Coastguard Worker 
488*61046927SAndroid Build Coastguard Worker }
489*61046927SAndroid Build Coastguard Worker 
490*61046927SAndroid Build Coastguard Worker static const uint32_t gfx8_sqtt_info_regs[] = {
491*61046927SAndroid Build Coastguard Worker    R_030CE4_SQ_THREAD_TRACE_WPTR,
492*61046927SAndroid Build Coastguard Worker    R_030CE8_SQ_THREAD_TRACE_STATUS,
493*61046927SAndroid Build Coastguard Worker    R_008E40_SQ_THREAD_TRACE_CNTR,
494*61046927SAndroid Build Coastguard Worker };
495*61046927SAndroid Build Coastguard Worker 
496*61046927SAndroid Build Coastguard Worker static const uint32_t gfx9_sqtt_info_regs[] = {
497*61046927SAndroid Build Coastguard Worker    R_030CE4_SQ_THREAD_TRACE_WPTR,
498*61046927SAndroid Build Coastguard Worker    R_030CE8_SQ_THREAD_TRACE_STATUS,
499*61046927SAndroid Build Coastguard Worker    R_030CF0_SQ_THREAD_TRACE_CNTR,
500*61046927SAndroid Build Coastguard Worker };
501*61046927SAndroid Build Coastguard Worker 
502*61046927SAndroid Build Coastguard Worker static const uint32_t gfx10_sqtt_info_regs[] = {
503*61046927SAndroid Build Coastguard Worker    R_008D10_SQ_THREAD_TRACE_WPTR,
504*61046927SAndroid Build Coastguard Worker    R_008D20_SQ_THREAD_TRACE_STATUS,
505*61046927SAndroid Build Coastguard Worker    R_008D24_SQ_THREAD_TRACE_DROPPED_CNTR,
506*61046927SAndroid Build Coastguard Worker };
507*61046927SAndroid Build Coastguard Worker 
508*61046927SAndroid Build Coastguard Worker static const uint32_t gfx11_sqtt_info_regs[] = {
509*61046927SAndroid Build Coastguard Worker    R_0367BC_SQ_THREAD_TRACE_WPTR,
510*61046927SAndroid Build Coastguard Worker    R_0367D0_SQ_THREAD_TRACE_STATUS,
511*61046927SAndroid Build Coastguard Worker    R_0367E8_SQ_THREAD_TRACE_DROPPED_CNTR,
512*61046927SAndroid Build Coastguard Worker };
513*61046927SAndroid Build Coastguard Worker 
514*61046927SAndroid Build Coastguard Worker static void
ac_sqtt_copy_info_regs(const struct radeon_info * info,struct ac_pm4_state * pm4,const struct ac_sqtt * sqtt,uint32_t se_index)515*61046927SAndroid Build Coastguard Worker ac_sqtt_copy_info_regs(const struct radeon_info *info, struct ac_pm4_state *pm4,
516*61046927SAndroid Build Coastguard Worker                        const struct ac_sqtt *sqtt, uint32_t se_index)
517*61046927SAndroid Build Coastguard Worker {
518*61046927SAndroid Build Coastguard Worker    const uint32_t *sqtt_info_regs = NULL;
519*61046927SAndroid Build Coastguard Worker 
520*61046927SAndroid Build Coastguard Worker    if (info->gfx_level >= GFX11) {
521*61046927SAndroid Build Coastguard Worker       sqtt_info_regs = gfx11_sqtt_info_regs;
522*61046927SAndroid Build Coastguard Worker    } else if (info->gfx_level >= GFX10) {
523*61046927SAndroid Build Coastguard Worker       sqtt_info_regs = gfx10_sqtt_info_regs;
524*61046927SAndroid Build Coastguard Worker    } else if (info->gfx_level == GFX9) {
525*61046927SAndroid Build Coastguard Worker       sqtt_info_regs = gfx9_sqtt_info_regs;
526*61046927SAndroid Build Coastguard Worker    } else {
527*61046927SAndroid Build Coastguard Worker       assert(info->gfx_level == GFX8);
528*61046927SAndroid Build Coastguard Worker       sqtt_info_regs = gfx8_sqtt_info_regs;
529*61046927SAndroid Build Coastguard Worker    }
530*61046927SAndroid Build Coastguard Worker 
531*61046927SAndroid Build Coastguard Worker    /* Get the VA where the info struct is stored for this SE. */
532*61046927SAndroid Build Coastguard Worker    uint64_t info_va = ac_sqtt_get_info_va(sqtt->buffer_va, se_index);
533*61046927SAndroid Build Coastguard Worker 
534*61046927SAndroid Build Coastguard Worker    /* Copy back the info struct one DWORD at a time. */
535*61046927SAndroid Build Coastguard Worker    for (unsigned i = 0; i < 3; i++) {
536*61046927SAndroid Build Coastguard Worker       ac_pm4_cmd_add(pm4, PKT3(PKT3_COPY_DATA, 4, 0));
537*61046927SAndroid Build Coastguard Worker       ac_pm4_cmd_add(pm4, COPY_DATA_SRC_SEL(COPY_DATA_PERF) | COPY_DATA_DST_SEL(COPY_DATA_TC_L2) | COPY_DATA_WR_CONFIRM);
538*61046927SAndroid Build Coastguard Worker       ac_pm4_cmd_add(pm4, sqtt_info_regs[i] >> 2);
539*61046927SAndroid Build Coastguard Worker       ac_pm4_cmd_add(pm4, 0); /* unused */
540*61046927SAndroid Build Coastguard Worker       ac_pm4_cmd_add(pm4, (info_va + i * 4));
541*61046927SAndroid Build Coastguard Worker       ac_pm4_cmd_add(pm4, (info_va + i * 4) >> 32);
542*61046927SAndroid Build Coastguard Worker    }
543*61046927SAndroid Build Coastguard Worker 
544*61046927SAndroid Build Coastguard Worker    if (info->gfx_level == GFX11) {
545*61046927SAndroid Build Coastguard Worker       /* On GFX11, SQ_THREAD_TRACE_WPTR is incremented from the "initial WPTR address" instead of 0.
546*61046927SAndroid Build Coastguard Worker        * To get the number of bytes (in units of 32 bytes) written by SQTT, the workaround is to
547*61046927SAndroid Build Coastguard Worker        * subtract SQ_THREAD_TRACE_WPTR from the "initial WPTR address" as follow:
548*61046927SAndroid Build Coastguard Worker        *
549*61046927SAndroid Build Coastguard Worker        * 1) get the current buffer base address for this SE
550*61046927SAndroid Build Coastguard Worker        * 2) shift right by 5 bits because SQ_THREAD_TRACE_WPTR is 32-byte aligned
551*61046927SAndroid Build Coastguard Worker        * 3) mask off the higher 3 bits because WPTR.OFFSET is 29 bits
552*61046927SAndroid Build Coastguard Worker        */
553*61046927SAndroid Build Coastguard Worker       uint64_t data_va = ac_sqtt_get_data_va(info, sqtt, se_index);
554*61046927SAndroid Build Coastguard Worker       uint64_t shifted_data_va = (data_va >> 5);
555*61046927SAndroid Build Coastguard Worker       uint32_t init_wptr_value = shifted_data_va & 0x1fffffff;
556*61046927SAndroid Build Coastguard Worker 
557*61046927SAndroid Build Coastguard Worker       ac_pm4_cmd_add(pm4, PKT3(PKT3_ATOMIC_MEM, 7, 0));
558*61046927SAndroid Build Coastguard Worker       ac_pm4_cmd_add(pm4, ATOMIC_OP(TC_OP_ATOMIC_SUB_32));
559*61046927SAndroid Build Coastguard Worker       ac_pm4_cmd_add(pm4, info_va);         /* addr lo */
560*61046927SAndroid Build Coastguard Worker       ac_pm4_cmd_add(pm4, info_va >> 32);   /* addr hi */
561*61046927SAndroid Build Coastguard Worker       ac_pm4_cmd_add(pm4, init_wptr_value); /* data lo */
562*61046927SAndroid Build Coastguard Worker       ac_pm4_cmd_add(pm4, 0);               /* data hi */
563*61046927SAndroid Build Coastguard Worker       ac_pm4_cmd_add(pm4, 0);               /* compare data lo */
564*61046927SAndroid Build Coastguard Worker       ac_pm4_cmd_add(pm4, 0);               /* compare data hi */
565*61046927SAndroid Build Coastguard Worker       ac_pm4_cmd_add(pm4, 0);               /* loop interval */
566*61046927SAndroid Build Coastguard Worker    }
567*61046927SAndroid Build Coastguard Worker }
568*61046927SAndroid Build Coastguard Worker 
569*61046927SAndroid Build Coastguard Worker void
ac_sqtt_emit_stop(const struct radeon_info * info,struct ac_pm4_state * pm4,bool is_compute_queue)570*61046927SAndroid Build Coastguard Worker ac_sqtt_emit_stop(const struct radeon_info *info, struct ac_pm4_state *pm4,
571*61046927SAndroid Build Coastguard Worker                   bool is_compute_queue)
572*61046927SAndroid Build Coastguard Worker {
573*61046927SAndroid Build Coastguard Worker    /* Stop the thread trace with a different event based on the queue. */
574*61046927SAndroid Build Coastguard Worker    if (is_compute_queue) {
575*61046927SAndroid Build Coastguard Worker       ac_pm4_set_reg(pm4, R_00B878_COMPUTE_THREAD_TRACE_ENABLE, S_00B878_THREAD_TRACE_ENABLE(0));
576*61046927SAndroid Build Coastguard Worker    } else {
577*61046927SAndroid Build Coastguard Worker       ac_pm4_cmd_add(pm4, PKT3(PKT3_EVENT_WRITE, 0, 0));
578*61046927SAndroid Build Coastguard Worker       ac_pm4_cmd_add(pm4, EVENT_TYPE(V_028A90_THREAD_TRACE_STOP) | EVENT_INDEX(0));
579*61046927SAndroid Build Coastguard Worker    }
580*61046927SAndroid Build Coastguard Worker 
581*61046927SAndroid Build Coastguard Worker    ac_pm4_cmd_add(pm4, PKT3(PKT3_EVENT_WRITE, 0, 0));
582*61046927SAndroid Build Coastguard Worker    ac_pm4_cmd_add(pm4, EVENT_TYPE(V_028A90_THREAD_TRACE_FINISH) | EVENT_INDEX(0));
583*61046927SAndroid Build Coastguard Worker }
584*61046927SAndroid Build Coastguard Worker 
585*61046927SAndroid Build Coastguard Worker void
ac_sqtt_emit_wait(const struct radeon_info * info,struct ac_pm4_state * pm4,const struct ac_sqtt * sqtt,bool is_compute_queue)586*61046927SAndroid Build Coastguard Worker ac_sqtt_emit_wait(const struct radeon_info *info, struct ac_pm4_state *pm4,
587*61046927SAndroid Build Coastguard Worker                   const struct ac_sqtt *sqtt, bool is_compute_queue)
588*61046927SAndroid Build Coastguard Worker {
589*61046927SAndroid Build Coastguard Worker    const unsigned max_se = info->max_se;
590*61046927SAndroid Build Coastguard Worker 
591*61046927SAndroid Build Coastguard Worker    for (unsigned se = 0; se < max_se; se++) {
592*61046927SAndroid Build Coastguard Worker       if (ac_sqtt_se_is_disabled(info, se))
593*61046927SAndroid Build Coastguard Worker          continue;
594*61046927SAndroid Build Coastguard Worker 
595*61046927SAndroid Build Coastguard Worker       /* Target SEi and SH0. */
596*61046927SAndroid Build Coastguard Worker       ac_pm4_set_reg(pm4, R_030800_GRBM_GFX_INDEX, S_030800_SE_INDEX(se) |
597*61046927SAndroid Build Coastguard Worker                      S_030800_SH_INDEX(0) | S_030800_INSTANCE_BROADCAST_WRITES(1));
598*61046927SAndroid Build Coastguard Worker 
599*61046927SAndroid Build Coastguard Worker       if (info->gfx_level >= GFX11) {
600*61046927SAndroid Build Coastguard Worker          /* Make sure to wait for the trace buffer. */
601*61046927SAndroid Build Coastguard Worker          ac_pm4_cmd_add(pm4, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
602*61046927SAndroid Build Coastguard Worker          ac_pm4_cmd_add(pm4, WAIT_REG_MEM_NOT_EQUAL); /* wait until the register is equal to the reference value */
603*61046927SAndroid Build Coastguard Worker          ac_pm4_cmd_add(pm4, R_0367D0_SQ_THREAD_TRACE_STATUS >> 2); /* register */
604*61046927SAndroid Build Coastguard Worker          ac_pm4_cmd_add(pm4, 0);
605*61046927SAndroid Build Coastguard Worker          ac_pm4_cmd_add(pm4, 0); /* reference value */
606*61046927SAndroid Build Coastguard Worker          ac_pm4_cmd_add(pm4, ~C_0367D0_FINISH_DONE);
607*61046927SAndroid Build Coastguard Worker          ac_pm4_cmd_add(pm4, 4); /* poll interval */
608*61046927SAndroid Build Coastguard Worker 
609*61046927SAndroid Build Coastguard Worker          /* Disable the thread trace mode. */
610*61046927SAndroid Build Coastguard Worker          ac_pm4_set_reg(pm4, R_0367B0_SQ_THREAD_TRACE_CTRL, ac_sqtt_get_ctrl(info, false));
611*61046927SAndroid Build Coastguard Worker 
612*61046927SAndroid Build Coastguard Worker          /* Wait for thread trace completion. */
613*61046927SAndroid Build Coastguard Worker          ac_pm4_cmd_add(pm4, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
614*61046927SAndroid Build Coastguard Worker          ac_pm4_cmd_add(pm4, WAIT_REG_MEM_EQUAL); /* wait until the register is equal to the reference value */
615*61046927SAndroid Build Coastguard Worker          ac_pm4_cmd_add(pm4, R_0367D0_SQ_THREAD_TRACE_STATUS >> 2); /* register */
616*61046927SAndroid Build Coastguard Worker          ac_pm4_cmd_add(pm4, 0);
617*61046927SAndroid Build Coastguard Worker          ac_pm4_cmd_add(pm4, 0);              /* reference value */
618*61046927SAndroid Build Coastguard Worker          ac_pm4_cmd_add(pm4, ~C_0367D0_BUSY); /* mask */
619*61046927SAndroid Build Coastguard Worker          ac_pm4_cmd_add(pm4, 4);              /* poll interval */
620*61046927SAndroid Build Coastguard Worker       } else if (info->gfx_level >= GFX10) {
621*61046927SAndroid Build Coastguard Worker          if (!info->has_sqtt_rb_harvest_bug) {
622*61046927SAndroid Build Coastguard Worker             /* Make sure to wait for the trace buffer. */
623*61046927SAndroid Build Coastguard Worker             ac_pm4_cmd_add(pm4, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
624*61046927SAndroid Build Coastguard Worker             ac_pm4_cmd_add(pm4, WAIT_REG_MEM_NOT_EQUAL); /* wait until the register is equal to the reference value */
625*61046927SAndroid Build Coastguard Worker             ac_pm4_cmd_add(pm4, R_008D20_SQ_THREAD_TRACE_STATUS >> 2); /* register */
626*61046927SAndroid Build Coastguard Worker             ac_pm4_cmd_add(pm4, 0);
627*61046927SAndroid Build Coastguard Worker             ac_pm4_cmd_add(pm4, 0); /* reference value */
628*61046927SAndroid Build Coastguard Worker             ac_pm4_cmd_add(pm4, ~C_008D20_FINISH_DONE);
629*61046927SAndroid Build Coastguard Worker             ac_pm4_cmd_add(pm4, 4); /* poll interval */
630*61046927SAndroid Build Coastguard Worker          }
631*61046927SAndroid Build Coastguard Worker 
632*61046927SAndroid Build Coastguard Worker          /* Disable the thread trace mode. */
633*61046927SAndroid Build Coastguard Worker          ac_pm4_set_reg(pm4, R_008D1C_SQ_THREAD_TRACE_CTRL, ac_sqtt_get_ctrl(info, false));
634*61046927SAndroid Build Coastguard Worker 
635*61046927SAndroid Build Coastguard Worker          /* Wait for thread trace completion. */
636*61046927SAndroid Build Coastguard Worker          ac_pm4_cmd_add(pm4, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
637*61046927SAndroid Build Coastguard Worker          ac_pm4_cmd_add(pm4, WAIT_REG_MEM_EQUAL); /* wait until the register is equal to the reference value */
638*61046927SAndroid Build Coastguard Worker          ac_pm4_cmd_add(pm4, R_008D20_SQ_THREAD_TRACE_STATUS >> 2); /* register */
639*61046927SAndroid Build Coastguard Worker          ac_pm4_cmd_add(pm4, 0);
640*61046927SAndroid Build Coastguard Worker          ac_pm4_cmd_add(pm4, 0);              /* reference value */
641*61046927SAndroid Build Coastguard Worker          ac_pm4_cmd_add(pm4, ~C_008D20_BUSY); /* mask */
642*61046927SAndroid Build Coastguard Worker          ac_pm4_cmd_add(pm4, 4);              /* poll interval */
643*61046927SAndroid Build Coastguard Worker       } else {
644*61046927SAndroid Build Coastguard Worker          /* Disable the thread trace mode. */
645*61046927SAndroid Build Coastguard Worker          ac_pm4_set_reg(pm4, R_030CD8_SQ_THREAD_TRACE_MODE, S_030CD8_MODE(0));
646*61046927SAndroid Build Coastguard Worker 
647*61046927SAndroid Build Coastguard Worker          /* Wait for thread trace completion. */
648*61046927SAndroid Build Coastguard Worker          ac_pm4_cmd_add(pm4, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
649*61046927SAndroid Build Coastguard Worker          ac_pm4_cmd_add(pm4, WAIT_REG_MEM_EQUAL); /* wait until the register is equal to the reference value */
650*61046927SAndroid Build Coastguard Worker          ac_pm4_cmd_add(pm4, R_030CE8_SQ_THREAD_TRACE_STATUS >> 2); /* register */
651*61046927SAndroid Build Coastguard Worker          ac_pm4_cmd_add(pm4, 0);
652*61046927SAndroid Build Coastguard Worker          ac_pm4_cmd_add(pm4, 0);              /* reference value */
653*61046927SAndroid Build Coastguard Worker          ac_pm4_cmd_add(pm4, ~C_030CE8_BUSY); /* mask */
654*61046927SAndroid Build Coastguard Worker          ac_pm4_cmd_add(pm4, 4);              /* poll interval */
655*61046927SAndroid Build Coastguard Worker       }
656*61046927SAndroid Build Coastguard Worker 
657*61046927SAndroid Build Coastguard Worker       ac_sqtt_copy_info_regs(info, pm4, sqtt, se);
658*61046927SAndroid Build Coastguard Worker    }
659*61046927SAndroid Build Coastguard Worker 
660*61046927SAndroid Build Coastguard Worker    /* Restore global broadcasting. */
661*61046927SAndroid Build Coastguard Worker    ac_pm4_set_reg(pm4, R_030800_GRBM_GFX_INDEX, S_030800_SE_BROADCAST_WRITES(1) |
662*61046927SAndroid Build Coastguard Worker                   S_030800_SH_BROADCAST_WRITES(1) | S_030800_INSTANCE_BROADCAST_WRITES(1));
663*61046927SAndroid Build Coastguard Worker }
664