xref: /aosp_15_r20/external/mesa3d/src/amd/common/ac_sqtt.h (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright 2020 Advanced Micro Devices, Inc.
3  * Copyright 2020 Valve Corporation
4  *
5  * SPDX-License-Identifier: MIT
6  */
7 
8 #ifndef AC_SQTT_H
9 #define AC_SQTT_H
10 
11 #include <stdint.h>
12 #include <stdbool.h>
13 
14 #include <assert.h>
15 #include "ac_pm4.h"
16 #include "ac_rgp.h"
17 #include "amd_family.h"
18 
19 struct radeon_cmdbuf;
20 struct radeon_info;
21 
22 /**
23  * SQ Thread tracing is a tracing mechanism that allows taking a detailed look
24  * at what the shader cores are doing.
25  *
26  * Among the things recorded are:
27  *  - draws/dispatches + state
28  *  - when each wave starts and stops.
29  *  - for one SIMD per SE all instructions executed on that SIMD.
30  *
31  * The hardware stores all these as events in a buffer, no manual barrier
32  * around each command needed. The primary user of this is RGP.
33  */
34 struct ac_sqtt {
35    struct radeon_cmdbuf *start_cs[2];
36    struct radeon_cmdbuf *stop_cs[2];
37    /* struct radeon_winsys_bo or struct pb_buffer */
38    void *bo;
39    uint64_t buffer_va;
40    void *ptr;
41    uint32_t buffer_size;
42    int start_frame;
43    char *trigger_file;
44    bool instruction_timing_enabled;
45 
46    uint32_t cmdbuf_ids_per_queue[AMD_NUM_IP_TYPES];
47 
48    struct rgp_code_object rgp_code_object;
49    struct rgp_loader_events rgp_loader_events;
50    struct rgp_pso_correlation rgp_pso_correlation;
51 
52    struct rgp_queue_info rgp_queue_info;
53    struct rgp_queue_event rgp_queue_event;
54 
55    struct rgp_clock_calibration rgp_clock_calibration;
56 
57    struct hash_table_u64 *pipeline_bos;
58 };
59 
60 #define SQTT_BUFFER_ALIGN_SHIFT 12
61 
62 struct ac_sqtt_data_info {
63    uint32_t cur_offset;
64    uint32_t trace_status;
65    union {
66       uint32_t gfx9_write_counter;
67       uint32_t gfx10_dropped_cntr;
68    };
69 };
70 
71 struct ac_sqtt_data_se {
72    struct ac_sqtt_data_info info;
73    void *data_ptr;
74    uint32_t shader_engine;
75    uint32_t compute_unit;
76 };
77 
78 #define SQTT_MAX_TRACES 6
79 
80 struct ac_sqtt_trace {
81    const struct rgp_code_object *rgp_code_object;
82    const struct rgp_loader_events *rgp_loader_events;
83    const struct rgp_pso_correlation *rgp_pso_correlation;
84    const struct rgp_queue_info *rgp_queue_info;
85    const struct rgp_queue_event *rgp_queue_event;
86    const struct rgp_clock_calibration *rgp_clock_calibration;
87 
88    uint32_t num_traces;
89    struct ac_sqtt_data_se traces[SQTT_MAX_TRACES];
90 };
91 
92 uint64_t ac_sqtt_get_info_offset(unsigned se);
93 
94 uint64_t ac_sqtt_get_data_offset(const struct radeon_info *rad_info, const struct ac_sqtt *sqtt,
95                                  unsigned se);
96 
97 void ac_sqtt_init(struct ac_sqtt *data);
98 
99 void ac_sqtt_finish(struct ac_sqtt *data);
100 
101 bool ac_is_sqtt_complete(const struct radeon_info *rad_info, const struct ac_sqtt *sqtt,
102                          const struct ac_sqtt_data_info *info);
103 
104 uint32_t ac_get_expected_buffer_size(struct radeon_info *rad_info,
105                                      const struct ac_sqtt_data_info *info);
106 
107 /**
108  * Identifiers for RGP SQ thread-tracing markers (Table 1)
109  */
110 enum rgp_sqtt_marker_identifier
111 {
112    RGP_SQTT_MARKER_IDENTIFIER_EVENT = 0x0,
113    RGP_SQTT_MARKER_IDENTIFIER_CB_START = 0x1,
114    RGP_SQTT_MARKER_IDENTIFIER_CB_END = 0x2,
115    RGP_SQTT_MARKER_IDENTIFIER_BARRIER_START = 0x3,
116    RGP_SQTT_MARKER_IDENTIFIER_BARRIER_END = 0x4,
117    RGP_SQTT_MARKER_IDENTIFIER_USER_EVENT = 0x5,
118    RGP_SQTT_MARKER_IDENTIFIER_GENERAL_API = 0x6,
119    RGP_SQTT_MARKER_IDENTIFIER_SYNC = 0x7,
120    RGP_SQTT_MARKER_IDENTIFIER_PRESENT = 0x8,
121    RGP_SQTT_MARKER_IDENTIFIER_LAYOUT_TRANSITION = 0x9,
122    RGP_SQTT_MARKER_IDENTIFIER_RENDER_PASS = 0xA,
123    RGP_SQTT_MARKER_IDENTIFIER_RESERVED2 = 0xB,
124    RGP_SQTT_MARKER_IDENTIFIER_BIND_PIPELINE = 0xC,
125    RGP_SQTT_MARKER_IDENTIFIER_RESERVED4 = 0xD,
126    RGP_SQTT_MARKER_IDENTIFIER_RESERVED5 = 0xE,
127    RGP_SQTT_MARKER_IDENTIFIER_RESERVED6 = 0xF
128 };
129 
130 /**
131  * Command buffer IDs used in RGP SQ thread-tracing markers (only 20 bits).
132  */
133 union rgp_sqtt_marker_cb_id {
134    struct {
135       uint32_t per_frame : 1; /* Must be 1, frame-based command buffer ID. */
136       uint32_t frame_index : 7;
137       uint32_t cb_index : 12; /* Command buffer index within the frame. */
138       uint32_t reserved : 12;
139    } per_frame_cb_id;
140 
141    struct {
142       uint32_t per_frame : 1; /* Must be 0, global command buffer ID. */
143       uint32_t cb_index : 19; /* Global command buffer index. */
144       uint32_t reserved : 12;
145    } global_cb_id;
146 
147    uint32_t all;
148 };
149 
150 /**
151  * RGP SQ thread-tracing marker for the start of a command buffer. (Table 2)
152  */
153 struct rgp_sqtt_marker_cb_start {
154    union {
155       struct {
156          uint32_t identifier : 4;
157          uint32_t ext_dwords : 3;
158          uint32_t cb_id : 20;
159          uint32_t queue : 5;
160       };
161       uint32_t dword01;
162    };
163    union {
164       uint32_t device_id_low;
165       uint32_t dword02;
166    };
167    union {
168       uint32_t device_id_high;
169       uint32_t dword03;
170    };
171    union {
172       uint32_t queue_flags;
173       uint32_t dword04;
174    };
175 };
176 
177 static_assert(sizeof(struct rgp_sqtt_marker_cb_start) == 16,
178               "rgp_sqtt_marker_cb_start doesn't match RGP spec");
179 
180 /**
181  *
182  * RGP SQ thread-tracing marker for the end of a command buffer. (Table 3)
183  */
184 struct rgp_sqtt_marker_cb_end {
185    union {
186       struct {
187          uint32_t identifier : 4;
188          uint32_t ext_dwords : 3;
189          uint32_t cb_id : 20;
190          uint32_t reserved : 5;
191       };
192       uint32_t dword01;
193    };
194    union {
195       uint32_t device_id_low;
196       uint32_t dword02;
197    };
198    union {
199       uint32_t device_id_high;
200       uint32_t dword03;
201    };
202 };
203 
204 static_assert(sizeof(struct rgp_sqtt_marker_cb_end) == 12,
205               "rgp_sqtt_marker_cb_end doesn't match RGP spec");
206 
207 /**
208  * API types used in RGP SQ thread-tracing markers for the "General API"
209  * packet.
210  */
211 enum rgp_sqtt_marker_general_api_type
212 {
213    ApiCmdBindPipeline = 0,
214    ApiCmdBindDescriptorSets = 1,
215    ApiCmdBindIndexBuffer = 2,
216    ApiCmdBindVertexBuffers = 3,
217    ApiCmdDraw = 4,
218    ApiCmdDrawIndexed = 5,
219    ApiCmdDrawIndirect = 6,
220    ApiCmdDrawIndexedIndirect = 7,
221    ApiCmdDrawIndirectCountAMD = 8,
222    ApiCmdDrawIndexedIndirectCountAMD = 9,
223    ApiCmdDispatch = 10,
224    ApiCmdDispatchIndirect = 11,
225    ApiCmdCopyBuffer = 12,
226    ApiCmdCopyImage = 13,
227    ApiCmdBlitImage = 14,
228    ApiCmdCopyBufferToImage = 15,
229    ApiCmdCopyImageToBuffer = 16,
230    ApiCmdUpdateBuffer = 17,
231    ApiCmdFillBuffer = 18,
232    ApiCmdClearColorImage = 19,
233    ApiCmdClearDepthStencilImage = 20,
234    ApiCmdClearAttachments = 21,
235    ApiCmdResolveImage = 22,
236    ApiCmdWaitEvents = 23,
237    ApiCmdPipelineBarrier = 24,
238    ApiCmdBeginQuery = 25,
239    ApiCmdEndQuery = 26,
240    ApiCmdResetQueryPool = 27,
241    ApiCmdWriteTimestamp = 28,
242    ApiCmdCopyQueryPoolResults = 29,
243    ApiCmdPushConstants = 30,
244    ApiCmdBeginRenderPass = 31,
245    ApiCmdNextSubpass = 32,
246    ApiCmdEndRenderPass = 33,
247    ApiCmdExecuteCommands = 34,
248    ApiCmdSetViewport = 35,
249    ApiCmdSetScissor = 36,
250    ApiCmdSetLineWidth = 37,
251    ApiCmdSetDepthBias = 38,
252    ApiCmdSetBlendConstants = 39,
253    ApiCmdSetDepthBounds = 40,
254    ApiCmdSetStencilCompareMask = 41,
255    ApiCmdSetStencilWriteMask = 42,
256    ApiCmdSetStencilReference = 43,
257    ApiCmdDrawIndirectCount = 44,
258    ApiCmdDrawIndexedIndirectCount = 45,
259    /* gap */
260    ApiCmdDrawMeshTasksEXT = 47,
261    ApiCmdDrawMeshTasksIndirectCountEXT = 48,
262    ApiCmdDrawMeshTasksIndirectEXT = 49,
263 
264    ApiRayTracingSeparateCompiled = 0x800000,
265    ApiInvalid = 0xffffffff
266 };
267 
268 /**
269  * RGP SQ thread-tracing marker for a "General API" instrumentation packet.
270  */
271 struct rgp_sqtt_marker_general_api {
272    union {
273       struct {
274          uint32_t identifier : 4;
275          uint32_t ext_dwords : 3;
276          uint32_t api_type : 20;
277          uint32_t is_end : 1;
278          uint32_t reserved : 4;
279       };
280       uint32_t dword01;
281    };
282 };
283 
284 static_assert(sizeof(struct rgp_sqtt_marker_general_api) == 4,
285               "rgp_sqtt_marker_general_api doesn't match RGP spec");
286 
287 /**
288  * API types used in RGP SQ thread-tracing markers (Table 16).
289  */
290 enum rgp_sqtt_marker_event_type
291 {
292    EventCmdDraw = 0,
293    EventCmdDrawIndexed = 1,
294    EventCmdDrawIndirect = 2,
295    EventCmdDrawIndexedIndirect = 3,
296    EventCmdDrawIndirectCountAMD = 4,
297    EventCmdDrawIndexedIndirectCountAMD = 5,
298    EventCmdDispatch = 6,
299    EventCmdDispatchIndirect = 7,
300    EventCmdCopyBuffer = 8,
301    EventCmdCopyImage = 9,
302    EventCmdBlitImage = 10,
303    EventCmdCopyBufferToImage = 11,
304    EventCmdCopyImageToBuffer = 12,
305    EventCmdUpdateBuffer = 13,
306    EventCmdFillBuffer = 14,
307    EventCmdClearColorImage = 15,
308    EventCmdClearDepthStencilImage = 16,
309    EventCmdClearAttachments = 17,
310    EventCmdResolveImage = 18,
311    EventCmdWaitEvents = 19,
312    EventCmdPipelineBarrier = 20,
313    EventCmdResetQueryPool = 21,
314    EventCmdCopyQueryPoolResults = 22,
315    EventRenderPassColorClear = 23,
316    EventRenderPassDepthStencilClear = 24,
317    EventRenderPassResolve = 25,
318    EventInternalUnknown = 26,
319    EventCmdDrawIndirectCount = 27,
320    EventCmdDrawIndexedIndirectCount = 28,
321    /* gap */
322    EventCmdTraceRaysKHR = 30,
323    EventCmdTraceRaysIndirectKHR = 31,
324    EventCmdBuildAccelerationStructuresKHR = 32,
325    EventCmdBuildAccelerationStructuresIndirectKHR = 33,
326    EventCmdCopyAccelerationStructureKHR = 34,
327    EventCmdCopyAccelerationStructureToMemoryKHR = 35,
328    EventCmdCopyMemoryToAccelerationStructureKHR = 36,
329    /* gap */
330    EventCmdDrawMeshTasksEXT = 41,
331    EventCmdDrawMeshTasksIndirectCountEXT = 42,
332    EventCmdDrawMeshTasksIndirectEXT = 43,
333    EventUnknown = 0x7fff,
334    EventInvalid = 0xffffffff
335 };
336 
337 /**
338  * "Event (Per-draw/dispatch)" RGP SQ thread-tracing marker. (Table 4)
339  */
340 struct rgp_sqtt_marker_event {
341    union {
342       struct {
343          uint32_t identifier : 4;
344          uint32_t ext_dwords : 3;
345          uint32_t api_type : 24;
346          uint32_t has_thread_dims : 1;
347       };
348       uint32_t dword01;
349    };
350    union {
351       struct {
352          uint32_t cb_id : 20;
353          uint32_t vertex_offset_reg_idx : 4;
354          uint32_t instance_offset_reg_idx : 4;
355          uint32_t draw_index_reg_idx : 4;
356       };
357       uint32_t dword02;
358    };
359    union {
360       uint32_t cmd_id;
361       uint32_t dword03;
362    };
363 };
364 
365 static_assert(sizeof(struct rgp_sqtt_marker_event) == 12,
366               "rgp_sqtt_marker_event doesn't match RGP spec");
367 
368 /**
369  * Per-dispatch specific marker where workgroup dims are included.
370  */
371 struct rgp_sqtt_marker_event_with_dims {
372    struct rgp_sqtt_marker_event event;
373    uint32_t thread_x;
374    uint32_t thread_y;
375    uint32_t thread_z;
376 };
377 
378 static_assert(sizeof(struct rgp_sqtt_marker_event_with_dims) == 24,
379               "rgp_sqtt_marker_event_with_dims doesn't match RGP spec");
380 
381 /**
382  * "Barrier Start" RGP SQTT instrumentation marker (Table 5)
383  */
384 struct rgp_sqtt_marker_barrier_start {
385    union {
386       struct {
387          uint32_t identifier : 4;
388          uint32_t ext_dwords : 3;
389          uint32_t cb_id : 20;
390          uint32_t reserved : 5;
391       };
392       uint32_t dword01;
393    };
394    union {
395       struct {
396          uint32_t driver_reason : 31;
397          uint32_t internal : 1;
398       };
399       uint32_t dword02;
400    };
401 };
402 
403 static_assert(sizeof(struct rgp_sqtt_marker_barrier_start) == 8,
404               "rgp_sqtt_marker_barrier_start doesn't match RGP spec");
405 
406 /**
407  * "Barrier End" RGP SQTT instrumentation marker (Table 6)
408  */
409 struct rgp_sqtt_marker_barrier_end {
410    union {
411       struct {
412          uint32_t identifier : 4;
413          uint32_t ext_dwords : 3;
414          uint32_t cb_id : 20;
415          uint32_t wait_on_eop_ts : 1;
416          uint32_t vs_partial_flush : 1;
417          uint32_t ps_partial_flush : 1;
418          uint32_t cs_partial_flush : 1;
419          uint32_t pfp_sync_me : 1;
420       };
421       uint32_t dword01;
422    };
423    union {
424       struct {
425          uint32_t sync_cp_dma : 1;
426          uint32_t inval_tcp : 1;
427          uint32_t inval_sqI : 1;
428          uint32_t inval_sqK : 1;
429          uint32_t flush_tcc : 1;
430          uint32_t inval_tcc : 1;
431          uint32_t flush_cb : 1;
432          uint32_t inval_cb : 1;
433          uint32_t flush_db : 1;
434          uint32_t inval_db : 1;
435          uint32_t num_layout_transitions : 16;
436          uint32_t inval_gl1 : 1;
437          uint32_t wait_on_ts : 1;
438          uint32_t eop_ts_bottom_of_pipe : 1;
439          uint32_t eos_ts_ps_done : 1;
440          uint32_t eos_ts_cs_done : 1;
441          uint32_t reserved : 1;
442       };
443       uint32_t dword02;
444    };
445 };
446 
447 static_assert(sizeof(struct rgp_sqtt_marker_barrier_end) == 8,
448               "rgp_sqtt_marker_barrier_end doesn't match RGP spec");
449 
450 /**
451  * "Layout Transition" RGP SQTT instrumentation marker (Table 7)
452  */
453 struct rgp_sqtt_marker_layout_transition {
454    union {
455       struct {
456          uint32_t identifier : 4;
457          uint32_t ext_dwords : 3;
458          uint32_t depth_stencil_expand : 1;
459          uint32_t htile_hiz_range_expand : 1;
460          uint32_t depth_stencil_resummarize : 1;
461          uint32_t dcc_decompress : 1;
462          uint32_t fmask_decompress : 1;
463          uint32_t fast_clear_eliminate : 1;
464          uint32_t fmask_color_expand : 1;
465          uint32_t init_mask_ram : 1;
466          uint32_t reserved1 : 17;
467       };
468       uint32_t dword01;
469    };
470    union {
471       struct {
472          uint32_t reserved2 : 32;
473       };
474       uint32_t dword02;
475    };
476 };
477 
478 static_assert(sizeof(struct rgp_sqtt_marker_layout_transition) == 8,
479               "rgp_sqtt_marker_layout_transition doesn't match RGP spec");
480 
481 
482 /**
483  * "User Event" RGP SQTT instrumentation marker (Table 8)
484  */
485 struct rgp_sqtt_marker_user_event {
486    union {
487       struct {
488          uint32_t identifier : 4;
489          uint32_t reserved0 : 8;
490          uint32_t data_type : 8;
491          uint32_t reserved1 : 12;
492       };
493       uint32_t dword01;
494    };
495 };
496 struct rgp_sqtt_marker_user_event_with_length {
497    struct rgp_sqtt_marker_user_event user_event;
498    uint32_t length;
499 };
500 
501 static_assert(sizeof(struct rgp_sqtt_marker_user_event) == 4,
502               "rgp_sqtt_marker_user_event doesn't match RGP spec");
503 
504 enum rgp_sqtt_marker_user_event_type
505 {
506    UserEventTrigger = 0,
507    UserEventPop,
508    UserEventPush,
509    UserEventObjectName,
510 };
511 
512 /**
513  * "Pipeline bind" RGP SQTT instrumentation marker (Table 12)
514  */
515 struct rgp_sqtt_marker_pipeline_bind {
516    union {
517       struct {
518          uint32_t identifier : 4;
519          uint32_t ext_dwords : 3;
520          uint32_t bind_point : 1;
521          uint32_t cb_id : 20;
522          uint32_t reserved : 4;
523       };
524       uint32_t dword01;
525    };
526    union {
527       uint32_t api_pso_hash[2];
528       struct {
529          uint32_t dword02;
530          uint32_t dword03;
531       };
532    };
533 };
534 
535 static_assert(sizeof(struct rgp_sqtt_marker_pipeline_bind) == 12,
536               "rgp_sqtt_marker_pipeline_bind doesn't match RGP spec");
537 
538 bool ac_sqtt_add_pso_correlation(struct ac_sqtt *sqtt, uint64_t pipeline_hash, uint64_t api_hash);
539 
540 bool ac_sqtt_add_code_object_loader_event(struct ac_sqtt *sqtt, uint64_t pipeline_hash,
541                                           uint64_t base_address);
542 
543 bool ac_sqtt_add_clock_calibration(struct ac_sqtt *sqtt, uint64_t cpu_timestamp,
544                                    uint64_t gpu_timestamp);
545 
546 bool ac_check_profile_state(const struct radeon_info *info);
547 
548 union rgp_sqtt_marker_cb_id ac_sqtt_get_next_cmdbuf_id(struct ac_sqtt *sqtt,
549                                                        enum amd_ip_type ip_type);
550 
551 bool ac_sqtt_get_trace(struct ac_sqtt *sqtt, const struct radeon_info *info,
552                        struct ac_sqtt_trace *sqtt_trace);
553 
554 uint32_t ac_sqtt_get_ctrl(const struct radeon_info *info, bool enable);
555 
556 uint32_t ac_sqtt_get_shader_mask(const struct radeon_info *info);
557 
558 void ac_sqtt_emit_start(const struct radeon_info *info, struct ac_pm4_state *pm4,
559                         const struct ac_sqtt *sqtt, bool is_compute_queue);
560 
561 void ac_sqtt_emit_stop(const struct radeon_info *info, struct ac_pm4_state *pm4,
562                        bool is_compute_queue);
563 
564 void ac_sqtt_emit_wait(const struct radeon_info *info, struct ac_pm4_state *pm4,
565                        const struct ac_sqtt *sqtt, bool is_compute_queue);
566 
567 #endif
568