1 /* 2 * Copyright 2020 Advanced Micro Devices, Inc. 3 * Copyright 2020 Valve Corporation 4 * 5 * SPDX-License-Identifier: MIT 6 */ 7 8 #ifndef AC_SQTT_H 9 #define AC_SQTT_H 10 11 #include <stdint.h> 12 #include <stdbool.h> 13 14 #include <assert.h> 15 #include "ac_pm4.h" 16 #include "ac_rgp.h" 17 #include "amd_family.h" 18 19 struct radeon_cmdbuf; 20 struct radeon_info; 21 22 /** 23 * SQ Thread tracing is a tracing mechanism that allows taking a detailed look 24 * at what the shader cores are doing. 25 * 26 * Among the things recorded are: 27 * - draws/dispatches + state 28 * - when each wave starts and stops. 29 * - for one SIMD per SE all instructions executed on that SIMD. 30 * 31 * The hardware stores all these as events in a buffer, no manual barrier 32 * around each command needed. The primary user of this is RGP. 33 */ 34 struct ac_sqtt { 35 struct radeon_cmdbuf *start_cs[2]; 36 struct radeon_cmdbuf *stop_cs[2]; 37 /* struct radeon_winsys_bo or struct pb_buffer */ 38 void *bo; 39 uint64_t buffer_va; 40 void *ptr; 41 uint32_t buffer_size; 42 int start_frame; 43 char *trigger_file; 44 bool instruction_timing_enabled; 45 46 uint32_t cmdbuf_ids_per_queue[AMD_NUM_IP_TYPES]; 47 48 struct rgp_code_object rgp_code_object; 49 struct rgp_loader_events rgp_loader_events; 50 struct rgp_pso_correlation rgp_pso_correlation; 51 52 struct rgp_queue_info rgp_queue_info; 53 struct rgp_queue_event rgp_queue_event; 54 55 struct rgp_clock_calibration rgp_clock_calibration; 56 57 struct hash_table_u64 *pipeline_bos; 58 }; 59 60 #define SQTT_BUFFER_ALIGN_SHIFT 12 61 62 struct ac_sqtt_data_info { 63 uint32_t cur_offset; 64 uint32_t trace_status; 65 union { 66 uint32_t gfx9_write_counter; 67 uint32_t gfx10_dropped_cntr; 68 }; 69 }; 70 71 struct ac_sqtt_data_se { 72 struct ac_sqtt_data_info info; 73 void *data_ptr; 74 uint32_t shader_engine; 75 uint32_t compute_unit; 76 }; 77 78 #define SQTT_MAX_TRACES 6 79 80 struct ac_sqtt_trace { 81 const struct rgp_code_object *rgp_code_object; 82 const struct rgp_loader_events *rgp_loader_events; 83 const struct rgp_pso_correlation *rgp_pso_correlation; 84 const struct rgp_queue_info *rgp_queue_info; 85 const struct rgp_queue_event *rgp_queue_event; 86 const struct rgp_clock_calibration *rgp_clock_calibration; 87 88 uint32_t num_traces; 89 struct ac_sqtt_data_se traces[SQTT_MAX_TRACES]; 90 }; 91 92 uint64_t ac_sqtt_get_info_offset(unsigned se); 93 94 uint64_t ac_sqtt_get_data_offset(const struct radeon_info *rad_info, const struct ac_sqtt *sqtt, 95 unsigned se); 96 97 void ac_sqtt_init(struct ac_sqtt *data); 98 99 void ac_sqtt_finish(struct ac_sqtt *data); 100 101 bool ac_is_sqtt_complete(const struct radeon_info *rad_info, const struct ac_sqtt *sqtt, 102 const struct ac_sqtt_data_info *info); 103 104 uint32_t ac_get_expected_buffer_size(struct radeon_info *rad_info, 105 const struct ac_sqtt_data_info *info); 106 107 /** 108 * Identifiers for RGP SQ thread-tracing markers (Table 1) 109 */ 110 enum rgp_sqtt_marker_identifier 111 { 112 RGP_SQTT_MARKER_IDENTIFIER_EVENT = 0x0, 113 RGP_SQTT_MARKER_IDENTIFIER_CB_START = 0x1, 114 RGP_SQTT_MARKER_IDENTIFIER_CB_END = 0x2, 115 RGP_SQTT_MARKER_IDENTIFIER_BARRIER_START = 0x3, 116 RGP_SQTT_MARKER_IDENTIFIER_BARRIER_END = 0x4, 117 RGP_SQTT_MARKER_IDENTIFIER_USER_EVENT = 0x5, 118 RGP_SQTT_MARKER_IDENTIFIER_GENERAL_API = 0x6, 119 RGP_SQTT_MARKER_IDENTIFIER_SYNC = 0x7, 120 RGP_SQTT_MARKER_IDENTIFIER_PRESENT = 0x8, 121 RGP_SQTT_MARKER_IDENTIFIER_LAYOUT_TRANSITION = 0x9, 122 RGP_SQTT_MARKER_IDENTIFIER_RENDER_PASS = 0xA, 123 RGP_SQTT_MARKER_IDENTIFIER_RESERVED2 = 0xB, 124 RGP_SQTT_MARKER_IDENTIFIER_BIND_PIPELINE = 0xC, 125 RGP_SQTT_MARKER_IDENTIFIER_RESERVED4 = 0xD, 126 RGP_SQTT_MARKER_IDENTIFIER_RESERVED5 = 0xE, 127 RGP_SQTT_MARKER_IDENTIFIER_RESERVED6 = 0xF 128 }; 129 130 /** 131 * Command buffer IDs used in RGP SQ thread-tracing markers (only 20 bits). 132 */ 133 union rgp_sqtt_marker_cb_id { 134 struct { 135 uint32_t per_frame : 1; /* Must be 1, frame-based command buffer ID. */ 136 uint32_t frame_index : 7; 137 uint32_t cb_index : 12; /* Command buffer index within the frame. */ 138 uint32_t reserved : 12; 139 } per_frame_cb_id; 140 141 struct { 142 uint32_t per_frame : 1; /* Must be 0, global command buffer ID. */ 143 uint32_t cb_index : 19; /* Global command buffer index. */ 144 uint32_t reserved : 12; 145 } global_cb_id; 146 147 uint32_t all; 148 }; 149 150 /** 151 * RGP SQ thread-tracing marker for the start of a command buffer. (Table 2) 152 */ 153 struct rgp_sqtt_marker_cb_start { 154 union { 155 struct { 156 uint32_t identifier : 4; 157 uint32_t ext_dwords : 3; 158 uint32_t cb_id : 20; 159 uint32_t queue : 5; 160 }; 161 uint32_t dword01; 162 }; 163 union { 164 uint32_t device_id_low; 165 uint32_t dword02; 166 }; 167 union { 168 uint32_t device_id_high; 169 uint32_t dword03; 170 }; 171 union { 172 uint32_t queue_flags; 173 uint32_t dword04; 174 }; 175 }; 176 177 static_assert(sizeof(struct rgp_sqtt_marker_cb_start) == 16, 178 "rgp_sqtt_marker_cb_start doesn't match RGP spec"); 179 180 /** 181 * 182 * RGP SQ thread-tracing marker for the end of a command buffer. (Table 3) 183 */ 184 struct rgp_sqtt_marker_cb_end { 185 union { 186 struct { 187 uint32_t identifier : 4; 188 uint32_t ext_dwords : 3; 189 uint32_t cb_id : 20; 190 uint32_t reserved : 5; 191 }; 192 uint32_t dword01; 193 }; 194 union { 195 uint32_t device_id_low; 196 uint32_t dword02; 197 }; 198 union { 199 uint32_t device_id_high; 200 uint32_t dword03; 201 }; 202 }; 203 204 static_assert(sizeof(struct rgp_sqtt_marker_cb_end) == 12, 205 "rgp_sqtt_marker_cb_end doesn't match RGP spec"); 206 207 /** 208 * API types used in RGP SQ thread-tracing markers for the "General API" 209 * packet. 210 */ 211 enum rgp_sqtt_marker_general_api_type 212 { 213 ApiCmdBindPipeline = 0, 214 ApiCmdBindDescriptorSets = 1, 215 ApiCmdBindIndexBuffer = 2, 216 ApiCmdBindVertexBuffers = 3, 217 ApiCmdDraw = 4, 218 ApiCmdDrawIndexed = 5, 219 ApiCmdDrawIndirect = 6, 220 ApiCmdDrawIndexedIndirect = 7, 221 ApiCmdDrawIndirectCountAMD = 8, 222 ApiCmdDrawIndexedIndirectCountAMD = 9, 223 ApiCmdDispatch = 10, 224 ApiCmdDispatchIndirect = 11, 225 ApiCmdCopyBuffer = 12, 226 ApiCmdCopyImage = 13, 227 ApiCmdBlitImage = 14, 228 ApiCmdCopyBufferToImage = 15, 229 ApiCmdCopyImageToBuffer = 16, 230 ApiCmdUpdateBuffer = 17, 231 ApiCmdFillBuffer = 18, 232 ApiCmdClearColorImage = 19, 233 ApiCmdClearDepthStencilImage = 20, 234 ApiCmdClearAttachments = 21, 235 ApiCmdResolveImage = 22, 236 ApiCmdWaitEvents = 23, 237 ApiCmdPipelineBarrier = 24, 238 ApiCmdBeginQuery = 25, 239 ApiCmdEndQuery = 26, 240 ApiCmdResetQueryPool = 27, 241 ApiCmdWriteTimestamp = 28, 242 ApiCmdCopyQueryPoolResults = 29, 243 ApiCmdPushConstants = 30, 244 ApiCmdBeginRenderPass = 31, 245 ApiCmdNextSubpass = 32, 246 ApiCmdEndRenderPass = 33, 247 ApiCmdExecuteCommands = 34, 248 ApiCmdSetViewport = 35, 249 ApiCmdSetScissor = 36, 250 ApiCmdSetLineWidth = 37, 251 ApiCmdSetDepthBias = 38, 252 ApiCmdSetBlendConstants = 39, 253 ApiCmdSetDepthBounds = 40, 254 ApiCmdSetStencilCompareMask = 41, 255 ApiCmdSetStencilWriteMask = 42, 256 ApiCmdSetStencilReference = 43, 257 ApiCmdDrawIndirectCount = 44, 258 ApiCmdDrawIndexedIndirectCount = 45, 259 /* gap */ 260 ApiCmdDrawMeshTasksEXT = 47, 261 ApiCmdDrawMeshTasksIndirectCountEXT = 48, 262 ApiCmdDrawMeshTasksIndirectEXT = 49, 263 264 ApiRayTracingSeparateCompiled = 0x800000, 265 ApiInvalid = 0xffffffff 266 }; 267 268 /** 269 * RGP SQ thread-tracing marker for a "General API" instrumentation packet. 270 */ 271 struct rgp_sqtt_marker_general_api { 272 union { 273 struct { 274 uint32_t identifier : 4; 275 uint32_t ext_dwords : 3; 276 uint32_t api_type : 20; 277 uint32_t is_end : 1; 278 uint32_t reserved : 4; 279 }; 280 uint32_t dword01; 281 }; 282 }; 283 284 static_assert(sizeof(struct rgp_sqtt_marker_general_api) == 4, 285 "rgp_sqtt_marker_general_api doesn't match RGP spec"); 286 287 /** 288 * API types used in RGP SQ thread-tracing markers (Table 16). 289 */ 290 enum rgp_sqtt_marker_event_type 291 { 292 EventCmdDraw = 0, 293 EventCmdDrawIndexed = 1, 294 EventCmdDrawIndirect = 2, 295 EventCmdDrawIndexedIndirect = 3, 296 EventCmdDrawIndirectCountAMD = 4, 297 EventCmdDrawIndexedIndirectCountAMD = 5, 298 EventCmdDispatch = 6, 299 EventCmdDispatchIndirect = 7, 300 EventCmdCopyBuffer = 8, 301 EventCmdCopyImage = 9, 302 EventCmdBlitImage = 10, 303 EventCmdCopyBufferToImage = 11, 304 EventCmdCopyImageToBuffer = 12, 305 EventCmdUpdateBuffer = 13, 306 EventCmdFillBuffer = 14, 307 EventCmdClearColorImage = 15, 308 EventCmdClearDepthStencilImage = 16, 309 EventCmdClearAttachments = 17, 310 EventCmdResolveImage = 18, 311 EventCmdWaitEvents = 19, 312 EventCmdPipelineBarrier = 20, 313 EventCmdResetQueryPool = 21, 314 EventCmdCopyQueryPoolResults = 22, 315 EventRenderPassColorClear = 23, 316 EventRenderPassDepthStencilClear = 24, 317 EventRenderPassResolve = 25, 318 EventInternalUnknown = 26, 319 EventCmdDrawIndirectCount = 27, 320 EventCmdDrawIndexedIndirectCount = 28, 321 /* gap */ 322 EventCmdTraceRaysKHR = 30, 323 EventCmdTraceRaysIndirectKHR = 31, 324 EventCmdBuildAccelerationStructuresKHR = 32, 325 EventCmdBuildAccelerationStructuresIndirectKHR = 33, 326 EventCmdCopyAccelerationStructureKHR = 34, 327 EventCmdCopyAccelerationStructureToMemoryKHR = 35, 328 EventCmdCopyMemoryToAccelerationStructureKHR = 36, 329 /* gap */ 330 EventCmdDrawMeshTasksEXT = 41, 331 EventCmdDrawMeshTasksIndirectCountEXT = 42, 332 EventCmdDrawMeshTasksIndirectEXT = 43, 333 EventUnknown = 0x7fff, 334 EventInvalid = 0xffffffff 335 }; 336 337 /** 338 * "Event (Per-draw/dispatch)" RGP SQ thread-tracing marker. (Table 4) 339 */ 340 struct rgp_sqtt_marker_event { 341 union { 342 struct { 343 uint32_t identifier : 4; 344 uint32_t ext_dwords : 3; 345 uint32_t api_type : 24; 346 uint32_t has_thread_dims : 1; 347 }; 348 uint32_t dword01; 349 }; 350 union { 351 struct { 352 uint32_t cb_id : 20; 353 uint32_t vertex_offset_reg_idx : 4; 354 uint32_t instance_offset_reg_idx : 4; 355 uint32_t draw_index_reg_idx : 4; 356 }; 357 uint32_t dword02; 358 }; 359 union { 360 uint32_t cmd_id; 361 uint32_t dword03; 362 }; 363 }; 364 365 static_assert(sizeof(struct rgp_sqtt_marker_event) == 12, 366 "rgp_sqtt_marker_event doesn't match RGP spec"); 367 368 /** 369 * Per-dispatch specific marker where workgroup dims are included. 370 */ 371 struct rgp_sqtt_marker_event_with_dims { 372 struct rgp_sqtt_marker_event event; 373 uint32_t thread_x; 374 uint32_t thread_y; 375 uint32_t thread_z; 376 }; 377 378 static_assert(sizeof(struct rgp_sqtt_marker_event_with_dims) == 24, 379 "rgp_sqtt_marker_event_with_dims doesn't match RGP spec"); 380 381 /** 382 * "Barrier Start" RGP SQTT instrumentation marker (Table 5) 383 */ 384 struct rgp_sqtt_marker_barrier_start { 385 union { 386 struct { 387 uint32_t identifier : 4; 388 uint32_t ext_dwords : 3; 389 uint32_t cb_id : 20; 390 uint32_t reserved : 5; 391 }; 392 uint32_t dword01; 393 }; 394 union { 395 struct { 396 uint32_t driver_reason : 31; 397 uint32_t internal : 1; 398 }; 399 uint32_t dword02; 400 }; 401 }; 402 403 static_assert(sizeof(struct rgp_sqtt_marker_barrier_start) == 8, 404 "rgp_sqtt_marker_barrier_start doesn't match RGP spec"); 405 406 /** 407 * "Barrier End" RGP SQTT instrumentation marker (Table 6) 408 */ 409 struct rgp_sqtt_marker_barrier_end { 410 union { 411 struct { 412 uint32_t identifier : 4; 413 uint32_t ext_dwords : 3; 414 uint32_t cb_id : 20; 415 uint32_t wait_on_eop_ts : 1; 416 uint32_t vs_partial_flush : 1; 417 uint32_t ps_partial_flush : 1; 418 uint32_t cs_partial_flush : 1; 419 uint32_t pfp_sync_me : 1; 420 }; 421 uint32_t dword01; 422 }; 423 union { 424 struct { 425 uint32_t sync_cp_dma : 1; 426 uint32_t inval_tcp : 1; 427 uint32_t inval_sqI : 1; 428 uint32_t inval_sqK : 1; 429 uint32_t flush_tcc : 1; 430 uint32_t inval_tcc : 1; 431 uint32_t flush_cb : 1; 432 uint32_t inval_cb : 1; 433 uint32_t flush_db : 1; 434 uint32_t inval_db : 1; 435 uint32_t num_layout_transitions : 16; 436 uint32_t inval_gl1 : 1; 437 uint32_t wait_on_ts : 1; 438 uint32_t eop_ts_bottom_of_pipe : 1; 439 uint32_t eos_ts_ps_done : 1; 440 uint32_t eos_ts_cs_done : 1; 441 uint32_t reserved : 1; 442 }; 443 uint32_t dword02; 444 }; 445 }; 446 447 static_assert(sizeof(struct rgp_sqtt_marker_barrier_end) == 8, 448 "rgp_sqtt_marker_barrier_end doesn't match RGP spec"); 449 450 /** 451 * "Layout Transition" RGP SQTT instrumentation marker (Table 7) 452 */ 453 struct rgp_sqtt_marker_layout_transition { 454 union { 455 struct { 456 uint32_t identifier : 4; 457 uint32_t ext_dwords : 3; 458 uint32_t depth_stencil_expand : 1; 459 uint32_t htile_hiz_range_expand : 1; 460 uint32_t depth_stencil_resummarize : 1; 461 uint32_t dcc_decompress : 1; 462 uint32_t fmask_decompress : 1; 463 uint32_t fast_clear_eliminate : 1; 464 uint32_t fmask_color_expand : 1; 465 uint32_t init_mask_ram : 1; 466 uint32_t reserved1 : 17; 467 }; 468 uint32_t dword01; 469 }; 470 union { 471 struct { 472 uint32_t reserved2 : 32; 473 }; 474 uint32_t dword02; 475 }; 476 }; 477 478 static_assert(sizeof(struct rgp_sqtt_marker_layout_transition) == 8, 479 "rgp_sqtt_marker_layout_transition doesn't match RGP spec"); 480 481 482 /** 483 * "User Event" RGP SQTT instrumentation marker (Table 8) 484 */ 485 struct rgp_sqtt_marker_user_event { 486 union { 487 struct { 488 uint32_t identifier : 4; 489 uint32_t reserved0 : 8; 490 uint32_t data_type : 8; 491 uint32_t reserved1 : 12; 492 }; 493 uint32_t dword01; 494 }; 495 }; 496 struct rgp_sqtt_marker_user_event_with_length { 497 struct rgp_sqtt_marker_user_event user_event; 498 uint32_t length; 499 }; 500 501 static_assert(sizeof(struct rgp_sqtt_marker_user_event) == 4, 502 "rgp_sqtt_marker_user_event doesn't match RGP spec"); 503 504 enum rgp_sqtt_marker_user_event_type 505 { 506 UserEventTrigger = 0, 507 UserEventPop, 508 UserEventPush, 509 UserEventObjectName, 510 }; 511 512 /** 513 * "Pipeline bind" RGP SQTT instrumentation marker (Table 12) 514 */ 515 struct rgp_sqtt_marker_pipeline_bind { 516 union { 517 struct { 518 uint32_t identifier : 4; 519 uint32_t ext_dwords : 3; 520 uint32_t bind_point : 1; 521 uint32_t cb_id : 20; 522 uint32_t reserved : 4; 523 }; 524 uint32_t dword01; 525 }; 526 union { 527 uint32_t api_pso_hash[2]; 528 struct { 529 uint32_t dword02; 530 uint32_t dword03; 531 }; 532 }; 533 }; 534 535 static_assert(sizeof(struct rgp_sqtt_marker_pipeline_bind) == 12, 536 "rgp_sqtt_marker_pipeline_bind doesn't match RGP spec"); 537 538 bool ac_sqtt_add_pso_correlation(struct ac_sqtt *sqtt, uint64_t pipeline_hash, uint64_t api_hash); 539 540 bool ac_sqtt_add_code_object_loader_event(struct ac_sqtt *sqtt, uint64_t pipeline_hash, 541 uint64_t base_address); 542 543 bool ac_sqtt_add_clock_calibration(struct ac_sqtt *sqtt, uint64_t cpu_timestamp, 544 uint64_t gpu_timestamp); 545 546 bool ac_check_profile_state(const struct radeon_info *info); 547 548 union rgp_sqtt_marker_cb_id ac_sqtt_get_next_cmdbuf_id(struct ac_sqtt *sqtt, 549 enum amd_ip_type ip_type); 550 551 bool ac_sqtt_get_trace(struct ac_sqtt *sqtt, const struct radeon_info *info, 552 struct ac_sqtt_trace *sqtt_trace); 553 554 uint32_t ac_sqtt_get_ctrl(const struct radeon_info *info, bool enable); 555 556 uint32_t ac_sqtt_get_shader_mask(const struct radeon_info *info); 557 558 void ac_sqtt_emit_start(const struct radeon_info *info, struct ac_pm4_state *pm4, 559 const struct ac_sqtt *sqtt, bool is_compute_queue); 560 561 void ac_sqtt_emit_stop(const struct radeon_info *info, struct ac_pm4_state *pm4, 562 bool is_compute_queue); 563 564 void ac_sqtt_emit_wait(const struct radeon_info *info, struct ac_pm4_state *pm4, 565 const struct ac_sqtt *sqtt, bool is_compute_queue); 566 567 #endif 568