1 /* 2 * Copyright 2015 Advanced Micro Devices, Inc. 3 * Authors: 4 * Nicolai Hähnle <[email protected]> 5 * SPDX-License-Identifier: MIT 6 */ 7 8 #ifndef R600_QUERY_H 9 #define R600_QUERY_H 10 11 #include "util/u_threaded_context.h" 12 13 struct pipe_context; 14 struct pipe_query; 15 struct pipe_resource; 16 17 struct r600_common_context; 18 struct r600_common_screen; 19 struct r600_query; 20 struct r600_query_hw; 21 struct r600_resource; 22 23 enum { 24 R600_QUERY_DRAW_CALLS = PIPE_QUERY_DRIVER_SPECIFIC, 25 R600_QUERY_DECOMPRESS_CALLS, 26 R600_QUERY_MRT_DRAW_CALLS, 27 R600_QUERY_PRIM_RESTART_CALLS, 28 R600_QUERY_SPILL_DRAW_CALLS, 29 R600_QUERY_COMPUTE_CALLS, 30 R600_QUERY_SPILL_COMPUTE_CALLS, 31 R600_QUERY_DMA_CALLS, 32 R600_QUERY_CP_DMA_CALLS, 33 R600_QUERY_NUM_VS_FLUSHES, 34 R600_QUERY_NUM_PS_FLUSHES, 35 R600_QUERY_NUM_CS_FLUSHES, 36 R600_QUERY_NUM_CB_CACHE_FLUSHES, 37 R600_QUERY_NUM_DB_CACHE_FLUSHES, 38 R600_QUERY_NUM_RESIDENT_HANDLES, 39 R600_QUERY_TC_OFFLOADED_SLOTS, 40 R600_QUERY_TC_DIRECT_SLOTS, 41 R600_QUERY_TC_NUM_SYNCS, 42 R600_QUERY_CS_THREAD_BUSY, 43 R600_QUERY_GALLIUM_THREAD_BUSY, 44 R600_QUERY_REQUESTED_VRAM, 45 R600_QUERY_REQUESTED_GTT, 46 R600_QUERY_MAPPED_VRAM, 47 R600_QUERY_MAPPED_GTT, 48 R600_QUERY_BUFFER_WAIT_TIME, 49 R600_QUERY_NUM_MAPPED_BUFFERS, 50 R600_QUERY_NUM_GFX_IBS, 51 R600_QUERY_NUM_SDMA_IBS, 52 R600_QUERY_GFX_BO_LIST_SIZE, 53 R600_QUERY_NUM_BYTES_MOVED, 54 R600_QUERY_NUM_EVICTIONS, 55 R600_QUERY_NUM_VRAM_CPU_PAGE_FAULTS, 56 R600_QUERY_VRAM_USAGE, 57 R600_QUERY_VRAM_VIS_USAGE, 58 R600_QUERY_GTT_USAGE, 59 R600_QUERY_GPU_TEMPERATURE, 60 R600_QUERY_CURRENT_GPU_SCLK, 61 R600_QUERY_CURRENT_GPU_MCLK, 62 R600_QUERY_GPU_LOAD, 63 R600_QUERY_GPU_SHADERS_BUSY, 64 R600_QUERY_GPU_TA_BUSY, 65 R600_QUERY_GPU_GDS_BUSY, 66 R600_QUERY_GPU_VGT_BUSY, 67 R600_QUERY_GPU_IA_BUSY, 68 R600_QUERY_GPU_SX_BUSY, 69 R600_QUERY_GPU_WD_BUSY, 70 R600_QUERY_GPU_BCI_BUSY, 71 R600_QUERY_GPU_SC_BUSY, 72 R600_QUERY_GPU_PA_BUSY, 73 R600_QUERY_GPU_DB_BUSY, 74 R600_QUERY_GPU_CP_BUSY, 75 R600_QUERY_GPU_CB_BUSY, 76 R600_QUERY_GPU_SDMA_BUSY, 77 R600_QUERY_GPU_PFP_BUSY, 78 R600_QUERY_GPU_MEQ_BUSY, 79 R600_QUERY_GPU_ME_BUSY, 80 R600_QUERY_GPU_SURF_SYNC_BUSY, 81 R600_QUERY_GPU_CP_DMA_BUSY, 82 R600_QUERY_GPU_SCRATCH_RAM_BUSY, 83 R600_QUERY_NUM_COMPILATIONS, 84 R600_QUERY_NUM_SHADERS_CREATED, 85 R600_QUERY_NUM_SHADER_CACHE_HITS, 86 R600_QUERY_GPIN_ASIC_ID, 87 R600_QUERY_GPIN_NUM_SIMD, 88 R600_QUERY_GPIN_NUM_RB, 89 R600_QUERY_GPIN_NUM_SPI, 90 R600_QUERY_GPIN_NUM_SE, 91 92 R600_QUERY_FIRST_PERFCOUNTER = PIPE_QUERY_DRIVER_SPECIFIC + 100, 93 }; 94 95 enum { 96 R600_QUERY_GROUP_GPIN = 0, 97 R600_NUM_SW_QUERY_GROUPS 98 }; 99 100 struct r600_query_ops { 101 void (*destroy)(struct r600_common_screen *, struct r600_query *); 102 bool (*begin)(struct r600_common_context *, struct r600_query *); 103 bool (*end)(struct r600_common_context *, struct r600_query *); 104 bool (*get_result)(struct r600_common_context *, 105 struct r600_query *, bool wait, 106 union pipe_query_result *result); 107 void (*get_result_resource)(struct r600_common_context *, 108 struct r600_query *, 109 enum pipe_query_flags flags, 110 enum pipe_query_value_type result_type, 111 int index, 112 struct pipe_resource *resource, 113 unsigned offset); 114 }; 115 116 struct r600_query { 117 struct threaded_query b; 118 struct r600_query_ops *ops; 119 120 /* The type of query */ 121 unsigned type; 122 }; 123 124 enum { 125 R600_QUERY_HW_FLAG_NO_START = (1 << 0), 126 /* gap */ 127 /* whether begin_query doesn't clear the result */ 128 R600_QUERY_HW_FLAG_BEGIN_RESUMES = (1 << 2), 129 }; 130 131 struct r600_query_hw_ops { 132 bool (*prepare_buffer)(struct r600_common_screen *, 133 struct r600_query_hw *, 134 struct r600_resource *); 135 void (*emit_start)(struct r600_common_context *, 136 struct r600_query_hw *, 137 struct r600_resource *buffer, uint64_t va); 138 void (*emit_stop)(struct r600_common_context *, 139 struct r600_query_hw *, 140 struct r600_resource *buffer, uint64_t va); 141 void (*clear_result)(struct r600_query_hw *, union pipe_query_result *); 142 void (*add_result)(struct r600_common_screen *screen, 143 struct r600_query_hw *, void *buffer, 144 union pipe_query_result *result); 145 }; 146 147 struct r600_query_buffer { 148 /* The buffer where query results are stored. */ 149 struct r600_resource *buf; 150 /* Offset of the next free result after current query data */ 151 unsigned results_end; 152 /* If a query buffer is full, a new buffer is created and the old one 153 * is put in here. When we calculate the result, we sum up the samples 154 * from all buffers. */ 155 struct r600_query_buffer *previous; 156 }; 157 158 struct r600_query_hw { 159 struct r600_query b; 160 struct r600_query_hw_ops *ops; 161 unsigned flags; 162 163 /* The query buffer and how many results are in it. */ 164 struct r600_query_buffer buffer; 165 /* Size of the result in memory for both begin_query and end_query, 166 * this can be one or two numbers, or it could even be a size of a structure. */ 167 unsigned result_size; 168 /* The number of dwords for begin_query or end_query. */ 169 unsigned num_cs_dw_begin; 170 unsigned num_cs_dw_end; 171 /* Linked list of queries */ 172 struct list_head list; 173 /* For transform feedback: which stream the query is for */ 174 unsigned stream; 175 }; 176 177 bool r600_query_hw_init(struct r600_common_screen *rscreen, 178 struct r600_query_hw *query); 179 void r600_query_hw_destroy(struct r600_common_screen *rscreen, 180 struct r600_query *rquery); 181 bool r600_query_hw_begin(struct r600_common_context *rctx, 182 struct r600_query *rquery); 183 bool r600_query_hw_end(struct r600_common_context *rctx, 184 struct r600_query *rquery); 185 bool r600_query_hw_get_result(struct r600_common_context *rctx, 186 struct r600_query *rquery, 187 bool wait, 188 union pipe_query_result *result); 189 190 /* Performance counters */ 191 enum { 192 /* This block is part of the shader engine */ 193 R600_PC_BLOCK_SE = (1 << 0), 194 195 /* Expose per-instance groups instead of summing all instances (within 196 * an SE). */ 197 R600_PC_BLOCK_INSTANCE_GROUPS = (1 << 1), 198 199 /* Expose per-SE groups instead of summing instances across SEs. */ 200 R600_PC_BLOCK_SE_GROUPS = (1 << 2), 201 202 /* Shader block */ 203 R600_PC_BLOCK_SHADER = (1 << 3), 204 205 /* Non-shader block with perfcounters windowed by shaders. */ 206 R600_PC_BLOCK_SHADER_WINDOWED = (1 << 4), 207 }; 208 209 /* Describes a hardware block with performance counters. Multiple instances of 210 * each block, possibly per-SE, may exist on the chip. Depending on the block 211 * and on the user's configuration, we either 212 * (a) expose every instance as a performance counter group, 213 * (b) expose a single performance counter group that reports the sum over all 214 * instances, or 215 * (c) expose one performance counter group per instance, but summed over all 216 * shader engines. 217 */ 218 struct r600_perfcounter_block { 219 const char *basename; 220 unsigned flags; 221 unsigned num_counters; 222 unsigned num_selectors; 223 unsigned num_instances; 224 225 unsigned num_groups; 226 char *group_names; 227 unsigned group_name_stride; 228 229 char *selector_names; 230 unsigned selector_name_stride; 231 232 void *data; 233 }; 234 235 struct r600_perfcounters { 236 unsigned num_groups; 237 unsigned num_blocks; 238 struct r600_perfcounter_block *blocks; 239 240 unsigned num_start_cs_dwords; 241 unsigned num_stop_cs_dwords; 242 unsigned num_instance_cs_dwords; 243 unsigned num_shaders_cs_dwords; 244 245 unsigned num_shader_types; 246 const char * const *shader_type_suffixes; 247 const unsigned *shader_type_bits; 248 249 void (*get_size)(struct r600_perfcounter_block *, 250 unsigned count, unsigned *selectors, 251 unsigned *num_select_dw, unsigned *num_read_dw); 252 253 void (*emit_instance)(struct r600_common_context *, 254 int se, int instance); 255 void (*emit_shaders)(struct r600_common_context *, unsigned shaders); 256 void (*emit_select)(struct r600_common_context *, 257 struct r600_perfcounter_block *, 258 unsigned count, unsigned *selectors); 259 void (*emit_start)(struct r600_common_context *, 260 struct r600_resource *buffer, uint64_t va); 261 void (*emit_stop)(struct r600_common_context *, 262 struct r600_resource *buffer, uint64_t va); 263 void (*emit_read)(struct r600_common_context *, 264 struct r600_perfcounter_block *, 265 unsigned count, unsigned *selectors, 266 struct r600_resource *buffer, uint64_t va); 267 268 void (*cleanup)(struct r600_common_screen *); 269 270 bool separate_se; 271 bool separate_instance; 272 }; 273 274 struct pipe_query *r600_create_batch_query(struct pipe_context *ctx, 275 unsigned num_queries, 276 unsigned *query_types); 277 278 int r600_get_perfcounter_info(struct r600_common_screen *, 279 unsigned index, 280 struct pipe_driver_query_info *info); 281 int r600_get_perfcounter_group_info(struct r600_common_screen *, 282 unsigned index, 283 struct pipe_driver_query_group_info *info); 284 285 bool r600_perfcounters_init(struct r600_perfcounters *, unsigned num_blocks); 286 void r600_perfcounters_add_block(struct r600_common_screen *, 287 struct r600_perfcounters *, 288 const char *name, unsigned flags, 289 unsigned counters, unsigned selectors, 290 unsigned instances, void *data); 291 void r600_perfcounters_do_destroy(struct r600_perfcounters *); 292 void r600_query_hw_reset_buffers(struct r600_common_context *rctx, 293 struct r600_query_hw *query); 294 295 struct r600_qbo_state { 296 void *saved_compute; 297 struct pipe_constant_buffer saved_const0; 298 struct pipe_shader_buffer saved_ssbo[3]; 299 }; 300 301 #endif /* R600_QUERY_H */ 302