xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/r600/r600_query.h (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright 2015 Advanced Micro Devices, Inc.
3  * Authors:
4  *  Nicolai Hähnle <[email protected]>
5  * SPDX-License-Identifier: MIT
6  */
7 
8 #ifndef R600_QUERY_H
9 #define R600_QUERY_H
10 
11 #include "util/u_threaded_context.h"
12 
13 struct pipe_context;
14 struct pipe_query;
15 struct pipe_resource;
16 
17 struct r600_common_context;
18 struct r600_common_screen;
19 struct r600_query;
20 struct r600_query_hw;
21 struct r600_resource;
22 
23 enum {
24 	R600_QUERY_DRAW_CALLS = PIPE_QUERY_DRIVER_SPECIFIC,
25 	R600_QUERY_DECOMPRESS_CALLS,
26 	R600_QUERY_MRT_DRAW_CALLS,
27 	R600_QUERY_PRIM_RESTART_CALLS,
28 	R600_QUERY_SPILL_DRAW_CALLS,
29 	R600_QUERY_COMPUTE_CALLS,
30 	R600_QUERY_SPILL_COMPUTE_CALLS,
31 	R600_QUERY_DMA_CALLS,
32 	R600_QUERY_CP_DMA_CALLS,
33 	R600_QUERY_NUM_VS_FLUSHES,
34 	R600_QUERY_NUM_PS_FLUSHES,
35 	R600_QUERY_NUM_CS_FLUSHES,
36 	R600_QUERY_NUM_CB_CACHE_FLUSHES,
37 	R600_QUERY_NUM_DB_CACHE_FLUSHES,
38 	R600_QUERY_NUM_RESIDENT_HANDLES,
39 	R600_QUERY_TC_OFFLOADED_SLOTS,
40 	R600_QUERY_TC_DIRECT_SLOTS,
41 	R600_QUERY_TC_NUM_SYNCS,
42 	R600_QUERY_CS_THREAD_BUSY,
43 	R600_QUERY_GALLIUM_THREAD_BUSY,
44 	R600_QUERY_REQUESTED_VRAM,
45 	R600_QUERY_REQUESTED_GTT,
46 	R600_QUERY_MAPPED_VRAM,
47 	R600_QUERY_MAPPED_GTT,
48 	R600_QUERY_BUFFER_WAIT_TIME,
49 	R600_QUERY_NUM_MAPPED_BUFFERS,
50 	R600_QUERY_NUM_GFX_IBS,
51 	R600_QUERY_NUM_SDMA_IBS,
52 	R600_QUERY_GFX_BO_LIST_SIZE,
53 	R600_QUERY_NUM_BYTES_MOVED,
54 	R600_QUERY_NUM_EVICTIONS,
55 	R600_QUERY_NUM_VRAM_CPU_PAGE_FAULTS,
56 	R600_QUERY_VRAM_USAGE,
57 	R600_QUERY_VRAM_VIS_USAGE,
58 	R600_QUERY_GTT_USAGE,
59 	R600_QUERY_GPU_TEMPERATURE,
60 	R600_QUERY_CURRENT_GPU_SCLK,
61 	R600_QUERY_CURRENT_GPU_MCLK,
62 	R600_QUERY_GPU_LOAD,
63 	R600_QUERY_GPU_SHADERS_BUSY,
64 	R600_QUERY_GPU_TA_BUSY,
65 	R600_QUERY_GPU_GDS_BUSY,
66 	R600_QUERY_GPU_VGT_BUSY,
67 	R600_QUERY_GPU_IA_BUSY,
68 	R600_QUERY_GPU_SX_BUSY,
69 	R600_QUERY_GPU_WD_BUSY,
70 	R600_QUERY_GPU_BCI_BUSY,
71 	R600_QUERY_GPU_SC_BUSY,
72 	R600_QUERY_GPU_PA_BUSY,
73 	R600_QUERY_GPU_DB_BUSY,
74 	R600_QUERY_GPU_CP_BUSY,
75 	R600_QUERY_GPU_CB_BUSY,
76 	R600_QUERY_GPU_SDMA_BUSY,
77 	R600_QUERY_GPU_PFP_BUSY,
78 	R600_QUERY_GPU_MEQ_BUSY,
79 	R600_QUERY_GPU_ME_BUSY,
80 	R600_QUERY_GPU_SURF_SYNC_BUSY,
81 	R600_QUERY_GPU_CP_DMA_BUSY,
82 	R600_QUERY_GPU_SCRATCH_RAM_BUSY,
83 	R600_QUERY_NUM_COMPILATIONS,
84 	R600_QUERY_NUM_SHADERS_CREATED,
85 	R600_QUERY_NUM_SHADER_CACHE_HITS,
86 	R600_QUERY_GPIN_ASIC_ID,
87 	R600_QUERY_GPIN_NUM_SIMD,
88 	R600_QUERY_GPIN_NUM_RB,
89 	R600_QUERY_GPIN_NUM_SPI,
90 	R600_QUERY_GPIN_NUM_SE,
91 
92 	R600_QUERY_FIRST_PERFCOUNTER = PIPE_QUERY_DRIVER_SPECIFIC + 100,
93 };
94 
95 enum {
96 	R600_QUERY_GROUP_GPIN = 0,
97 	R600_NUM_SW_QUERY_GROUPS
98 };
99 
100 struct r600_query_ops {
101 	void (*destroy)(struct r600_common_screen *, struct r600_query *);
102 	bool (*begin)(struct r600_common_context *, struct r600_query *);
103 	bool (*end)(struct r600_common_context *, struct r600_query *);
104 	bool (*get_result)(struct r600_common_context *,
105 			   struct r600_query *, bool wait,
106 			   union pipe_query_result *result);
107 	void (*get_result_resource)(struct r600_common_context *,
108 				    struct r600_query *,
109 				    enum pipe_query_flags flags,
110 				    enum pipe_query_value_type result_type,
111 				    int index,
112 				    struct pipe_resource *resource,
113 				    unsigned offset);
114 };
115 
116 struct r600_query {
117 	struct threaded_query b;
118 	struct r600_query_ops *ops;
119 
120 	/* The type of query */
121 	unsigned type;
122 };
123 
124 enum {
125 	R600_QUERY_HW_FLAG_NO_START = (1 << 0),
126 	/* gap */
127 	/* whether begin_query doesn't clear the result */
128 	R600_QUERY_HW_FLAG_BEGIN_RESUMES = (1 << 2),
129 };
130 
131 struct r600_query_hw_ops {
132 	bool (*prepare_buffer)(struct r600_common_screen *,
133 			       struct r600_query_hw *,
134 			       struct r600_resource *);
135 	void (*emit_start)(struct r600_common_context *,
136 			   struct r600_query_hw *,
137 			   struct r600_resource *buffer, uint64_t va);
138 	void (*emit_stop)(struct r600_common_context *,
139 			  struct r600_query_hw *,
140 			  struct r600_resource *buffer, uint64_t va);
141 	void (*clear_result)(struct r600_query_hw *, union pipe_query_result *);
142 	void (*add_result)(struct r600_common_screen *screen,
143 			   struct r600_query_hw *, void *buffer,
144 			   union pipe_query_result *result);
145 };
146 
147 struct r600_query_buffer {
148 	/* The buffer where query results are stored. */
149 	struct r600_resource		*buf;
150 	/* Offset of the next free result after current query data */
151 	unsigned			results_end;
152 	/* If a query buffer is full, a new buffer is created and the old one
153 	 * is put in here. When we calculate the result, we sum up the samples
154 	 * from all buffers. */
155 	struct r600_query_buffer	*previous;
156 };
157 
158 struct r600_query_hw {
159 	struct r600_query b;
160 	struct r600_query_hw_ops *ops;
161 	unsigned flags;
162 
163 	/* The query buffer and how many results are in it. */
164 	struct r600_query_buffer buffer;
165 	/* Size of the result in memory for both begin_query and end_query,
166 	 * this can be one or two numbers, or it could even be a size of a structure. */
167 	unsigned result_size;
168 	/* The number of dwords for begin_query or end_query. */
169 	unsigned num_cs_dw_begin;
170 	unsigned num_cs_dw_end;
171 	/* Linked list of queries */
172 	struct list_head list;
173 	/* For transform feedback: which stream the query is for */
174 	unsigned stream;
175 };
176 
177 bool r600_query_hw_init(struct r600_common_screen *rscreen,
178 			struct r600_query_hw *query);
179 void r600_query_hw_destroy(struct r600_common_screen *rscreen,
180 			   struct r600_query *rquery);
181 bool r600_query_hw_begin(struct r600_common_context *rctx,
182 			 struct r600_query *rquery);
183 bool r600_query_hw_end(struct r600_common_context *rctx,
184 		       struct r600_query *rquery);
185 bool r600_query_hw_get_result(struct r600_common_context *rctx,
186 			      struct r600_query *rquery,
187 			      bool wait,
188 			      union pipe_query_result *result);
189 
190 /* Performance counters */
191 enum {
192 	/* This block is part of the shader engine */
193 	R600_PC_BLOCK_SE = (1 << 0),
194 
195 	/* Expose per-instance groups instead of summing all instances (within
196 	 * an SE). */
197 	R600_PC_BLOCK_INSTANCE_GROUPS = (1 << 1),
198 
199 	/* Expose per-SE groups instead of summing instances across SEs. */
200 	R600_PC_BLOCK_SE_GROUPS = (1 << 2),
201 
202 	/* Shader block */
203 	R600_PC_BLOCK_SHADER = (1 << 3),
204 
205 	/* Non-shader block with perfcounters windowed by shaders. */
206 	R600_PC_BLOCK_SHADER_WINDOWED = (1 << 4),
207 };
208 
209 /* Describes a hardware block with performance counters. Multiple instances of
210  * each block, possibly per-SE, may exist on the chip. Depending on the block
211  * and on the user's configuration, we either
212  *  (a) expose every instance as a performance counter group,
213  *  (b) expose a single performance counter group that reports the sum over all
214  *      instances, or
215  *  (c) expose one performance counter group per instance, but summed over all
216  *      shader engines.
217  */
218 struct r600_perfcounter_block {
219 	const char *basename;
220 	unsigned flags;
221 	unsigned num_counters;
222 	unsigned num_selectors;
223 	unsigned num_instances;
224 
225 	unsigned num_groups;
226 	char *group_names;
227 	unsigned group_name_stride;
228 
229 	char *selector_names;
230 	unsigned selector_name_stride;
231 
232 	void *data;
233 };
234 
235 struct r600_perfcounters {
236 	unsigned num_groups;
237 	unsigned num_blocks;
238 	struct r600_perfcounter_block *blocks;
239 
240 	unsigned num_start_cs_dwords;
241 	unsigned num_stop_cs_dwords;
242 	unsigned num_instance_cs_dwords;
243 	unsigned num_shaders_cs_dwords;
244 
245 	unsigned num_shader_types;
246 	const char * const *shader_type_suffixes;
247 	const unsigned *shader_type_bits;
248 
249 	void (*get_size)(struct r600_perfcounter_block *,
250 			 unsigned count, unsigned *selectors,
251 			 unsigned *num_select_dw, unsigned *num_read_dw);
252 
253 	void (*emit_instance)(struct r600_common_context *,
254 			      int se, int instance);
255 	void (*emit_shaders)(struct r600_common_context *, unsigned shaders);
256 	void (*emit_select)(struct r600_common_context *,
257 			    struct r600_perfcounter_block *,
258 			    unsigned count, unsigned *selectors);
259 	void (*emit_start)(struct r600_common_context *,
260 			  struct r600_resource *buffer, uint64_t va);
261 	void (*emit_stop)(struct r600_common_context *,
262 			  struct r600_resource *buffer, uint64_t va);
263 	void (*emit_read)(struct r600_common_context *,
264 			  struct r600_perfcounter_block *,
265 			  unsigned count, unsigned *selectors,
266 			  struct r600_resource *buffer, uint64_t va);
267 
268 	void (*cleanup)(struct r600_common_screen *);
269 
270 	bool separate_se;
271 	bool separate_instance;
272 };
273 
274 struct pipe_query *r600_create_batch_query(struct pipe_context *ctx,
275 					   unsigned num_queries,
276 					   unsigned *query_types);
277 
278 int r600_get_perfcounter_info(struct r600_common_screen *,
279 			      unsigned index,
280 			      struct pipe_driver_query_info *info);
281 int r600_get_perfcounter_group_info(struct r600_common_screen *,
282 				    unsigned index,
283 				    struct pipe_driver_query_group_info *info);
284 
285 bool r600_perfcounters_init(struct r600_perfcounters *, unsigned num_blocks);
286 void r600_perfcounters_add_block(struct r600_common_screen *,
287 				 struct r600_perfcounters *,
288 				 const char *name, unsigned flags,
289 				 unsigned counters, unsigned selectors,
290 				 unsigned instances, void *data);
291 void r600_perfcounters_do_destroy(struct r600_perfcounters *);
292 void r600_query_hw_reset_buffers(struct r600_common_context *rctx,
293 				 struct r600_query_hw *query);
294 
295 struct r600_qbo_state {
296 	void *saved_compute;
297 	struct pipe_constant_buffer saved_const0;
298 	struct pipe_shader_buffer saved_ssbo[3];
299 };
300 
301 #endif /* R600_QUERY_H */
302