1 /*
2 * Copyright 2015 Advanced Micro Devices, Inc.
3 * Authors:
4 * Nicolai Hähnle <[email protected]>
5 * SPDX-License-Identifier: MIT
6 */
7
8 #include "util/u_memory.h"
9 #include "r600_query.h"
10 #include "r600_pipe_common.h"
11 #include "r600d_common.h"
12
13 /* Max counters per HW block */
14 #define R600_QUERY_MAX_COUNTERS 16
15
16 static struct r600_perfcounter_block *
lookup_counter(struct r600_perfcounters * pc,unsigned index,unsigned * base_gid,unsigned * sub_index)17 lookup_counter(struct r600_perfcounters *pc, unsigned index,
18 unsigned *base_gid, unsigned *sub_index)
19 {
20 struct r600_perfcounter_block *block = pc->blocks;
21 unsigned bid;
22
23 *base_gid = 0;
24 for (bid = 0; bid < pc->num_blocks; ++bid, ++block) {
25 unsigned total = block->num_groups * block->num_selectors;
26
27 if (index < total) {
28 *sub_index = index;
29 return block;
30 }
31
32 index -= total;
33 *base_gid += block->num_groups;
34 }
35
36 return NULL;
37 }
38
39 static struct r600_perfcounter_block *
lookup_group(struct r600_perfcounters * pc,unsigned * index)40 lookup_group(struct r600_perfcounters *pc, unsigned *index)
41 {
42 unsigned bid;
43 struct r600_perfcounter_block *block = pc->blocks;
44
45 for (bid = 0; bid < pc->num_blocks; ++bid, ++block) {
46 if (*index < block->num_groups)
47 return block;
48 *index -= block->num_groups;
49 }
50
51 return NULL;
52 }
53
54 struct r600_pc_group {
55 struct r600_pc_group *next;
56 struct r600_perfcounter_block *block;
57 unsigned sub_gid; /* only used during init */
58 unsigned result_base; /* only used during init */
59 int se;
60 int instance;
61 unsigned num_counters;
62 unsigned selectors[R600_QUERY_MAX_COUNTERS];
63 };
64
65 struct r600_pc_counter {
66 unsigned base;
67 unsigned qwords;
68 unsigned stride; /* in uint64s */
69 };
70
71 #define R600_PC_SHADERS_WINDOWING (1 << 31)
72
73 struct r600_query_pc {
74 struct r600_query_hw b;
75
76 unsigned shaders;
77 unsigned num_counters;
78 struct r600_pc_counter *counters;
79 struct r600_pc_group *groups;
80 };
81
r600_pc_query_destroy(struct r600_common_screen * rscreen,struct r600_query * rquery)82 static void r600_pc_query_destroy(struct r600_common_screen *rscreen,
83 struct r600_query *rquery)
84 {
85 struct r600_query_pc *query = (struct r600_query_pc *)rquery;
86
87 while (query->groups) {
88 struct r600_pc_group *group = query->groups;
89 query->groups = group->next;
90 FREE(group);
91 }
92
93 FREE(query->counters);
94
95 r600_query_hw_destroy(rscreen, rquery);
96 }
97
r600_pc_query_prepare_buffer(struct r600_common_screen * screen,struct r600_query_hw * hwquery,struct r600_resource * buffer)98 static bool r600_pc_query_prepare_buffer(struct r600_common_screen *screen,
99 struct r600_query_hw *hwquery,
100 struct r600_resource *buffer)
101 {
102 /* no-op */
103 return true;
104 }
105
r600_pc_query_emit_start(struct r600_common_context * ctx,struct r600_query_hw * hwquery,struct r600_resource * buffer,uint64_t va)106 static void r600_pc_query_emit_start(struct r600_common_context *ctx,
107 struct r600_query_hw *hwquery,
108 struct r600_resource *buffer, uint64_t va)
109 {
110 struct r600_perfcounters *pc = ctx->screen->perfcounters;
111 struct r600_query_pc *query = (struct r600_query_pc *)hwquery;
112 struct r600_pc_group *group;
113 int current_se = -1;
114 int current_instance = -1;
115
116 if (query->shaders)
117 pc->emit_shaders(ctx, query->shaders);
118
119 for (group = query->groups; group; group = group->next) {
120 struct r600_perfcounter_block *block = group->block;
121
122 if (group->se != current_se || group->instance != current_instance) {
123 current_se = group->se;
124 current_instance = group->instance;
125 pc->emit_instance(ctx, group->se, group->instance);
126 }
127
128 pc->emit_select(ctx, block, group->num_counters, group->selectors);
129 }
130
131 if (current_se != -1 || current_instance != -1)
132 pc->emit_instance(ctx, -1, -1);
133
134 pc->emit_start(ctx, buffer, va);
135 }
136
r600_pc_query_emit_stop(struct r600_common_context * ctx,struct r600_query_hw * hwquery,struct r600_resource * buffer,uint64_t va)137 static void r600_pc_query_emit_stop(struct r600_common_context *ctx,
138 struct r600_query_hw *hwquery,
139 struct r600_resource *buffer, uint64_t va)
140 {
141 struct r600_perfcounters *pc = ctx->screen->perfcounters;
142 struct r600_query_pc *query = (struct r600_query_pc *)hwquery;
143 struct r600_pc_group *group;
144
145 pc->emit_stop(ctx, buffer, va);
146
147 for (group = query->groups; group; group = group->next) {
148 struct r600_perfcounter_block *block = group->block;
149 unsigned se = group->se >= 0 ? group->se : 0;
150 unsigned se_end = se + 1;
151
152 if ((block->flags & R600_PC_BLOCK_SE) && (group->se < 0))
153 se_end = ctx->screen->info.max_se;
154
155 do {
156 unsigned instance = group->instance >= 0 ? group->instance : 0;
157
158 do {
159 pc->emit_instance(ctx, se, instance);
160 pc->emit_read(ctx, block,
161 group->num_counters, group->selectors,
162 buffer, va);
163 va += sizeof(uint64_t) * group->num_counters;
164 } while (group->instance < 0 && ++instance < block->num_instances);
165 } while (++se < se_end);
166 }
167
168 pc->emit_instance(ctx, -1, -1);
169 }
170
r600_pc_query_clear_result(struct r600_query_hw * hwquery,union pipe_query_result * result)171 static void r600_pc_query_clear_result(struct r600_query_hw *hwquery,
172 union pipe_query_result *result)
173 {
174 struct r600_query_pc *query = (struct r600_query_pc *)hwquery;
175
176 memset(result, 0, sizeof(result->batch[0]) * query->num_counters);
177 }
178
r600_pc_query_add_result(struct r600_common_screen * rscreen,struct r600_query_hw * hwquery,void * buffer,union pipe_query_result * result)179 static void r600_pc_query_add_result(struct r600_common_screen *rscreen,
180 struct r600_query_hw *hwquery,
181 void *buffer,
182 union pipe_query_result *result)
183 {
184 struct r600_query_pc *query = (struct r600_query_pc *)hwquery;
185 uint64_t *results = buffer;
186 unsigned i, j;
187
188 for (i = 0; i < query->num_counters; ++i) {
189 struct r600_pc_counter *counter = &query->counters[i];
190
191 for (j = 0; j < counter->qwords; ++j) {
192 uint32_t value = results[counter->base + j * counter->stride];
193 result->batch[i].u64 += value;
194 }
195 }
196 }
197
198 static struct r600_query_ops batch_query_ops = {
199 .destroy = r600_pc_query_destroy,
200 .begin = r600_query_hw_begin,
201 .end = r600_query_hw_end,
202 .get_result = r600_query_hw_get_result
203 };
204
205 static struct r600_query_hw_ops batch_query_hw_ops = {
206 .prepare_buffer = r600_pc_query_prepare_buffer,
207 .emit_start = r600_pc_query_emit_start,
208 .emit_stop = r600_pc_query_emit_stop,
209 .clear_result = r600_pc_query_clear_result,
210 .add_result = r600_pc_query_add_result,
211 };
212
get_group_state(struct r600_common_screen * screen,struct r600_query_pc * query,struct r600_perfcounter_block * block,unsigned sub_gid)213 static struct r600_pc_group *get_group_state(struct r600_common_screen *screen,
214 struct r600_query_pc *query,
215 struct r600_perfcounter_block *block,
216 unsigned sub_gid)
217 {
218 struct r600_pc_group *group = query->groups;
219
220 while (group) {
221 if (group->block == block && group->sub_gid == sub_gid)
222 return group;
223 group = group->next;
224 }
225
226 group = CALLOC_STRUCT(r600_pc_group);
227 if (!group)
228 return NULL;
229
230 group->block = block;
231 group->sub_gid = sub_gid;
232
233 if (block->flags & R600_PC_BLOCK_SHADER) {
234 unsigned sub_gids = block->num_instances;
235 unsigned shader_id;
236 unsigned shaders;
237 unsigned query_shaders;
238
239 if (block->flags & R600_PC_BLOCK_SE_GROUPS)
240 sub_gids = sub_gids * screen->info.max_se;
241 shader_id = sub_gid / sub_gids;
242 sub_gid = sub_gid % sub_gids;
243
244 shaders = screen->perfcounters->shader_type_bits[shader_id];
245
246 query_shaders = query->shaders & ~R600_PC_SHADERS_WINDOWING;
247 if (query_shaders && query_shaders != shaders) {
248 fprintf(stderr, "r600_perfcounter: incompatible shader groups\n");
249 FREE(group);
250 return NULL;
251 }
252 query->shaders = shaders;
253 }
254
255 if (block->flags & R600_PC_BLOCK_SHADER_WINDOWED && !query->shaders) {
256 // A non-zero value in query->shaders ensures that the shader
257 // masking is reset unless the user explicitly requests one.
258 query->shaders = R600_PC_SHADERS_WINDOWING;
259 }
260
261 if (block->flags & R600_PC_BLOCK_SE_GROUPS) {
262 group->se = sub_gid / block->num_instances;
263 sub_gid = sub_gid % block->num_instances;
264 } else {
265 group->se = -1;
266 }
267
268 if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS) {
269 group->instance = sub_gid;
270 } else {
271 group->instance = -1;
272 }
273
274 group->next = query->groups;
275 query->groups = group;
276
277 return group;
278 }
279
r600_create_batch_query(struct pipe_context * ctx,unsigned num_queries,unsigned * query_types)280 struct pipe_query *r600_create_batch_query(struct pipe_context *ctx,
281 unsigned num_queries,
282 unsigned *query_types)
283 {
284 struct r600_common_screen *screen =
285 (struct r600_common_screen *)ctx->screen;
286 struct r600_perfcounters *pc = screen->perfcounters;
287 struct r600_perfcounter_block *block;
288 struct r600_pc_group *group;
289 struct r600_query_pc *query;
290 unsigned base_gid, sub_gid, sub_index;
291 unsigned i, j;
292
293 if (!pc)
294 return NULL;
295
296 query = CALLOC_STRUCT(r600_query_pc);
297 if (!query)
298 return NULL;
299
300 query->b.b.ops = &batch_query_ops;
301 query->b.ops = &batch_query_hw_ops;
302
303 query->num_counters = num_queries;
304
305 /* Collect selectors per group */
306 for (i = 0; i < num_queries; ++i) {
307 unsigned sub_gid;
308
309 if (query_types[i] < R600_QUERY_FIRST_PERFCOUNTER)
310 goto error;
311
312 block = lookup_counter(pc, query_types[i] - R600_QUERY_FIRST_PERFCOUNTER,
313 &base_gid, &sub_index);
314 if (!block)
315 goto error;
316
317 sub_gid = sub_index / block->num_selectors;
318 sub_index = sub_index % block->num_selectors;
319
320 group = get_group_state(screen, query, block, sub_gid);
321 if (!group)
322 goto error;
323
324 if (group->num_counters >= block->num_counters) {
325 fprintf(stderr,
326 "perfcounter group %s: too many selected\n",
327 block->basename);
328 goto error;
329 }
330 group->selectors[group->num_counters] = sub_index;
331 ++group->num_counters;
332 }
333
334 /* Compute result bases and CS size per group */
335 query->b.num_cs_dw_begin = pc->num_start_cs_dwords;
336 query->b.num_cs_dw_end = pc->num_stop_cs_dwords;
337
338 query->b.num_cs_dw_begin += pc->num_instance_cs_dwords; /* conservative */
339 query->b.num_cs_dw_end += pc->num_instance_cs_dwords;
340
341 i = 0;
342 for (group = query->groups; group; group = group->next) {
343 struct r600_perfcounter_block *block = group->block;
344 unsigned select_dw, read_dw;
345 unsigned instances = 1;
346
347 if ((block->flags & R600_PC_BLOCK_SE) && group->se < 0)
348 instances = screen->info.max_se;
349 if (group->instance < 0)
350 instances *= block->num_instances;
351
352 group->result_base = i;
353 query->b.result_size += sizeof(uint64_t) * instances * group->num_counters;
354 i += instances * group->num_counters;
355
356 pc->get_size(block, group->num_counters, group->selectors,
357 &select_dw, &read_dw);
358 query->b.num_cs_dw_begin += select_dw;
359 query->b.num_cs_dw_end += instances * read_dw;
360 query->b.num_cs_dw_begin += pc->num_instance_cs_dwords; /* conservative */
361 query->b.num_cs_dw_end += instances * pc->num_instance_cs_dwords;
362 }
363
364 if (query->shaders) {
365 if (query->shaders == R600_PC_SHADERS_WINDOWING)
366 query->shaders = 0xffffffff;
367 query->b.num_cs_dw_begin += pc->num_shaders_cs_dwords;
368 }
369
370 /* Map user-supplied query array to result indices */
371 query->counters = CALLOC(num_queries, sizeof(*query->counters));
372 for (i = 0; i < num_queries; ++i) {
373 struct r600_pc_counter *counter = &query->counters[i];
374 struct r600_perfcounter_block *block;
375
376 block = lookup_counter(pc, query_types[i] - R600_QUERY_FIRST_PERFCOUNTER,
377 &base_gid, &sub_index);
378
379 sub_gid = sub_index / block->num_selectors;
380 sub_index = sub_index % block->num_selectors;
381
382 group = get_group_state(screen, query, block, sub_gid);
383 assert(group != NULL);
384
385 for (j = 0; j < group->num_counters; ++j) {
386 if (group->selectors[j] == sub_index)
387 break;
388 }
389
390 counter->base = group->result_base + j;
391 counter->stride = group->num_counters;
392
393 counter->qwords = 1;
394 if ((block->flags & R600_PC_BLOCK_SE) && group->se < 0)
395 counter->qwords = screen->info.max_se;
396 if (group->instance < 0)
397 counter->qwords *= block->num_instances;
398 }
399
400 if (!r600_query_hw_init(screen, &query->b))
401 goto error;
402
403 return (struct pipe_query *)query;
404
405 error:
406 r600_pc_query_destroy(screen, &query->b.b);
407 return NULL;
408 }
409
r600_init_block_names(struct r600_common_screen * screen,struct r600_perfcounter_block * block)410 static bool r600_init_block_names(struct r600_common_screen *screen,
411 struct r600_perfcounter_block *block)
412 {
413 unsigned i, j, k;
414 unsigned groups_shader = 1, groups_se = 1, groups_instance = 1;
415 unsigned namelen;
416 char *groupname;
417 char *p;
418
419 if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS)
420 groups_instance = block->num_instances;
421 if (block->flags & R600_PC_BLOCK_SE_GROUPS)
422 groups_se = screen->info.max_se;
423 if (block->flags & R600_PC_BLOCK_SHADER)
424 groups_shader = screen->perfcounters->num_shader_types;
425
426 namelen = strlen(block->basename);
427 block->group_name_stride = namelen + 1;
428 if (block->flags & R600_PC_BLOCK_SHADER)
429 block->group_name_stride += 3;
430 if (block->flags & R600_PC_BLOCK_SE_GROUPS) {
431 assert(groups_se <= 10);
432 block->group_name_stride += 1;
433
434 if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS)
435 block->group_name_stride += 1;
436 }
437 if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS) {
438 assert(groups_instance <= 100);
439 block->group_name_stride += 2;
440 }
441
442 block->group_names = MALLOC(block->num_groups * block->group_name_stride);
443 if (!block->group_names)
444 return false;
445
446 groupname = block->group_names;
447 for (i = 0; i < groups_shader; ++i) {
448 const char *shader_suffix = screen->perfcounters->shader_type_suffixes[i];
449 unsigned shaderlen = strlen(shader_suffix);
450 for (j = 0; j < groups_se; ++j) {
451 for (k = 0; k < groups_instance; ++k) {
452 strcpy(groupname, block->basename);
453 p = groupname + namelen;
454
455 if (block->flags & R600_PC_BLOCK_SHADER) {
456 strcpy(p, shader_suffix);
457 p += shaderlen;
458 }
459
460 if (block->flags & R600_PC_BLOCK_SE_GROUPS) {
461 p += sprintf(p, "%d", j);
462 if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS)
463 *p++ = '_';
464 }
465
466 if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS)
467 p += sprintf(p, "%d", k);
468
469 groupname += block->group_name_stride;
470 }
471 }
472 }
473
474 assert(block->num_selectors <= 1000);
475 block->selector_name_stride = block->group_name_stride + 4;
476 block->selector_names = MALLOC(block->num_groups * block->num_selectors *
477 block->selector_name_stride);
478 if (!block->selector_names)
479 return false;
480
481 groupname = block->group_names;
482 p = block->selector_names;
483 for (i = 0; i < block->num_groups; ++i) {
484 for (j = 0; j < block->num_selectors; ++j) {
485 sprintf(p, "%s_%03d", groupname, j);
486 p += block->selector_name_stride;
487 }
488 groupname += block->group_name_stride;
489 }
490
491 return true;
492 }
493
r600_get_perfcounter_info(struct r600_common_screen * screen,unsigned index,struct pipe_driver_query_info * info)494 int r600_get_perfcounter_info(struct r600_common_screen *screen,
495 unsigned index,
496 struct pipe_driver_query_info *info)
497 {
498 struct r600_perfcounters *pc = screen->perfcounters;
499 struct r600_perfcounter_block *block;
500 unsigned base_gid, sub;
501
502 if (!pc)
503 return 0;
504
505 if (!info) {
506 unsigned bid, num_queries = 0;
507
508 for (bid = 0; bid < pc->num_blocks; ++bid) {
509 num_queries += pc->blocks[bid].num_selectors *
510 pc->blocks[bid].num_groups;
511 }
512
513 return num_queries;
514 }
515
516 block = lookup_counter(pc, index, &base_gid, &sub);
517 if (!block)
518 return 0;
519
520 if (!block->selector_names) {
521 if (!r600_init_block_names(screen, block))
522 return 0;
523 }
524 info->name = block->selector_names + sub * block->selector_name_stride;
525 info->query_type = R600_QUERY_FIRST_PERFCOUNTER + index;
526 info->max_value.u64 = 0;
527 info->type = PIPE_DRIVER_QUERY_TYPE_UINT64;
528 info->result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE;
529 info->group_id = base_gid + sub / block->num_selectors;
530 info->flags = PIPE_DRIVER_QUERY_FLAG_BATCH;
531 if (sub > 0 && sub + 1 < block->num_selectors * block->num_groups)
532 info->flags |= PIPE_DRIVER_QUERY_FLAG_DONT_LIST;
533 return 1;
534 }
535
r600_get_perfcounter_group_info(struct r600_common_screen * screen,unsigned index,struct pipe_driver_query_group_info * info)536 int r600_get_perfcounter_group_info(struct r600_common_screen *screen,
537 unsigned index,
538 struct pipe_driver_query_group_info *info)
539 {
540 struct r600_perfcounters *pc = screen->perfcounters;
541 struct r600_perfcounter_block *block;
542
543 if (!pc)
544 return 0;
545
546 if (!info)
547 return pc->num_groups;
548
549 block = lookup_group(pc, &index);
550 if (!block)
551 return 0;
552
553 if (!block->group_names) {
554 if (!r600_init_block_names(screen, block))
555 return 0;
556 }
557 info->name = block->group_names + index * block->group_name_stride;
558 info->num_queries = block->num_selectors;
559 info->max_active_queries = block->num_counters;
560 return 1;
561 }
562
r600_perfcounters_destroy(struct r600_common_screen * rscreen)563 void r600_perfcounters_destroy(struct r600_common_screen *rscreen)
564 {
565 if (rscreen->perfcounters)
566 rscreen->perfcounters->cleanup(rscreen);
567 }
568
r600_perfcounters_init(struct r600_perfcounters * pc,unsigned num_blocks)569 bool r600_perfcounters_init(struct r600_perfcounters *pc,
570 unsigned num_blocks)
571 {
572 pc->blocks = CALLOC(num_blocks, sizeof(struct r600_perfcounter_block));
573 if (!pc->blocks)
574 return false;
575
576 pc->separate_se = debug_get_bool_option("RADEON_PC_SEPARATE_SE", false);
577 pc->separate_instance = debug_get_bool_option("RADEON_PC_SEPARATE_INSTANCE", false);
578
579 return true;
580 }
581
r600_perfcounters_add_block(struct r600_common_screen * rscreen,struct r600_perfcounters * pc,const char * name,unsigned flags,unsigned counters,unsigned selectors,unsigned instances,void * data)582 void r600_perfcounters_add_block(struct r600_common_screen *rscreen,
583 struct r600_perfcounters *pc,
584 const char *name, unsigned flags,
585 unsigned counters, unsigned selectors,
586 unsigned instances, void *data)
587 {
588 struct r600_perfcounter_block *block = &pc->blocks[pc->num_blocks];
589
590 assert(counters <= R600_QUERY_MAX_COUNTERS);
591
592 block->basename = name;
593 block->flags = flags;
594 block->num_counters = counters;
595 block->num_selectors = selectors;
596 block->num_instances = MAX2(instances, 1);
597 block->data = data;
598
599 if (pc->separate_se && (block->flags & R600_PC_BLOCK_SE))
600 block->flags |= R600_PC_BLOCK_SE_GROUPS;
601 if (pc->separate_instance && block->num_instances > 1)
602 block->flags |= R600_PC_BLOCK_INSTANCE_GROUPS;
603
604 if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS) {
605 block->num_groups = block->num_instances;
606 } else {
607 block->num_groups = 1;
608 }
609
610 if (block->flags & R600_PC_BLOCK_SE_GROUPS)
611 block->num_groups *= rscreen->info.max_se;
612 if (block->flags & R600_PC_BLOCK_SHADER)
613 block->num_groups *= pc->num_shader_types;
614
615 ++pc->num_blocks;
616 pc->num_groups += block->num_groups;
617 }
618
r600_perfcounters_do_destroy(struct r600_perfcounters * pc)619 void r600_perfcounters_do_destroy(struct r600_perfcounters *pc)
620 {
621 unsigned i;
622
623 for (i = 0; i < pc->num_blocks; ++i) {
624 FREE(pc->blocks[i].group_names);
625 FREE(pc->blocks[i].selector_names);
626 }
627 FREE(pc->blocks);
628 FREE(pc);
629 }
630