1 #include "zink_query.h"
2
3 #include "zink_context.h"
4 #include "zink_clear.h"
5 #include "zink_program.h"
6 #include "zink_resource.h"
7 #include "zink_screen.h"
8
9 #include "util/u_dump.h"
10 #include "util/u_inlines.h"
11 #include "util/u_memory.h"
12
13 #define NUM_QUERIES 500
14
15 #define ZINK_QUERY_RENDER_PASSES (PIPE_QUERY_DRIVER_SPECIFIC + 0)
16
17 struct zink_query_pool {
18 struct list_head list;
19 VkQueryType vk_query_type;
20 VkQueryPipelineStatisticFlags pipeline_stats;
21 VkQueryPool query_pool;
22 unsigned last_range;
23 unsigned refcount;
24 };
25
26 struct zink_query_buffer {
27 struct list_head list;
28 unsigned num_results;
29 struct pipe_resource *buffers[PIPE_MAX_VERTEX_STREAMS];
30 };
31
32 struct zink_vk_query {
33 struct zink_query_pool *pool;
34 unsigned query_id;
35 bool needs_reset;
36 bool started;
37 uint32_t refcount;
38 };
39
40 struct zink_query_start {
41 union {
42 struct {
43 bool have_gs;
44 bool have_xfb;
45 bool was_line_loop;
46 };
47 uint32_t data;
48 };
49 struct zink_vk_query *vkq[PIPE_MAX_VERTEX_STREAMS];
50 };
51
52 struct zink_query {
53 struct threaded_query base;
54 enum pipe_query_type type;
55
56 /* Everytime the gallium query needs
57 * another vulkan query, add a new start.
58 */
59 struct util_dynarray starts;
60 unsigned start_offset;
61
62 VkQueryType vkqtype;
63 unsigned index;
64 bool precise;
65
66 bool active; /* query is considered active by vk */
67 bool needs_reset; /* query is considered active by vk and cannot be destroyed */
68 bool dead; /* query should be destroyed when its fence finishes */
69 bool needs_update; /* query needs to update its qbos */
70 bool needs_rast_discard_workaround; /* query needs discard disabled */
71 bool suspended;
72 bool started_in_rp; //needs to be stopped in rp
73
74 struct list_head active_list;
75
76 struct list_head stats_list; /* when active, statistics queries are added to ctx->primitives_generated_queries */
77 bool has_draws; /* have_gs and have_xfb are valid for idx=curr_query */
78
79 struct zink_batch_usage *batch_uses; //batch that the query was started in
80
81 struct list_head buffers;
82 union {
83 struct zink_query_buffer *curr_qbo;
84 struct pipe_fence_handle *fence; //PIPE_QUERY_GPU_FINISHED
85 };
86
87 struct zink_resource *predicate;
88 bool predicate_dirty;
89 };
90
91 static const struct pipe_driver_query_info zink_specific_queries[] = {
92 {"render-passes", ZINK_QUERY_RENDER_PASSES, { 0 }},
93 };
94
95 static inline int
get_num_starts(struct zink_query * q)96 get_num_starts(struct zink_query *q)
97 {
98 return util_dynarray_num_elements(&q->starts, struct zink_query_start);
99 }
100
101 static void
102 update_query_id(struct zink_context *ctx, struct zink_query *q);
103
104
105 static VkQueryPipelineStatisticFlags
pipeline_statistic_convert(enum pipe_statistics_query_index idx)106 pipeline_statistic_convert(enum pipe_statistics_query_index idx)
107 {
108 unsigned map[] = {
109 [PIPE_STAT_QUERY_IA_VERTICES] = VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_VERTICES_BIT,
110 [PIPE_STAT_QUERY_IA_PRIMITIVES] = VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_PRIMITIVES_BIT,
111 [PIPE_STAT_QUERY_VS_INVOCATIONS] = VK_QUERY_PIPELINE_STATISTIC_VERTEX_SHADER_INVOCATIONS_BIT,
112 [PIPE_STAT_QUERY_GS_INVOCATIONS] = VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_INVOCATIONS_BIT,
113 [PIPE_STAT_QUERY_GS_PRIMITIVES] = VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT,
114 [PIPE_STAT_QUERY_C_INVOCATIONS] = VK_QUERY_PIPELINE_STATISTIC_CLIPPING_INVOCATIONS_BIT,
115 [PIPE_STAT_QUERY_C_PRIMITIVES] = VK_QUERY_PIPELINE_STATISTIC_CLIPPING_PRIMITIVES_BIT,
116 [PIPE_STAT_QUERY_PS_INVOCATIONS] = VK_QUERY_PIPELINE_STATISTIC_FRAGMENT_SHADER_INVOCATIONS_BIT,
117 [PIPE_STAT_QUERY_HS_INVOCATIONS] = VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_CONTROL_SHADER_PATCHES_BIT,
118 [PIPE_STAT_QUERY_DS_INVOCATIONS] = VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_EVALUATION_SHADER_INVOCATIONS_BIT,
119 [PIPE_STAT_QUERY_CS_INVOCATIONS] = VK_QUERY_PIPELINE_STATISTIC_COMPUTE_SHADER_INVOCATIONS_BIT
120 };
121 assert(idx < ARRAY_SIZE(map));
122 return map[idx];
123 }
124
125 static void
begin_vk_query_indexed(struct zink_context * ctx,struct zink_vk_query * vkq,int index,VkQueryControlFlags flags)126 begin_vk_query_indexed(struct zink_context *ctx, struct zink_vk_query *vkq, int index,
127 VkQueryControlFlags flags)
128 {
129 if (!vkq->started) {
130 VKCTX(CmdBeginQueryIndexedEXT)(ctx->bs->cmdbuf,
131 vkq->pool->query_pool,
132 vkq->query_id,
133 flags,
134 index);
135 vkq->started = true;
136 }
137 }
138
139 static void
end_vk_query_indexed(struct zink_context * ctx,struct zink_vk_query * vkq,int index)140 end_vk_query_indexed(struct zink_context *ctx, struct zink_vk_query *vkq, int index)
141 {
142 if (vkq->started) {
143 VKCTX(CmdEndQueryIndexedEXT)(ctx->bs->cmdbuf,
144 vkq->pool->query_pool,
145 vkq->query_id, index);
146 vkq->started = false;
147 }
148 }
149
150 static void
reset_vk_query_pool(struct zink_context * ctx,struct zink_vk_query * vkq)151 reset_vk_query_pool(struct zink_context *ctx, struct zink_vk_query *vkq)
152 {
153 if (vkq->needs_reset) {
154 VKCTX(CmdResetQueryPool)(ctx->bs->reordered_cmdbuf, vkq->pool->query_pool, vkq->query_id, 1);
155 ctx->bs->has_reordered_work = true;
156 }
157 vkq->needs_reset = false;
158 }
159
160 void
zink_context_destroy_query_pools(struct zink_context * ctx)161 zink_context_destroy_query_pools(struct zink_context *ctx)
162 {
163 struct zink_screen *screen = zink_screen(ctx->base.screen);
164 list_for_each_entry_safe(struct zink_query_pool, pool, &ctx->query_pools, list) {
165 VKSCR(DestroyQueryPool)(screen->dev, pool->query_pool, NULL);
166 list_del(&pool->list);
167 FREE(pool);
168 }
169 }
170
171 static struct zink_query_pool *
find_or_allocate_qp(struct zink_context * ctx,struct zink_query * q,unsigned idx)172 find_or_allocate_qp(struct zink_context *ctx, struct zink_query *q, unsigned idx)
173 {
174 VkQueryPipelineStatisticFlags pipeline_stats = 0;
175 if (q->type == PIPE_QUERY_PRIMITIVES_GENERATED && q->vkqtype != VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT)
176 pipeline_stats = VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT |
177 VK_QUERY_PIPELINE_STATISTIC_CLIPPING_INVOCATIONS_BIT;
178 else if (q->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE)
179 pipeline_stats = pipeline_statistic_convert(q->index);
180
181 VkQueryType vk_query_type = q->vkqtype;
182 /* if xfb is active, we need to use an xfb query, otherwise we need pipeline statistics */
183 if (q->type == PIPE_QUERY_PRIMITIVES_GENERATED && idx == 1) {
184 vk_query_type = VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT;
185 pipeline_stats = 0;
186 }
187
188 struct zink_screen *screen = zink_screen(ctx->base.screen);
189 list_for_each_entry(struct zink_query_pool, pool, &ctx->query_pools, list) {
190 if (pool->vk_query_type == vk_query_type) {
191 if (vk_query_type == VK_QUERY_TYPE_PIPELINE_STATISTICS) {
192 if (pool->pipeline_stats == pipeline_stats)
193 return pool;
194 } else
195 return pool;
196 }
197 }
198
199 struct zink_query_pool *new_pool = CALLOC_STRUCT(zink_query_pool);
200 if (!new_pool)
201 return NULL;
202
203 new_pool->vk_query_type = vk_query_type;
204 new_pool->pipeline_stats = pipeline_stats;
205
206 VkQueryPoolCreateInfo pool_create = {0};
207 pool_create.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO;
208 pool_create.queryType = vk_query_type;
209 pool_create.queryCount = NUM_QUERIES;
210 pool_create.pipelineStatistics = pipeline_stats;
211
212 VkResult status = VKSCR(CreateQueryPool)(screen->dev, &pool_create, NULL, &new_pool->query_pool);
213 if (status != VK_SUCCESS) {
214 mesa_loge("ZINK: vkCreateQueryPool failed (%s)", vk_Result_to_str(status));
215 FREE(new_pool);
216 return NULL;
217 }
218
219 list_addtail(&new_pool->list, &ctx->query_pools);
220 return new_pool;
221 }
222
223 static void
224 update_qbo(struct zink_context *ctx, struct zink_query *q);
225 static void
226 reset_qbos(struct zink_context *ctx, struct zink_query *q);
227
228
229 static bool
is_emulated_primgen(const struct zink_query * q)230 is_emulated_primgen(const struct zink_query *q)
231 {
232 return q->type == PIPE_QUERY_PRIMITIVES_GENERATED &&
233 q->vkqtype != VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT;
234 }
235
236 static inline unsigned
get_num_query_pools(struct zink_query * q)237 get_num_query_pools(struct zink_query *q)
238 {
239 if (is_emulated_primgen(q))
240 return 2;
241 return 1;
242 }
243
244 static inline unsigned
get_num_queries(struct zink_query * q)245 get_num_queries(struct zink_query *q)
246 {
247 if (is_emulated_primgen(q))
248 return 2;
249 if (q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)
250 return PIPE_MAX_VERTEX_STREAMS;
251 return 1;
252 }
253
254 static inline unsigned
get_num_results(struct zink_query * q)255 get_num_results(struct zink_query *q)
256 {
257 if (q->type < PIPE_QUERY_DRIVER_SPECIFIC &&
258 q->vkqtype == VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT)
259 return 1;
260 switch (q->type) {
261 case PIPE_QUERY_OCCLUSION_COUNTER:
262 case PIPE_QUERY_OCCLUSION_PREDICATE:
263 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
264 case PIPE_QUERY_TIME_ELAPSED:
265 case PIPE_QUERY_TIMESTAMP:
266 case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE:
267 return 1;
268 case PIPE_QUERY_PRIMITIVES_GENERATED:
269 case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
270 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
271 case PIPE_QUERY_PRIMITIVES_EMITTED:
272 return 2;
273 default:
274 debug_printf("unknown query: %s\n",
275 util_str_query_type(q->type, true));
276 unreachable("zink: unknown query type");
277 }
278 }
279
280 static void
timestamp_to_nanoseconds(struct zink_screen * screen,uint64_t * timestamp)281 timestamp_to_nanoseconds(struct zink_screen *screen, uint64_t *timestamp)
282 {
283 /* The number of valid bits in a timestamp value is determined by
284 * the VkQueueFamilyProperties::timestampValidBits property of the queue on which the timestamp is written.
285 * - 17.5. Timestamp Queries
286 */
287 if (screen->timestamp_valid_bits < 64)
288 *timestamp &= (1ull << screen->timestamp_valid_bits) - 1;
289
290 /* The number of nanoseconds it takes for a timestamp value to be incremented by 1
291 * can be obtained from VkPhysicalDeviceLimits::timestampPeriod
292 * - 17.5. Timestamp Queries
293 */
294 *timestamp *= (double)screen->info.props.limits.timestampPeriod;
295 }
296
297 static VkQueryType
convert_query_type(struct zink_screen * screen,enum pipe_query_type query_type,bool * precise)298 convert_query_type(struct zink_screen *screen, enum pipe_query_type query_type, bool *precise)
299 {
300 *precise = false;
301 switch (query_type) {
302 case PIPE_QUERY_OCCLUSION_COUNTER:
303 *precise = true;
304 FALLTHROUGH;
305 case PIPE_QUERY_OCCLUSION_PREDICATE:
306 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
307 return VK_QUERY_TYPE_OCCLUSION;
308 case PIPE_QUERY_TIME_ELAPSED:
309 case PIPE_QUERY_TIMESTAMP:
310 return VK_QUERY_TYPE_TIMESTAMP;
311 case PIPE_QUERY_PRIMITIVES_GENERATED:
312 return screen->info.have_EXT_primitives_generated_query ?
313 VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT :
314 VK_QUERY_TYPE_PIPELINE_STATISTICS;
315 case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE:
316 return VK_QUERY_TYPE_PIPELINE_STATISTICS;
317 case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
318 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
319 case PIPE_QUERY_PRIMITIVES_EMITTED:
320 return VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT;
321 default:
322 debug_printf("unknown query: %s\n",
323 util_str_query_type(query_type, true));
324 unreachable("zink: unknown query type");
325 }
326 }
327
328 static bool
needs_stats_list(struct zink_query * query)329 needs_stats_list(struct zink_query *query)
330 {
331 return is_emulated_primgen(query) ||
332 query->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE ||
333 query->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE;
334 }
335
336 static bool
is_time_query(struct zink_query * query)337 is_time_query(struct zink_query *query)
338 {
339 return query->type == PIPE_QUERY_TIMESTAMP || query->type == PIPE_QUERY_TIME_ELAPSED;
340 }
341
342 static bool
is_so_overflow_query(struct zink_query * query)343 is_so_overflow_query(struct zink_query *query)
344 {
345 return query->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE || query->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE;
346 }
347
348 static bool
is_bool_query(struct zink_query * query)349 is_bool_query(struct zink_query *query)
350 {
351 return is_so_overflow_query(query) ||
352 query->type == PIPE_QUERY_OCCLUSION_PREDICATE ||
353 query->type == PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE ||
354 query->type == PIPE_QUERY_GPU_FINISHED;
355 }
356
357 static bool
qbo_append(struct pipe_screen * screen,struct zink_query * query)358 qbo_append(struct pipe_screen *screen, struct zink_query *query)
359 {
360 if (query->curr_qbo && query->curr_qbo->list.next)
361 return true;
362 struct zink_query_buffer *qbo = CALLOC_STRUCT(zink_query_buffer);
363 if (!qbo)
364 return false;
365 int num_buffers = get_num_queries(query);
366
367 for (unsigned i = 0; i < num_buffers; i++) {
368 qbo->buffers[i] = pipe_buffer_create(screen, PIPE_BIND_QUERY_BUFFER,
369 PIPE_USAGE_STAGING,
370 /* this is the maximum possible size of the results in a given buffer */
371 (query->type == PIPE_QUERY_TIMESTAMP ? 1 : NUM_QUERIES) * get_num_results(query) * sizeof(uint64_t));
372 if (!qbo->buffers[i])
373 goto fail;
374 }
375 list_addtail(&qbo->list, &query->buffers);
376
377 return true;
378 fail:
379 for (unsigned i = 0; i < num_buffers; i++)
380 pipe_resource_reference(&qbo->buffers[i], NULL);
381 FREE(qbo);
382 return false;
383 }
384
385 static void
unref_vk_pool(struct zink_context * ctx,struct zink_query_pool * pool)386 unref_vk_pool(struct zink_context *ctx, struct zink_query_pool *pool)
387 {
388 if (!pool || --pool->refcount)
389 return;
390 util_dynarray_append(&ctx->bs->dead_querypools, VkQueryPool, pool->query_pool);
391 if (list_is_linked(&pool->list))
392 list_del(&pool->list);
393 FREE(pool);
394 }
395
396 static void
unref_vk_query(struct zink_context * ctx,struct zink_vk_query * vkq)397 unref_vk_query(struct zink_context *ctx, struct zink_vk_query *vkq)
398 {
399 if (!vkq)
400 return;
401 unref_vk_pool(ctx, vkq->pool);
402 vkq->refcount--;
403 if (vkq->refcount == 0)
404 FREE(vkq);
405 }
406
407 static void
destroy_query(struct zink_context * ctx,struct zink_query * query)408 destroy_query(struct zink_context *ctx, struct zink_query *query)
409 {
410 ASSERTED struct zink_screen *screen = zink_screen(ctx->base.screen);
411 assert(zink_screen_usage_check_completion(screen, query->batch_uses));
412 struct zink_query_buffer *qbo, *next;
413
414 struct zink_query_start *starts = query->starts.data;
415 unsigned num_starts = query->starts.capacity / sizeof(struct zink_query_start);
416 for (unsigned j = 0; j < num_starts; j++) {
417 for (unsigned i = 0; i < PIPE_MAX_VERTEX_STREAMS; i++) {
418 unref_vk_query(ctx, starts[j].vkq[i]);
419 }
420 }
421
422 util_dynarray_fini(&query->starts);
423 LIST_FOR_EACH_ENTRY_SAFE(qbo, next, &query->buffers, list) {
424 for (unsigned i = 0; i < ARRAY_SIZE(qbo->buffers); i++)
425 pipe_resource_reference(&qbo->buffers[i], NULL);
426 FREE(qbo);
427 }
428 pipe_resource_reference((struct pipe_resource**)&query->predicate, NULL);
429 FREE(query);
430 }
431
432 static void
reset_qbo(struct zink_query * q)433 reset_qbo(struct zink_query *q)
434 {
435 q->curr_qbo = list_first_entry(&q->buffers, struct zink_query_buffer, list);
436 q->curr_qbo->num_results = 0;
437 }
438
439 static void
query_pool_get_range(struct zink_context * ctx,struct zink_query * q)440 query_pool_get_range(struct zink_context *ctx, struct zink_query *q)
441 {
442 bool is_timestamp = q->type == PIPE_QUERY_TIMESTAMP;
443 struct zink_query_start *start;
444 int num_queries = get_num_queries(q);
445 if (!is_timestamp || get_num_starts(q) == 0) {
446 size_t size = q->starts.capacity;
447 start = util_dynarray_grow(&q->starts, struct zink_query_start, 1);
448 if (size != q->starts.capacity) {
449 /* when resizing, always zero the new data to avoid garbage */
450 uint8_t *data = q->starts.data;
451 memset(data + size, 0, q->starts.capacity - size);
452 }
453 } else {
454 start = util_dynarray_top_ptr(&q->starts, struct zink_query_start);
455 }
456 start->data = 0;
457
458 unsigned num_pools = get_num_query_pools(q);
459 for (unsigned i = 0; i < num_queries; i++) {
460 int pool_idx = num_pools > 1 ? i : 0;
461 /* try and find the active query for this */
462 struct zink_vk_query *vkq;
463 int xfb_idx = num_queries == 4 ? i : q->index;
464 if ((q->vkqtype == VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT ||
465 (pool_idx == 1)) && ctx->curr_xfb_queries[xfb_idx]) {
466 vkq = ctx->curr_xfb_queries[xfb_idx];
467 vkq->refcount++;
468 vkq->pool->refcount++;
469 } else {
470 struct zink_query_pool *pool = find_or_allocate_qp(ctx, q, pool_idx);
471 if (pool->last_range == NUM_QUERIES) {
472 list_del(&pool->list);
473 pool = find_or_allocate_qp(ctx, q, pool_idx);
474 }
475 vkq = CALLOC_STRUCT(zink_vk_query);
476 if (!vkq) {
477 mesa_loge("ZINK: failed to allocate vkq!");
478 return;
479 }
480
481 pool->refcount++;
482 vkq->refcount = 1;
483 vkq->needs_reset = true;
484 vkq->pool = pool;
485 vkq->started = false;
486 vkq->query_id = pool->last_range++;
487 }
488 unref_vk_query(ctx, start->vkq[i]);
489 start->vkq[i] = vkq;
490 }
491 }
492
493 static struct pipe_query *
zink_create_query(struct pipe_context * pctx,unsigned query_type,unsigned index)494 zink_create_query(struct pipe_context *pctx,
495 unsigned query_type, unsigned index)
496 {
497 struct zink_context *ctx = zink_context(pctx);
498 struct zink_screen *screen = zink_screen(pctx->screen);
499 struct zink_query *query = CALLOC_STRUCT(zink_query);
500
501 if (!query)
502 return NULL;
503 list_inithead(&query->buffers);
504
505 query->index = index;
506 query->type = query_type;
507
508 if (query->type >= PIPE_QUERY_DRIVER_SPECIFIC)
509 return (struct pipe_query *)query;
510
511 if (query->type == PIPE_QUERY_GPU_FINISHED || query->type == PIPE_QUERY_TIMESTAMP_DISJOINT)
512 return (struct pipe_query *)query;
513 query->vkqtype = convert_query_type(screen, query_type, &query->precise);
514 if (query->vkqtype == -1)
515 return NULL;
516
517 util_dynarray_init(&query->starts, NULL);
518
519 assert(!query->precise || query->vkqtype == VK_QUERY_TYPE_OCCLUSION);
520
521 /* use emulated path for drivers without full support */
522 if (query->vkqtype == VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT && index &&
523 !screen->info.primgen_feats.primitivesGeneratedQueryWithNonZeroStreams)
524 query->vkqtype = VK_QUERY_TYPE_PIPELINE_STATISTICS;
525
526 if (query->vkqtype == VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT) {
527 query->needs_rast_discard_workaround = !screen->info.primgen_feats.primitivesGeneratedQueryWithRasterizerDiscard;
528 } else if (query_type == PIPE_QUERY_PRIMITIVES_GENERATED) {
529 query->needs_rast_discard_workaround = true;
530 }
531
532 if (!qbo_append(pctx->screen, query))
533 goto fail;
534 ctx->bs->has_work = true;
535 query->needs_reset = true;
536 query->predicate_dirty = true;
537 if (query->type == PIPE_QUERY_TIMESTAMP) {
538 query->active = true;
539 /* defer pool reset until end_query since we're guaranteed to be threadsafe then */
540 reset_qbo(query);
541 }
542 return (struct pipe_query *)query;
543 fail:
544 destroy_query(zink_context(pctx), query);
545 return NULL;
546 }
547
548 static void
zink_destroy_query(struct pipe_context * pctx,struct pipe_query * q)549 zink_destroy_query(struct pipe_context *pctx,
550 struct pipe_query *q)
551 {
552 struct zink_query *query = (struct zink_query *)q;
553
554 /* only destroy if this query isn't active on any batches,
555 * otherwise just mark dead and wait
556 */
557 if (query->batch_uses) {
558 query->dead = true;
559 return;
560 }
561
562 destroy_query(zink_context(pctx), query);
563 }
564
565 void
zink_prune_query(struct zink_batch_state * bs,struct zink_query * query)566 zink_prune_query(struct zink_batch_state *bs, struct zink_query *query)
567 {
568 if (!zink_batch_usage_matches(query->batch_uses, bs))
569 return;
570 query->batch_uses = NULL;
571 if (query->dead)
572 destroy_query(bs->ctx, query);
573 }
574
575 static void
check_query_results(struct zink_query * query,union pipe_query_result * result,int num_starts,uint64_t * results,uint64_t * xfb_results)576 check_query_results(struct zink_query *query, union pipe_query_result *result,
577 int num_starts, uint64_t *results, uint64_t *xfb_results)
578 {
579 uint64_t last_val = 0;
580 int result_size = get_num_results(query);
581 int idx = 0;
582 util_dynarray_foreach(&query->starts, struct zink_query_start, start) {
583 unsigned i = idx * result_size;
584 idx++;
585 switch (query->type) {
586 case PIPE_QUERY_OCCLUSION_PREDICATE:
587 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
588 case PIPE_QUERY_GPU_FINISHED:
589 result->b |= results[i] != 0;
590 break;
591
592 case PIPE_QUERY_TIME_ELAPSED:
593 case PIPE_QUERY_TIMESTAMP:
594 /* the application can sum the differences between all N queries to determine the total execution time.
595 * - 17.5. Timestamp Queries
596 */
597 if (query->type != PIPE_QUERY_TIME_ELAPSED || i)
598 result->u64 += results[i] - last_val;
599 last_val = results[i];
600 break;
601 case PIPE_QUERY_OCCLUSION_COUNTER:
602 result->u64 += results[i];
603 break;
604 case PIPE_QUERY_PRIMITIVES_GENERATED:
605 if (query->vkqtype == VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT)
606 result->u64 += results[i];
607 else if (start->have_xfb || query->index)
608 result->u64 += xfb_results[i + 1];
609 else
610 /* if a given draw had a geometry shader, we need to use the first result */
611 result->u64 += results[i + !start->have_gs];
612 break;
613 case PIPE_QUERY_PRIMITIVES_EMITTED:
614 /* A query pool created with this type will capture 2 integers -
615 * numPrimitivesWritten and numPrimitivesNeeded -
616 * for the specified vertex stream output from the last vertex processing stage.
617 * - from VK_EXT_transform_feedback spec
618 */
619 result->u64 += results[i];
620 break;
621 case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
622 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
623 /* A query pool created with this type will capture 2 integers -
624 * numPrimitivesWritten and numPrimitivesNeeded -
625 * for the specified vertex stream output from the last vertex processing stage.
626 * - from VK_EXT_transform_feedback spec
627 */
628 if (start->have_xfb)
629 result->b |= results[i] != results[i + 1];
630 break;
631 case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE:
632 switch (query->index) {
633 case PIPE_STAT_QUERY_IA_VERTICES:
634 result->u64 += start->was_line_loop ? results[i] / 2 : results[i];
635 break;
636 default:
637 result->u64 += results[i];
638 break;
639 }
640 break;
641
642 default:
643 debug_printf("unhandled query type: %s\n",
644 util_str_query_type(query->type, true));
645 unreachable("unexpected query type");
646 }
647 }
648 }
649
650 static bool
get_query_result(struct pipe_context * pctx,struct pipe_query * q,bool wait,union pipe_query_result * result)651 get_query_result(struct pipe_context *pctx,
652 struct pipe_query *q,
653 bool wait,
654 union pipe_query_result *result)
655 {
656 struct zink_screen *screen = zink_screen(pctx->screen);
657 struct zink_query *query = (struct zink_query *)q;
658 unsigned flags = PIPE_MAP_READ;
659
660 if (!wait)
661 flags |= PIPE_MAP_DONTBLOCK;
662 if (query->base.flushed)
663 /* this is not a context-safe operation; ensure map doesn't use slab alloc */
664 flags |= PIPE_MAP_THREAD_SAFE;
665
666 util_query_clear_result(result, query->type);
667
668 int num_starts = get_num_starts(query);
669 /* no results: return zero */
670 if (!num_starts)
671 return true;
672 int result_size = get_num_results(query) * sizeof(uint64_t);
673 int num_maps = get_num_queries(query);
674
675 struct zink_query_buffer *qbo;
676 struct pipe_transfer *xfer[PIPE_MAX_VERTEX_STREAMS] = { 0 };
677 LIST_FOR_EACH_ENTRY(qbo, &query->buffers, list) {
678 uint64_t *results[PIPE_MAX_VERTEX_STREAMS] = { NULL, NULL };
679 bool is_timestamp = query->type == PIPE_QUERY_TIMESTAMP;
680 if (!qbo->num_results)
681 continue;
682
683 for (unsigned i = 0; i < num_maps; i++) {
684 results[i] = pipe_buffer_map_range(pctx, qbo->buffers[i], 0,
685 (is_timestamp ? 1 : qbo->num_results) * result_size, flags, &xfer[i]);
686 if (!results[i]) {
687 if (wait)
688 debug_printf("zink: qbo read failed!");
689 goto fail;
690 }
691 }
692 if (query->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) {
693 for (unsigned i = 0; i < PIPE_MAX_VERTEX_STREAMS && !result->b; i++) {
694 check_query_results(query, result, num_starts, results[i], NULL);
695 }
696 } else
697 check_query_results(query, result, num_starts, results[0], results[1]);
698
699 for (unsigned i = 0 ; i < num_maps; i++)
700 pipe_buffer_unmap(pctx, xfer[i]);
701
702 /* if overflow is detected we can stop */
703 if (query->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE && result->b)
704 break;
705 }
706
707 if (is_time_query(query))
708 timestamp_to_nanoseconds(screen, &result->u64);
709
710 return true;
711 fail:
712 for (unsigned i = 0 ; i < num_maps; i++)
713 if (xfer[i])
714 pipe_buffer_unmap(pctx, xfer[i]);
715 return false;
716 }
717
718 static void
force_cpu_read(struct zink_context * ctx,struct pipe_query * pquery,enum pipe_query_value_type result_type,struct pipe_resource * pres,unsigned offset)719 force_cpu_read(struct zink_context *ctx, struct pipe_query *pquery, enum pipe_query_value_type result_type, struct pipe_resource *pres, unsigned offset)
720 {
721 struct pipe_context *pctx = &ctx->base;
722 unsigned result_size = result_type <= PIPE_QUERY_TYPE_U32 ? sizeof(uint32_t) : sizeof(uint64_t);
723 struct zink_query *query = (struct zink_query*)pquery;
724 union pipe_query_result result = {0};
725
726 if (query->needs_update)
727 update_qbo(ctx, query);
728
729 bool success = get_query_result(pctx, pquery, true, &result);
730 if (!success) {
731 debug_printf("zink: getting query result failed\n");
732 return;
733 }
734
735 if (result_type <= PIPE_QUERY_TYPE_U32) {
736 uint32_t u32;
737 uint32_t limit;
738 if (result_type == PIPE_QUERY_TYPE_I32)
739 limit = INT_MAX;
740 else
741 limit = UINT_MAX;
742 if (is_bool_query(query))
743 u32 = result.b;
744 else
745 u32 = MIN2(limit, result.u64);
746 tc_buffer_write(pctx, pres, offset, result_size, &u32);
747 } else {
748 uint64_t u64;
749 if (is_bool_query(query))
750 u64 = result.b;
751 else
752 u64 = result.u64;
753 tc_buffer_write(pctx, pres, offset, result_size, &u64);
754 }
755 }
756
757 static void
copy_pool_results_to_buffer(struct zink_context * ctx,struct zink_query * query,VkQueryPool pool,unsigned query_id,struct zink_resource * res,unsigned offset,int num_results,VkQueryResultFlags flags)758 copy_pool_results_to_buffer(struct zink_context *ctx, struct zink_query *query, VkQueryPool pool,
759 unsigned query_id, struct zink_resource *res, unsigned offset,
760 int num_results, VkQueryResultFlags flags)
761 {
762 unsigned type_size = (flags & VK_QUERY_RESULT_64_BIT) ? sizeof(uint64_t) : sizeof(uint32_t);
763 unsigned base_result_size = get_num_results(query) * type_size;
764 unsigned result_size = base_result_size * num_results;
765 if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)
766 result_size += type_size;
767
768 bool marker = zink_cmd_debug_marker_begin(ctx, VK_NULL_HANDLE, "update_qbo(%s: id=%u, num_results=%d)", vk_QueryType_to_str(query->vkqtype), query_id, num_results);
769
770 zink_batch_no_rp(ctx);
771 /* if it's a single query that doesn't need special handling, we can copy it and be done */
772 zink_batch_reference_resource_rw(ctx, res, true);
773 res->obj->access = VK_ACCESS_TRANSFER_WRITE_BIT;
774 res->obj->access_stage = VK_PIPELINE_STAGE_TRANSFER_BIT;
775 util_range_add(&res->base.b, &res->valid_buffer_range, offset, offset + result_size);
776 assert(query_id < NUM_QUERIES);
777 res->obj->unordered_read = res->obj->unordered_write = false;
778 ctx->bs->has_work = true;
779 VKCTX(CmdCopyQueryPoolResults)(ctx->bs->cmdbuf, pool, query_id, num_results, res->obj->buffer,
780 offset, base_result_size, flags);
781 zink_cmd_debug_marker_end(ctx, ctx->bs->cmdbuf, marker);
782 }
783
784 static void
copy_results_to_buffer(struct zink_context * ctx,struct zink_query * query,struct zink_resource * res,unsigned offset,int num_results,VkQueryResultFlags flags)785 copy_results_to_buffer(struct zink_context *ctx, struct zink_query *query, struct zink_resource *res, unsigned offset, int num_results, VkQueryResultFlags flags)
786 {
787 struct zink_query_start *start = util_dynarray_top_ptr(&query->starts, struct zink_query_start);
788 copy_pool_results_to_buffer(ctx, query, start->vkq[0]->pool->query_pool, start->vkq[0]->query_id, res, offset, num_results, flags);
789 }
790
791
792 static void
reset_query_range(struct zink_context * ctx,struct zink_query * q)793 reset_query_range(struct zink_context *ctx, struct zink_query *q)
794 {
795 int num_queries = get_num_queries(q);
796 struct zink_query_start *start = util_dynarray_top_ptr(&q->starts, struct zink_query_start);
797 for (unsigned i = 0; i < num_queries; i++) {
798 reset_vk_query_pool(ctx, start->vkq[i]);
799 }
800 }
801
802 static void
reset_qbos(struct zink_context * ctx,struct zink_query * q)803 reset_qbos(struct zink_context *ctx, struct zink_query *q)
804 {
805 if (q->needs_update)
806 update_qbo(ctx, q);
807
808 q->needs_reset = false;
809 /* create new qbo for non-timestamp queries:
810 * timestamp queries should never need more than 2 entries in the qbo
811 */
812 if (q->type == PIPE_QUERY_TIMESTAMP)
813 return;
814 if (qbo_append(ctx->base.screen, q))
815 reset_qbo(q);
816 else
817 debug_printf("zink: qbo alloc failed on reset!");
818 }
819
820 static inline unsigned
get_buffer_offset(struct zink_query * q)821 get_buffer_offset(struct zink_query *q)
822 {
823 return (get_num_starts(q) - 1) * get_num_results(q) * sizeof(uint64_t);
824 }
825
826 static void
update_qbo(struct zink_context * ctx,struct zink_query * q)827 update_qbo(struct zink_context *ctx, struct zink_query *q)
828 {
829 struct zink_query_buffer *qbo = q->curr_qbo;
830 unsigned num_starts = get_num_starts(q);
831 struct zink_query_start *starts = q->starts.data;
832 bool is_timestamp = q->type == PIPE_QUERY_TIMESTAMP;
833 /* timestamp queries just write to offset 0 always */
834 int num_queries = get_num_queries(q);
835 unsigned num_results = qbo->num_results;
836 for (unsigned i = 0; i < num_queries; i++) {
837 unsigned start_offset = q->start_offset;
838 while (start_offset < num_starts) {
839 unsigned num_merged_copies = 0;
840 VkQueryPool qp = starts[start_offset].vkq[i]->pool->query_pool;
841 unsigned base_id = starts[start_offset].vkq[i]->query_id;
842 /* iterate over all the starts to see how many can be merged */
843 for (unsigned j = start_offset; j < num_starts; j++, num_merged_copies++) {
844 if (starts[j].vkq[i]->pool->query_pool != qp || starts[j].vkq[i]->query_id != base_id + num_merged_copies)
845 break;
846 }
847 assert(num_merged_copies);
848 unsigned cur_offset = start_offset * get_num_results(q) * sizeof(uint64_t);
849 unsigned offset = is_timestamp ? 0 : cur_offset;
850 copy_pool_results_to_buffer(ctx, q, starts[start_offset].vkq[i]->pool->query_pool, starts[start_offset].vkq[i]->query_id,
851 zink_resource(qbo->buffers[i]),
852 offset,
853 num_merged_copies,
854 /*
855 there is an implicit execution dependency from
856 each such query command to all query commands previously submitted to the same queue. There
857 is one significant exception to this; if the flags parameter of vkCmdCopyQueryPoolResults does not
858 include VK_QUERY_RESULT_WAIT_BIT, execution of vkCmdCopyQueryPoolResults may happen-before
859 the results of vkCmdEndQuery are available.
860
861 * - Chapter 18. Queries
862 */
863 VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT);
864 if (!is_timestamp)
865 q->curr_qbo->num_results += num_merged_copies;
866 start_offset += num_merged_copies;
867 }
868 }
869 q->start_offset += q->curr_qbo->num_results - num_results;
870
871
872 if (is_timestamp)
873 q->curr_qbo->num_results = 1;
874
875 q->needs_update = false;
876 }
877
878 static void
begin_query(struct zink_context * ctx,struct zink_query * q)879 begin_query(struct zink_context *ctx, struct zink_query *q)
880 {
881 VkQueryControlFlags flags = 0;
882
883 if (q->type == PIPE_QUERY_TIMESTAMP_DISJOINT || q->type >= PIPE_QUERY_DRIVER_SPECIFIC)
884 return;
885
886 if (q->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE && q->index == PIPE_STAT_QUERY_CS_INVOCATIONS && ctx->in_rp) {
887 /* refuse to start CS queries in renderpasses */
888 if (!list_is_linked(&q->active_list))
889 list_addtail(&q->active_list, &ctx->suspended_queries);
890 q->suspended = true;
891 return;
892 }
893
894 update_query_id(ctx, q);
895 q->predicate_dirty = true;
896 if (q->needs_reset)
897 reset_qbos(ctx, q);
898 reset_query_range(ctx, q);
899 q->active = true;
900 ctx->bs->has_work = true;
901
902 struct zink_query_start *start = util_dynarray_top_ptr(&q->starts, struct zink_query_start);
903 if (q->type == PIPE_QUERY_TIME_ELAPSED) {
904 VKCTX(CmdWriteTimestamp)(ctx->bs->cmdbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, start->vkq[0]->pool->query_pool, start->vkq[0]->query_id);
905 if (!ctx->in_rp)
906 update_qbo(ctx, q);
907 zink_batch_usage_set(&q->batch_uses, ctx->bs);
908 _mesa_set_add(&ctx->bs->active_queries, q);
909 }
910 /* ignore the rest of begin_query for timestamps */
911 if (is_time_query(q))
912 return;
913
914 /* A query must either begin and end inside the same subpass of a render pass
915 instance, or must both begin and end outside of a render pass instance
916 (i.e. contain entire render pass instances).
917 - 18.2. Query Operation
918 */
919 q->started_in_rp = ctx->in_rp;
920
921 if (q->precise)
922 flags |= VK_QUERY_CONTROL_PRECISE_BIT;
923
924 if (q->type == PIPE_QUERY_PRIMITIVES_EMITTED ||
925 is_emulated_primgen(q) ||
926 q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE) {
927 struct zink_vk_query *vkq = start->vkq[1] ? start->vkq[1] : start->vkq[0];
928 assert(!ctx->curr_xfb_queries[q->index] || ctx->curr_xfb_queries[q->index] == vkq);
929 ctx->curr_xfb_queries[q->index] = vkq;
930
931 begin_vk_query_indexed(ctx, vkq, q->index, flags);
932 } else if (q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) {
933 for (unsigned i = 0; i < PIPE_MAX_VERTEX_STREAMS; i++) {
934 assert(!ctx->curr_xfb_queries[i] || ctx->curr_xfb_queries[i] == start->vkq[i]);
935 ctx->curr_xfb_queries[i] = start->vkq[i];
936
937 begin_vk_query_indexed(ctx, start->vkq[i], i, flags);
938 }
939 } else if (q->vkqtype == VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT) {
940 begin_vk_query_indexed(ctx, start->vkq[0], q->index, flags);
941 }
942 if (q->vkqtype != VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT && q->vkqtype != VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT)
943 VKCTX(CmdBeginQuery)(ctx->bs->cmdbuf, start->vkq[0]->pool->query_pool, start->vkq[0]->query_id, flags);
944 if (q->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE && q->index == PIPE_STAT_QUERY_IA_VERTICES) {
945 assert(!ctx->vertices_query);
946 ctx->vertices_query = q;
947 }
948 if (needs_stats_list(q))
949 list_addtail(&q->stats_list, &ctx->primitives_generated_queries);
950 zink_batch_usage_set(&q->batch_uses, ctx->bs);
951 _mesa_set_add(&ctx->bs->active_queries, q);
952 if (q->needs_rast_discard_workaround) {
953 ctx->primitives_generated_active = true;
954 if (zink_set_rasterizer_discard(ctx, true))
955 zink_set_null_fs(ctx);
956 }
957 }
958
959 static bool
zink_begin_query(struct pipe_context * pctx,struct pipe_query * q)960 zink_begin_query(struct pipe_context *pctx,
961 struct pipe_query *q)
962 {
963 struct zink_query *query = (struct zink_query *)q;
964 struct zink_context *ctx = zink_context(pctx);
965
966 /* drop all past results */
967 reset_qbo(query);
968
969 if (query->type < PIPE_QUERY_DRIVER_SPECIFIC && query->vkqtype == VK_QUERY_TYPE_OCCLUSION)
970 ctx->occlusion_query_active = true;
971 if (query->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE && query->index == PIPE_STAT_QUERY_PS_INVOCATIONS)
972 ctx->fs_query_active = true;
973
974 query->predicate_dirty = true;
975
976 util_dynarray_clear(&query->starts);
977 query->start_offset = 0;
978
979 if (ctx->in_rp || (query->type == PIPE_QUERY_TIME_ELAPSED)) {
980 begin_query(ctx, query);
981 } else {
982 /* never directly start queries out of renderpass, always defer */
983 list_addtail(&query->active_list, &ctx->suspended_queries);
984 query->suspended = true;
985 if (query->type == PIPE_QUERY_PRIMITIVES_GENERATED)
986 ctx->primitives_generated_suspended = query->needs_rast_discard_workaround;
987 }
988
989 return true;
990 }
991
992 static void
update_query_id(struct zink_context * ctx,struct zink_query * q)993 update_query_id(struct zink_context *ctx, struct zink_query *q)
994 {
995 query_pool_get_range(ctx, q);
996 ctx->bs->has_work = true;
997 q->has_draws = false;
998 }
999
1000 static void
end_query(struct zink_context * ctx,struct zink_query * q)1001 end_query(struct zink_context *ctx, struct zink_query *q)
1002 {
1003 if (q->type == PIPE_QUERY_TIMESTAMP_DISJOINT || q->type >= PIPE_QUERY_DRIVER_SPECIFIC)
1004 return;
1005
1006 ASSERTED struct zink_query_buffer *qbo = q->curr_qbo;
1007 assert(qbo);
1008 assert(!is_time_query(q));
1009 q->active = false;
1010 assert(q->started_in_rp == ctx->in_rp);
1011 struct zink_query_start *start = util_dynarray_top_ptr(&q->starts, struct zink_query_start);
1012
1013 if (q->type == PIPE_QUERY_PRIMITIVES_EMITTED ||
1014 is_emulated_primgen(q) ||
1015 q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE) {
1016 struct zink_vk_query *vkq = start->vkq[1] ? start->vkq[1] : start->vkq[0];
1017
1018 end_vk_query_indexed(ctx, vkq, q->index);
1019 ctx->curr_xfb_queries[q->index] = NULL;
1020 }
1021 else if (q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) {
1022 for (unsigned i = 0; i < PIPE_MAX_VERTEX_STREAMS; i++) {
1023 end_vk_query_indexed(ctx, start->vkq[i], i);
1024 ctx->curr_xfb_queries[i] = NULL;
1025 }
1026 } else if (q->vkqtype == VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT) {
1027 end_vk_query_indexed(ctx, start->vkq[0], q->index);
1028 }
1029 if (q->vkqtype != VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT &&
1030 q->vkqtype != VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT && !is_time_query(q))
1031 VKCTX(CmdEndQuery)(ctx->bs->cmdbuf, start->vkq[0]->pool->query_pool, start->vkq[0]->query_id);
1032
1033 if (q->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE &&
1034 q->index == PIPE_STAT_QUERY_IA_VERTICES)
1035 ctx->vertices_query = NULL;
1036
1037 if (needs_stats_list(q))
1038 list_delinit(&q->stats_list);
1039
1040 q->needs_update = true;
1041 if (q->needs_rast_discard_workaround) {
1042 ctx->primitives_generated_active = false;
1043 if (zink_set_rasterizer_discard(ctx, false))
1044 zink_set_null_fs(ctx);
1045 }
1046 }
1047
1048 static bool
zink_end_query(struct pipe_context * pctx,struct pipe_query * q)1049 zink_end_query(struct pipe_context *pctx,
1050 struct pipe_query *q)
1051 {
1052 struct zink_context *ctx = zink_context(pctx);
1053 struct zink_query *query = (struct zink_query *)q;
1054
1055 if (query->type == PIPE_QUERY_TIMESTAMP_DISJOINT || query->type >= PIPE_QUERY_DRIVER_SPECIFIC)
1056 return true;
1057
1058 if (query->type == PIPE_QUERY_GPU_FINISHED) {
1059 pctx->flush(pctx, &query->fence, PIPE_FLUSH_DEFERRED);
1060 return true;
1061 }
1062
1063 /* FIXME: this can be called from a thread, but it needs to write to the cmdbuf */
1064 threaded_context_unwrap_sync(pctx);
1065
1066 if (query->vkqtype == VK_QUERY_TYPE_OCCLUSION)
1067 ctx->occlusion_query_active = true;
1068 if (query->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE && query->index == PIPE_STAT_QUERY_PS_INVOCATIONS)
1069 ctx->fs_query_active = true;
1070
1071 bool unset_null_fs = query->type == PIPE_QUERY_PRIMITIVES_GENERATED && (ctx->primitives_generated_suspended || ctx->primitives_generated_active);
1072 if (query->type == PIPE_QUERY_PRIMITIVES_GENERATED)
1073 ctx->primitives_generated_suspended = false;
1074
1075 if (list_is_linked(&query->stats_list))
1076 list_delinit(&query->stats_list);
1077 if (query->suspended) {
1078 list_delinit(&query->active_list);
1079 query->suspended = false;
1080 }
1081 if (is_time_query(query)) {
1082 update_query_id(ctx, query);
1083 if (query->needs_reset)
1084 reset_qbos(ctx, query);
1085 reset_query_range(ctx, query);
1086 struct zink_query_start *start = util_dynarray_top_ptr(&query->starts, struct zink_query_start);
1087 VKCTX(CmdWriteTimestamp)(ctx->bs->cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
1088 start->vkq[0]->pool->query_pool, start->vkq[0]->query_id);
1089 ctx->bs->has_work = true;
1090 zink_batch_usage_set(&query->batch_uses, ctx->bs);
1091 _mesa_set_add(&ctx->bs->active_queries, query);
1092 query->needs_update = true;
1093 } else if (query->active) {
1094 /* this should be a tc-optimized query end that doesn't split a renderpass */
1095 if (!query->started_in_rp)
1096 zink_batch_no_rp(ctx);
1097 end_query(ctx, query);
1098 }
1099
1100 if (unset_null_fs)
1101 zink_set_null_fs(ctx);
1102
1103 return true;
1104 }
1105
1106 static bool
zink_get_query_result(struct pipe_context * pctx,struct pipe_query * q,bool wait,union pipe_query_result * result)1107 zink_get_query_result(struct pipe_context *pctx,
1108 struct pipe_query *q,
1109 bool wait,
1110 union pipe_query_result *result)
1111 {
1112 struct zink_query *query = (void*)q;
1113 struct zink_context *ctx = zink_context(pctx);
1114
1115 if (query->type == PIPE_QUERY_TIMESTAMP_DISJOINT) {
1116 result->timestamp_disjoint.frequency = zink_screen(pctx->screen)->info.props.limits.timestampPeriod * 1000000.0;
1117 result->timestamp_disjoint.disjoint = false;
1118 return true;
1119 }
1120
1121 if (query->type == PIPE_QUERY_GPU_FINISHED) {
1122 struct pipe_screen *screen = pctx->screen;
1123
1124 result->b = screen->fence_finish(screen, query->base.flushed ? NULL : pctx,
1125 query->fence, wait ? OS_TIMEOUT_INFINITE : 0);
1126 return result->b;
1127 }
1128
1129 if (query->type == ZINK_QUERY_RENDER_PASSES) {
1130 result->u64 = ctx->hud.render_passes;
1131 ctx->hud.render_passes = 0;
1132 return true;
1133 }
1134
1135 if (query->needs_update) {
1136 assert(!ctx->tc || !threaded_query(q)->flushed);
1137 update_qbo(ctx, query);
1138 }
1139
1140 if (zink_batch_usage_is_unflushed(query->batch_uses)) {
1141 if (!threaded_query(q)->flushed)
1142 pctx->flush(pctx, NULL, 0);
1143 if (!wait)
1144 return false;
1145 }
1146
1147 return get_query_result(pctx, q, wait, result);
1148 }
1149
1150 static void
suspend_query(struct zink_context * ctx,struct zink_query * query)1151 suspend_query(struct zink_context *ctx, struct zink_query *query)
1152 {
1153 /* if a query isn't active here then we don't need to reactivate it on the next batch */
1154 if (query->active && !is_time_query(query))
1155 end_query(ctx, query);
1156 if (query->needs_update && !ctx->in_rp)
1157 update_qbo(ctx, query);
1158 }
1159
1160 static void
suspend_queries(struct zink_context * ctx,bool rp_only)1161 suspend_queries(struct zink_context *ctx, bool rp_only)
1162 {
1163 set_foreach(&ctx->bs->active_queries, entry) {
1164 struct zink_query *query = (void*)entry->key;
1165 if (query->suspended || (rp_only && !query->started_in_rp))
1166 continue;
1167 if (query->active && !is_time_query(query)) {
1168 /* the fence is going to steal the set off the batch, so we have to copy
1169 * the active queries onto a list
1170 */
1171 list_addtail(&query->active_list, &ctx->suspended_queries);
1172 query->suspended = true;
1173 if (query->type == PIPE_QUERY_PRIMITIVES_GENERATED)
1174 ctx->primitives_generated_suspended = query->needs_rast_discard_workaround;
1175 }
1176 suspend_query(ctx, query);
1177 }
1178 }
1179
1180 void
zink_suspend_queries(struct zink_context * ctx)1181 zink_suspend_queries(struct zink_context *ctx)
1182 {
1183 suspend_queries(ctx, false);
1184 }
1185
1186 void
zink_resume_queries(struct zink_context * ctx)1187 zink_resume_queries(struct zink_context *ctx)
1188 {
1189 struct zink_query *query, *next;
1190 LIST_FOR_EACH_ENTRY_SAFE(query, next, &ctx->suspended_queries, active_list) {
1191 list_delinit(&query->active_list);
1192 query->suspended = false;
1193 if (query->type == PIPE_QUERY_PRIMITIVES_GENERATED)
1194 ctx->primitives_generated_suspended = false;
1195 if (query->needs_update && !ctx->in_rp)
1196 update_qbo(ctx, query);
1197 begin_query(ctx, query);
1198 }
1199 }
1200
1201 void
zink_resume_cs_query(struct zink_context * ctx)1202 zink_resume_cs_query(struct zink_context *ctx)
1203 {
1204 struct zink_query *query, *next;
1205 LIST_FOR_EACH_ENTRY_SAFE(query, next, &ctx->suspended_queries, active_list) {
1206 if (query->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE && query->index == PIPE_STAT_QUERY_CS_INVOCATIONS) {
1207 list_delinit(&query->active_list);
1208 query->suspended = false;
1209 begin_query(ctx, query);
1210 }
1211 }
1212 }
1213
1214 void
zink_query_renderpass_suspend(struct zink_context * ctx)1215 zink_query_renderpass_suspend(struct zink_context *ctx)
1216 {
1217 suspend_queries(ctx, true);
1218 }
1219
1220 void
zink_query_update_gs_states(struct zink_context * ctx)1221 zink_query_update_gs_states(struct zink_context *ctx)
1222 {
1223 struct zink_query *query;
1224 bool suspendall = false;
1225 bool have_gs = !!ctx->gfx_stages[MESA_SHADER_GEOMETRY];
1226 bool have_xfb = !!ctx->num_so_targets;
1227
1228 LIST_FOR_EACH_ENTRY(query, &ctx->primitives_generated_queries, stats_list) {
1229 struct zink_query_start *last_start = util_dynarray_top_ptr(&query->starts, struct zink_query_start);
1230 assert(query->active);
1231 if (query->has_draws) {
1232 if (last_start->have_gs != have_gs ||
1233 last_start->have_xfb != have_xfb) {
1234 suspendall = true;
1235 }
1236 }
1237 }
1238
1239 if (ctx->vertices_query) {
1240 query = ctx->vertices_query;
1241 struct zink_query_start *last_start = util_dynarray_top_ptr(&query->starts, struct zink_query_start);
1242 assert(query->active);
1243 if (last_start->was_line_loop != ctx->was_line_loop) {
1244 suspendall = true;
1245 }
1246 }
1247 if (suspendall) {
1248 zink_suspend_queries(ctx);
1249 zink_resume_queries(ctx);
1250 }
1251
1252 LIST_FOR_EACH_ENTRY(query, &ctx->primitives_generated_queries, stats_list) {
1253 struct zink_query_start *last_start = util_dynarray_top_ptr(&query->starts, struct zink_query_start);
1254 last_start->have_gs = have_gs;
1255 last_start->have_xfb = have_xfb;
1256 query->has_draws = true;
1257 }
1258 if (ctx->vertices_query) {
1259 query = ctx->vertices_query;
1260 struct zink_query_start *last_start = util_dynarray_top_ptr(&query->starts, struct zink_query_start);
1261 last_start->was_line_loop = ctx->was_line_loop;
1262 query->has_draws = true;
1263 }
1264 }
1265
1266 static void
zink_set_active_query_state(struct pipe_context * pctx,bool enable)1267 zink_set_active_query_state(struct pipe_context *pctx, bool enable)
1268 {
1269 struct zink_context *ctx = zink_context(pctx);
1270 /* unordered blits already disable queries */
1271 if (ctx->unordered_blitting)
1272 return;
1273 ctx->queries_disabled = !enable;
1274
1275 if (ctx->queries_disabled)
1276 zink_suspend_queries(ctx);
1277 else if (ctx->in_rp)
1278 zink_resume_queries(ctx);
1279 }
1280
1281 void
zink_query_sync(struct zink_context * ctx,struct zink_query * query)1282 zink_query_sync(struct zink_context *ctx, struct zink_query *query)
1283 {
1284 if (query->needs_update)
1285 update_qbo(ctx, query);
1286 }
1287
1288 void
zink_start_conditional_render(struct zink_context * ctx)1289 zink_start_conditional_render(struct zink_context *ctx)
1290 {
1291 if (unlikely(!zink_screen(ctx->base.screen)->info.have_EXT_conditional_rendering) || ctx->render_condition.active)
1292 return;
1293 VkConditionalRenderingFlagsEXT begin_flags = 0;
1294 if (ctx->render_condition.inverted)
1295 begin_flags = VK_CONDITIONAL_RENDERING_INVERTED_BIT_EXT;
1296 VkConditionalRenderingBeginInfoEXT begin_info = {0};
1297 begin_info.sType = VK_STRUCTURE_TYPE_CONDITIONAL_RENDERING_BEGIN_INFO_EXT;
1298 begin_info.buffer = ctx->render_condition.query->predicate->obj->buffer;
1299 begin_info.flags = begin_flags;
1300 ctx->render_condition.query->predicate->obj->unordered_read = false;
1301 VKCTX(CmdBeginConditionalRenderingEXT)(ctx->bs->cmdbuf, &begin_info);
1302 zink_batch_reference_resource_rw(ctx, ctx->render_condition.query->predicate, false);
1303 ctx->render_condition.active = true;
1304 }
1305
1306 void
zink_stop_conditional_render(struct zink_context * ctx)1307 zink_stop_conditional_render(struct zink_context *ctx)
1308 {
1309 zink_clear_apply_conditionals(ctx);
1310 if (unlikely(!zink_screen(ctx->base.screen)->info.have_EXT_conditional_rendering) || !ctx->render_condition.active)
1311 return;
1312 VKCTX(CmdEndConditionalRenderingEXT)(ctx->bs->cmdbuf);
1313 ctx->render_condition.active = false;
1314 }
1315
1316 static void
zink_render_condition(struct pipe_context * pctx,struct pipe_query * pquery,bool condition,enum pipe_render_cond_flag mode)1317 zink_render_condition(struct pipe_context *pctx,
1318 struct pipe_query *pquery,
1319 bool condition,
1320 enum pipe_render_cond_flag mode)
1321 {
1322 struct zink_context *ctx = zink_context(pctx);
1323 struct zink_query *query = (struct zink_query *)pquery;
1324 zink_batch_no_rp(ctx);
1325 VkQueryResultFlagBits flags = 0;
1326
1327 ctx->bs->has_work = true;
1328 if (query == NULL) {
1329 /* force conditional clears if they exist */
1330 if (ctx->clears_enabled && !ctx->in_rp)
1331 zink_batch_rp(ctx);
1332 zink_stop_conditional_render(ctx);
1333 ctx->render_condition_active = false;
1334 ctx->render_condition.query = NULL;
1335 return;
1336 }
1337
1338 if (!query->predicate) {
1339 struct pipe_resource *pres;
1340
1341 /* need to create a vulkan buffer to copy the data into */
1342 pres = pipe_buffer_create(pctx->screen, PIPE_BIND_QUERY_BUFFER, PIPE_USAGE_DEFAULT, sizeof(uint64_t));
1343 if (!pres)
1344 return;
1345
1346 query->predicate = zink_resource(pres);
1347 }
1348 if (query->predicate_dirty) {
1349 struct zink_resource *res = query->predicate;
1350
1351 if (mode == PIPE_RENDER_COND_WAIT || mode == PIPE_RENDER_COND_BY_REGION_WAIT)
1352 flags |= VK_QUERY_RESULT_WAIT_BIT;
1353
1354 flags |= VK_QUERY_RESULT_64_BIT;
1355 int num_results = get_num_starts(query);
1356 if (num_results) {
1357 if (!is_emulated_primgen(query) &&
1358 !is_so_overflow_query(query) &&
1359 num_results == 1) {
1360 copy_results_to_buffer(ctx, query, res, 0, num_results, flags);
1361 } else {
1362 /* these need special handling */
1363 force_cpu_read(ctx, pquery, PIPE_QUERY_TYPE_U32, &res->base.b, 0);
1364 }
1365 } else {
1366 uint64_t zero = 0;
1367 tc_buffer_write(pctx, &res->base.b, 0, sizeof(zero), &zero);
1368 }
1369 zink_screen(ctx->base.screen)->buffer_barrier(ctx, res, VK_ACCESS_CONDITIONAL_RENDERING_READ_BIT_EXT, VK_PIPELINE_STAGE_CONDITIONAL_RENDERING_BIT_EXT);
1370 query->predicate_dirty = false;
1371 }
1372 ctx->render_condition.inverted = condition;
1373 ctx->render_condition_active = true;
1374 ctx->render_condition.query = query;
1375 if (ctx->in_rp)
1376 zink_start_conditional_render(ctx);
1377 }
1378
1379 static void
zink_get_query_result_resource(struct pipe_context * pctx,struct pipe_query * pquery,enum pipe_query_flags flags,enum pipe_query_value_type result_type,int index,struct pipe_resource * pres,unsigned offset)1380 zink_get_query_result_resource(struct pipe_context *pctx,
1381 struct pipe_query *pquery,
1382 enum pipe_query_flags flags,
1383 enum pipe_query_value_type result_type,
1384 int index,
1385 struct pipe_resource *pres,
1386 unsigned offset)
1387 {
1388 struct zink_context *ctx = zink_context(pctx);
1389 struct zink_screen *screen = zink_screen(pctx->screen);
1390 struct zink_query *query = (struct zink_query*)pquery;
1391 struct zink_resource *res = zink_resource(pres);
1392 unsigned result_size = result_type <= PIPE_QUERY_TYPE_U32 ? sizeof(uint32_t) : sizeof(uint64_t);
1393 VkQueryResultFlagBits size_flags = result_type <= PIPE_QUERY_TYPE_U32 ? 0 : VK_QUERY_RESULT_64_BIT;
1394 unsigned num_queries = get_num_starts(query);
1395
1396 /* it's possible that a query may have no data at all: write out zeroes to the buffer and return */
1397 uint64_t u64[4] = {0};
1398 unsigned src_offset = result_size * get_num_results(query);
1399 if (!num_queries) {
1400 tc_buffer_write(pctx, pres, offset, result_size, (unsigned char*)u64 + src_offset);
1401 return;
1402 }
1403
1404 if (index == -1) {
1405 /* VK_QUERY_RESULT_WITH_AVAILABILITY_BIT will ALWAYS write some kind of result data
1406 * in addition to the availability result, which is a problem if we're just trying to get availability data
1407 *
1408 * if we know that there's no valid buffer data in the preceding buffer range, then we can just
1409 * stomp on it with a glorious queued buffer copy instead of forcing a stall to manually write to the
1410 * buffer
1411 */
1412
1413 VkQueryResultFlags flag = is_time_query(query) ? 0 : VK_QUERY_RESULT_PARTIAL_BIT;
1414 if (zink_batch_usage_check_completion(ctx, query->batch_uses)) {
1415 struct zink_query_start *start = util_dynarray_top_ptr(&query->starts, struct zink_query_start);
1416 unsigned query_id = start->vkq[0]->query_id;
1417 VkResult result = VKCTX(GetQueryPoolResults)(screen->dev, start->vkq[0]->pool->query_pool, query_id, 1,
1418 sizeof(u64), u64, 0, size_flags | VK_QUERY_RESULT_WITH_AVAILABILITY_BIT | flag);
1419 if (result == VK_SUCCESS) {
1420 tc_buffer_write(pctx, pres, offset, result_size, (unsigned char*)u64 + src_offset);
1421 return;
1422 } else {
1423 mesa_loge("ZINK: vkGetQueryPoolResults failed (%s)", vk_Result_to_str(result));
1424 }
1425 }
1426 struct pipe_resource *staging = pipe_buffer_create(pctx->screen, 0, PIPE_USAGE_STAGING, src_offset + result_size);
1427 copy_results_to_buffer(ctx, query, zink_resource(staging), 0, 1, size_flags | VK_QUERY_RESULT_WITH_AVAILABILITY_BIT | flag);
1428 zink_copy_buffer(ctx, res, zink_resource(staging), offset, result_size * get_num_results(query), result_size);
1429 pipe_resource_reference(&staging, NULL);
1430 return;
1431 }
1432
1433 /*
1434 there is an implicit execution dependency from
1435 each such query command to all query commands previously submitted to the same queue. There
1436 is one significant exception to this; if the flags parameter of vkCmdCopyQueryPoolResults does not
1437 include VK_QUERY_RESULT_WAIT_BIT, execution of vkCmdCopyQueryPoolResults may happen-before
1438 the results of vkCmdEndQuery are available.
1439
1440 * - Chapter 18. Queries
1441 */
1442 size_flags |= VK_QUERY_RESULT_WAIT_BIT;
1443 if (!is_time_query(query) && !is_bool_query(query)) {
1444 if (num_queries == 1 && !is_emulated_primgen(query) &&
1445 query->type != PIPE_QUERY_PRIMITIVES_EMITTED &&
1446 !is_bool_query(query)) {
1447 if (size_flags == VK_QUERY_RESULT_64_BIT) {
1448 if (query->needs_update)
1449 update_qbo(ctx, query);
1450 /* internal qbo always writes 64bit value so we can just direct copy */
1451 zink_copy_buffer(ctx, res, zink_resource(query->curr_qbo->buffers[0]), offset,
1452 get_buffer_offset(query),
1453 result_size);
1454 } else
1455 /* have to do a new copy for 32bit */
1456 copy_results_to_buffer(ctx, query, res, offset, 1, size_flags);
1457 return;
1458 }
1459 }
1460
1461 /* TODO: use CS to aggregate results */
1462
1463 /* unfortunately, there's no way to accumulate results from multiple queries on the gpu without either
1464 * clobbering all but the last result or writing the results sequentially, so we have to manually write the result
1465 */
1466 force_cpu_read(ctx, pquery, result_type, pres, offset);
1467 }
1468
1469 uint64_t
zink_get_timestamp(struct pipe_screen * pscreen)1470 zink_get_timestamp(struct pipe_screen *pscreen)
1471 {
1472 struct zink_screen *screen = zink_screen(pscreen);
1473 uint64_t timestamp, deviation;
1474 if (screen->info.have_EXT_calibrated_timestamps) {
1475 VkCalibratedTimestampInfoEXT cti = {0};
1476 cti.sType = VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT;
1477 cti.timeDomain = VK_TIME_DOMAIN_DEVICE_EXT;
1478 VkResult result = VKSCR(GetCalibratedTimestampsEXT)(screen->dev, 1, &cti, ×tamp, &deviation);
1479 if (result != VK_SUCCESS) {
1480 mesa_loge("ZINK: vkGetCalibratedTimestampsEXT failed (%s)", vk_Result_to_str(result));
1481 }
1482 } else {
1483 zink_screen_lock_context(screen);
1484 struct pipe_context *pctx = &screen->copy_context->base;
1485 struct pipe_query *pquery = pctx->create_query(pctx, PIPE_QUERY_TIMESTAMP, 0);
1486 if (!pquery)
1487 return 0;
1488 union pipe_query_result result = {0};
1489 pctx->begin_query(pctx, pquery);
1490 pctx->end_query(pctx, pquery);
1491 pctx->get_query_result(pctx, pquery, true, &result);
1492 pctx->destroy_query(pctx, pquery);
1493 zink_screen_unlock_context(screen);
1494 timestamp = result.u64;
1495 }
1496 timestamp_to_nanoseconds(screen, ×tamp);
1497 return timestamp;
1498 }
1499
1500 void
zink_context_query_init(struct pipe_context * pctx)1501 zink_context_query_init(struct pipe_context *pctx)
1502 {
1503 struct zink_context *ctx = zink_context(pctx);
1504 list_inithead(&ctx->suspended_queries);
1505 list_inithead(&ctx->primitives_generated_queries);
1506
1507 pctx->create_query = zink_create_query;
1508 pctx->destroy_query = zink_destroy_query;
1509 pctx->begin_query = zink_begin_query;
1510 pctx->end_query = zink_end_query;
1511 pctx->get_query_result = zink_get_query_result;
1512 pctx->get_query_result_resource = zink_get_query_result_resource;
1513 pctx->set_active_query_state = zink_set_active_query_state;
1514 pctx->render_condition = zink_render_condition;
1515 }
1516
1517 int
zink_get_driver_query_group_info(struct pipe_screen * pscreen,unsigned index,struct pipe_driver_query_group_info * info)1518 zink_get_driver_query_group_info(struct pipe_screen *pscreen, unsigned index,
1519 struct pipe_driver_query_group_info *info)
1520 {
1521 if (!info)
1522 return 1;
1523
1524 assert(index == 0);
1525 info->name = "Zink counters";
1526 info->max_active_queries = ARRAY_SIZE(zink_specific_queries);
1527 info->num_queries = ARRAY_SIZE(zink_specific_queries);
1528
1529 return 1;
1530 }
1531
1532 int
zink_get_driver_query_info(struct pipe_screen * pscreen,unsigned index,struct pipe_driver_query_info * info)1533 zink_get_driver_query_info(struct pipe_screen *pscreen, unsigned index,
1534 struct pipe_driver_query_info *info)
1535 {
1536 if (!info)
1537 return ARRAY_SIZE(zink_specific_queries);
1538
1539 assert(index < ARRAY_SIZE(zink_specific_queries));
1540 *info = zink_specific_queries[index];
1541
1542 return 1;
1543 }
1544