xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/zink/zink_query.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 #include "zink_query.h"
2 
3 #include "zink_context.h"
4 #include "zink_clear.h"
5 #include "zink_program.h"
6 #include "zink_resource.h"
7 #include "zink_screen.h"
8 
9 #include "util/u_dump.h"
10 #include "util/u_inlines.h"
11 #include "util/u_memory.h"
12 
13 #define NUM_QUERIES 500
14 
15 #define ZINK_QUERY_RENDER_PASSES (PIPE_QUERY_DRIVER_SPECIFIC + 0)
16 
17 struct zink_query_pool {
18    struct list_head list;
19    VkQueryType vk_query_type;
20    VkQueryPipelineStatisticFlags pipeline_stats;
21    VkQueryPool query_pool;
22    unsigned last_range;
23    unsigned refcount;
24 };
25 
26 struct zink_query_buffer {
27    struct list_head list;
28    unsigned num_results;
29    struct pipe_resource *buffers[PIPE_MAX_VERTEX_STREAMS];
30 };
31 
32 struct zink_vk_query {
33    struct zink_query_pool *pool;
34    unsigned query_id;
35    bool needs_reset;
36    bool started;
37    uint32_t refcount;
38 };
39 
40 struct zink_query_start {
41    union {
42       struct {
43          bool have_gs;
44          bool have_xfb;
45          bool was_line_loop;
46       };
47       uint32_t data;
48    };
49    struct zink_vk_query *vkq[PIPE_MAX_VERTEX_STREAMS];
50 };
51 
52 struct zink_query {
53    struct threaded_query base;
54    enum pipe_query_type type;
55 
56    /* Everytime the gallium query needs
57     * another vulkan query, add a new start.
58     */
59    struct util_dynarray starts;
60    unsigned start_offset;
61 
62    VkQueryType vkqtype;
63    unsigned index;
64    bool precise;
65 
66    bool active; /* query is considered active by vk */
67    bool needs_reset; /* query is considered active by vk and cannot be destroyed */
68    bool dead; /* query should be destroyed when its fence finishes */
69    bool needs_update; /* query needs to update its qbos */
70    bool needs_rast_discard_workaround; /* query needs discard disabled */
71    bool suspended;
72    bool started_in_rp; //needs to be stopped in rp
73 
74    struct list_head active_list;
75 
76    struct list_head stats_list; /* when active, statistics queries are added to ctx->primitives_generated_queries */
77    bool has_draws; /* have_gs and have_xfb are valid for idx=curr_query */
78 
79    struct zink_batch_usage *batch_uses; //batch that the query was started in
80 
81    struct list_head buffers;
82    union {
83       struct zink_query_buffer *curr_qbo;
84       struct pipe_fence_handle *fence; //PIPE_QUERY_GPU_FINISHED
85    };
86 
87    struct zink_resource *predicate;
88    bool predicate_dirty;
89 };
90 
91 static const struct pipe_driver_query_info zink_specific_queries[] = {
92    {"render-passes", ZINK_QUERY_RENDER_PASSES, { 0 }},
93 };
94 
95 static inline int
get_num_starts(struct zink_query * q)96 get_num_starts(struct zink_query *q)
97 {
98    return util_dynarray_num_elements(&q->starts, struct zink_query_start);
99 }
100 
101 static void
102 update_query_id(struct zink_context *ctx, struct zink_query *q);
103 
104 
105 static VkQueryPipelineStatisticFlags
pipeline_statistic_convert(enum pipe_statistics_query_index idx)106 pipeline_statistic_convert(enum pipe_statistics_query_index idx)
107 {
108    unsigned map[] = {
109       [PIPE_STAT_QUERY_IA_VERTICES] = VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_VERTICES_BIT,
110       [PIPE_STAT_QUERY_IA_PRIMITIVES] = VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_PRIMITIVES_BIT,
111       [PIPE_STAT_QUERY_VS_INVOCATIONS] = VK_QUERY_PIPELINE_STATISTIC_VERTEX_SHADER_INVOCATIONS_BIT,
112       [PIPE_STAT_QUERY_GS_INVOCATIONS] = VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_INVOCATIONS_BIT,
113       [PIPE_STAT_QUERY_GS_PRIMITIVES] = VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT,
114       [PIPE_STAT_QUERY_C_INVOCATIONS] = VK_QUERY_PIPELINE_STATISTIC_CLIPPING_INVOCATIONS_BIT,
115       [PIPE_STAT_QUERY_C_PRIMITIVES] = VK_QUERY_PIPELINE_STATISTIC_CLIPPING_PRIMITIVES_BIT,
116       [PIPE_STAT_QUERY_PS_INVOCATIONS] = VK_QUERY_PIPELINE_STATISTIC_FRAGMENT_SHADER_INVOCATIONS_BIT,
117       [PIPE_STAT_QUERY_HS_INVOCATIONS] = VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_CONTROL_SHADER_PATCHES_BIT,
118       [PIPE_STAT_QUERY_DS_INVOCATIONS] = VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_EVALUATION_SHADER_INVOCATIONS_BIT,
119       [PIPE_STAT_QUERY_CS_INVOCATIONS] = VK_QUERY_PIPELINE_STATISTIC_COMPUTE_SHADER_INVOCATIONS_BIT
120    };
121    assert(idx < ARRAY_SIZE(map));
122    return map[idx];
123 }
124 
125 static void
begin_vk_query_indexed(struct zink_context * ctx,struct zink_vk_query * vkq,int index,VkQueryControlFlags flags)126 begin_vk_query_indexed(struct zink_context *ctx, struct zink_vk_query *vkq, int index,
127                        VkQueryControlFlags flags)
128 {
129    if (!vkq->started) {
130       VKCTX(CmdBeginQueryIndexedEXT)(ctx->bs->cmdbuf,
131                                      vkq->pool->query_pool,
132                                      vkq->query_id,
133                                      flags,
134                                      index);
135       vkq->started = true;
136    }
137 }
138 
139 static void
end_vk_query_indexed(struct zink_context * ctx,struct zink_vk_query * vkq,int index)140 end_vk_query_indexed(struct zink_context *ctx, struct zink_vk_query *vkq, int index)
141 {
142    if (vkq->started) {
143       VKCTX(CmdEndQueryIndexedEXT)(ctx->bs->cmdbuf,
144                                    vkq->pool->query_pool,
145                                    vkq->query_id, index);
146       vkq->started = false;
147    }
148 }
149 
150 static void
reset_vk_query_pool(struct zink_context * ctx,struct zink_vk_query * vkq)151 reset_vk_query_pool(struct zink_context *ctx, struct zink_vk_query *vkq)
152 {
153    if (vkq->needs_reset) {
154       VKCTX(CmdResetQueryPool)(ctx->bs->reordered_cmdbuf, vkq->pool->query_pool, vkq->query_id, 1);
155       ctx->bs->has_reordered_work = true;
156    }
157    vkq->needs_reset = false;
158 }
159 
160 void
zink_context_destroy_query_pools(struct zink_context * ctx)161 zink_context_destroy_query_pools(struct zink_context *ctx)
162 {
163    struct zink_screen *screen = zink_screen(ctx->base.screen);
164    list_for_each_entry_safe(struct zink_query_pool, pool, &ctx->query_pools, list) {
165       VKSCR(DestroyQueryPool)(screen->dev, pool->query_pool, NULL);
166       list_del(&pool->list);
167       FREE(pool);
168    }
169 }
170 
171 static struct zink_query_pool *
find_or_allocate_qp(struct zink_context * ctx,struct zink_query * q,unsigned idx)172 find_or_allocate_qp(struct zink_context *ctx, struct zink_query *q, unsigned idx)
173 {
174    VkQueryPipelineStatisticFlags pipeline_stats = 0;
175    if (q->type == PIPE_QUERY_PRIMITIVES_GENERATED && q->vkqtype != VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT)
176       pipeline_stats = VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT |
177                        VK_QUERY_PIPELINE_STATISTIC_CLIPPING_INVOCATIONS_BIT;
178    else if (q->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE)
179       pipeline_stats = pipeline_statistic_convert(q->index);
180 
181    VkQueryType vk_query_type = q->vkqtype;
182    /* if xfb is active, we need to use an xfb query, otherwise we need pipeline statistics */
183    if (q->type == PIPE_QUERY_PRIMITIVES_GENERATED && idx == 1) {
184       vk_query_type = VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT;
185       pipeline_stats = 0;
186    }
187 
188    struct zink_screen *screen = zink_screen(ctx->base.screen);
189    list_for_each_entry(struct zink_query_pool, pool, &ctx->query_pools, list) {
190       if (pool->vk_query_type == vk_query_type) {
191          if (vk_query_type == VK_QUERY_TYPE_PIPELINE_STATISTICS) {
192             if (pool->pipeline_stats == pipeline_stats)
193                return pool;
194          } else
195             return pool;
196       }
197    }
198 
199    struct zink_query_pool *new_pool = CALLOC_STRUCT(zink_query_pool);
200    if (!new_pool)
201       return NULL;
202 
203    new_pool->vk_query_type = vk_query_type;
204    new_pool->pipeline_stats = pipeline_stats;
205 
206    VkQueryPoolCreateInfo pool_create = {0};
207    pool_create.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO;
208    pool_create.queryType = vk_query_type;
209    pool_create.queryCount = NUM_QUERIES;
210    pool_create.pipelineStatistics = pipeline_stats;
211 
212    VkResult status = VKSCR(CreateQueryPool)(screen->dev, &pool_create, NULL, &new_pool->query_pool);
213    if (status != VK_SUCCESS) {
214       mesa_loge("ZINK: vkCreateQueryPool failed (%s)", vk_Result_to_str(status));
215       FREE(new_pool);
216       return NULL;
217    }
218 
219    list_addtail(&new_pool->list, &ctx->query_pools);
220    return new_pool;
221 }
222 
223 static void
224 update_qbo(struct zink_context *ctx, struct zink_query *q);
225 static void
226 reset_qbos(struct zink_context *ctx, struct zink_query *q);
227 
228 
229 static bool
is_emulated_primgen(const struct zink_query * q)230 is_emulated_primgen(const struct zink_query *q)
231 {
232    return q->type == PIPE_QUERY_PRIMITIVES_GENERATED &&
233           q->vkqtype != VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT;
234 }
235 
236 static inline unsigned
get_num_query_pools(struct zink_query * q)237 get_num_query_pools(struct zink_query *q)
238 {
239    if (is_emulated_primgen(q))
240       return 2;
241    return 1;
242 }
243 
244 static inline unsigned
get_num_queries(struct zink_query * q)245 get_num_queries(struct zink_query *q)
246 {
247    if (is_emulated_primgen(q))
248       return 2;
249    if (q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)
250       return PIPE_MAX_VERTEX_STREAMS;
251    return 1;
252 }
253 
254 static inline unsigned
get_num_results(struct zink_query * q)255 get_num_results(struct zink_query *q)
256 {
257    if (q->type < PIPE_QUERY_DRIVER_SPECIFIC &&
258        q->vkqtype == VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT)
259       return 1;
260    switch (q->type) {
261    case PIPE_QUERY_OCCLUSION_COUNTER:
262    case PIPE_QUERY_OCCLUSION_PREDICATE:
263    case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
264    case PIPE_QUERY_TIME_ELAPSED:
265    case PIPE_QUERY_TIMESTAMP:
266    case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE:
267       return 1;
268    case PIPE_QUERY_PRIMITIVES_GENERATED:
269    case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
270    case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
271    case PIPE_QUERY_PRIMITIVES_EMITTED:
272       return 2;
273    default:
274       debug_printf("unknown query: %s\n",
275                    util_str_query_type(q->type, true));
276       unreachable("zink: unknown query type");
277    }
278 }
279 
280 static void
timestamp_to_nanoseconds(struct zink_screen * screen,uint64_t * timestamp)281 timestamp_to_nanoseconds(struct zink_screen *screen, uint64_t *timestamp)
282 {
283    /* The number of valid bits in a timestamp value is determined by
284     * the VkQueueFamilyProperties::timestampValidBits property of the queue on which the timestamp is written.
285     * - 17.5. Timestamp Queries
286     */
287    if (screen->timestamp_valid_bits < 64)
288       *timestamp &= (1ull << screen->timestamp_valid_bits) - 1;
289 
290    /* The number of nanoseconds it takes for a timestamp value to be incremented by 1
291     * can be obtained from VkPhysicalDeviceLimits::timestampPeriod
292     * - 17.5. Timestamp Queries
293     */
294    *timestamp *= (double)screen->info.props.limits.timestampPeriod;
295 }
296 
297 static VkQueryType
convert_query_type(struct zink_screen * screen,enum pipe_query_type query_type,bool * precise)298 convert_query_type(struct zink_screen *screen, enum pipe_query_type query_type, bool *precise)
299 {
300    *precise = false;
301    switch (query_type) {
302    case PIPE_QUERY_OCCLUSION_COUNTER:
303       *precise = true;
304       FALLTHROUGH;
305    case PIPE_QUERY_OCCLUSION_PREDICATE:
306    case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
307       return VK_QUERY_TYPE_OCCLUSION;
308    case PIPE_QUERY_TIME_ELAPSED:
309    case PIPE_QUERY_TIMESTAMP:
310       return VK_QUERY_TYPE_TIMESTAMP;
311    case PIPE_QUERY_PRIMITIVES_GENERATED:
312       return screen->info.have_EXT_primitives_generated_query ?
313              VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT :
314              VK_QUERY_TYPE_PIPELINE_STATISTICS;
315    case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE:
316       return VK_QUERY_TYPE_PIPELINE_STATISTICS;
317    case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
318    case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
319    case PIPE_QUERY_PRIMITIVES_EMITTED:
320       return VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT;
321    default:
322       debug_printf("unknown query: %s\n",
323                    util_str_query_type(query_type, true));
324       unreachable("zink: unknown query type");
325    }
326 }
327 
328 static bool
needs_stats_list(struct zink_query * query)329 needs_stats_list(struct zink_query *query)
330 {
331    return is_emulated_primgen(query) ||
332           query->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE ||
333           query->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE;
334 }
335 
336 static bool
is_time_query(struct zink_query * query)337 is_time_query(struct zink_query *query)
338 {
339    return query->type == PIPE_QUERY_TIMESTAMP || query->type == PIPE_QUERY_TIME_ELAPSED;
340 }
341 
342 static bool
is_so_overflow_query(struct zink_query * query)343 is_so_overflow_query(struct zink_query *query)
344 {
345    return query->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE || query->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE;
346 }
347 
348 static bool
is_bool_query(struct zink_query * query)349 is_bool_query(struct zink_query *query)
350 {
351    return is_so_overflow_query(query) ||
352           query->type == PIPE_QUERY_OCCLUSION_PREDICATE ||
353           query->type == PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE ||
354           query->type == PIPE_QUERY_GPU_FINISHED;
355 }
356 
357 static bool
qbo_append(struct pipe_screen * screen,struct zink_query * query)358 qbo_append(struct pipe_screen *screen, struct zink_query *query)
359 {
360    if (query->curr_qbo && query->curr_qbo->list.next)
361       return true;
362    struct zink_query_buffer *qbo = CALLOC_STRUCT(zink_query_buffer);
363    if (!qbo)
364       return false;
365    int num_buffers = get_num_queries(query);
366 
367    for (unsigned i = 0; i < num_buffers; i++) {
368       qbo->buffers[i] = pipe_buffer_create(screen, PIPE_BIND_QUERY_BUFFER,
369                                            PIPE_USAGE_STAGING,
370                                            /* this is the maximum possible size of the results in a given buffer */
371                                            (query->type == PIPE_QUERY_TIMESTAMP ? 1 : NUM_QUERIES) * get_num_results(query) * sizeof(uint64_t));
372       if (!qbo->buffers[i])
373          goto fail;
374    }
375    list_addtail(&qbo->list, &query->buffers);
376 
377    return true;
378 fail:
379    for (unsigned i = 0; i < num_buffers; i++)
380       pipe_resource_reference(&qbo->buffers[i], NULL);
381    FREE(qbo);
382    return false;
383 }
384 
385 static void
unref_vk_pool(struct zink_context * ctx,struct zink_query_pool * pool)386 unref_vk_pool(struct zink_context *ctx, struct zink_query_pool *pool)
387 {
388    if (!pool || --pool->refcount)
389       return;
390    util_dynarray_append(&ctx->bs->dead_querypools, VkQueryPool, pool->query_pool);
391    if (list_is_linked(&pool->list))
392       list_del(&pool->list);
393    FREE(pool);
394 }
395 
396 static void
unref_vk_query(struct zink_context * ctx,struct zink_vk_query * vkq)397 unref_vk_query(struct zink_context *ctx, struct zink_vk_query *vkq)
398 {
399    if (!vkq)
400       return;
401    unref_vk_pool(ctx, vkq->pool);
402    vkq->refcount--;
403    if (vkq->refcount == 0)
404       FREE(vkq);
405 }
406 
407 static void
destroy_query(struct zink_context * ctx,struct zink_query * query)408 destroy_query(struct zink_context *ctx, struct zink_query *query)
409 {
410    ASSERTED struct zink_screen *screen = zink_screen(ctx->base.screen);
411    assert(zink_screen_usage_check_completion(screen, query->batch_uses));
412    struct zink_query_buffer *qbo, *next;
413 
414    struct zink_query_start *starts = query->starts.data;
415    unsigned num_starts = query->starts.capacity / sizeof(struct zink_query_start);
416    for (unsigned j = 0; j < num_starts; j++) {
417       for (unsigned i = 0; i < PIPE_MAX_VERTEX_STREAMS; i++) {
418          unref_vk_query(ctx, starts[j].vkq[i]);
419       }
420    }
421 
422    util_dynarray_fini(&query->starts);
423    LIST_FOR_EACH_ENTRY_SAFE(qbo, next, &query->buffers, list) {
424       for (unsigned i = 0; i < ARRAY_SIZE(qbo->buffers); i++)
425          pipe_resource_reference(&qbo->buffers[i], NULL);
426       FREE(qbo);
427    }
428    pipe_resource_reference((struct pipe_resource**)&query->predicate, NULL);
429    FREE(query);
430 }
431 
432 static void
reset_qbo(struct zink_query * q)433 reset_qbo(struct zink_query *q)
434 {
435    q->curr_qbo = list_first_entry(&q->buffers, struct zink_query_buffer, list);
436    q->curr_qbo->num_results = 0;
437 }
438 
439 static void
query_pool_get_range(struct zink_context * ctx,struct zink_query * q)440 query_pool_get_range(struct zink_context *ctx, struct zink_query *q)
441 {
442    bool is_timestamp = q->type == PIPE_QUERY_TIMESTAMP;
443    struct zink_query_start *start;
444    int num_queries = get_num_queries(q);
445    if (!is_timestamp || get_num_starts(q) == 0) {
446       size_t size = q->starts.capacity;
447       start = util_dynarray_grow(&q->starts, struct zink_query_start, 1);
448       if (size != q->starts.capacity) {
449          /* when resizing, always zero the new data to avoid garbage */
450          uint8_t *data = q->starts.data;
451          memset(data + size, 0, q->starts.capacity - size);
452       }
453    } else {
454       start = util_dynarray_top_ptr(&q->starts, struct zink_query_start);
455    }
456    start->data = 0;
457 
458    unsigned num_pools = get_num_query_pools(q);
459    for (unsigned i = 0; i < num_queries; i++) {
460       int pool_idx = num_pools > 1 ? i : 0;
461       /* try and find the active query for this */
462       struct zink_vk_query *vkq;
463       int xfb_idx = num_queries == 4 ? i : q->index;
464       if ((q->vkqtype == VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT ||
465            (pool_idx == 1)) && ctx->curr_xfb_queries[xfb_idx]) {
466          vkq = ctx->curr_xfb_queries[xfb_idx];
467          vkq->refcount++;
468          vkq->pool->refcount++;
469       } else {
470          struct zink_query_pool *pool = find_or_allocate_qp(ctx, q, pool_idx);
471          if (pool->last_range == NUM_QUERIES) {
472             list_del(&pool->list);
473             pool = find_or_allocate_qp(ctx, q, pool_idx);
474          }
475          vkq = CALLOC_STRUCT(zink_vk_query);
476          if (!vkq) {
477             mesa_loge("ZINK: failed to allocate vkq!");
478             return;
479          }
480 
481          pool->refcount++;
482          vkq->refcount = 1;
483          vkq->needs_reset = true;
484          vkq->pool = pool;
485          vkq->started = false;
486          vkq->query_id = pool->last_range++;
487       }
488       unref_vk_query(ctx, start->vkq[i]);
489       start->vkq[i] = vkq;
490    }
491 }
492 
493 static struct pipe_query *
zink_create_query(struct pipe_context * pctx,unsigned query_type,unsigned index)494 zink_create_query(struct pipe_context *pctx,
495                   unsigned query_type, unsigned index)
496 {
497    struct zink_context *ctx = zink_context(pctx);
498    struct zink_screen *screen = zink_screen(pctx->screen);
499    struct zink_query *query = CALLOC_STRUCT(zink_query);
500 
501    if (!query)
502       return NULL;
503    list_inithead(&query->buffers);
504 
505    query->index = index;
506    query->type = query_type;
507 
508    if (query->type >= PIPE_QUERY_DRIVER_SPECIFIC)
509       return (struct pipe_query *)query;
510 
511    if (query->type == PIPE_QUERY_GPU_FINISHED || query->type == PIPE_QUERY_TIMESTAMP_DISJOINT)
512       return (struct pipe_query *)query;
513    query->vkqtype = convert_query_type(screen, query_type, &query->precise);
514    if (query->vkqtype == -1)
515       return NULL;
516 
517    util_dynarray_init(&query->starts, NULL);
518 
519    assert(!query->precise || query->vkqtype == VK_QUERY_TYPE_OCCLUSION);
520 
521    /* use emulated path for drivers without full support */
522    if (query->vkqtype == VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT && index &&
523        !screen->info.primgen_feats.primitivesGeneratedQueryWithNonZeroStreams)
524       query->vkqtype = VK_QUERY_TYPE_PIPELINE_STATISTICS;
525 
526    if (query->vkqtype == VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT) {
527       query->needs_rast_discard_workaround = !screen->info.primgen_feats.primitivesGeneratedQueryWithRasterizerDiscard;
528    } else if (query_type == PIPE_QUERY_PRIMITIVES_GENERATED) {
529       query->needs_rast_discard_workaround = true;
530    }
531 
532    if (!qbo_append(pctx->screen, query))
533       goto fail;
534    ctx->bs->has_work = true;
535    query->needs_reset = true;
536    query->predicate_dirty = true;
537    if (query->type == PIPE_QUERY_TIMESTAMP) {
538       query->active = true;
539       /* defer pool reset until end_query since we're guaranteed to be threadsafe then */
540       reset_qbo(query);
541    }
542    return (struct pipe_query *)query;
543 fail:
544    destroy_query(zink_context(pctx), query);
545    return NULL;
546 }
547 
548 static void
zink_destroy_query(struct pipe_context * pctx,struct pipe_query * q)549 zink_destroy_query(struct pipe_context *pctx,
550                    struct pipe_query *q)
551 {
552    struct zink_query *query = (struct zink_query *)q;
553 
554    /* only destroy if this query isn't active on any batches,
555     * otherwise just mark dead and wait
556     */
557    if (query->batch_uses) {
558       query->dead = true;
559       return;
560    }
561 
562    destroy_query(zink_context(pctx), query);
563 }
564 
565 void
zink_prune_query(struct zink_batch_state * bs,struct zink_query * query)566 zink_prune_query(struct zink_batch_state *bs, struct zink_query *query)
567 {
568    if (!zink_batch_usage_matches(query->batch_uses, bs))
569       return;
570    query->batch_uses = NULL;
571    if (query->dead)
572       destroy_query(bs->ctx, query);
573 }
574 
575 static void
check_query_results(struct zink_query * query,union pipe_query_result * result,int num_starts,uint64_t * results,uint64_t * xfb_results)576 check_query_results(struct zink_query *query, union pipe_query_result *result,
577                     int num_starts, uint64_t *results, uint64_t *xfb_results)
578 {
579    uint64_t last_val = 0;
580    int result_size = get_num_results(query);
581    int idx = 0;
582    util_dynarray_foreach(&query->starts, struct zink_query_start, start) {
583       unsigned i = idx * result_size;
584       idx++;
585       switch (query->type) {
586       case PIPE_QUERY_OCCLUSION_PREDICATE:
587       case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
588       case PIPE_QUERY_GPU_FINISHED:
589          result->b |= results[i] != 0;
590          break;
591 
592       case PIPE_QUERY_TIME_ELAPSED:
593       case PIPE_QUERY_TIMESTAMP:
594          /* the application can sum the differences between all N queries to determine the total execution time.
595           * - 17.5. Timestamp Queries
596           */
597          if (query->type != PIPE_QUERY_TIME_ELAPSED || i)
598             result->u64 += results[i] - last_val;
599          last_val = results[i];
600          break;
601       case PIPE_QUERY_OCCLUSION_COUNTER:
602          result->u64 += results[i];
603          break;
604       case PIPE_QUERY_PRIMITIVES_GENERATED:
605          if (query->vkqtype == VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT)
606             result->u64 += results[i];
607          else if (start->have_xfb || query->index)
608             result->u64 += xfb_results[i + 1];
609          else
610             /* if a given draw had a geometry shader, we need to use the first result */
611             result->u64 += results[i + !start->have_gs];
612          break;
613       case PIPE_QUERY_PRIMITIVES_EMITTED:
614          /* A query pool created with this type will capture 2 integers -
615           * numPrimitivesWritten and numPrimitivesNeeded -
616           * for the specified vertex stream output from the last vertex processing stage.
617           * - from VK_EXT_transform_feedback spec
618           */
619          result->u64 += results[i];
620          break;
621       case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
622       case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
623          /* A query pool created with this type will capture 2 integers -
624           * numPrimitivesWritten and numPrimitivesNeeded -
625           * for the specified vertex stream output from the last vertex processing stage.
626           * - from VK_EXT_transform_feedback spec
627           */
628          if (start->have_xfb)
629             result->b |= results[i] != results[i + 1];
630          break;
631       case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE:
632          switch (query->index) {
633          case PIPE_STAT_QUERY_IA_VERTICES:
634             result->u64 += start->was_line_loop ? results[i] / 2 : results[i];
635             break;
636          default:
637             result->u64 += results[i];
638             break;
639          }
640          break;
641 
642       default:
643          debug_printf("unhandled query type: %s\n",
644                       util_str_query_type(query->type, true));
645          unreachable("unexpected query type");
646       }
647    }
648 }
649 
650 static bool
get_query_result(struct pipe_context * pctx,struct pipe_query * q,bool wait,union pipe_query_result * result)651 get_query_result(struct pipe_context *pctx,
652                       struct pipe_query *q,
653                       bool wait,
654                       union pipe_query_result *result)
655 {
656    struct zink_screen *screen = zink_screen(pctx->screen);
657    struct zink_query *query = (struct zink_query *)q;
658    unsigned flags = PIPE_MAP_READ;
659 
660    if (!wait)
661       flags |= PIPE_MAP_DONTBLOCK;
662    if (query->base.flushed)
663       /* this is not a context-safe operation; ensure map doesn't use slab alloc */
664       flags |= PIPE_MAP_THREAD_SAFE;
665 
666    util_query_clear_result(result, query->type);
667 
668    int num_starts = get_num_starts(query);
669    /* no results: return zero */
670    if (!num_starts)
671       return true;
672    int result_size = get_num_results(query) * sizeof(uint64_t);
673    int num_maps = get_num_queries(query);
674 
675    struct zink_query_buffer *qbo;
676    struct pipe_transfer *xfer[PIPE_MAX_VERTEX_STREAMS] = { 0 };
677    LIST_FOR_EACH_ENTRY(qbo, &query->buffers, list) {
678       uint64_t *results[PIPE_MAX_VERTEX_STREAMS] = { NULL, NULL };
679       bool is_timestamp = query->type == PIPE_QUERY_TIMESTAMP;
680       if (!qbo->num_results)
681          continue;
682 
683       for (unsigned i = 0; i < num_maps; i++) {
684          results[i] = pipe_buffer_map_range(pctx, qbo->buffers[i], 0,
685                                             (is_timestamp ? 1 : qbo->num_results) * result_size, flags, &xfer[i]);
686          if (!results[i]) {
687             if (wait)
688                debug_printf("zink: qbo read failed!");
689             goto fail;
690          }
691       }
692       if (query->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) {
693          for (unsigned i = 0; i < PIPE_MAX_VERTEX_STREAMS && !result->b; i++) {
694             check_query_results(query, result, num_starts, results[i], NULL);
695          }
696       } else
697          check_query_results(query, result, num_starts, results[0], results[1]);
698 
699       for (unsigned i = 0 ; i < num_maps; i++)
700          pipe_buffer_unmap(pctx, xfer[i]);
701 
702       /* if overflow is detected we can stop */
703       if (query->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE && result->b)
704          break;
705    }
706 
707    if (is_time_query(query))
708       timestamp_to_nanoseconds(screen, &result->u64);
709 
710    return true;
711 fail:
712    for (unsigned i = 0 ; i < num_maps; i++)
713       if (xfer[i])
714          pipe_buffer_unmap(pctx, xfer[i]);
715    return false;
716 }
717 
718 static void
force_cpu_read(struct zink_context * ctx,struct pipe_query * pquery,enum pipe_query_value_type result_type,struct pipe_resource * pres,unsigned offset)719 force_cpu_read(struct zink_context *ctx, struct pipe_query *pquery, enum pipe_query_value_type result_type, struct pipe_resource *pres, unsigned offset)
720 {
721    struct pipe_context *pctx = &ctx->base;
722    unsigned result_size = result_type <= PIPE_QUERY_TYPE_U32 ? sizeof(uint32_t) : sizeof(uint64_t);
723    struct zink_query *query = (struct zink_query*)pquery;
724    union pipe_query_result result = {0};
725 
726    if (query->needs_update)
727       update_qbo(ctx, query);
728 
729    bool success = get_query_result(pctx, pquery, true, &result);
730    if (!success) {
731       debug_printf("zink: getting query result failed\n");
732       return;
733    }
734 
735    if (result_type <= PIPE_QUERY_TYPE_U32) {
736       uint32_t u32;
737       uint32_t limit;
738       if (result_type == PIPE_QUERY_TYPE_I32)
739          limit = INT_MAX;
740       else
741          limit = UINT_MAX;
742       if (is_bool_query(query))
743          u32 = result.b;
744       else
745          u32 = MIN2(limit, result.u64);
746       tc_buffer_write(pctx, pres, offset, result_size, &u32);
747    } else {
748       uint64_t u64;
749       if (is_bool_query(query))
750          u64 = result.b;
751       else
752          u64 = result.u64;
753       tc_buffer_write(pctx, pres, offset, result_size, &u64);
754    }
755 }
756 
757 static void
copy_pool_results_to_buffer(struct zink_context * ctx,struct zink_query * query,VkQueryPool pool,unsigned query_id,struct zink_resource * res,unsigned offset,int num_results,VkQueryResultFlags flags)758 copy_pool_results_to_buffer(struct zink_context *ctx, struct zink_query *query, VkQueryPool pool,
759                             unsigned query_id, struct zink_resource *res, unsigned offset,
760                             int num_results, VkQueryResultFlags flags)
761 {
762    unsigned type_size = (flags & VK_QUERY_RESULT_64_BIT) ? sizeof(uint64_t) : sizeof(uint32_t);
763    unsigned base_result_size = get_num_results(query) * type_size;
764    unsigned result_size = base_result_size * num_results;
765    if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)
766       result_size += type_size;
767 
768    bool marker = zink_cmd_debug_marker_begin(ctx, VK_NULL_HANDLE, "update_qbo(%s: id=%u, num_results=%d)", vk_QueryType_to_str(query->vkqtype), query_id, num_results);
769 
770    zink_batch_no_rp(ctx);
771    /* if it's a single query that doesn't need special handling, we can copy it and be done */
772    zink_batch_reference_resource_rw(ctx, res, true);
773    res->obj->access = VK_ACCESS_TRANSFER_WRITE_BIT;
774    res->obj->access_stage = VK_PIPELINE_STAGE_TRANSFER_BIT;
775    util_range_add(&res->base.b, &res->valid_buffer_range, offset, offset + result_size);
776    assert(query_id < NUM_QUERIES);
777    res->obj->unordered_read = res->obj->unordered_write = false;
778    ctx->bs->has_work = true;
779    VKCTX(CmdCopyQueryPoolResults)(ctx->bs->cmdbuf, pool, query_id, num_results, res->obj->buffer,
780                                   offset, base_result_size, flags);
781    zink_cmd_debug_marker_end(ctx, ctx->bs->cmdbuf, marker);
782 }
783 
784 static void
copy_results_to_buffer(struct zink_context * ctx,struct zink_query * query,struct zink_resource * res,unsigned offset,int num_results,VkQueryResultFlags flags)785 copy_results_to_buffer(struct zink_context *ctx, struct zink_query *query, struct zink_resource *res, unsigned offset, int num_results, VkQueryResultFlags flags)
786 {
787    struct zink_query_start *start = util_dynarray_top_ptr(&query->starts, struct zink_query_start);
788    copy_pool_results_to_buffer(ctx, query, start->vkq[0]->pool->query_pool, start->vkq[0]->query_id, res, offset, num_results, flags);
789 }
790 
791 
792 static void
reset_query_range(struct zink_context * ctx,struct zink_query * q)793 reset_query_range(struct zink_context *ctx, struct zink_query *q)
794 {
795    int num_queries = get_num_queries(q);
796    struct zink_query_start *start = util_dynarray_top_ptr(&q->starts, struct zink_query_start);
797    for (unsigned i = 0; i < num_queries; i++) {
798       reset_vk_query_pool(ctx, start->vkq[i]);
799    }
800 }
801 
802 static void
reset_qbos(struct zink_context * ctx,struct zink_query * q)803 reset_qbos(struct zink_context *ctx, struct zink_query *q)
804 {
805    if (q->needs_update)
806       update_qbo(ctx, q);
807 
808    q->needs_reset = false;
809    /* create new qbo for non-timestamp queries:
810     * timestamp queries should never need more than 2 entries in the qbo
811     */
812    if (q->type == PIPE_QUERY_TIMESTAMP)
813       return;
814    if (qbo_append(ctx->base.screen, q))
815       reset_qbo(q);
816    else
817       debug_printf("zink: qbo alloc failed on reset!");
818 }
819 
820 static inline unsigned
get_buffer_offset(struct zink_query * q)821 get_buffer_offset(struct zink_query *q)
822 {
823    return (get_num_starts(q) - 1) * get_num_results(q) * sizeof(uint64_t);
824 }
825 
826 static void
update_qbo(struct zink_context * ctx,struct zink_query * q)827 update_qbo(struct zink_context *ctx, struct zink_query *q)
828 {
829    struct zink_query_buffer *qbo = q->curr_qbo;
830    unsigned num_starts = get_num_starts(q);
831    struct zink_query_start *starts = q->starts.data;
832    bool is_timestamp = q->type == PIPE_QUERY_TIMESTAMP;
833    /* timestamp queries just write to offset 0 always */
834    int num_queries = get_num_queries(q);
835    unsigned num_results = qbo->num_results;
836    for (unsigned i = 0; i < num_queries; i++) {
837       unsigned start_offset = q->start_offset;
838       while (start_offset < num_starts) {
839          unsigned num_merged_copies = 0;
840          VkQueryPool qp = starts[start_offset].vkq[i]->pool->query_pool;
841          unsigned base_id = starts[start_offset].vkq[i]->query_id;
842          /* iterate over all the starts to see how many can be merged */
843          for (unsigned j = start_offset; j < num_starts; j++, num_merged_copies++) {
844             if (starts[j].vkq[i]->pool->query_pool != qp || starts[j].vkq[i]->query_id != base_id + num_merged_copies)
845                break;
846          }
847          assert(num_merged_copies);
848          unsigned cur_offset = start_offset * get_num_results(q) * sizeof(uint64_t);
849          unsigned offset = is_timestamp ? 0 : cur_offset;
850          copy_pool_results_to_buffer(ctx, q, starts[start_offset].vkq[i]->pool->query_pool, starts[start_offset].vkq[i]->query_id,
851                                     zink_resource(qbo->buffers[i]),
852                                     offset,
853                                     num_merged_copies,
854                                     /*
855                                        there is an implicit execution dependency from
856                                        each such query command to all query commands previously submitted to the same queue. There
857                                        is one significant exception to this; if the flags parameter of vkCmdCopyQueryPoolResults does not
858                                        include VK_QUERY_RESULT_WAIT_BIT, execution of vkCmdCopyQueryPoolResults may happen-before
859                                        the results of vkCmdEndQuery are available.
860 
861                                     * - Chapter 18. Queries
862                                     */
863                                     VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT);
864          if (!is_timestamp)
865             q->curr_qbo->num_results += num_merged_copies;
866          start_offset += num_merged_copies;
867       }
868    }
869    q->start_offset += q->curr_qbo->num_results - num_results;
870 
871 
872    if (is_timestamp)
873       q->curr_qbo->num_results = 1;
874 
875    q->needs_update = false;
876 }
877 
878 static void
begin_query(struct zink_context * ctx,struct zink_query * q)879 begin_query(struct zink_context *ctx, struct zink_query *q)
880 {
881    VkQueryControlFlags flags = 0;
882 
883    if (q->type == PIPE_QUERY_TIMESTAMP_DISJOINT || q->type >= PIPE_QUERY_DRIVER_SPECIFIC)
884       return;
885 
886    if (q->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE && q->index == PIPE_STAT_QUERY_CS_INVOCATIONS && ctx->in_rp) {
887       /* refuse to start CS queries in renderpasses */
888       if (!list_is_linked(&q->active_list))
889          list_addtail(&q->active_list, &ctx->suspended_queries);
890       q->suspended = true;
891       return;
892    }
893 
894    update_query_id(ctx, q);
895    q->predicate_dirty = true;
896    if (q->needs_reset)
897       reset_qbos(ctx, q);
898    reset_query_range(ctx, q);
899    q->active = true;
900    ctx->bs->has_work = true;
901 
902    struct zink_query_start *start = util_dynarray_top_ptr(&q->starts, struct zink_query_start);
903    if (q->type == PIPE_QUERY_TIME_ELAPSED) {
904       VKCTX(CmdWriteTimestamp)(ctx->bs->cmdbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, start->vkq[0]->pool->query_pool, start->vkq[0]->query_id);
905       if (!ctx->in_rp)
906          update_qbo(ctx, q);
907       zink_batch_usage_set(&q->batch_uses, ctx->bs);
908       _mesa_set_add(&ctx->bs->active_queries, q);
909    }
910    /* ignore the rest of begin_query for timestamps */
911    if (is_time_query(q))
912       return;
913 
914    /* A query must either begin and end inside the same subpass of a render pass
915       instance, or must both begin and end outside of a render pass instance
916       (i.e. contain entire render pass instances).
917       - 18.2. Query Operation
918     */
919    q->started_in_rp = ctx->in_rp;
920 
921    if (q->precise)
922       flags |= VK_QUERY_CONTROL_PRECISE_BIT;
923 
924    if (q->type == PIPE_QUERY_PRIMITIVES_EMITTED ||
925        is_emulated_primgen(q) ||
926        q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE) {
927       struct zink_vk_query *vkq = start->vkq[1] ? start->vkq[1] : start->vkq[0];
928       assert(!ctx->curr_xfb_queries[q->index] || ctx->curr_xfb_queries[q->index] == vkq);
929       ctx->curr_xfb_queries[q->index] = vkq;
930 
931       begin_vk_query_indexed(ctx, vkq, q->index, flags);
932    } else if (q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) {
933       for (unsigned i = 0; i < PIPE_MAX_VERTEX_STREAMS; i++) {
934          assert(!ctx->curr_xfb_queries[i] || ctx->curr_xfb_queries[i] == start->vkq[i]);
935          ctx->curr_xfb_queries[i] = start->vkq[i];
936 
937          begin_vk_query_indexed(ctx, start->vkq[i], i, flags);
938       }
939    } else if (q->vkqtype == VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT) {
940       begin_vk_query_indexed(ctx, start->vkq[0], q->index, flags);
941    }
942    if (q->vkqtype != VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT && q->vkqtype != VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT)
943       VKCTX(CmdBeginQuery)(ctx->bs->cmdbuf, start->vkq[0]->pool->query_pool, start->vkq[0]->query_id, flags);
944    if (q->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE && q->index == PIPE_STAT_QUERY_IA_VERTICES)  {
945       assert(!ctx->vertices_query);
946       ctx->vertices_query = q;
947    }
948    if (needs_stats_list(q))
949       list_addtail(&q->stats_list, &ctx->primitives_generated_queries);
950    zink_batch_usage_set(&q->batch_uses, ctx->bs);
951    _mesa_set_add(&ctx->bs->active_queries, q);
952    if (q->needs_rast_discard_workaround) {
953       ctx->primitives_generated_active = true;
954       if (zink_set_rasterizer_discard(ctx, true))
955          zink_set_null_fs(ctx);
956    }
957 }
958 
959 static bool
zink_begin_query(struct pipe_context * pctx,struct pipe_query * q)960 zink_begin_query(struct pipe_context *pctx,
961                  struct pipe_query *q)
962 {
963    struct zink_query *query = (struct zink_query *)q;
964    struct zink_context *ctx = zink_context(pctx);
965 
966    /* drop all past results */
967    reset_qbo(query);
968 
969    if (query->type < PIPE_QUERY_DRIVER_SPECIFIC && query->vkqtype == VK_QUERY_TYPE_OCCLUSION)
970       ctx->occlusion_query_active = true;
971    if (query->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE && query->index == PIPE_STAT_QUERY_PS_INVOCATIONS)
972       ctx->fs_query_active = true;
973 
974    query->predicate_dirty = true;
975 
976    util_dynarray_clear(&query->starts);
977    query->start_offset = 0;
978 
979    if (ctx->in_rp || (query->type == PIPE_QUERY_TIME_ELAPSED)) {
980       begin_query(ctx, query);
981    } else {
982       /* never directly start queries out of renderpass, always defer */
983       list_addtail(&query->active_list, &ctx->suspended_queries);
984       query->suspended = true;
985       if (query->type == PIPE_QUERY_PRIMITIVES_GENERATED)
986          ctx->primitives_generated_suspended = query->needs_rast_discard_workaround;
987    }
988 
989    return true;
990 }
991 
992 static void
update_query_id(struct zink_context * ctx,struct zink_query * q)993 update_query_id(struct zink_context *ctx, struct zink_query *q)
994 {
995    query_pool_get_range(ctx, q);
996    ctx->bs->has_work = true;
997    q->has_draws = false;
998 }
999 
1000 static void
end_query(struct zink_context * ctx,struct zink_query * q)1001 end_query(struct zink_context *ctx, struct zink_query *q)
1002 {
1003    if (q->type == PIPE_QUERY_TIMESTAMP_DISJOINT || q->type >= PIPE_QUERY_DRIVER_SPECIFIC)
1004       return;
1005 
1006    ASSERTED struct zink_query_buffer *qbo = q->curr_qbo;
1007    assert(qbo);
1008    assert(!is_time_query(q));
1009    q->active = false;
1010    assert(q->started_in_rp == ctx->in_rp);
1011    struct zink_query_start *start = util_dynarray_top_ptr(&q->starts, struct zink_query_start);
1012 
1013    if (q->type == PIPE_QUERY_PRIMITIVES_EMITTED ||
1014        is_emulated_primgen(q) ||
1015        q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE) {
1016       struct zink_vk_query *vkq = start->vkq[1] ? start->vkq[1] : start->vkq[0];
1017 
1018       end_vk_query_indexed(ctx, vkq, q->index);
1019       ctx->curr_xfb_queries[q->index] = NULL;
1020    }
1021    else if (q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) {
1022       for (unsigned i = 0; i < PIPE_MAX_VERTEX_STREAMS; i++) {
1023          end_vk_query_indexed(ctx, start->vkq[i], i);
1024          ctx->curr_xfb_queries[i] = NULL;
1025       }
1026    } else if (q->vkqtype == VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT) {
1027       end_vk_query_indexed(ctx, start->vkq[0], q->index);
1028    }
1029    if (q->vkqtype != VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT &&
1030        q->vkqtype != VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT && !is_time_query(q))
1031       VKCTX(CmdEndQuery)(ctx->bs->cmdbuf, start->vkq[0]->pool->query_pool, start->vkq[0]->query_id);
1032 
1033    if (q->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE &&
1034        q->index == PIPE_STAT_QUERY_IA_VERTICES)
1035       ctx->vertices_query = NULL;
1036 
1037    if (needs_stats_list(q))
1038       list_delinit(&q->stats_list);
1039 
1040    q->needs_update = true;
1041    if (q->needs_rast_discard_workaround) {
1042       ctx->primitives_generated_active = false;
1043       if (zink_set_rasterizer_discard(ctx, false))
1044          zink_set_null_fs(ctx);
1045    }
1046 }
1047 
1048 static bool
zink_end_query(struct pipe_context * pctx,struct pipe_query * q)1049 zink_end_query(struct pipe_context *pctx,
1050                struct pipe_query *q)
1051 {
1052    struct zink_context *ctx = zink_context(pctx);
1053    struct zink_query *query = (struct zink_query *)q;
1054 
1055    if (query->type == PIPE_QUERY_TIMESTAMP_DISJOINT || query->type >= PIPE_QUERY_DRIVER_SPECIFIC)
1056       return true;
1057 
1058    if (query->type == PIPE_QUERY_GPU_FINISHED) {
1059       pctx->flush(pctx, &query->fence, PIPE_FLUSH_DEFERRED);
1060       return true;
1061    }
1062 
1063    /* FIXME: this can be called from a thread, but it needs to write to the cmdbuf */
1064    threaded_context_unwrap_sync(pctx);
1065 
1066    if (query->vkqtype == VK_QUERY_TYPE_OCCLUSION)
1067       ctx->occlusion_query_active = true;
1068    if (query->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE && query->index == PIPE_STAT_QUERY_PS_INVOCATIONS)
1069       ctx->fs_query_active = true;
1070 
1071    bool unset_null_fs = query->type == PIPE_QUERY_PRIMITIVES_GENERATED && (ctx->primitives_generated_suspended || ctx->primitives_generated_active);
1072    if (query->type == PIPE_QUERY_PRIMITIVES_GENERATED)
1073       ctx->primitives_generated_suspended = false;
1074 
1075    if (list_is_linked(&query->stats_list))
1076       list_delinit(&query->stats_list);
1077    if (query->suspended) {
1078       list_delinit(&query->active_list);
1079       query->suspended = false;
1080    }
1081    if (is_time_query(query)) {
1082       update_query_id(ctx, query);
1083       if (query->needs_reset)
1084          reset_qbos(ctx, query);
1085       reset_query_range(ctx, query);
1086       struct zink_query_start *start = util_dynarray_top_ptr(&query->starts, struct zink_query_start);
1087       VKCTX(CmdWriteTimestamp)(ctx->bs->cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
1088                                start->vkq[0]->pool->query_pool, start->vkq[0]->query_id);
1089       ctx->bs->has_work = true;
1090       zink_batch_usage_set(&query->batch_uses, ctx->bs);
1091       _mesa_set_add(&ctx->bs->active_queries, query);
1092       query->needs_update = true;
1093    } else if (query->active) {
1094       /* this should be a tc-optimized query end that doesn't split a renderpass */
1095       if (!query->started_in_rp)
1096          zink_batch_no_rp(ctx);
1097       end_query(ctx, query);
1098    }
1099 
1100    if (unset_null_fs)
1101       zink_set_null_fs(ctx);
1102 
1103    return true;
1104 }
1105 
1106 static bool
zink_get_query_result(struct pipe_context * pctx,struct pipe_query * q,bool wait,union pipe_query_result * result)1107 zink_get_query_result(struct pipe_context *pctx,
1108                       struct pipe_query *q,
1109                       bool wait,
1110                       union pipe_query_result *result)
1111 {
1112    struct zink_query *query = (void*)q;
1113    struct zink_context *ctx = zink_context(pctx);
1114 
1115    if (query->type == PIPE_QUERY_TIMESTAMP_DISJOINT) {
1116       result->timestamp_disjoint.frequency = zink_screen(pctx->screen)->info.props.limits.timestampPeriod * 1000000.0;
1117       result->timestamp_disjoint.disjoint = false;
1118       return true;
1119    }
1120 
1121    if (query->type == PIPE_QUERY_GPU_FINISHED) {
1122       struct pipe_screen *screen = pctx->screen;
1123 
1124       result->b = screen->fence_finish(screen, query->base.flushed ? NULL : pctx,
1125                                         query->fence, wait ? OS_TIMEOUT_INFINITE : 0);
1126       return result->b;
1127    }
1128 
1129    if (query->type == ZINK_QUERY_RENDER_PASSES) {
1130       result->u64 = ctx->hud.render_passes;
1131       ctx->hud.render_passes = 0;
1132       return true;
1133    }
1134 
1135    if (query->needs_update) {
1136       assert(!ctx->tc || !threaded_query(q)->flushed);
1137       update_qbo(ctx, query);
1138    }
1139 
1140    if (zink_batch_usage_is_unflushed(query->batch_uses)) {
1141       if (!threaded_query(q)->flushed)
1142          pctx->flush(pctx, NULL, 0);
1143       if (!wait)
1144          return false;
1145    }
1146 
1147    return get_query_result(pctx, q, wait, result);
1148 }
1149 
1150 static void
suspend_query(struct zink_context * ctx,struct zink_query * query)1151 suspend_query(struct zink_context *ctx, struct zink_query *query)
1152 {
1153    /* if a query isn't active here then we don't need to reactivate it on the next batch */
1154    if (query->active && !is_time_query(query))
1155       end_query(ctx, query);
1156    if (query->needs_update && !ctx->in_rp)
1157       update_qbo(ctx, query);
1158 }
1159 
1160 static void
suspend_queries(struct zink_context * ctx,bool rp_only)1161 suspend_queries(struct zink_context *ctx, bool rp_only)
1162 {
1163    set_foreach(&ctx->bs->active_queries, entry) {
1164       struct zink_query *query = (void*)entry->key;
1165       if (query->suspended || (rp_only && !query->started_in_rp))
1166          continue;
1167       if (query->active && !is_time_query(query)) {
1168          /* the fence is going to steal the set off the batch, so we have to copy
1169           * the active queries onto a list
1170           */
1171          list_addtail(&query->active_list, &ctx->suspended_queries);
1172          query->suspended = true;
1173          if (query->type == PIPE_QUERY_PRIMITIVES_GENERATED)
1174             ctx->primitives_generated_suspended = query->needs_rast_discard_workaround;
1175       }
1176       suspend_query(ctx, query);
1177    }
1178 }
1179 
1180 void
zink_suspend_queries(struct zink_context * ctx)1181 zink_suspend_queries(struct zink_context *ctx)
1182 {
1183    suspend_queries(ctx, false);
1184 }
1185 
1186 void
zink_resume_queries(struct zink_context * ctx)1187 zink_resume_queries(struct zink_context *ctx)
1188 {
1189    struct zink_query *query, *next;
1190    LIST_FOR_EACH_ENTRY_SAFE(query, next, &ctx->suspended_queries, active_list) {
1191       list_delinit(&query->active_list);
1192       query->suspended = false;
1193       if (query->type == PIPE_QUERY_PRIMITIVES_GENERATED)
1194          ctx->primitives_generated_suspended = false;
1195       if (query->needs_update && !ctx->in_rp)
1196          update_qbo(ctx, query);
1197       begin_query(ctx, query);
1198    }
1199 }
1200 
1201 void
zink_resume_cs_query(struct zink_context * ctx)1202 zink_resume_cs_query(struct zink_context *ctx)
1203 {
1204    struct zink_query *query, *next;
1205    LIST_FOR_EACH_ENTRY_SAFE(query, next, &ctx->suspended_queries, active_list) {
1206       if (query->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE && query->index == PIPE_STAT_QUERY_CS_INVOCATIONS) {
1207          list_delinit(&query->active_list);
1208          query->suspended = false;
1209          begin_query(ctx, query);
1210       }
1211    }
1212 }
1213 
1214 void
zink_query_renderpass_suspend(struct zink_context * ctx)1215 zink_query_renderpass_suspend(struct zink_context *ctx)
1216 {
1217    suspend_queries(ctx, true);
1218 }
1219 
1220 void
zink_query_update_gs_states(struct zink_context * ctx)1221 zink_query_update_gs_states(struct zink_context *ctx)
1222 {
1223    struct zink_query *query;
1224    bool suspendall = false;
1225    bool have_gs = !!ctx->gfx_stages[MESA_SHADER_GEOMETRY];
1226    bool have_xfb = !!ctx->num_so_targets;
1227 
1228    LIST_FOR_EACH_ENTRY(query, &ctx->primitives_generated_queries, stats_list) {
1229       struct zink_query_start *last_start = util_dynarray_top_ptr(&query->starts, struct zink_query_start);
1230       assert(query->active);
1231       if (query->has_draws) {
1232          if (last_start->have_gs != have_gs ||
1233              last_start->have_xfb != have_xfb) {
1234             suspendall = true;
1235          }
1236       }
1237    }
1238 
1239    if (ctx->vertices_query) {
1240       query = ctx->vertices_query;
1241       struct zink_query_start *last_start = util_dynarray_top_ptr(&query->starts, struct zink_query_start);
1242       assert(query->active);
1243       if (last_start->was_line_loop != ctx->was_line_loop) {
1244          suspendall = true;
1245       }
1246    }
1247    if (suspendall) {
1248      zink_suspend_queries(ctx);
1249      zink_resume_queries(ctx);
1250    }
1251 
1252    LIST_FOR_EACH_ENTRY(query, &ctx->primitives_generated_queries, stats_list) {
1253       struct zink_query_start *last_start = util_dynarray_top_ptr(&query->starts, struct zink_query_start);
1254       last_start->have_gs = have_gs;
1255       last_start->have_xfb = have_xfb;
1256       query->has_draws = true;
1257    }
1258    if (ctx->vertices_query) {
1259       query = ctx->vertices_query;
1260       struct zink_query_start *last_start = util_dynarray_top_ptr(&query->starts, struct zink_query_start);
1261       last_start->was_line_loop = ctx->was_line_loop;
1262       query->has_draws = true;
1263    }
1264 }
1265 
1266 static void
zink_set_active_query_state(struct pipe_context * pctx,bool enable)1267 zink_set_active_query_state(struct pipe_context *pctx, bool enable)
1268 {
1269    struct zink_context *ctx = zink_context(pctx);
1270    /* unordered blits already disable queries */
1271    if (ctx->unordered_blitting)
1272       return;
1273    ctx->queries_disabled = !enable;
1274 
1275    if (ctx->queries_disabled)
1276       zink_suspend_queries(ctx);
1277    else if (ctx->in_rp)
1278       zink_resume_queries(ctx);
1279 }
1280 
1281 void
zink_query_sync(struct zink_context * ctx,struct zink_query * query)1282 zink_query_sync(struct zink_context *ctx, struct zink_query *query)
1283 {
1284    if (query->needs_update)
1285       update_qbo(ctx, query);
1286 }
1287 
1288 void
zink_start_conditional_render(struct zink_context * ctx)1289 zink_start_conditional_render(struct zink_context *ctx)
1290 {
1291    if (unlikely(!zink_screen(ctx->base.screen)->info.have_EXT_conditional_rendering) || ctx->render_condition.active)
1292       return;
1293    VkConditionalRenderingFlagsEXT begin_flags = 0;
1294    if (ctx->render_condition.inverted)
1295       begin_flags = VK_CONDITIONAL_RENDERING_INVERTED_BIT_EXT;
1296    VkConditionalRenderingBeginInfoEXT begin_info = {0};
1297    begin_info.sType = VK_STRUCTURE_TYPE_CONDITIONAL_RENDERING_BEGIN_INFO_EXT;
1298    begin_info.buffer = ctx->render_condition.query->predicate->obj->buffer;
1299    begin_info.flags = begin_flags;
1300    ctx->render_condition.query->predicate->obj->unordered_read = false;
1301    VKCTX(CmdBeginConditionalRenderingEXT)(ctx->bs->cmdbuf, &begin_info);
1302    zink_batch_reference_resource_rw(ctx, ctx->render_condition.query->predicate, false);
1303    ctx->render_condition.active = true;
1304 }
1305 
1306 void
zink_stop_conditional_render(struct zink_context * ctx)1307 zink_stop_conditional_render(struct zink_context *ctx)
1308 {
1309    zink_clear_apply_conditionals(ctx);
1310    if (unlikely(!zink_screen(ctx->base.screen)->info.have_EXT_conditional_rendering) || !ctx->render_condition.active)
1311       return;
1312    VKCTX(CmdEndConditionalRenderingEXT)(ctx->bs->cmdbuf);
1313    ctx->render_condition.active = false;
1314 }
1315 
1316 static void
zink_render_condition(struct pipe_context * pctx,struct pipe_query * pquery,bool condition,enum pipe_render_cond_flag mode)1317 zink_render_condition(struct pipe_context *pctx,
1318                       struct pipe_query *pquery,
1319                       bool condition,
1320                       enum pipe_render_cond_flag mode)
1321 {
1322    struct zink_context *ctx = zink_context(pctx);
1323    struct zink_query *query = (struct zink_query *)pquery;
1324    zink_batch_no_rp(ctx);
1325    VkQueryResultFlagBits flags = 0;
1326 
1327    ctx->bs->has_work = true;
1328    if (query == NULL) {
1329       /* force conditional clears if they exist */
1330       if (ctx->clears_enabled && !ctx->in_rp)
1331          zink_batch_rp(ctx);
1332       zink_stop_conditional_render(ctx);
1333       ctx->render_condition_active = false;
1334       ctx->render_condition.query = NULL;
1335       return;
1336    }
1337 
1338    if (!query->predicate) {
1339       struct pipe_resource *pres;
1340 
1341       /* need to create a vulkan buffer to copy the data into */
1342       pres = pipe_buffer_create(pctx->screen, PIPE_BIND_QUERY_BUFFER, PIPE_USAGE_DEFAULT, sizeof(uint64_t));
1343       if (!pres)
1344          return;
1345 
1346       query->predicate = zink_resource(pres);
1347    }
1348    if (query->predicate_dirty) {
1349       struct zink_resource *res = query->predicate;
1350 
1351       if (mode == PIPE_RENDER_COND_WAIT || mode == PIPE_RENDER_COND_BY_REGION_WAIT)
1352          flags |= VK_QUERY_RESULT_WAIT_BIT;
1353 
1354       flags |= VK_QUERY_RESULT_64_BIT;
1355       int num_results = get_num_starts(query);
1356       if (num_results) {
1357          if (!is_emulated_primgen(query) &&
1358             !is_so_overflow_query(query) &&
1359             num_results == 1) {
1360             copy_results_to_buffer(ctx, query, res, 0, num_results, flags);
1361          } else {
1362             /* these need special handling */
1363             force_cpu_read(ctx, pquery, PIPE_QUERY_TYPE_U32, &res->base.b, 0);
1364          }
1365       } else {
1366          uint64_t zero = 0;
1367          tc_buffer_write(pctx, &res->base.b, 0, sizeof(zero), &zero);
1368       }
1369       zink_screen(ctx->base.screen)->buffer_barrier(ctx, res, VK_ACCESS_CONDITIONAL_RENDERING_READ_BIT_EXT, VK_PIPELINE_STAGE_CONDITIONAL_RENDERING_BIT_EXT);
1370       query->predicate_dirty = false;
1371    }
1372    ctx->render_condition.inverted = condition;
1373    ctx->render_condition_active = true;
1374    ctx->render_condition.query = query;
1375    if (ctx->in_rp)
1376       zink_start_conditional_render(ctx);
1377 }
1378 
1379 static void
zink_get_query_result_resource(struct pipe_context * pctx,struct pipe_query * pquery,enum pipe_query_flags flags,enum pipe_query_value_type result_type,int index,struct pipe_resource * pres,unsigned offset)1380 zink_get_query_result_resource(struct pipe_context *pctx,
1381                                struct pipe_query *pquery,
1382                                enum pipe_query_flags flags,
1383                                enum pipe_query_value_type result_type,
1384                                int index,
1385                                struct pipe_resource *pres,
1386                                unsigned offset)
1387 {
1388    struct zink_context *ctx = zink_context(pctx);
1389    struct zink_screen *screen = zink_screen(pctx->screen);
1390    struct zink_query *query = (struct zink_query*)pquery;
1391    struct zink_resource *res = zink_resource(pres);
1392    unsigned result_size = result_type <= PIPE_QUERY_TYPE_U32 ? sizeof(uint32_t) : sizeof(uint64_t);
1393    VkQueryResultFlagBits size_flags = result_type <= PIPE_QUERY_TYPE_U32 ? 0 : VK_QUERY_RESULT_64_BIT;
1394    unsigned num_queries = get_num_starts(query);
1395 
1396    /* it's possible that a query may have no data at all: write out zeroes to the buffer and return */
1397    uint64_t u64[4] = {0};
1398    unsigned src_offset = result_size * get_num_results(query);
1399    if (!num_queries) {
1400       tc_buffer_write(pctx, pres, offset, result_size, (unsigned char*)u64 + src_offset);
1401       return;
1402    }
1403 
1404    if (index == -1) {
1405       /* VK_QUERY_RESULT_WITH_AVAILABILITY_BIT will ALWAYS write some kind of result data
1406        * in addition to the availability result, which is a problem if we're just trying to get availability data
1407        *
1408        * if we know that there's no valid buffer data in the preceding buffer range, then we can just
1409        * stomp on it with a glorious queued buffer copy instead of forcing a stall to manually write to the
1410        * buffer
1411        */
1412 
1413       VkQueryResultFlags flag = is_time_query(query) ? 0 : VK_QUERY_RESULT_PARTIAL_BIT;
1414       if (zink_batch_usage_check_completion(ctx, query->batch_uses)) {
1415          struct zink_query_start *start = util_dynarray_top_ptr(&query->starts, struct zink_query_start);
1416          unsigned query_id = start->vkq[0]->query_id;
1417          VkResult result = VKCTX(GetQueryPoolResults)(screen->dev, start->vkq[0]->pool->query_pool, query_id, 1,
1418                                    sizeof(u64), u64, 0, size_flags | VK_QUERY_RESULT_WITH_AVAILABILITY_BIT | flag);
1419          if (result == VK_SUCCESS) {
1420             tc_buffer_write(pctx, pres, offset, result_size, (unsigned char*)u64 + src_offset);
1421             return;
1422          } else {
1423             mesa_loge("ZINK: vkGetQueryPoolResults failed (%s)", vk_Result_to_str(result));
1424          }
1425       }
1426       struct pipe_resource *staging = pipe_buffer_create(pctx->screen, 0, PIPE_USAGE_STAGING, src_offset + result_size);
1427       copy_results_to_buffer(ctx, query, zink_resource(staging), 0, 1, size_flags | VK_QUERY_RESULT_WITH_AVAILABILITY_BIT | flag);
1428       zink_copy_buffer(ctx, res, zink_resource(staging), offset, result_size * get_num_results(query), result_size);
1429       pipe_resource_reference(&staging, NULL);
1430       return;
1431    }
1432 
1433    /*
1434       there is an implicit execution dependency from
1435       each such query command to all query commands previously submitted to the same queue. There
1436       is one significant exception to this; if the flags parameter of vkCmdCopyQueryPoolResults does not
1437       include VK_QUERY_RESULT_WAIT_BIT, execution of vkCmdCopyQueryPoolResults may happen-before
1438       the results of vkCmdEndQuery are available.
1439 
1440     * - Chapter 18. Queries
1441     */
1442    size_flags |= VK_QUERY_RESULT_WAIT_BIT;
1443    if (!is_time_query(query) && !is_bool_query(query)) {
1444       if (num_queries == 1 && !is_emulated_primgen(query) &&
1445                               query->type != PIPE_QUERY_PRIMITIVES_EMITTED &&
1446                               !is_bool_query(query)) {
1447          if (size_flags == VK_QUERY_RESULT_64_BIT) {
1448             if (query->needs_update)
1449                update_qbo(ctx, query);
1450             /* internal qbo always writes 64bit value so we can just direct copy */
1451             zink_copy_buffer(ctx, res, zink_resource(query->curr_qbo->buffers[0]), offset,
1452                              get_buffer_offset(query),
1453                              result_size);
1454          } else
1455             /* have to do a new copy for 32bit */
1456             copy_results_to_buffer(ctx, query, res, offset, 1, size_flags);
1457          return;
1458       }
1459    }
1460 
1461    /* TODO: use CS to aggregate results */
1462 
1463    /* unfortunately, there's no way to accumulate results from multiple queries on the gpu without either
1464     * clobbering all but the last result or writing the results sequentially, so we have to manually write the result
1465     */
1466    force_cpu_read(ctx, pquery, result_type, pres, offset);
1467 }
1468 
1469 uint64_t
zink_get_timestamp(struct pipe_screen * pscreen)1470 zink_get_timestamp(struct pipe_screen *pscreen)
1471 {
1472    struct zink_screen *screen = zink_screen(pscreen);
1473    uint64_t timestamp, deviation;
1474    if (screen->info.have_EXT_calibrated_timestamps) {
1475       VkCalibratedTimestampInfoEXT cti = {0};
1476       cti.sType = VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT;
1477       cti.timeDomain = VK_TIME_DOMAIN_DEVICE_EXT;
1478       VkResult result = VKSCR(GetCalibratedTimestampsEXT)(screen->dev, 1, &cti, &timestamp, &deviation);
1479       if (result != VK_SUCCESS) {
1480          mesa_loge("ZINK: vkGetCalibratedTimestampsEXT failed (%s)", vk_Result_to_str(result));
1481       }
1482    } else {
1483       zink_screen_lock_context(screen);
1484       struct pipe_context *pctx = &screen->copy_context->base;
1485       struct pipe_query *pquery = pctx->create_query(pctx, PIPE_QUERY_TIMESTAMP, 0);
1486       if (!pquery)
1487          return 0;
1488       union pipe_query_result result = {0};
1489       pctx->begin_query(pctx, pquery);
1490       pctx->end_query(pctx, pquery);
1491       pctx->get_query_result(pctx, pquery, true, &result);
1492       pctx->destroy_query(pctx, pquery);
1493       zink_screen_unlock_context(screen);
1494       timestamp = result.u64;
1495    }
1496    timestamp_to_nanoseconds(screen, &timestamp);
1497    return timestamp;
1498 }
1499 
1500 void
zink_context_query_init(struct pipe_context * pctx)1501 zink_context_query_init(struct pipe_context *pctx)
1502 {
1503    struct zink_context *ctx = zink_context(pctx);
1504    list_inithead(&ctx->suspended_queries);
1505    list_inithead(&ctx->primitives_generated_queries);
1506 
1507    pctx->create_query = zink_create_query;
1508    pctx->destroy_query = zink_destroy_query;
1509    pctx->begin_query = zink_begin_query;
1510    pctx->end_query = zink_end_query;
1511    pctx->get_query_result = zink_get_query_result;
1512    pctx->get_query_result_resource = zink_get_query_result_resource;
1513    pctx->set_active_query_state = zink_set_active_query_state;
1514    pctx->render_condition = zink_render_condition;
1515 }
1516 
1517 int
zink_get_driver_query_group_info(struct pipe_screen * pscreen,unsigned index,struct pipe_driver_query_group_info * info)1518 zink_get_driver_query_group_info(struct pipe_screen *pscreen, unsigned index,
1519                                  struct pipe_driver_query_group_info *info)
1520 {
1521    if (!info)
1522       return 1;
1523 
1524    assert(index == 0);
1525    info->name = "Zink counters";
1526    info->max_active_queries = ARRAY_SIZE(zink_specific_queries);
1527    info->num_queries = ARRAY_SIZE(zink_specific_queries);
1528 
1529    return 1;
1530 }
1531 
1532 int
zink_get_driver_query_info(struct pipe_screen * pscreen,unsigned index,struct pipe_driver_query_info * info)1533 zink_get_driver_query_info(struct pipe_screen *pscreen, unsigned index,
1534                            struct pipe_driver_query_info *info)
1535 {
1536    if (!info)
1537       return ARRAY_SIZE(zink_specific_queries);
1538 
1539    assert(index < ARRAY_SIZE(zink_specific_queries));
1540    *info = zink_specific_queries[index];
1541 
1542    return 1;
1543 }
1544