xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/asahi/agx_query.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright 2022 Alyssa Rosenzweig
3  * Copyright 2019-2020 Collabora, Ltd.
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #include <stdint.h>
8 #include "pipe/p_defines.h"
9 #include "util/bitset.h"
10 #include "util/macros.h"
11 #include "util/ralloc.h"
12 #include "util/u_dump.h"
13 #include "util/u_inlines.h"
14 #include "util/u_prim.h"
15 #include "agx_bo.h"
16 #include "agx_device.h"
17 #include "agx_state.h"
18 #include "nir.h"
19 #include "nir_builder.h"
20 #include "nir_builder_opcodes.h"
21 #include "pool.h"
22 #include "shader_enums.h"
23 
24 static bool
is_occlusion(struct agx_query * query)25 is_occlusion(struct agx_query *query)
26 {
27    switch (query->type) {
28    case PIPE_QUERY_OCCLUSION_COUNTER:
29    case PIPE_QUERY_OCCLUSION_PREDICATE:
30    case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
31       return true;
32    default:
33       return false;
34    }
35 }
36 
37 static bool
is_timer(struct agx_query * query)38 is_timer(struct agx_query *query)
39 {
40    switch (query->type) {
41    case PIPE_QUERY_TIMESTAMP:
42    case PIPE_QUERY_TIME_ELAPSED:
43       return true;
44    default:
45       return false;
46    }
47 }
48 
49 struct agx_oq_heap {
50    /* The GPU allocation itself */
51    struct agx_device *dev;
52    struct agx_bo *bo;
53 
54    /* Bitset of query indices that are in use */
55    BITSET_DECLARE(available, AGX_MAX_OCCLUSION_QUERIES);
56 };
57 
58 static void
agx_destroy_oq_heap(void * heap_)59 agx_destroy_oq_heap(void *heap_)
60 {
61    struct agx_oq_heap *heap = heap_;
62    agx_bo_unreference(heap->dev, heap->bo);
63 }
64 
65 static struct agx_oq_heap *
agx_alloc_oq_heap(struct agx_context * ctx)66 agx_alloc_oq_heap(struct agx_context *ctx)
67 {
68    struct agx_oq_heap *heap = rzalloc(ctx, struct agx_oq_heap);
69    ralloc_set_destructor(heap, agx_destroy_oq_heap);
70 
71    heap->dev = agx_device(ctx->base.screen);
72    heap->bo =
73       agx_bo_create(heap->dev, AGX_MAX_OCCLUSION_QUERIES * sizeof(uint64_t), 0,
74                     AGX_BO_WRITEBACK, "Occlusion query heap");
75 
76    /* At the start, everything is available */
77    BITSET_ONES(heap->available);
78 
79    return heap;
80 }
81 
82 static struct agx_oq_heap *
agx_get_oq_heap(struct agx_context * ctx)83 agx_get_oq_heap(struct agx_context *ctx)
84 {
85    if (!ctx->oq)
86       ctx->oq = agx_alloc_oq_heap(ctx);
87 
88    return ctx->oq;
89 }
90 
91 static struct agx_ptr
agx_alloc_oq(struct agx_context * ctx)92 agx_alloc_oq(struct agx_context *ctx)
93 {
94    struct agx_oq_heap *heap = agx_get_oq_heap(ctx);
95 
96    /* Find first available */
97    int ffs = BITSET_FFS(heap->available);
98    if (!ffs)
99       return (struct agx_ptr){NULL, 0};
100 
101    /* Allocate it */
102    unsigned index = ffs - 1;
103    BITSET_CLEAR(heap->available, index);
104 
105    unsigned offset = index * sizeof(uint64_t);
106 
107    return (struct agx_ptr){
108       (uint8_t *)heap->bo->map + offset,
109       heap->bo->va->addr + offset,
110    };
111 }
112 
113 static unsigned
agx_oq_index(struct agx_context * ctx,struct agx_query * q)114 agx_oq_index(struct agx_context *ctx, struct agx_query *q)
115 {
116    assert(is_occlusion(q));
117 
118    return (q->ptr.gpu - ctx->oq->bo->va->addr) / sizeof(uint64_t);
119 }
120 
121 static void
agx_free_oq(struct agx_context * ctx,struct agx_query * q)122 agx_free_oq(struct agx_context *ctx, struct agx_query *q)
123 {
124    struct agx_oq_heap *heap = agx_get_oq_heap(ctx);
125    unsigned index = agx_oq_index(ctx, q);
126 
127    assert(index < AGX_MAX_OCCLUSION_QUERIES);
128    assert(!BITSET_TEST(heap->available, index));
129 
130    BITSET_SET(heap->available, index);
131 }
132 
133 uint64_t
agx_get_occlusion_heap(struct agx_batch * batch)134 agx_get_occlusion_heap(struct agx_batch *batch)
135 {
136    if (!batch->ctx->oq)
137       return 0;
138 
139    struct agx_bo *bo = batch->ctx->oq->bo;
140 
141    if (agx_batch_uses_bo(batch, bo))
142       return bo->va->addr;
143    else
144       return 0;
145 }
146 
147 static struct pipe_query *
agx_create_query(struct pipe_context * ctx,unsigned query_type,unsigned index)148 agx_create_query(struct pipe_context *ctx, unsigned query_type, unsigned index)
149 {
150    struct agx_query *query = calloc(1, sizeof(struct agx_query));
151 
152    query->type = query_type;
153    query->index = index;
154 
155    /* Set all writer generations to a sentinel that will always compare as
156     * false, since nothing writes to no queries.
157     */
158    for (unsigned i = 0; i < ARRAY_SIZE(query->writer_generation); ++i) {
159       query->writer_generation[i] = UINT64_MAX;
160    }
161 
162    if (is_occlusion(query)) {
163       query->ptr = agx_alloc_oq(agx_context(ctx));
164    } else {
165       /* TODO: a BO for the query is wasteful, but we benefit from BO list
166        * tracking / reference counting to deal with lifetimes.
167        */
168       query->bo = agx_bo_create(agx_device(ctx->screen), sizeof(uint64_t) * 2,
169                                 0, AGX_BO_WRITEBACK, "Query");
170       query->ptr = (struct agx_ptr){
171          .gpu = query->bo->va->addr,
172          .cpu = query->bo->map,
173       };
174    }
175 
176    if (!query->ptr.gpu) {
177       free(query);
178       return NULL;
179    }
180 
181    return (struct pipe_query *)query;
182 }
183 
184 static void
flush_query_writers(struct agx_context * ctx,struct agx_query * query,const char * reason)185 flush_query_writers(struct agx_context *ctx, struct agx_query *query,
186                     const char *reason)
187 {
188    STATIC_ASSERT(ARRAY_SIZE(ctx->batches.generation) == AGX_MAX_BATCHES);
189    STATIC_ASSERT(ARRAY_SIZE(ctx->batches.slots) == AGX_MAX_BATCHES);
190    STATIC_ASSERT(ARRAY_SIZE(query->writer_generation) == AGX_MAX_BATCHES);
191 
192    for (unsigned i = 0; i < AGX_MAX_BATCHES; ++i) {
193       if (query->writer_generation[i] == ctx->batches.generation[i])
194          agx_flush_batch_for_reason(ctx, &ctx->batches.slots[i], reason);
195    }
196 }
197 
198 static void
sync_query_writers(struct agx_context * ctx,struct agx_query * query,const char * reason)199 sync_query_writers(struct agx_context *ctx, struct agx_query *query,
200                    const char *reason)
201 {
202    for (unsigned i = 0; i < AGX_MAX_BATCHES; ++i) {
203       if (query->writer_generation[i] == ctx->batches.generation[i])
204          agx_sync_batch_for_reason(ctx, &ctx->batches.slots[i], reason);
205    }
206 }
207 
208 static bool
is_query_busy(struct agx_context * ctx,struct agx_query * query)209 is_query_busy(struct agx_context *ctx, struct agx_query *query)
210 {
211    for (unsigned i = 0; i < AGX_MAX_BATCHES; ++i) {
212       if (query->writer_generation[i] == ctx->batches.generation[i])
213          return true;
214    }
215 
216    return false;
217 }
218 
219 static void
agx_destroy_query(struct pipe_context * pctx,struct pipe_query * pquery)220 agx_destroy_query(struct pipe_context *pctx, struct pipe_query *pquery)
221 {
222    struct agx_context *ctx = agx_context(pctx);
223    struct agx_query *query = (struct agx_query *)pquery;
224    struct agx_device *dev = agx_device(pctx->screen);
225 
226    /* We don't reference count the occlusion query allocations, so we need to
227     * sync writers when destroying so we can freely write from the CPU after
228     * it's destroyed, since the driver will assume an available query is idle.
229     *
230     * For other queries, the BO itself is reference counted after the pipe_query
231     * is destroyed so we don't need to flush.
232     */
233    if (is_occlusion(query)) {
234       sync_query_writers(ctx, query, "Occlusion query destroy");
235       agx_free_oq(ctx, query);
236    } else {
237       agx_bo_unreference(dev, query->bo);
238    }
239 
240    free(pquery);
241 }
242 
243 static bool
agx_begin_query(struct pipe_context * pctx,struct pipe_query * pquery)244 agx_begin_query(struct pipe_context *pctx, struct pipe_query *pquery)
245 {
246    struct agx_context *ctx = agx_context(pctx);
247    struct agx_query *query = (struct agx_query *)pquery;
248 
249    ctx->dirty |= AGX_DIRTY_QUERY;
250 
251    switch (query->type) {
252    case PIPE_QUERY_OCCLUSION_COUNTER:
253    case PIPE_QUERY_OCCLUSION_PREDICATE:
254    case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
255       ctx->occlusion_query = query;
256       break;
257 
258    case PIPE_QUERY_PRIMITIVES_GENERATED:
259       ctx->prims_generated[query->index] = query;
260       break;
261 
262    case PIPE_QUERY_PRIMITIVES_EMITTED:
263       ctx->tf_prims_generated[query->index] = query;
264       break;
265 
266    case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
267       ctx->tf_overflow[query->index] = query;
268       break;
269 
270    case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
271       ctx->tf_any_overflow = query;
272       break;
273 
274    case PIPE_QUERY_TIME_ELAPSED:
275       ctx->time_elapsed = query;
276       break;
277 
278    case PIPE_QUERY_TIMESTAMP:
279       /* No-op */
280       break;
281 
282    case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE:
283       assert(query->index < ARRAY_SIZE(ctx->pipeline_statistics));
284       ctx->pipeline_statistics[query->index] = query;
285       break;
286 
287    default:
288       return false;
289    }
290 
291    /* begin_query zeroes, sync so we can do that write from the CPU */
292    sync_query_writers(ctx, query, "Query overwritten");
293 
294    uint64_t *ptr = query->ptr.cpu;
295    ptr[0] = 0;
296 
297    if (query->type == PIPE_QUERY_TIME_ELAPSED) {
298       /* Timestamp begin in second record, the timestamp end in the first */
299       ptr[1] = UINT64_MAX;
300    }
301 
302    return true;
303 }
304 
305 static bool
agx_end_query(struct pipe_context * pctx,struct pipe_query * pquery)306 agx_end_query(struct pipe_context *pctx, struct pipe_query *pquery)
307 {
308    struct agx_context *ctx = agx_context(pctx);
309    struct agx_device *dev = agx_device(pctx->screen);
310    struct agx_query *query = (struct agx_query *)pquery;
311 
312    ctx->dirty |= AGX_DIRTY_QUERY;
313 
314    switch (query->type) {
315    case PIPE_QUERY_OCCLUSION_COUNTER:
316    case PIPE_QUERY_OCCLUSION_PREDICATE:
317    case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
318       ctx->occlusion_query = NULL;
319       return true;
320    case PIPE_QUERY_PRIMITIVES_GENERATED:
321       ctx->prims_generated[query->index] = NULL;
322       return true;
323    case PIPE_QUERY_PRIMITIVES_EMITTED:
324       ctx->tf_prims_generated[query->index] = NULL;
325       return true;
326    case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
327       ctx->tf_overflow[query->index] = NULL;
328       return true;
329    case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
330       ctx->tf_any_overflow = NULL;
331       return true;
332    case PIPE_QUERY_TIME_ELAPSED:
333       ctx->time_elapsed = NULL;
334       return true;
335    case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE:
336       assert(query->index < ARRAY_SIZE(ctx->pipeline_statistics));
337       ctx->pipeline_statistics[query->index] = NULL;
338       return true;
339    case PIPE_QUERY_TIMESTAMP: {
340       /* Timestamp logically written now, set up batches to MAX their finish
341        * time in. If there are no batches, it's just the current time stamp.
342        */
343       agx_add_timestamp_end_query(ctx, query);
344 
345       uint64_t *value = query->ptr.cpu;
346       *value = agx_get_gpu_timestamp(dev);
347 
348       return true;
349    }
350    default:
351       return false;
352    }
353 }
354 
355 enum query_copy_type {
356    QUERY_COPY_NORMAL,
357    QUERY_COPY_BOOL32,
358    QUERY_COPY_BOOL64,
359    QUERY_COPY_TIMESTAMP,
360    QUERY_COPY_TIME_ELAPSED,
361 };
362 
363 static enum query_copy_type
classify_query_type(enum pipe_query_type type)364 classify_query_type(enum pipe_query_type type)
365 {
366    switch (type) {
367    case PIPE_QUERY_OCCLUSION_PREDICATE:
368    case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
369       return QUERY_COPY_BOOL32;
370 
371    case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
372    case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
373       return QUERY_COPY_BOOL64;
374 
375    case PIPE_QUERY_TIMESTAMP:
376       return QUERY_COPY_TIMESTAMP;
377 
378    case PIPE_QUERY_TIME_ELAPSED:
379       return QUERY_COPY_TIME_ELAPSED;
380 
381    default:
382       return QUERY_COPY_NORMAL;
383    }
384 }
385 
386 static bool
agx_get_query_result(struct pipe_context * pctx,struct pipe_query * pquery,bool wait,union pipe_query_result * vresult)387 agx_get_query_result(struct pipe_context *pctx, struct pipe_query *pquery,
388                      bool wait, union pipe_query_result *vresult)
389 {
390    struct agx_query *query = (struct agx_query *)pquery;
391    struct agx_context *ctx = agx_context(pctx);
392    struct agx_device *dev = agx_device(pctx->screen);
393 
394    /* TODO: Honour `wait` */
395    sync_query_writers(ctx, query, "Reading query results");
396 
397    uint64_t *ptr = query->ptr.cpu;
398    uint64_t value = *ptr;
399 
400    switch (classify_query_type(query->type)) {
401    case QUERY_COPY_BOOL32:
402       vresult->b = value;
403       return true;
404 
405    case QUERY_COPY_BOOL64:
406       vresult->b = value > 0;
407       return true;
408 
409    case QUERY_COPY_NORMAL:
410       vresult->u64 = value;
411       return true;
412 
413    case QUERY_COPY_TIMESTAMP:
414       vresult->u64 = agx_gpu_time_to_ns(dev, value);
415       return true;
416 
417    case QUERY_COPY_TIME_ELAPSED:
418       /* end - begin */
419       vresult->u64 = agx_gpu_time_to_ns(dev, ptr[0] - ptr[1]);
420       return true;
421 
422    default:
423       unreachable("Other queries not yet supported");
424    }
425 }
426 
427 static unsigned
result_type_size(enum pipe_query_value_type result_type)428 result_type_size(enum pipe_query_value_type result_type)
429 {
430    return (result_type <= PIPE_QUERY_TYPE_U32) ? 4 : 8;
431 }
432 
433 static void
agx_get_query_result_resource_cpu(struct agx_context * ctx,struct agx_query * query,enum pipe_query_flags flags,enum pipe_query_value_type result_type,int index,struct pipe_resource * resource,unsigned offset)434 agx_get_query_result_resource_cpu(struct agx_context *ctx,
435                                   struct agx_query *query,
436                                   enum pipe_query_flags flags,
437                                   enum pipe_query_value_type result_type,
438                                   int index, struct pipe_resource *resource,
439                                   unsigned offset)
440 {
441    union pipe_query_result result;
442    if (index < 0) {
443       /* availability */
444       result.u64 = !is_query_busy(ctx, query);
445    } else {
446       bool ready =
447          agx_get_query_result(&ctx->base, (void *)query, true, &result);
448 
449       assert(ready);
450 
451       switch (classify_query_type(query->type)) {
452       case QUERY_COPY_BOOL32:
453       case QUERY_COPY_BOOL64:
454          result.u64 = result.b;
455          break;
456       default:
457          break;
458       }
459    }
460 
461    /* Clamp to type, arb_query_buffer_object-qbo tests */
462    if (result_type == PIPE_QUERY_TYPE_U32) {
463       result.u32 = MIN2(result.u64, u_uintN_max(32));
464    } else if (result_type == PIPE_QUERY_TYPE_I32) {
465       int64_t x = result.u64;
466       x = MAX2(MIN2(x, u_intN_max(32)), u_intN_min(32));
467       result.u32 = x;
468    }
469 
470    pipe_buffer_write(&ctx->base, resource, offset,
471                      result_type_size(result_type), &result.u64);
472 }
473 
474 struct query_copy_key {
475    enum pipe_query_value_type result;
476    enum query_copy_type query;
477 };
478 
479 static void
agx_nir_query_copy(nir_builder * b,const void * key_)480 agx_nir_query_copy(nir_builder *b, const void *key_)
481 {
482    const struct query_copy_key *key = key_;
483    b->shader->info.num_ubos = 1;
484 
485    nir_def *params =
486       nir_load_ubo(b, 2, 64, nir_imm_int(b, 0), nir_imm_int(b, 0),
487                    .align_mul = 8, .range = 8);
488 
489    nir_def *value =
490       nir_load_global_constant(b, nir_channel(b, params, 0), 8, 1, 64);
491 
492    if (key->query == QUERY_COPY_BOOL32 || key->query == QUERY_COPY_BOOL64) {
493       if (key->query == QUERY_COPY_BOOL32)
494          value = nir_u2u32(b, value);
495 
496       value = nir_u2u64(b, nir_ine_imm(b, value, 0));
497    }
498 
499    if (key->result == PIPE_QUERY_TYPE_U32) {
500       value =
501          nir_u2u32(b, nir_umin(b, value, nir_imm_int64(b, u_uintN_max(32))));
502    } else if (key->result == PIPE_QUERY_TYPE_I32) {
503       value =
504          nir_u2u32(b, nir_iclamp(b, value, nir_imm_int64(b, u_intN_min(32)),
505                                  nir_imm_int64(b, u_intN_max(32))));
506    }
507 
508    nir_store_global(b, nir_channel(b, params, 1), result_type_size(key->result),
509                     value, nir_component_mask(1));
510 }
511 
512 static bool
agx_get_query_result_resource_gpu(struct agx_context * ctx,struct agx_query * query,enum pipe_query_flags flags,enum pipe_query_value_type result_type,int index,struct pipe_resource * prsrc,unsigned offset)513 agx_get_query_result_resource_gpu(struct agx_context *ctx,
514                                   struct agx_query *query,
515                                   enum pipe_query_flags flags,
516                                   enum pipe_query_value_type result_type,
517                                   int index, struct pipe_resource *prsrc,
518                                   unsigned offset)
519 {
520    /* Handle availability queries on CPU */
521    if (index < 0)
522       return false;
523 
524    /* TODO: timer queries on GPU */
525    if (query->type == PIPE_QUERY_TIMESTAMP ||
526        query->type == PIPE_QUERY_TIME_ELAPSED)
527       return false;
528 
529    flush_query_writers(ctx, query, util_str_query_type(query->type, true));
530 
531    struct agx_resource *rsrc = agx_resource(prsrc);
532 
533    struct query_copy_key key = {
534       .result = result_type,
535       .query = classify_query_type(query->type),
536    };
537 
538    struct agx_compiled_shader *cs =
539       agx_build_meta_shader(ctx, agx_nir_query_copy, &key, sizeof(key));
540 
541    struct agx_batch *batch = agx_get_compute_batch(ctx);
542    agx_batch_init_state(batch);
543    agx_dirty_all(ctx);
544 
545    /* Save cb */
546    struct agx_stage *stage = &ctx->stage[PIPE_SHADER_COMPUTE];
547    struct pipe_constant_buffer saved_cb = {NULL};
548    pipe_resource_reference(&saved_cb.buffer, stage->cb[0].buffer);
549    memcpy(&saved_cb, &stage->cb[0], sizeof(struct pipe_constant_buffer));
550 
551    /* Set params */
552    uint64_t params[2] = {query->ptr.gpu, rsrc->bo->va->addr + offset};
553    agx_batch_writes_range(batch, rsrc, offset, result_type_size(result_type));
554 
555    struct pipe_constant_buffer cb = {
556       .buffer_size = sizeof(params),
557       .user_buffer = &params,
558    };
559    ctx->base.set_constant_buffer(&ctx->base, PIPE_SHADER_COMPUTE, 0, false,
560                                  &cb);
561 
562    struct agx_grid grid = agx_grid_direct(1, 1, 1, 1, 1, 1);
563    agx_launch(batch, &grid, cs, NULL, PIPE_SHADER_COMPUTE, 0);
564 
565    /* take_ownership=true so do not unreference */
566    ctx->base.set_constant_buffer(&ctx->base, PIPE_SHADER_COMPUTE, 0, true,
567                                  &saved_cb);
568    return true;
569 }
570 
571 static void
agx_get_query_result_resource(struct pipe_context * pipe,struct pipe_query * q,enum pipe_query_flags flags,enum pipe_query_value_type result_type,int index,struct pipe_resource * resource,unsigned offset)572 agx_get_query_result_resource(struct pipe_context *pipe, struct pipe_query *q,
573                               enum pipe_query_flags flags,
574                               enum pipe_query_value_type result_type, int index,
575                               struct pipe_resource *resource, unsigned offset)
576 {
577    struct agx_query *query = (struct agx_query *)q;
578    struct agx_context *ctx = agx_context(pipe);
579 
580    /* Try to copy on the GPU */
581    if (!agx_get_query_result_resource_gpu(ctx, query, flags, result_type, index,
582                                           resource, offset)) {
583 
584       /* Else, fallback to CPU */
585       agx_get_query_result_resource_cpu(ctx, query, flags, result_type, index,
586                                         resource, offset);
587    }
588 }
589 
590 static void
agx_set_active_query_state(struct pipe_context * pipe,bool enable)591 agx_set_active_query_state(struct pipe_context *pipe, bool enable)
592 {
593    struct agx_context *ctx = agx_context(pipe);
594 
595    ctx->active_queries = enable;
596    ctx->dirty |= AGX_DIRTY_QUERY;
597 }
598 
599 static void
agx_add_query_to_batch(struct agx_batch * batch,struct agx_query * query)600 agx_add_query_to_batch(struct agx_batch *batch, struct agx_query *query)
601 {
602    unsigned idx = agx_batch_idx(batch);
603    struct agx_bo *bo = is_occlusion(query) ? batch->ctx->oq->bo : query->bo;
604 
605    agx_batch_add_bo(batch, bo);
606    query->writer_generation[idx] = batch->ctx->batches.generation[idx];
607 }
608 
609 void
agx_batch_add_timestamp_query(struct agx_batch * batch,struct agx_query * q)610 agx_batch_add_timestamp_query(struct agx_batch *batch, struct agx_query *q)
611 {
612    if (q) {
613       agx_add_query_to_batch(batch, q);
614       util_dynarray_append(&batch->timestamps, struct agx_ptr, q->ptr);
615    }
616 }
617 
618 uint16_t
agx_get_oq_index(struct agx_batch * batch,struct agx_query * query)619 agx_get_oq_index(struct agx_batch *batch, struct agx_query *query)
620 {
621    agx_add_query_to_batch(batch, query);
622    return agx_oq_index(batch->ctx, query);
623 }
624 
625 uint64_t
agx_get_query_address(struct agx_batch * batch,struct agx_query * query)626 agx_get_query_address(struct agx_batch *batch, struct agx_query *query)
627 {
628    if (query) {
629       agx_add_query_to_batch(batch, query);
630       return query->ptr.gpu;
631    } else {
632       return 0;
633    }
634 }
635 
636 void
agx_finish_batch_queries(struct agx_batch * batch,uint64_t begin_ts,uint64_t end_ts)637 agx_finish_batch_queries(struct agx_batch *batch, uint64_t begin_ts,
638                          uint64_t end_ts)
639 {
640    /* Remove the batch as write from all queries by incrementing the generation
641     * of the batch.
642     */
643    batch->ctx->batches.generation[agx_batch_idx(batch)]++;
644 
645    /* Write out timestamps */
646    util_dynarray_foreach(&batch->timestamps, struct agx_ptr, it) {
647       uint64_t *ptr = it->cpu;
648 
649       ptr[0] = MAX2(ptr[0], end_ts);
650       ptr[1] = MIN2(ptr[1], begin_ts);
651    }
652 }
653 
654 void
agx_query_increment_cpu(struct agx_context * ctx,struct agx_query * query,uint64_t increment)655 agx_query_increment_cpu(struct agx_context *ctx, struct agx_query *query,
656                         uint64_t increment)
657 {
658    if (!query)
659       return;
660 
661    sync_query_writers(ctx, query, "CPU query increment");
662 
663    uint64_t *value = query->ptr.cpu;
664    *value += increment;
665 }
666 
667 static void
agx_render_condition(struct pipe_context * pipe,struct pipe_query * query,bool condition,enum pipe_render_cond_flag mode)668 agx_render_condition(struct pipe_context *pipe, struct pipe_query *query,
669                      bool condition, enum pipe_render_cond_flag mode)
670 {
671    struct agx_context *ctx = agx_context(pipe);
672 
673    ctx->cond_query = query;
674    ctx->cond_cond = condition;
675    ctx->cond_mode = mode;
676 }
677 
678 bool
agx_render_condition_check_inner(struct agx_context * ctx)679 agx_render_condition_check_inner(struct agx_context *ctx)
680 {
681    assert(ctx->cond_query != NULL && "precondition");
682 
683    perf_debug_ctx(ctx, "Implementing conditional rendering on the CPU");
684 
685    union pipe_query_result res = {0};
686    bool wait = ctx->cond_mode != PIPE_RENDER_COND_NO_WAIT &&
687                ctx->cond_mode != PIPE_RENDER_COND_BY_REGION_NO_WAIT;
688 
689    struct pipe_query *pq = (struct pipe_query *)ctx->cond_query;
690 
691    if (agx_get_query_result(&ctx->base, pq, wait, &res))
692       return res.u64 != ctx->cond_cond;
693 
694    return true;
695 }
696 
697 void
agx_init_query_functions(struct pipe_context * pctx)698 agx_init_query_functions(struct pipe_context *pctx)
699 {
700    pctx->create_query = agx_create_query;
701    pctx->destroy_query = agx_destroy_query;
702    pctx->begin_query = agx_begin_query;
703    pctx->end_query = agx_end_query;
704    pctx->get_query_result = agx_get_query_result;
705    pctx->get_query_result_resource = agx_get_query_result_resource;
706    pctx->set_active_query_state = agx_set_active_query_state;
707    pctx->render_condition = agx_render_condition;
708 
709    /* By default queries are active */
710    agx_context(pctx)->active_queries = true;
711 }
712