xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/freedreno/freedreno_query_acc.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2017 Rob Clark <[email protected]>
3  * SPDX-License-Identifier: MIT
4  *
5  * Authors:
6  *    Rob Clark <[email protected]>
7  */
8 
9 #include "util/u_inlines.h"
10 #include "util/u_memory.h"
11 
12 #include "freedreno_context.h"
13 #include "freedreno_query_acc.h"
14 #include "freedreno_resource.h"
15 #include "freedreno_util.h"
16 
17 static void
fd_acc_destroy_query(struct fd_context * ctx,struct fd_query * q)18 fd_acc_destroy_query(struct fd_context *ctx, struct fd_query *q) assert_dt
19 {
20    struct fd_acc_query *aq = fd_acc_query(q);
21 
22    DBG("%p", q);
23 
24    pipe_resource_reference(&aq->prsc, NULL);
25    list_del(&aq->node);
26 
27    free(aq->query_data);
28    free(aq);
29 }
30 
31 static void
realloc_query_bo(struct fd_context * ctx,struct fd_acc_query * aq)32 realloc_query_bo(struct fd_context *ctx, struct fd_acc_query *aq)
33 {
34    struct fd_resource *rsc;
35    void *map;
36 
37    pipe_resource_reference(&aq->prsc, NULL);
38 
39    aq->prsc =
40       pipe_buffer_create(&ctx->screen->base, PIPE_BIND_QUERY_BUFFER, 0, 0x1000);
41 
42    /* don't assume the buffer is zero-initialized: */
43    rsc = fd_resource(aq->prsc);
44 
45    fd_bo_cpu_prep(rsc->bo, ctx->pipe, FD_BO_PREP_WRITE);
46 
47    map = fd_bo_map(rsc->bo);
48    memset(map, 0, aq->size);
49 }
50 
51 static void
fd_acc_query_pause(struct fd_acc_query * aq)52 fd_acc_query_pause(struct fd_acc_query *aq) assert_dt
53 {
54    const struct fd_acc_sample_provider *p = aq->provider;
55 
56    if (!aq->batch)
57       return;
58 
59    fd_batch_needs_flush(aq->batch);
60    p->pause(aq, aq->batch);
61    aq->batch = NULL;
62 }
63 
64 static void
fd_acc_query_resume(struct fd_acc_query * aq,struct fd_batch * batch)65 fd_acc_query_resume(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt
66 {
67    const struct fd_acc_sample_provider *p = aq->provider;
68 
69    fd_screen_lock(batch->ctx->screen);
70    fd_batch_resource_write(batch, fd_resource(aq->prsc));
71    fd_screen_unlock(batch->ctx->screen);
72 
73    aq->batch = batch;
74    fd_batch_needs_flush(aq->batch);
75    p->resume(aq, aq->batch);
76 }
77 
78 static void
fd_acc_begin_query(struct fd_context * ctx,struct fd_query * q)79 fd_acc_begin_query(struct fd_context *ctx, struct fd_query *q) assert_dt
80 {
81    struct fd_acc_query *aq = fd_acc_query(q);
82 
83    DBG("%p", q);
84 
85    /* ->begin_query() discards previous results, so realloc bo: */
86    realloc_query_bo(ctx, aq);
87 
88    /* Signal that we need to update the active queries on the next draw */
89    fd_context_dirty(ctx, FD_DIRTY_QUERY);
90 
91    /* add to active list: */
92    assert(list_is_empty(&aq->node));
93    list_addtail(&aq->node, &ctx->acc_active_queries);
94 
95    /* TIMESTAMP/GPU_FINISHED and don't do normal bracketing at draw time, we
96     * need to just emit the capture at this moment.
97     */
98    if (skip_begin_query(q->type)) {
99       struct fd_batch *batch = fd_context_batch(ctx);
100       fd_acc_query_resume(aq, batch);
101       fd_batch_reference(&batch, NULL);
102    }
103 }
104 
105 static void
fd_acc_end_query(struct fd_context * ctx,struct fd_query * q)106 fd_acc_end_query(struct fd_context *ctx, struct fd_query *q) assert_dt
107 {
108    struct fd_acc_query *aq = fd_acc_query(q);
109 
110    DBG("%p", q);
111 
112    fd_acc_query_pause(aq);
113 
114    /* remove from active list: */
115    list_delinit(&aq->node);
116 
117    /* mark the result available: */
118    struct fd_batch *batch = fd_context_batch(ctx);
119    struct fd_ringbuffer *ring = fd_batch_get_tile_epilogue(batch);
120    struct fd_resource *rsc = fd_resource(aq->prsc);
121 
122    if (ctx->screen->gen < 5) {
123       OUT_PKT3(ring, CP_MEM_WRITE, 3);
124       OUT_RELOC(ring, rsc->bo, 0, 0, 0);
125       OUT_RING(ring, 1);     /* low 32b */
126       OUT_RING(ring, 0);     /* high 32b */
127    } else {
128       OUT_PKT7(ring, CP_MEM_WRITE, 4);
129       OUT_RELOC(ring, rsc->bo, 0, 0, 0);
130       OUT_RING(ring, 1);     /* low 32b */
131       OUT_RING(ring, 0);     /* high 32b */
132    }
133 
134    fd_batch_reference(&batch, NULL);
135 }
136 
137 static bool
fd_acc_get_query_result(struct fd_context * ctx,struct fd_query * q,bool wait,union pipe_query_result * result)138 fd_acc_get_query_result(struct fd_context *ctx, struct fd_query *q, bool wait,
139                         union pipe_query_result *result)
140 {
141    struct fd_acc_query *aq = fd_acc_query(q);
142    const struct fd_acc_sample_provider *p = aq->provider;
143    struct fd_resource *rsc = fd_resource(aq->prsc);
144 
145    DBG("%p: wait=%d", q, wait);
146 
147    assert(list_is_empty(&aq->node));
148 
149    /* ARB_occlusion_query says:
150     *
151     *     "Querying the state for a given occlusion query forces that
152     *      occlusion query to complete within a finite amount of time."
153     *
154     * So, regardless of whether we are supposed to wait or not, we do need to
155     * flush now.
156     */
157    if (fd_get_query_result_in_driver_thread(q)) {
158       tc_assert_driver_thread(ctx->tc);
159       fd_context_access_begin(ctx);
160       fd_bc_flush_writer(ctx, rsc);
161       fd_context_access_end(ctx);
162    }
163 
164    if (!wait) {
165       int ret = fd_resource_wait(
166          ctx, rsc, FD_BO_PREP_READ | FD_BO_PREP_NOSYNC | FD_BO_PREP_FLUSH);
167       if (ret)
168          return false;
169    } else {
170       fd_resource_wait(ctx, rsc, FD_BO_PREP_READ);
171    }
172 
173    struct fd_acc_query_sample *s = fd_bo_map(rsc->bo);
174    p->result(aq, s, result);
175 
176    return true;
177 }
178 
179 static void
fd_acc_get_query_result_resource(struct fd_context * ctx,struct fd_query * q,enum pipe_query_flags flags,enum pipe_query_value_type result_type,int index,struct fd_resource * dst,unsigned offset)180 fd_acc_get_query_result_resource(struct fd_context *ctx, struct fd_query *q,
181                                  enum pipe_query_flags flags,
182                                  enum pipe_query_value_type result_type,
183                                  int index, struct fd_resource *dst,
184                                  unsigned offset)
185    assert_dt
186 {
187    struct fd_acc_query *aq = fd_acc_query(q);
188    const struct fd_acc_sample_provider *p = aq->provider;
189    struct fd_batch *batch = fd_context_batch(ctx);
190 
191    assert(ctx->screen->gen >= 5);
192 
193    fd_screen_lock(batch->ctx->screen);
194    fd_batch_resource_write(batch, dst);
195    fd_screen_unlock(batch->ctx->screen);
196 
197    /* query_buffer_object isn't really the greatest thing for a tiler,
198     * if the app tries to use the result of the query in the same batch.
199     * In general the query result isn't truly ready until the last gmem
200     * bin/tile.
201     *
202     * So, we mark the query result as not being available in the draw
203     * ring (which technically is true), and then in epilogue ring we
204     * update the query dst buffer with the *actual* results and status.
205     */
206    if (index == -1) {
207       /* Mark the query as not-ready in the draw ring: */
208       struct fd_ringbuffer *ring = batch->draw;
209       bool is_64b = result_type >= PIPE_QUERY_TYPE_I64;
210 
211       OUT_PKT7(ring, CP_MEM_WRITE, is_64b ? 4 : 3);
212       OUT_RELOC(ring, dst->bo, offset, 0, 0);
213       OUT_RING(ring, 0);     /* low 32b */
214       if (is_64b)
215          OUT_RING(ring, 0);  /* high 32b */
216    }
217 
218    struct fd_ringbuffer *ring = fd_batch_get_epilogue(batch);
219 
220    if (index == -1) {
221       copy_result(ring, result_type, dst, offset, fd_resource(aq->prsc), 0);
222    } else {
223       p->result_resource(aq, ring, result_type, index, dst, offset);
224    }
225 
226    /* If we are told to wait for results, then we need to flush.  For an IMR
227     * this would just be a wait on the GPU, but the expectation is that draws
228     * following this one see the results of the query, which means we need to
229     * use the big flush-hammer :-(
230     */
231    if (flags & PIPE_QUERY_WAIT)
232       fd_batch_flush(batch);
233 
234    fd_batch_reference(&batch, NULL);
235 }
236 
237 static const struct fd_query_funcs acc_query_funcs = {
238    .destroy_query = fd_acc_destroy_query,
239    .begin_query = fd_acc_begin_query,
240    .end_query = fd_acc_end_query,
241    .get_query_result = fd_acc_get_query_result,
242    .get_query_result_resource = fd_acc_get_query_result_resource,
243 };
244 
245 struct fd_query *
fd_acc_create_query2(struct fd_context * ctx,unsigned query_type,unsigned index,const struct fd_acc_sample_provider * provider)246 fd_acc_create_query2(struct fd_context *ctx, unsigned query_type,
247                      unsigned index,
248                      const struct fd_acc_sample_provider *provider)
249 {
250    struct fd_acc_query *aq;
251    struct fd_query *q;
252 
253    aq = CALLOC_STRUCT(fd_acc_query);
254    if (!aq)
255       return NULL;
256 
257    DBG("%p: query_type=%u", aq, query_type);
258 
259    aq->provider = provider;
260    aq->size = provider->size;
261 
262    list_inithead(&aq->node);
263 
264    q = &aq->base;
265    q->funcs = &acc_query_funcs;
266    q->type = query_type;
267    q->index = index;
268 
269    return q;
270 }
271 
272 struct fd_query *
fd_acc_create_query(struct fd_context * ctx,unsigned query_type,unsigned index)273 fd_acc_create_query(struct fd_context *ctx, unsigned query_type, unsigned index)
274 {
275    int idx = pidx(query_type);
276 
277    if ((idx < 0) || !ctx->acc_sample_providers[idx])
278       return NULL;
279 
280    return fd_acc_create_query2(ctx, query_type, index,
281                                ctx->acc_sample_providers[idx]);
282 }
283 
284 /* Called at clear/draw/blit time to enable/disable the appropriate queries in
285  * the batch (and transfer active querying between batches in the case of
286  * batch reordering).
287  */
288 void
fd_acc_query_update_batch(struct fd_batch * batch,bool disable_all)289 fd_acc_query_update_batch(struct fd_batch *batch, bool disable_all)
290 {
291    struct fd_context *ctx = batch->ctx;
292 
293    if (disable_all || (ctx->dirty & FD_DIRTY_QUERY)) {
294       struct fd_acc_query *aq;
295       LIST_FOR_EACH_ENTRY (aq, &ctx->acc_active_queries, node) {
296          bool batch_change = aq->batch != batch;
297          bool was_active = aq->batch != NULL;
298          bool now_active =
299             !disable_all && (ctx->active_queries || aq->provider->always);
300 
301          if (was_active && (!now_active || batch_change))
302             fd_acc_query_pause(aq);
303          if (now_active && (!was_active || batch_change))
304             fd_acc_query_resume(aq, batch);
305       }
306    }
307 }
308 
309 void
fd_acc_query_register_provider(struct pipe_context * pctx,const struct fd_acc_sample_provider * provider)310 fd_acc_query_register_provider(struct pipe_context *pctx,
311                                const struct fd_acc_sample_provider *provider)
312 {
313    struct fd_context *ctx = fd_context(pctx);
314    int idx = pidx(provider->query_type);
315 
316    assert((0 <= idx) && (idx < MAX_HW_SAMPLE_PROVIDERS));
317    assert(!ctx->acc_sample_providers[idx]);
318 
319    ctx->acc_sample_providers[idx] = provider;
320 }
321