1 /*
2 * Copyright © 2017 Rob Clark <[email protected]>
3 * SPDX-License-Identifier: MIT
4 *
5 * Authors:
6 * Rob Clark <[email protected]>
7 */
8
9 #include "util/u_inlines.h"
10 #include "util/u_memory.h"
11
12 #include "freedreno_context.h"
13 #include "freedreno_query_acc.h"
14 #include "freedreno_resource.h"
15 #include "freedreno_util.h"
16
17 static void
fd_acc_destroy_query(struct fd_context * ctx,struct fd_query * q)18 fd_acc_destroy_query(struct fd_context *ctx, struct fd_query *q) assert_dt
19 {
20 struct fd_acc_query *aq = fd_acc_query(q);
21
22 DBG("%p", q);
23
24 pipe_resource_reference(&aq->prsc, NULL);
25 list_del(&aq->node);
26
27 free(aq->query_data);
28 free(aq);
29 }
30
31 static void
realloc_query_bo(struct fd_context * ctx,struct fd_acc_query * aq)32 realloc_query_bo(struct fd_context *ctx, struct fd_acc_query *aq)
33 {
34 struct fd_resource *rsc;
35 void *map;
36
37 pipe_resource_reference(&aq->prsc, NULL);
38
39 aq->prsc =
40 pipe_buffer_create(&ctx->screen->base, PIPE_BIND_QUERY_BUFFER, 0, 0x1000);
41
42 /* don't assume the buffer is zero-initialized: */
43 rsc = fd_resource(aq->prsc);
44
45 fd_bo_cpu_prep(rsc->bo, ctx->pipe, FD_BO_PREP_WRITE);
46
47 map = fd_bo_map(rsc->bo);
48 memset(map, 0, aq->size);
49 }
50
51 static void
fd_acc_query_pause(struct fd_acc_query * aq)52 fd_acc_query_pause(struct fd_acc_query *aq) assert_dt
53 {
54 const struct fd_acc_sample_provider *p = aq->provider;
55
56 if (!aq->batch)
57 return;
58
59 fd_batch_needs_flush(aq->batch);
60 p->pause(aq, aq->batch);
61 aq->batch = NULL;
62 }
63
64 static void
fd_acc_query_resume(struct fd_acc_query * aq,struct fd_batch * batch)65 fd_acc_query_resume(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt
66 {
67 const struct fd_acc_sample_provider *p = aq->provider;
68
69 fd_screen_lock(batch->ctx->screen);
70 fd_batch_resource_write(batch, fd_resource(aq->prsc));
71 fd_screen_unlock(batch->ctx->screen);
72
73 aq->batch = batch;
74 fd_batch_needs_flush(aq->batch);
75 p->resume(aq, aq->batch);
76 }
77
78 static void
fd_acc_begin_query(struct fd_context * ctx,struct fd_query * q)79 fd_acc_begin_query(struct fd_context *ctx, struct fd_query *q) assert_dt
80 {
81 struct fd_acc_query *aq = fd_acc_query(q);
82
83 DBG("%p", q);
84
85 /* ->begin_query() discards previous results, so realloc bo: */
86 realloc_query_bo(ctx, aq);
87
88 /* Signal that we need to update the active queries on the next draw */
89 fd_context_dirty(ctx, FD_DIRTY_QUERY);
90
91 /* add to active list: */
92 assert(list_is_empty(&aq->node));
93 list_addtail(&aq->node, &ctx->acc_active_queries);
94
95 /* TIMESTAMP/GPU_FINISHED and don't do normal bracketing at draw time, we
96 * need to just emit the capture at this moment.
97 */
98 if (skip_begin_query(q->type)) {
99 struct fd_batch *batch = fd_context_batch(ctx);
100 fd_acc_query_resume(aq, batch);
101 fd_batch_reference(&batch, NULL);
102 }
103 }
104
105 static void
fd_acc_end_query(struct fd_context * ctx,struct fd_query * q)106 fd_acc_end_query(struct fd_context *ctx, struct fd_query *q) assert_dt
107 {
108 struct fd_acc_query *aq = fd_acc_query(q);
109
110 DBG("%p", q);
111
112 fd_acc_query_pause(aq);
113
114 /* remove from active list: */
115 list_delinit(&aq->node);
116
117 /* mark the result available: */
118 struct fd_batch *batch = fd_context_batch(ctx);
119 struct fd_ringbuffer *ring = fd_batch_get_tile_epilogue(batch);
120 struct fd_resource *rsc = fd_resource(aq->prsc);
121
122 if (ctx->screen->gen < 5) {
123 OUT_PKT3(ring, CP_MEM_WRITE, 3);
124 OUT_RELOC(ring, rsc->bo, 0, 0, 0);
125 OUT_RING(ring, 1); /* low 32b */
126 OUT_RING(ring, 0); /* high 32b */
127 } else {
128 OUT_PKT7(ring, CP_MEM_WRITE, 4);
129 OUT_RELOC(ring, rsc->bo, 0, 0, 0);
130 OUT_RING(ring, 1); /* low 32b */
131 OUT_RING(ring, 0); /* high 32b */
132 }
133
134 fd_batch_reference(&batch, NULL);
135 }
136
137 static bool
fd_acc_get_query_result(struct fd_context * ctx,struct fd_query * q,bool wait,union pipe_query_result * result)138 fd_acc_get_query_result(struct fd_context *ctx, struct fd_query *q, bool wait,
139 union pipe_query_result *result)
140 {
141 struct fd_acc_query *aq = fd_acc_query(q);
142 const struct fd_acc_sample_provider *p = aq->provider;
143 struct fd_resource *rsc = fd_resource(aq->prsc);
144
145 DBG("%p: wait=%d", q, wait);
146
147 assert(list_is_empty(&aq->node));
148
149 /* ARB_occlusion_query says:
150 *
151 * "Querying the state for a given occlusion query forces that
152 * occlusion query to complete within a finite amount of time."
153 *
154 * So, regardless of whether we are supposed to wait or not, we do need to
155 * flush now.
156 */
157 if (fd_get_query_result_in_driver_thread(q)) {
158 tc_assert_driver_thread(ctx->tc);
159 fd_context_access_begin(ctx);
160 fd_bc_flush_writer(ctx, rsc);
161 fd_context_access_end(ctx);
162 }
163
164 if (!wait) {
165 int ret = fd_resource_wait(
166 ctx, rsc, FD_BO_PREP_READ | FD_BO_PREP_NOSYNC | FD_BO_PREP_FLUSH);
167 if (ret)
168 return false;
169 } else {
170 fd_resource_wait(ctx, rsc, FD_BO_PREP_READ);
171 }
172
173 struct fd_acc_query_sample *s = fd_bo_map(rsc->bo);
174 p->result(aq, s, result);
175
176 return true;
177 }
178
179 static void
fd_acc_get_query_result_resource(struct fd_context * ctx,struct fd_query * q,enum pipe_query_flags flags,enum pipe_query_value_type result_type,int index,struct fd_resource * dst,unsigned offset)180 fd_acc_get_query_result_resource(struct fd_context *ctx, struct fd_query *q,
181 enum pipe_query_flags flags,
182 enum pipe_query_value_type result_type,
183 int index, struct fd_resource *dst,
184 unsigned offset)
185 assert_dt
186 {
187 struct fd_acc_query *aq = fd_acc_query(q);
188 const struct fd_acc_sample_provider *p = aq->provider;
189 struct fd_batch *batch = fd_context_batch(ctx);
190
191 assert(ctx->screen->gen >= 5);
192
193 fd_screen_lock(batch->ctx->screen);
194 fd_batch_resource_write(batch, dst);
195 fd_screen_unlock(batch->ctx->screen);
196
197 /* query_buffer_object isn't really the greatest thing for a tiler,
198 * if the app tries to use the result of the query in the same batch.
199 * In general the query result isn't truly ready until the last gmem
200 * bin/tile.
201 *
202 * So, we mark the query result as not being available in the draw
203 * ring (which technically is true), and then in epilogue ring we
204 * update the query dst buffer with the *actual* results and status.
205 */
206 if (index == -1) {
207 /* Mark the query as not-ready in the draw ring: */
208 struct fd_ringbuffer *ring = batch->draw;
209 bool is_64b = result_type >= PIPE_QUERY_TYPE_I64;
210
211 OUT_PKT7(ring, CP_MEM_WRITE, is_64b ? 4 : 3);
212 OUT_RELOC(ring, dst->bo, offset, 0, 0);
213 OUT_RING(ring, 0); /* low 32b */
214 if (is_64b)
215 OUT_RING(ring, 0); /* high 32b */
216 }
217
218 struct fd_ringbuffer *ring = fd_batch_get_epilogue(batch);
219
220 if (index == -1) {
221 copy_result(ring, result_type, dst, offset, fd_resource(aq->prsc), 0);
222 } else {
223 p->result_resource(aq, ring, result_type, index, dst, offset);
224 }
225
226 /* If we are told to wait for results, then we need to flush. For an IMR
227 * this would just be a wait on the GPU, but the expectation is that draws
228 * following this one see the results of the query, which means we need to
229 * use the big flush-hammer :-(
230 */
231 if (flags & PIPE_QUERY_WAIT)
232 fd_batch_flush(batch);
233
234 fd_batch_reference(&batch, NULL);
235 }
236
237 static const struct fd_query_funcs acc_query_funcs = {
238 .destroy_query = fd_acc_destroy_query,
239 .begin_query = fd_acc_begin_query,
240 .end_query = fd_acc_end_query,
241 .get_query_result = fd_acc_get_query_result,
242 .get_query_result_resource = fd_acc_get_query_result_resource,
243 };
244
245 struct fd_query *
fd_acc_create_query2(struct fd_context * ctx,unsigned query_type,unsigned index,const struct fd_acc_sample_provider * provider)246 fd_acc_create_query2(struct fd_context *ctx, unsigned query_type,
247 unsigned index,
248 const struct fd_acc_sample_provider *provider)
249 {
250 struct fd_acc_query *aq;
251 struct fd_query *q;
252
253 aq = CALLOC_STRUCT(fd_acc_query);
254 if (!aq)
255 return NULL;
256
257 DBG("%p: query_type=%u", aq, query_type);
258
259 aq->provider = provider;
260 aq->size = provider->size;
261
262 list_inithead(&aq->node);
263
264 q = &aq->base;
265 q->funcs = &acc_query_funcs;
266 q->type = query_type;
267 q->index = index;
268
269 return q;
270 }
271
272 struct fd_query *
fd_acc_create_query(struct fd_context * ctx,unsigned query_type,unsigned index)273 fd_acc_create_query(struct fd_context *ctx, unsigned query_type, unsigned index)
274 {
275 int idx = pidx(query_type);
276
277 if ((idx < 0) || !ctx->acc_sample_providers[idx])
278 return NULL;
279
280 return fd_acc_create_query2(ctx, query_type, index,
281 ctx->acc_sample_providers[idx]);
282 }
283
284 /* Called at clear/draw/blit time to enable/disable the appropriate queries in
285 * the batch (and transfer active querying between batches in the case of
286 * batch reordering).
287 */
288 void
fd_acc_query_update_batch(struct fd_batch * batch,bool disable_all)289 fd_acc_query_update_batch(struct fd_batch *batch, bool disable_all)
290 {
291 struct fd_context *ctx = batch->ctx;
292
293 if (disable_all || (ctx->dirty & FD_DIRTY_QUERY)) {
294 struct fd_acc_query *aq;
295 LIST_FOR_EACH_ENTRY (aq, &ctx->acc_active_queries, node) {
296 bool batch_change = aq->batch != batch;
297 bool was_active = aq->batch != NULL;
298 bool now_active =
299 !disable_all && (ctx->active_queries || aq->provider->always);
300
301 if (was_active && (!now_active || batch_change))
302 fd_acc_query_pause(aq);
303 if (now_active && (!was_active || batch_change))
304 fd_acc_query_resume(aq, batch);
305 }
306 }
307 }
308
309 void
fd_acc_query_register_provider(struct pipe_context * pctx,const struct fd_acc_sample_provider * provider)310 fd_acc_query_register_provider(struct pipe_context *pctx,
311 const struct fd_acc_sample_provider *provider)
312 {
313 struct fd_context *ctx = fd_context(pctx);
314 int idx = pidx(provider->query_type);
315
316 assert((0 <= idx) && (idx < MAX_HW_SAMPLE_PROVIDERS));
317 assert(!ctx->acc_sample_providers[idx]);
318
319 ctx->acc_sample_providers[idx] = provider;
320 }
321