1 /*
2 * Copyright © Microsoft Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "d3d12_query.h"
25 #include "d3d12_compiler.h"
26 #include "d3d12_compute_transforms.h"
27 #include "d3d12_context.h"
28 #include "d3d12_resource.h"
29 #include "d3d12_screen.h"
30 #include "d3d12_fence.h"
31
32 #include "util/u_dump.h"
33 #include "util/u_inlines.h"
34 #include "util/u_memory.h"
35 #include "util/u_threaded_context.h"
36
37 #include <dxguids/dxguids.h>
38
39 static unsigned
num_sub_queries(unsigned query_type,unsigned index)40 num_sub_queries(unsigned query_type, unsigned index)
41 {
42 switch (query_type) {
43 case PIPE_QUERY_PRIMITIVES_GENERATED:
44 return index == 0 ? 3 : 1;
45 case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
46 return 4;
47 default:
48 return 1;
49 }
50 }
51
52 static D3D12_QUERY_HEAP_TYPE
d3d12_query_heap_type(unsigned query_type,unsigned sub_query)53 d3d12_query_heap_type(unsigned query_type, unsigned sub_query)
54 {
55 switch (query_type) {
56 case PIPE_QUERY_OCCLUSION_COUNTER:
57 case PIPE_QUERY_OCCLUSION_PREDICATE:
58 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
59 return D3D12_QUERY_HEAP_TYPE_OCCLUSION;
60 case PIPE_QUERY_PIPELINE_STATISTICS:
61 return D3D12_QUERY_HEAP_TYPE_PIPELINE_STATISTICS;
62 case PIPE_QUERY_PRIMITIVES_GENERATED:
63 return sub_query == 0 ?
64 D3D12_QUERY_HEAP_TYPE_SO_STATISTICS :
65 D3D12_QUERY_HEAP_TYPE_PIPELINE_STATISTICS;
66 case PIPE_QUERY_PRIMITIVES_EMITTED:
67 case PIPE_QUERY_SO_STATISTICS:
68 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
69 case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
70 return D3D12_QUERY_HEAP_TYPE_SO_STATISTICS;
71 case PIPE_QUERY_TIMESTAMP:
72 case PIPE_QUERY_TIME_ELAPSED:
73 return D3D12_QUERY_HEAP_TYPE_TIMESTAMP;
74
75 default:
76 debug_printf("unknown query: %s\n",
77 util_str_query_type(query_type, true));
78 unreachable("d3d12: unknown query type");
79 }
80 }
81
82 static D3D12_QUERY_TYPE
d3d12_query_type(unsigned query_type,unsigned sub_query,unsigned index)83 d3d12_query_type(unsigned query_type, unsigned sub_query, unsigned index)
84 {
85 switch (query_type) {
86 case PIPE_QUERY_OCCLUSION_COUNTER:
87 return D3D12_QUERY_TYPE_OCCLUSION;
88 case PIPE_QUERY_OCCLUSION_PREDICATE:
89 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
90 return D3D12_QUERY_TYPE_BINARY_OCCLUSION;
91 case PIPE_QUERY_PIPELINE_STATISTICS:
92 return D3D12_QUERY_TYPE_PIPELINE_STATISTICS;
93 case PIPE_QUERY_PRIMITIVES_GENERATED:
94 if (sub_query > 0)
95 return D3D12_QUERY_TYPE_PIPELINE_STATISTICS;
96 FALLTHROUGH;
97 case PIPE_QUERY_PRIMITIVES_EMITTED:
98 case PIPE_QUERY_SO_STATISTICS:
99 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
100 return (D3D12_QUERY_TYPE)(D3D12_QUERY_TYPE_SO_STATISTICS_STREAM0 + index);
101 case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
102 return (D3D12_QUERY_TYPE)(D3D12_QUERY_TYPE_SO_STATISTICS_STREAM0 + sub_query);
103 case PIPE_QUERY_TIMESTAMP:
104 case PIPE_QUERY_TIME_ELAPSED:
105 return D3D12_QUERY_TYPE_TIMESTAMP;
106 default:
107 debug_printf("unknown query: %s\n",
108 util_str_query_type(query_type, true));
109 unreachable("d3d12: unknown query type");
110 }
111 }
112
113 static struct pipe_query *
d3d12_create_query(struct pipe_context * pctx,unsigned query_type,unsigned index)114 d3d12_create_query(struct pipe_context *pctx,
115 unsigned query_type, unsigned index)
116 {
117 struct d3d12_context *ctx = d3d12_context(pctx);
118 struct d3d12_screen *screen = d3d12_screen(pctx->screen);
119 struct d3d12_query *query = CALLOC_STRUCT(d3d12_query);
120 D3D12_QUERY_HEAP_DESC desc = {};
121
122 if (!query)
123 return NULL;
124
125 pipe_reference_init(&query->reference, 1);
126 query->type = (pipe_query_type)query_type;
127 query->index = index;
128 for (unsigned i = 0; i < num_sub_queries(query_type, index); ++i) {
129 assert(i < MAX_SUBQUERIES);
130 query->subqueries[i].d3d12qtype = d3d12_query_type(query_type, i, index);
131 query->subqueries[i].num_queries = 16;
132
133 /* With timer queries we want a few more queries, especially since we need two slots
134 * per query for TIME_ELAPSED queries
135 * For TIMESTAMP, we don't need more than one slot, since there's nothing to accumulate */
136 if (unlikely(query_type == PIPE_QUERY_TIME_ELAPSED))
137 query->subqueries[i].num_queries = 64;
138 else if (query_type == PIPE_QUERY_TIMESTAMP)
139 query->subqueries[i].num_queries = 1;
140
141 query->subqueries[i].curr_query = 0;
142 desc.Count = query->subqueries[i].num_queries;
143 desc.Type = d3d12_query_heap_type(query_type, i);
144
145 switch (desc.Type) {
146 case D3D12_QUERY_HEAP_TYPE_PIPELINE_STATISTICS:
147 query->subqueries[i].query_size = sizeof(D3D12_QUERY_DATA_PIPELINE_STATISTICS);
148 break;
149 case D3D12_QUERY_HEAP_TYPE_SO_STATISTICS:
150 query->subqueries[i].query_size = sizeof(D3D12_QUERY_DATA_SO_STATISTICS);
151 break;
152 default:
153 query->subqueries[i].query_size = sizeof(uint64_t);
154 break;
155 }
156 if (FAILED(screen->dev->CreateQueryHeap(&desc,
157 IID_PPV_ARGS(&query->subqueries[i].query_heap)))) {
158 FREE(query);
159 return NULL;
160 }
161
162 /* Query result goes into a readback buffer */
163 size_t buffer_size = query->subqueries[i].query_size * query->subqueries[i].num_queries;
164 u_suballocator_alloc(&ctx->query_allocator, buffer_size, 256,
165 &query->subqueries[i].buffer_offset, &query->subqueries[i].buffer);
166
167 query->subqueries[i].active = (query_type == PIPE_QUERY_TIMESTAMP);
168 }
169
170 return (struct pipe_query *)query;
171 }
172
173 void
d3d12_destroy_query(struct d3d12_query * query)174 d3d12_destroy_query(struct d3d12_query *query)
175 {
176 pipe_resource *predicate = &query->predicate->base.b;
177 pipe_resource_reference(&predicate, NULL);
178 for (unsigned i = 0; i < num_sub_queries(query->type, query->index); ++i) {
179 query->subqueries[i].query_heap->Release();
180 pipe_resource_reference(&query->subqueries[i].buffer, NULL);
181 }
182 FREE(query);
183 }
184
185 static void
d3d12_release_query(struct pipe_context * pctx,struct pipe_query * q)186 d3d12_release_query(struct pipe_context *pctx,
187 struct pipe_query *q)
188 {
189 struct d3d12_query *query = (struct d3d12_query *)q;
190 if (pipe_reference(&query->reference, nullptr)) {
191 d3d12_destroy_query(query);
192 }
193 }
194
195 static bool
accumulate_subresult_cpu(struct d3d12_context * ctx,struct d3d12_query * q_parent,unsigned sub_query,union pipe_query_result * result)196 accumulate_subresult_cpu(struct d3d12_context *ctx, struct d3d12_query *q_parent,
197 unsigned sub_query,
198 union pipe_query_result *result)
199 {
200 struct pipe_transfer *transfer = NULL;
201 struct d3d12_screen *screen = d3d12_screen(ctx->base.screen);
202 struct d3d12_query_impl *q = &q_parent->subqueries[sub_query];
203 unsigned access = PIPE_MAP_READ;
204 void *results;
205
206 access |= PIPE_MAP_UNSYNCHRONIZED;
207
208 results = pipe_buffer_map_range(&ctx->base, q->buffer, q->buffer_offset,
209 q->num_queries * q->query_size,
210 access, &transfer);
211
212 if (results == NULL)
213 return false;
214
215 uint64_t *results_u64 = (uint64_t *)results;
216 D3D12_QUERY_DATA_PIPELINE_STATISTICS *results_stats = (D3D12_QUERY_DATA_PIPELINE_STATISTICS *)results;
217 D3D12_QUERY_DATA_SO_STATISTICS *results_so = (D3D12_QUERY_DATA_SO_STATISTICS *)results;
218
219 memset(result, 0, sizeof(*result));
220 for (unsigned i = 0; i < q->curr_query; ++i) {
221 switch (q->d3d12qtype) {
222 case D3D12_QUERY_TYPE_BINARY_OCCLUSION:
223 result->b |= results_u64[i] != 0;
224 break;
225
226 case D3D12_QUERY_TYPE_OCCLUSION:
227 result->u64 += results_u64[i];
228 break;
229
230 case D3D12_QUERY_TYPE_TIMESTAMP:
231 if (q_parent->type == PIPE_QUERY_TIME_ELAPSED)
232 result->u64 += results_u64[2 * i + 1] - results_u64[2 * i];
233 else
234 result->u64 = results_u64[i];
235 break;
236
237 case D3D12_QUERY_TYPE_PIPELINE_STATISTICS:
238 result->pipeline_statistics.ia_vertices += results_stats[i].IAVertices;
239 result->pipeline_statistics.ia_primitives += results_stats[i].IAPrimitives;
240 result->pipeline_statistics.vs_invocations += results_stats[i].VSInvocations;
241 result->pipeline_statistics.gs_invocations += results_stats[i].GSInvocations;
242 result->pipeline_statistics.gs_primitives += results_stats[i].GSPrimitives;
243 result->pipeline_statistics.c_invocations += results_stats[i].CInvocations;
244 result->pipeline_statistics.c_primitives += results_stats[i].CPrimitives;
245 result->pipeline_statistics.ps_invocations += results_stats[i].PSInvocations;
246 result->pipeline_statistics.hs_invocations += results_stats[i].HSInvocations;
247 result->pipeline_statistics.ds_invocations += results_stats[i].DSInvocations;
248 result->pipeline_statistics.cs_invocations += results_stats[i].CSInvocations;
249 break;
250
251 case D3D12_QUERY_TYPE_SO_STATISTICS_STREAM0:
252 case D3D12_QUERY_TYPE_SO_STATISTICS_STREAM1:
253 case D3D12_QUERY_TYPE_SO_STATISTICS_STREAM2:
254 case D3D12_QUERY_TYPE_SO_STATISTICS_STREAM3:
255 if (q_parent->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE ||
256 q_parent->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE) {
257 result->b = results_so[i].NumPrimitivesWritten != results_so[i].PrimitivesStorageNeeded;
258 } else {
259 result->so_statistics.num_primitives_written += results_so[i].NumPrimitivesWritten;
260 result->so_statistics.primitives_storage_needed += results_so[i].PrimitivesStorageNeeded;
261 }
262 break;
263
264 default:
265 debug_printf("unsupported query type: %s\n",
266 util_str_query_type(q_parent->type, true));
267 unreachable("unexpected query type");
268 }
269 }
270
271 pipe_buffer_unmap(&ctx->base, transfer);
272
273 if (q->d3d12qtype == D3D12_QUERY_TYPE_TIMESTAMP)
274 result->u64 = static_cast<uint64_t>(screen->timestamp_multiplier * result->u64);
275
276 return true;
277 }
278
279 static bool
accumulate_result_cpu(struct d3d12_context * ctx,struct d3d12_query * q,union pipe_query_result * result)280 accumulate_result_cpu(struct d3d12_context *ctx, struct d3d12_query *q,
281 union pipe_query_result *result)
282 {
283 union pipe_query_result local_result;
284
285 switch (q->type) {
286 case PIPE_QUERY_PRIMITIVES_GENERATED:
287 if (!accumulate_subresult_cpu(ctx, q, 0, &local_result))
288 return false;
289 result->u64 = local_result.so_statistics.primitives_storage_needed;
290
291 if (q->index == 0) {
292 if (!accumulate_subresult_cpu(ctx, q, 1, &local_result))
293 return false;
294 result->u64 += local_result.pipeline_statistics.gs_primitives;
295
296 if (!accumulate_subresult_cpu(ctx, q, 2, &local_result))
297 return false;
298 result->u64 += local_result.pipeline_statistics.ia_primitives;
299 }
300 return true;
301 case PIPE_QUERY_PRIMITIVES_EMITTED:
302 if (!accumulate_subresult_cpu(ctx, q, 0, &local_result))
303 return false;
304 result->u64 = local_result.so_statistics.num_primitives_written;
305 return true;
306 case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
307 result->b = false;
308 for (uint32_t i = 0; i < num_sub_queries(q->type, q->index); ++i) {
309 if (!accumulate_subresult_cpu(ctx, q, i, &local_result))
310 return false;
311 result->b |= local_result.b;
312 }
313 return true;
314 default:
315 assert(num_sub_queries(q->type, q->index) == 1);
316 return accumulate_subresult_cpu(ctx, q, 0, result);
317 }
318 }
319
320 static bool
subquery_should_be_active(struct d3d12_context * ctx,struct d3d12_query * q,unsigned sub_query)321 subquery_should_be_active(struct d3d12_context *ctx, struct d3d12_query *q, unsigned sub_query)
322 {
323 switch (q->type) {
324 case PIPE_QUERY_PRIMITIVES_GENERATED: {
325 bool has_xfb = !!ctx->gfx_pipeline_state.num_so_targets;
326 struct d3d12_shader_selector *gs = ctx->gfx_stages[PIPE_SHADER_GEOMETRY];
327 bool has_gs = gs && !gs->is_variant;
328 switch (sub_query) {
329 case 0: return has_xfb;
330 case 1: return !has_xfb && has_gs;
331 case 2: return !has_xfb && !has_gs;
332 default: unreachable("Invalid subquery for primitives generated");
333 }
334 break;
335 }
336 default:
337 return true;
338 }
339 }
340
341 static bool
query_ensure_ready(struct d3d12_screen * screen,struct d3d12_context * ctx,struct d3d12_query * query,bool wait)342 query_ensure_ready(struct d3d12_screen* screen, struct d3d12_context* ctx, struct d3d12_query* query, bool wait)
343 {
344 // If the query is not flushed, it won't have
345 // been submitted yet, and won't have a waitable
346 // fence value
347 if (query->fence_value == UINT64_MAX) {
348 d3d12_flush_cmdlist(ctx);
349 }
350
351 if (screen->fence->GetCompletedValue() < query->fence_value){
352 if (!wait)
353 return false;
354
355 screen->fence->SetEventOnCompletion(query->fence_value, NULL);
356 }
357
358 return true;
359 }
360
361 static void
accumulate_subresult_gpu(struct d3d12_context * ctx,struct d3d12_query * q_parent,unsigned sub_query)362 accumulate_subresult_gpu(struct d3d12_context *ctx, struct d3d12_query *q_parent,
363 unsigned sub_query)
364 {
365 d3d12_compute_transform_save_restore save;
366 d3d12_save_compute_transform_state(ctx, &save);
367
368 d3d12_compute_transform_key key;
369 memset(&key, 0, sizeof(key));
370 key.type = d3d12_compute_transform_type::query_resolve;
371 key.query_resolve.is_64bit = true;
372 key.query_resolve.is_resolve_in_place = true;
373 key.query_resolve.num_subqueries = 1;
374 key.query_resolve.pipe_query_type = q_parent->type;
375 key.query_resolve.single_subquery_index = sub_query;
376 key.query_resolve.is_signed = false;
377 key.query_resolve.timestamp_multiplier = 1.0;
378 ctx->base.bind_compute_state(&ctx->base, d3d12_get_compute_transform(ctx, &key));
379
380 ctx->transform_state_vars[0] = q_parent->subqueries[sub_query].curr_query;
381 ctx->transform_state_vars[1] = 0;
382 ctx->transform_state_vars[2] = 0;
383 ctx->transform_state_vars[3] = 0;
384 ctx->transform_state_vars[4] = 0;
385
386 pipe_shader_buffer new_cs_ssbos[1];
387 new_cs_ssbos[0].buffer = q_parent->subqueries[sub_query].buffer;
388 new_cs_ssbos[0].buffer_offset = q_parent->subqueries[sub_query].buffer_offset;
389 new_cs_ssbos[0].buffer_size = q_parent->subqueries[sub_query].query_size * q_parent->subqueries[sub_query].num_queries;
390 ctx->base.set_shader_buffers(&ctx->base, PIPE_SHADER_COMPUTE, 0, 1, new_cs_ssbos, 1);
391
392 pipe_grid_info grid = {};
393 grid.block[0] = grid.block[1] = grid.block[2] = 1;
394 grid.grid[0] = grid.grid[1] = grid.grid[2] = 1;
395 ctx->base.launch_grid(&ctx->base, &grid);
396
397 d3d12_restore_compute_transform_state(ctx, &save);
398 }
399
400 static void
accumulate_result_gpu(struct d3d12_context * ctx,struct d3d12_query * q,struct pipe_resource * dst,uint32_t dst_offset,int index,enum pipe_query_value_type result_type)401 accumulate_result_gpu(struct d3d12_context *ctx, struct d3d12_query *q,
402 struct pipe_resource *dst, uint32_t dst_offset,
403 int index, enum pipe_query_value_type result_type)
404 {
405 d3d12_compute_transform_save_restore save;
406 d3d12_save_compute_transform_state(ctx, &save);
407
408 d3d12_compute_transform_key key;
409 memset(&key, 0, sizeof(key));
410 key.type = d3d12_compute_transform_type::query_resolve;
411 key.query_resolve.is_64bit = result_type == PIPE_QUERY_TYPE_I64 || result_type == PIPE_QUERY_TYPE_U64;
412 key.query_resolve.is_resolve_in_place = false;
413 key.query_resolve.num_subqueries = num_sub_queries(q->type, q->index);
414 key.query_resolve.pipe_query_type = q->type;
415 key.query_resolve.single_result_field_offset = index;
416 key.query_resolve.is_signed = result_type == PIPE_QUERY_TYPE_I32 || result_type == PIPE_QUERY_TYPE_I64;
417 key.query_resolve.timestamp_multiplier = d3d12_screen(ctx->base.screen)->timestamp_multiplier;
418 ctx->base.bind_compute_state(&ctx->base, d3d12_get_compute_transform(ctx, &key));
419
420 pipe_shader_buffer new_cs_ssbos[5];
421 uint32_t num_ssbos = 0;
422 for (uint32_t i = 0; i < key.query_resolve.num_subqueries; ++i) {
423 ctx->transform_state_vars[i] = q->subqueries[i].curr_query;
424 new_cs_ssbos[num_ssbos].buffer = q->subqueries[i].buffer;
425 new_cs_ssbos[num_ssbos].buffer_offset = q->subqueries[i].buffer_offset;
426 new_cs_ssbos[num_ssbos].buffer_size = q->subqueries[i].query_size * q->subqueries[i].num_queries;
427 num_ssbos++;
428 }
429
430 assert(dst_offset % (key.query_resolve.is_64bit ? 8 : 4) == 0);
431 ctx->transform_state_vars[4] = dst_offset / (key.query_resolve.is_64bit ? 8 : 4);
432
433 new_cs_ssbos[num_ssbos].buffer = dst;
434 new_cs_ssbos[num_ssbos].buffer_offset = 0;
435 new_cs_ssbos[num_ssbos].buffer_size = dst->width0;
436 num_ssbos++;
437
438 ctx->base.set_shader_buffers(&ctx->base, PIPE_SHADER_COMPUTE, 0, num_ssbos, new_cs_ssbos, 1 << (num_ssbos - 1));
439
440 pipe_grid_info grid = {};
441 grid.block[0] = grid.block[1] = grid.block[2] = 1;
442 grid.grid[0] = grid.grid[1] = grid.grid[2] = 1;
443 ctx->base.launch_grid(&ctx->base, &grid);
444
445 d3d12_restore_compute_transform_state(ctx, &save);
446 }
447
448 static void
begin_subquery(struct d3d12_context * ctx,struct d3d12_query * q_parent,unsigned sub_query)449 begin_subquery(struct d3d12_context *ctx, struct d3d12_query *q_parent, unsigned sub_query)
450 {
451 struct d3d12_query_impl *q = &q_parent->subqueries[sub_query];
452 if (q->curr_query == q->num_queries) {
453 /* Accumulate current results and store in first slot */
454 accumulate_subresult_gpu(ctx, q_parent, sub_query);
455 q->curr_query = 1;
456 }
457
458 ctx->cmdlist->BeginQuery(q->query_heap, q->d3d12qtype, q->curr_query);
459 q->active = true;
460 }
461
462 static void
begin_query(struct d3d12_context * ctx,struct d3d12_query * q_parent,bool restart)463 begin_query(struct d3d12_context *ctx, struct d3d12_query *q_parent, bool restart)
464 {
465 for (unsigned i = 0; i < num_sub_queries(q_parent->type, q_parent->index); ++i) {
466 if (restart)
467 q_parent->subqueries[i].curr_query = 0;
468
469 if (!subquery_should_be_active(ctx, q_parent, i))
470 continue;
471
472 begin_subquery(ctx, q_parent, i);
473 }
474 }
475
476
477 static void
begin_timer_query(struct d3d12_context * ctx,struct d3d12_query * q_parent,bool restart)478 begin_timer_query(struct d3d12_context *ctx, struct d3d12_query *q_parent, bool restart)
479 {
480 struct d3d12_query_impl *q = &q_parent->subqueries[0];
481
482 /* For PIPE_QUERY_TIME_ELAPSED we record one time with BeginQuery and one in
483 * EndQuery, so we need two query slots */
484 unsigned query_index = 2 * q->curr_query;
485
486 if (restart) {
487 q->curr_query = 0;
488 query_index = 0;
489 } else if (query_index == q->num_queries) {
490 /* Accumulate current results and store in first slot */
491 accumulate_subresult_gpu(ctx, q_parent, 0);
492 q->curr_query = 1;
493 }
494
495 ctx->cmdlist->EndQuery(q->query_heap, q->d3d12qtype, query_index);
496 q->active = true;
497 }
498
499 static bool
d3d12_begin_query(struct pipe_context * pctx,struct pipe_query * q)500 d3d12_begin_query(struct pipe_context *pctx,
501 struct pipe_query *q)
502 {
503 struct d3d12_context *ctx = d3d12_context(pctx);
504 struct d3d12_query *query = (struct d3d12_query *)q;
505
506 assert(query->type != PIPE_QUERY_TIMESTAMP);
507
508 if (unlikely(query->type == PIPE_QUERY_TIME_ELAPSED))
509 begin_timer_query(ctx, query, true);
510 else {
511 begin_query(ctx, query, true);
512 list_addtail(&query->active_list, &ctx->active_queries);
513 }
514
515 return true;
516 }
517
518 static void
end_subquery(struct d3d12_context * ctx,struct d3d12_query * q_parent,unsigned sub_query)519 end_subquery(struct d3d12_context *ctx, struct d3d12_query *q_parent, unsigned sub_query)
520 {
521 struct d3d12_query_impl *q = &q_parent->subqueries[sub_query];
522
523 uint64_t offset = 0;
524 struct d3d12_batch *batch = d3d12_current_batch(ctx);
525 struct d3d12_resource *res = (struct d3d12_resource *)q->buffer;
526 ID3D12Resource *d3d12_res = d3d12_resource_underlying(res, &offset);
527
528 /* For TIMESTAMP, there's only one slot */
529 if (q_parent->type == PIPE_QUERY_TIMESTAMP)
530 q->curr_query = 0;
531
532 /* With QUERY_TIME_ELAPSED we have recorded one value at
533 * (2 * q->curr_query), and now we record a value at (2 * q->curr_query + 1)
534 * and when resolving the query we subtract the latter from the former */
535
536 unsigned resolve_count = q_parent->type == PIPE_QUERY_TIME_ELAPSED ? 2 : 1;
537 unsigned resolve_index = resolve_count * q->curr_query;
538 unsigned end_index = resolve_index + resolve_count - 1;
539
540 offset += q->buffer_offset + resolve_index * q->query_size;
541 ctx->cmdlist->EndQuery(q->query_heap, q->d3d12qtype, end_index);
542 d3d12_transition_resource_state(ctx, res, D3D12_RESOURCE_STATE_COPY_DEST, D3D12_TRANSITION_FLAG_INVALIDATE_BINDINGS);
543 d3d12_apply_resource_states(ctx, false);
544 ctx->cmdlist->ResolveQueryData(q->query_heap, q->d3d12qtype, resolve_index,
545 resolve_count, d3d12_res, offset);
546
547 d3d12_batch_reference_object(batch, q->query_heap);
548 d3d12_batch_reference_resource(batch, res, true);
549
550 assert(q->curr_query < q->num_queries);
551 q->curr_query++;
552 q->active = (q_parent->type == PIPE_QUERY_TIMESTAMP);
553 }
554
555 static void
end_query(struct d3d12_context * ctx,struct d3d12_query * q_parent)556 end_query(struct d3d12_context *ctx, struct d3d12_query *q_parent)
557 {
558 for (unsigned i = 0; i < num_sub_queries(q_parent->type, q_parent->index); ++i) {
559 struct d3d12_query_impl *q = &q_parent->subqueries[i];
560 if (!q->active)
561 continue;
562
563 end_subquery(ctx, q_parent, i);
564 }
565 }
566
567 static bool
d3d12_end_query(struct pipe_context * pctx,struct pipe_query * q)568 d3d12_end_query(struct pipe_context *pctx,
569 struct pipe_query *q)
570 {
571 struct d3d12_context *ctx = d3d12_context(pctx);
572 struct d3d12_query *query = (struct d3d12_query *)q;
573
574 // Assign the sentinel and track now that the query is ended
575 query->fence_value = UINT64_MAX;
576 d3d12_batch_reference_query(d3d12_current_batch(ctx), query);
577
578 end_query(ctx, query);
579
580 if (query->type != PIPE_QUERY_TIMESTAMP &&
581 query->type != PIPE_QUERY_TIME_ELAPSED)
582 list_delinit(&query->active_list);
583 return true;
584 }
585
586 static bool
d3d12_get_query_result(struct pipe_context * pctx,struct pipe_query * q,bool wait,union pipe_query_result * result)587 d3d12_get_query_result(struct pipe_context *pctx,
588 struct pipe_query *q,
589 bool wait,
590 union pipe_query_result *result)
591 {
592 struct d3d12_context *ctx = d3d12_context(pctx);
593 struct d3d12_screen *screen = d3d12_screen(ctx->base.screen);
594 struct d3d12_query *query = (struct d3d12_query *)q;
595
596 if (!query_ensure_ready(screen, ctx, query, wait))
597 return false;
598
599 return accumulate_result_cpu(ctx, query, result);
600 }
601
602 static void
d3d12_get_query_result_resource(struct pipe_context * pctx,struct pipe_query * q,enum pipe_query_flags flags,enum pipe_query_value_type result_type,int index,struct pipe_resource * resource,unsigned offset)603 d3d12_get_query_result_resource(struct pipe_context *pctx,
604 struct pipe_query *q,
605 enum pipe_query_flags flags,
606 enum pipe_query_value_type result_type,
607 int index,
608 struct pipe_resource *resource,
609 unsigned offset)
610 {
611 struct d3d12_context *ctx = d3d12_context(pctx);
612
613 if (index == -1) {
614 /* Write the "available" bit, which is always true */
615 struct d3d12_resource *res = d3d12_resource(resource);
616 d3d12_transition_resource_state(ctx, res, D3D12_RESOURCE_STATE_COPY_DEST, D3D12_TRANSITION_FLAG_NONE);
617 d3d12_apply_resource_states(ctx, false);
618
619 D3D12_GPU_VIRTUAL_ADDRESS gpuva_base = d3d12_resource_gpu_virtual_address(res) + offset;
620 D3D12_WRITEBUFFERIMMEDIATE_PARAMETER params[2] = {
621 { gpuva_base, 1 },
622 { gpuva_base + sizeof(uint32_t), 0 },
623 };
624 D3D12_WRITEBUFFERIMMEDIATE_MODE modes[2] = { D3D12_WRITEBUFFERIMMEDIATE_MODE_DEFAULT, D3D12_WRITEBUFFERIMMEDIATE_MODE_DEFAULT };
625 ctx->cmdlist8->WriteBufferImmediate(result_type == PIPE_QUERY_TYPE_I64 || result_type == PIPE_QUERY_TYPE_U64 ? 2 : 1,
626 params, modes);
627 return;
628 }
629
630 struct d3d12_query *query = (struct d3d12_query *)q;
631 accumulate_result_gpu(ctx, query, resource, offset, index, result_type);
632 }
633
634 void
d3d12_suspend_queries(struct d3d12_context * ctx)635 d3d12_suspend_queries(struct d3d12_context *ctx)
636 {
637 list_for_each_entry(struct d3d12_query, query, &ctx->active_queries, active_list) {
638 end_query(ctx, query);
639 }
640 }
641
642 void
d3d12_resume_queries(struct d3d12_context * ctx)643 d3d12_resume_queries(struct d3d12_context *ctx)
644 {
645 list_for_each_entry(struct d3d12_query, query, &ctx->active_queries, active_list) {
646 begin_query(ctx, query, false);
647 }
648 }
649
650 void
d3d12_validate_queries(struct d3d12_context * ctx)651 d3d12_validate_queries(struct d3d12_context *ctx)
652 {
653 /* Nothing to do, all queries are suspended */
654 if (ctx->queries_disabled)
655 return;
656
657 list_for_each_entry(struct d3d12_query, query, &ctx->active_queries, active_list) {
658 for (unsigned i = 0; i < num_sub_queries(query->type, query->index); ++i) {
659 if (query->subqueries[i].active && !subquery_should_be_active(ctx, query, i))
660 end_subquery(ctx, query, i);
661 else if (!query->subqueries[i].active && subquery_should_be_active(ctx, query, i))
662 begin_subquery(ctx, query, i);
663 }
664 }
665 }
666
667 static void
d3d12_set_active_query_state(struct pipe_context * pctx,bool enable)668 d3d12_set_active_query_state(struct pipe_context *pctx, bool enable)
669 {
670 struct d3d12_context *ctx = d3d12_context(pctx);
671 ctx->queries_disabled = !enable;
672
673 if (enable)
674 d3d12_resume_queries(ctx);
675 else
676 d3d12_suspend_queries(ctx);
677 }
678
679 static void
d3d12_render_condition(struct pipe_context * pctx,struct pipe_query * pquery,bool condition,enum pipe_render_cond_flag mode)680 d3d12_render_condition(struct pipe_context *pctx,
681 struct pipe_query *pquery,
682 bool condition,
683 enum pipe_render_cond_flag mode)
684 {
685 struct d3d12_context *ctx = d3d12_context(pctx);
686 struct d3d12_query *query = (struct d3d12_query *)pquery;
687
688 if (query == nullptr) {
689 ctx->cmdlist->SetPredication(nullptr, 0, D3D12_PREDICATION_OP_EQUAL_ZERO);
690 ctx->current_predication = nullptr;
691 return;
692 }
693
694 if (!query->predicate)
695 query->predicate = d3d12_resource(pipe_buffer_create(pctx->screen, 0,
696 PIPE_USAGE_DEFAULT, sizeof(uint64_t)));
697
698 accumulate_result_gpu(ctx, query, &query->predicate->base.b, 0, 0, PIPE_QUERY_TYPE_U64);
699
700 d3d12_transition_resource_state(ctx, query->predicate, D3D12_RESOURCE_STATE_PREDICATION, D3D12_TRANSITION_FLAG_NONE);
701 d3d12_apply_resource_states(ctx, false);
702
703 ctx->current_predication = query->predicate;
704 ctx->predication_condition = condition;
705 d3d12_enable_predication(ctx);
706 }
707
708 void
d3d12_enable_predication(struct d3d12_context * ctx)709 d3d12_enable_predication(struct d3d12_context *ctx)
710 {
711 /* documentation of ID3D12GraphicsCommandList::SetPredication method:
712 * "resource manipulation commands are _not_ actually performed
713 * if the resulting predicate data of the predicate is equal to
714 * the operation specified."
715 */
716 ctx->cmdlist->SetPredication(d3d12_resource_resource(ctx->current_predication), 0,
717 ctx->predication_condition ? D3D12_PREDICATION_OP_NOT_EQUAL_ZERO :
718 D3D12_PREDICATION_OP_EQUAL_ZERO);
719 }
720
721 void
d3d12_context_query_init(struct pipe_context * pctx)722 d3d12_context_query_init(struct pipe_context *pctx)
723 {
724 struct d3d12_context *ctx = d3d12_context(pctx);
725 list_inithead(&ctx->active_queries);
726
727 u_suballocator_init(&ctx->query_allocator, &ctx->base, 4096, 0, PIPE_USAGE_STAGING,
728 0, true);
729
730 pctx->create_query = d3d12_create_query;
731 pctx->destroy_query = d3d12_release_query;
732 pctx->begin_query = d3d12_begin_query;
733 pctx->end_query = d3d12_end_query;
734 pctx->get_query_result = d3d12_get_query_result;
735 pctx->get_query_result_resource = d3d12_get_query_result_resource;
736 pctx->set_active_query_state = d3d12_set_active_query_state;
737 pctx->render_condition = d3d12_render_condition;
738 }
739