xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/d3d12/d3d12_draw.cpp (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © Microsoft Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "d3d12_cmd_signature.h"
25 #include "d3d12_compiler.h"
26 #include "d3d12_compute_transforms.h"
27 #include "d3d12_context.h"
28 #include "d3d12_format.h"
29 #include "d3d12_query.h"
30 #include "d3d12_resource.h"
31 #include "d3d12_root_signature.h"
32 #include "d3d12_screen.h"
33 #include "d3d12_surface.h"
34 
35 #include "indices/u_primconvert.h"
36 #include "util/u_debug.h"
37 #include "util/u_draw.h"
38 #include "util/u_helpers.h"
39 #include "util/u_inlines.h"
40 #include "util/u_prim.h"
41 #include "util/u_prim_restart.h"
42 #include "util/u_math.h"
43 
44 static const D3D12_RECT MAX_SCISSOR = { 0, 0, 16384, 16384 };
45 
46 static const D3D12_RECT MAX_SCISSOR_ARRAY[] = {
47    MAX_SCISSOR, MAX_SCISSOR, MAX_SCISSOR, MAX_SCISSOR,
48    MAX_SCISSOR, MAX_SCISSOR, MAX_SCISSOR, MAX_SCISSOR,
49    MAX_SCISSOR, MAX_SCISSOR, MAX_SCISSOR, MAX_SCISSOR,
50    MAX_SCISSOR, MAX_SCISSOR, MAX_SCISSOR, MAX_SCISSOR
51 };
52 static_assert(ARRAY_SIZE(MAX_SCISSOR_ARRAY) == PIPE_MAX_VIEWPORTS, "Wrong scissor count");
53 
54 static D3D12_GPU_DESCRIPTOR_HANDLE
fill_cbv_descriptors(struct d3d12_context * ctx,struct d3d12_shader * shader,int stage)55 fill_cbv_descriptors(struct d3d12_context *ctx,
56                      struct d3d12_shader *shader,
57                      int stage)
58 {
59    struct d3d12_batch *batch = d3d12_current_batch(ctx);
60    struct d3d12_descriptor_handle table_start;
61    d2d12_descriptor_heap_get_next_handle(batch->view_heap, &table_start);
62 
63    for (unsigned i = shader->begin_ubo_binding; i < shader->end_ubo_binding; i++) {
64       struct pipe_constant_buffer *buffer = &ctx->cbufs[stage][i];
65 
66       D3D12_CONSTANT_BUFFER_VIEW_DESC cbv_desc = {};
67       if (buffer && buffer->buffer) {
68          struct d3d12_resource *res = d3d12_resource(buffer->buffer);
69          d3d12_transition_resource_state(ctx, res, D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER, D3D12_TRANSITION_FLAG_ACCUMULATE_STATE);
70          cbv_desc.BufferLocation = d3d12_resource_gpu_virtual_address(res) + buffer->buffer_offset;
71          cbv_desc.SizeInBytes = MIN2(D3D12_REQ_CONSTANT_BUFFER_ELEMENT_COUNT * 16,
72             align(buffer->buffer_size, 256));
73          d3d12_batch_reference_resource(batch, res, false);
74       }
75 
76       struct d3d12_descriptor_handle handle;
77       d3d12_descriptor_heap_alloc_handle(batch->view_heap, &handle);
78       d3d12_screen(ctx->base.screen)->dev->CreateConstantBufferView(&cbv_desc, handle.cpu_handle);
79    }
80 
81    return table_start.gpu_handle;
82 }
83 
84 static D3D12_GPU_DESCRIPTOR_HANDLE
fill_srv_descriptors(struct d3d12_context * ctx,struct d3d12_shader * shader,unsigned stage)85 fill_srv_descriptors(struct d3d12_context *ctx,
86                      struct d3d12_shader *shader,
87                      unsigned stage)
88 {
89    struct d3d12_batch *batch = d3d12_current_batch(ctx);
90    struct d3d12_screen *screen = d3d12_screen(ctx->base.screen);
91    D3D12_CPU_DESCRIPTOR_HANDLE descs[PIPE_MAX_SHADER_SAMPLER_VIEWS];
92    struct d3d12_descriptor_handle table_start;
93 
94    d2d12_descriptor_heap_get_next_handle(batch->view_heap, &table_start);
95 
96    for (unsigned i = shader->begin_srv_binding; i < shader->end_srv_binding; i++)
97    {
98       struct d3d12_sampler_view *view;
99 
100       if (i == shader->pstipple_binding) {
101          view = (struct d3d12_sampler_view*)ctx->pstipple.sampler_view;
102       } else {
103          view = (struct d3d12_sampler_view*)ctx->sampler_views[stage][i];
104       }
105 
106       unsigned desc_idx = i - shader->begin_srv_binding;
107       if (view != NULL) {
108          descs[desc_idx] = view->handle.cpu_handle;
109          d3d12_batch_reference_sampler_view(batch, view);
110 
111          struct d3d12_resource *res = d3d12_resource(view->base.texture);
112          /* If this is a buffer that's been replaced, re-create the descriptor */
113          if (view->texture_generation_id != res->generation_id) {
114             d3d12_init_sampler_view_descriptor(view);
115             view->texture_generation_id = res->generation_id;
116          }
117 
118          D3D12_RESOURCE_STATES state = (stage == PIPE_SHADER_FRAGMENT) ?
119                                        D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE :
120                                        D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE;
121          if (view->base.texture->target == PIPE_BUFFER) {
122             d3d12_transition_resource_state(ctx, d3d12_resource(view->base.texture),
123                                             state,
124                                             D3D12_TRANSITION_FLAG_ACCUMULATE_STATE);
125          } else {
126             d3d12_transition_subresources_state(ctx, d3d12_resource(view->base.texture),
127                                                 view->base.u.tex.first_level, view->mip_levels,
128                                                 view->base.u.tex.first_layer, view->array_size,
129                                                 d3d12_get_format_start_plane(view->base.format),
130                                                 d3d12_get_format_num_planes(view->base.format),
131                                                 state,
132                                                 D3D12_TRANSITION_FLAG_ACCUMULATE_STATE);
133          }
134       } else {
135          descs[desc_idx] = screen->null_srvs[shader->srv_bindings[i].dimension].cpu_handle;
136       }
137    }
138 
139    d3d12_descriptor_heap_append_handles(batch->view_heap, descs, shader->end_srv_binding - shader->begin_srv_binding);
140 
141    return table_start.gpu_handle;
142 }
143 
144 static D3D12_GPU_DESCRIPTOR_HANDLE
fill_ssbo_descriptors(struct d3d12_context * ctx,const struct d3d12_shader * shader,int stage)145 fill_ssbo_descriptors(struct d3d12_context *ctx,
146                      const struct d3d12_shader *shader,
147                      int stage)
148 {
149    struct d3d12_batch *batch = d3d12_current_batch(ctx);
150    struct d3d12_descriptor_handle table_start;
151 
152    d2d12_descriptor_heap_get_next_handle(batch->view_heap, &table_start);
153 
154    for (unsigned i = 0; i < shader->nir->info.num_ssbos; i++)
155    {
156       struct pipe_shader_buffer *view = &ctx->ssbo_views[stage][i];
157 
158       D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc;
159       uav_desc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER;
160       uav_desc.Format = DXGI_FORMAT_R32_TYPELESS;
161       uav_desc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_RAW;
162       uav_desc.Buffer.StructureByteStride = 0;
163       uav_desc.Buffer.CounterOffsetInBytes = 0;
164       uav_desc.Buffer.FirstElement = 0;
165       uav_desc.Buffer.NumElements = 0;
166       ID3D12Resource *d3d12_res = nullptr;
167       if (view->buffer) {
168          struct d3d12_resource *res = d3d12_resource(view->buffer);
169          uint64_t res_offset = 0;
170          d3d12_res = d3d12_resource_underlying(res, &res_offset);
171          d3d12_transition_resource_state(ctx, res, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_TRANSITION_FLAG_ACCUMULATE_STATE);
172          uav_desc.Buffer.FirstElement = (view->buffer_offset + res_offset) / 4;
173          uav_desc.Buffer.NumElements = DIV_ROUND_UP(view->buffer_size, 4);
174          d3d12_batch_reference_resource(batch, res, true);
175       }
176 
177       struct d3d12_descriptor_handle handle;
178       d3d12_descriptor_heap_alloc_handle(batch->view_heap, &handle);
179       d3d12_screen(ctx->base.screen)->dev->CreateUnorderedAccessView(d3d12_res, nullptr, &uav_desc, handle.cpu_handle);
180    }
181 
182    return table_start.gpu_handle;
183 }
184 
185 static D3D12_GPU_DESCRIPTOR_HANDLE
fill_sampler_descriptors(struct d3d12_context * ctx,const struct d3d12_shader_selector * shader_sel,unsigned stage)186 fill_sampler_descriptors(struct d3d12_context *ctx,
187                          const struct d3d12_shader_selector *shader_sel,
188                          unsigned stage)
189 {
190    const struct d3d12_shader *shader = shader_sel->current;
191    struct d3d12_batch *batch = d3d12_current_batch(ctx);
192    struct d3d12_sampler_desc_table_key view;
193 
194    view.count = 0;
195    for (unsigned i = shader->begin_srv_binding; i < shader->end_srv_binding; i++, view.count++) {
196       struct d3d12_sampler_state *sampler;
197 
198       if (i == shader->pstipple_binding) {
199          sampler = ctx->pstipple.sampler_cso;
200       } else {
201          sampler = ctx->samplers[stage][i];
202       }
203 
204       unsigned desc_idx = i - shader->begin_srv_binding;
205       if (sampler != NULL) {
206          if (sampler->is_shadow_sampler && shader_sel->compare_with_lod_bias_grad)
207             view.descs[desc_idx] = sampler->handle_without_shadow.cpu_handle;
208          else
209             view.descs[desc_idx] = sampler->handle.cpu_handle;
210       } else
211          view.descs[desc_idx] = ctx->null_sampler.cpu_handle;
212    }
213 
214    hash_entry* sampler_entry =
215       (hash_entry*)_mesa_hash_table_search(batch->sampler_tables, &view);
216 
217    if (!sampler_entry) {
218       d3d12_sampler_desc_table_key* sampler_table_key = MALLOC_STRUCT(d3d12_sampler_desc_table_key);
219       sampler_table_key->count = view.count;
220       memcpy(sampler_table_key->descs, &view.descs, view.count * sizeof(view.descs[0]));
221 
222       d3d12_descriptor_handle* sampler_table_data = MALLOC_STRUCT(d3d12_descriptor_handle);
223       d2d12_descriptor_heap_get_next_handle(batch->sampler_heap, sampler_table_data);
224 
225       d3d12_descriptor_heap_append_handles(batch->sampler_heap, view.descs, shader->end_srv_binding - shader->begin_srv_binding);
226 
227       _mesa_hash_table_insert(batch->sampler_tables, sampler_table_key, sampler_table_data);
228 
229       return sampler_table_data->gpu_handle;
230    } else
231       return ((d3d12_descriptor_handle*)sampler_entry->data)->gpu_handle;
232 
233 }
234 
235 static D3D12_UAV_DIMENSION
image_view_dimension(enum pipe_texture_target target)236 image_view_dimension(enum pipe_texture_target target)
237 {
238    switch (target) {
239    case PIPE_BUFFER: return D3D12_UAV_DIMENSION_BUFFER;
240    case PIPE_TEXTURE_1D: return D3D12_UAV_DIMENSION_TEXTURE1D;
241    case PIPE_TEXTURE_1D_ARRAY: return D3D12_UAV_DIMENSION_TEXTURE1DARRAY;
242    case PIPE_TEXTURE_RECT:
243    case PIPE_TEXTURE_2D:
244       return D3D12_UAV_DIMENSION_TEXTURE2D;
245    case PIPE_TEXTURE_2D_ARRAY:
246    case PIPE_TEXTURE_CUBE:
247    case PIPE_TEXTURE_CUBE_ARRAY:
248       return D3D12_UAV_DIMENSION_TEXTURE2DARRAY;
249    case PIPE_TEXTURE_3D: return D3D12_UAV_DIMENSION_TEXTURE3D;
250    default:
251       unreachable("unexpected target");
252    }
253 }
254 
255 static D3D12_GPU_DESCRIPTOR_HANDLE
fill_image_descriptors(struct d3d12_context * ctx,const struct d3d12_shader * shader,int stage)256 fill_image_descriptors(struct d3d12_context *ctx,
257                        const struct d3d12_shader *shader,
258                        int stage)
259 {
260    struct d3d12_screen *screen = d3d12_screen(ctx->base.screen);
261    struct d3d12_batch *batch = d3d12_current_batch(ctx);
262    struct d3d12_descriptor_handle table_start;
263 
264    d2d12_descriptor_heap_get_next_handle(batch->view_heap, &table_start);
265 
266    for (unsigned i = 0; i < shader->nir->info.num_images; i++)
267    {
268       struct pipe_image_view *view = &ctx->image_views[stage][i];
269 
270       if (view->resource) {
271          D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc;
272          struct d3d12_resource *res = d3d12_resource(view->resource);
273          uint64_t offset = 0;
274          ID3D12Resource *d3d12_res = d3d12_resource_underlying(res, &offset);
275 
276          enum pipe_format view_format = ctx->image_view_emulation_formats[stage][i];
277          if (view_format == PIPE_FORMAT_NONE)
278             view_format = view->format;
279          uav_desc.Format = d3d12_get_format(view_format);
280          uav_desc.ViewDimension = image_view_dimension(res->base.b.target);
281 
282          unsigned array_size = view->u.tex.last_layer - view->u.tex.first_layer + 1;
283          switch (uav_desc.ViewDimension) {
284          case D3D12_UAV_DIMENSION_TEXTURE1D:
285             if (view->u.tex.first_layer > 0)
286                debug_printf("D3D12: can't create 1D UAV from layer %d\n",
287                             view->u.tex.first_layer);
288             uav_desc.Texture1D.MipSlice = view->u.tex.level;
289             break;
290          case D3D12_UAV_DIMENSION_TEXTURE1DARRAY:
291             uav_desc.Texture1DArray.FirstArraySlice = view->u.tex.first_layer;
292             uav_desc.Texture1DArray.ArraySize = array_size;
293             uav_desc.Texture1DArray.MipSlice = view->u.tex.level;
294             break;
295          case D3D12_UAV_DIMENSION_TEXTURE2D:
296             if (view->u.tex.first_layer > 0)
297                debug_printf("D3D12: can't create 2D UAV from layer %d\n",
298                             view->u.tex.first_layer);
299             uav_desc.Texture2D.MipSlice = view->u.tex.level;
300             uav_desc.Texture2D.PlaneSlice = 0;
301             break;
302          case D3D12_UAV_DIMENSION_TEXTURE2DARRAY:
303             uav_desc.Texture2DArray.FirstArraySlice = view->u.tex.first_layer;
304             uav_desc.Texture2DArray.ArraySize = array_size;
305             uav_desc.Texture2DArray.MipSlice = view->u.tex.level;
306             uav_desc.Texture2DArray.PlaneSlice = 0;
307             break;
308          case D3D12_UAV_DIMENSION_TEXTURE3D:
309             uav_desc.Texture3D.MipSlice = view->u.tex.level;
310             uav_desc.Texture3D.FirstWSlice = view->u.tex.first_layer;
311             uav_desc.Texture3D.WSize = array_size;
312             break;
313          case D3D12_UAV_DIMENSION_BUFFER: {
314             uint format_size = util_format_get_blocksize(view_format);
315             offset += view->u.buf.offset;
316             uav_desc.Buffer.CounterOffsetInBytes = 0;
317             uav_desc.Buffer.FirstElement = offset / format_size;
318             uav_desc.Buffer.NumElements = MIN2(view->u.buf.size / format_size,
319                                                1 << D3D12_REQ_BUFFER_RESOURCE_TEXEL_COUNT_2_TO_EXP);
320             uav_desc.Buffer.StructureByteStride = 0;
321             uav_desc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_NONE;
322             break;
323          }
324          default:
325             unreachable("Unexpected image view dimension");
326          }
327 
328          d3d12_transition_flags transition_flags = (d3d12_transition_flags)(D3D12_TRANSITION_FLAG_ACCUMULATE_STATE |
329             (batch->pending_memory_barrier ? D3D12_TRANSITION_FLAG_PENDING_MEMORY_BARRIER : 0));
330          if (res->base.b.target == PIPE_BUFFER) {
331             d3d12_transition_resource_state(ctx, res, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, transition_flags);
332          } else {
333             unsigned transition_first_layer = view->u.tex.first_layer;
334             unsigned transition_array_size = array_size;
335             if (res->base.b.target == PIPE_TEXTURE_3D) {
336                transition_first_layer = 0;
337                transition_array_size = 0;
338             }
339             d3d12_transition_subresources_state(ctx, res,
340                                                 view->u.tex.level, 1,
341                                                 transition_first_layer, transition_array_size,
342                                                 0, 1,
343                                                 D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
344                                                 transition_flags);
345          }
346          d3d12_batch_reference_resource(batch, res, true);
347 
348          struct d3d12_descriptor_handle handle;
349          d3d12_descriptor_heap_alloc_handle(batch->view_heap, &handle);
350          d3d12_screen(ctx->base.screen)->dev->CreateUnorderedAccessView(d3d12_res, nullptr, &uav_desc, handle.cpu_handle);
351       } else {
352          d3d12_descriptor_heap_append_handles(batch->view_heap, &screen->null_uavs[shader->uav_bindings[i].dimension].cpu_handle, 1);
353       }
354    }
355 
356    return table_start.gpu_handle;
357 }
358 
359 static unsigned
fill_graphics_state_vars(struct d3d12_context * ctx,const struct pipe_draw_info * dinfo,unsigned drawid,const struct pipe_draw_start_count_bias * draw,struct d3d12_shader * shader,uint32_t * values,unsigned cur_root_param_idx,struct d3d12_cmd_signature_key * cmd_sig_key)360 fill_graphics_state_vars(struct d3d12_context *ctx,
361                          const struct pipe_draw_info *dinfo,
362                          unsigned drawid,
363                          const struct pipe_draw_start_count_bias *draw,
364                          struct d3d12_shader *shader,
365                          uint32_t *values,
366                          unsigned cur_root_param_idx,
367                          struct d3d12_cmd_signature_key *cmd_sig_key)
368 {
369    unsigned size = 0;
370 
371    for (unsigned j = 0; j < shader->num_state_vars; ++j) {
372       uint32_t *ptr = values + size;
373 
374       switch (shader->state_vars[j].var) {
375       case D3D12_STATE_VAR_Y_FLIP:
376          ptr[0] = fui(ctx->flip_y);
377          size += 4;
378          break;
379       case D3D12_STATE_VAR_PT_SPRITE:
380          ptr[0] = fui(1.0 / ctx->viewports[0].Width);
381          ptr[1] = fui(1.0 / ctx->viewports[0].Height);
382          ptr[2] = fui(ctx->gfx_pipeline_state.rast->base.point_size);
383          ptr[3] = fui(D3D12_MAX_POINT_SIZE);
384          size += 4;
385          break;
386       case D3D12_STATE_VAR_DRAW_PARAMS:
387          ptr[0] = dinfo->index_size ? draw->index_bias : draw->start;
388          ptr[1] = dinfo->start_instance;
389          ptr[2] = drawid;
390          ptr[3] = dinfo->index_size ? -1 : 0;
391          assert(!cmd_sig_key->draw_or_dispatch_params); // Should only be set once
392          cmd_sig_key->draw_or_dispatch_params = 1;
393          cmd_sig_key->root_sig = ctx->gfx_pipeline_state.root_signature;
394          cmd_sig_key->params_root_const_offset = size;
395          cmd_sig_key->params_root_const_param = cur_root_param_idx;
396          size += 4;
397          break;
398       case D3D12_STATE_VAR_DEPTH_TRANSFORM:
399          ptr[0] = fui(2.0f * ctx->viewport_states[0].scale[2]);
400          ptr[1] = fui(ctx->viewport_states[0].translate[2] - ctx->viewport_states[0].scale[2]);
401          size += 4;
402          break;
403       case D3D12_STATE_VAR_DEFAULT_INNER_TESS_LEVEL:
404          memcpy(ptr, ctx->default_inner_tess_factor, sizeof(ctx->default_inner_tess_factor));
405          size += 4;
406          break;
407       case D3D12_STATE_VAR_DEFAULT_OUTER_TESS_LEVEL:
408          memcpy(ptr, ctx->default_outer_tess_factor, sizeof(ctx->default_outer_tess_factor));
409          size += 4;
410          break;
411       case D3D12_STATE_VAR_PATCH_VERTICES_IN:
412          ptr[0] = ctx->patch_vertices;
413          size += 4;
414          break;
415       default:
416          unreachable("unknown state variable");
417       }
418    }
419 
420    return size;
421 }
422 
423 static unsigned
fill_compute_state_vars(struct d3d12_context * ctx,const struct pipe_grid_info * info,struct d3d12_shader * shader,uint32_t * values,struct d3d12_cmd_signature_key * cmd_sig_key)424 fill_compute_state_vars(struct d3d12_context *ctx,
425                         const struct pipe_grid_info *info,
426                         struct d3d12_shader *shader,
427                         uint32_t *values,
428                         struct d3d12_cmd_signature_key *cmd_sig_key)
429 {
430    unsigned size = 0;
431 
432    for (unsigned j = 0; j < shader->num_state_vars; ++j) {
433       uint32_t *ptr = values + size;
434 
435       switch (shader->state_vars[j].var) {
436       case D3D12_STATE_VAR_NUM_WORKGROUPS:
437          ptr[0] = info->grid[0];
438          ptr[1] = info->grid[1];
439          ptr[2] = info->grid[2];
440          cmd_sig_key->draw_or_dispatch_params = 1;
441          cmd_sig_key->root_sig = ctx->compute_pipeline_state.root_signature;
442          cmd_sig_key->params_root_const_offset = size;
443          size += 4;
444          break;
445       case D3D12_STATE_VAR_TRANSFORM_GENERIC0:
446       case D3D12_STATE_VAR_TRANSFORM_GENERIC1: {
447          unsigned idx = shader->state_vars[j].var - D3D12_STATE_VAR_TRANSFORM_GENERIC0;
448          ptr[0] = ctx->transform_state_vars[idx * 4];
449          ptr[1] = ctx->transform_state_vars[idx * 4 + 1];
450          ptr[2] = ctx->transform_state_vars[idx * 4 + 2];
451          ptr[3] = ctx->transform_state_vars[idx * 4 + 3];
452          size += 4;
453          break;
454       }
455       default:
456          unreachable("unknown state variable");
457       }
458    }
459 
460    return size;
461 }
462 
463 static bool
check_descriptors_left(struct d3d12_context * ctx,bool compute)464 check_descriptors_left(struct d3d12_context *ctx, bool compute)
465 {
466    struct d3d12_batch *batch = d3d12_current_batch(ctx);
467    unsigned needed_descs = 0;
468 
469    unsigned count = compute ? 1 : D3D12_GFX_SHADER_STAGES;
470    for (unsigned i = 0; i < count; ++i) {
471       struct d3d12_shader_selector *shader = compute ? ctx->compute_state : ctx->gfx_stages[i];
472 
473       if (!shader)
474          continue;
475 
476       needed_descs += shader->current->end_ubo_binding;
477       needed_descs += shader->current->end_srv_binding - shader->current->begin_srv_binding;
478       needed_descs += shader->current->nir->info.num_ssbos;
479       needed_descs += shader->current->nir->info.num_images;
480    }
481 
482    if (d3d12_descriptor_heap_get_remaining_handles(batch->view_heap) < needed_descs)
483       return false;
484 
485    needed_descs = 0;
486    for (unsigned i = 0; i < count; ++i) {
487       struct d3d12_shader_selector *shader = compute ? ctx->compute_state : ctx->gfx_stages[i];
488 
489       if (!shader)
490          continue;
491 
492       needed_descs += shader->current->end_srv_binding - shader->current->begin_srv_binding;
493    }
494 
495    if (d3d12_descriptor_heap_get_remaining_handles(batch->sampler_heap) < needed_descs)
496       return false;
497 
498    return true;
499 }
500 
501 #define MAX_DESCRIPTOR_TABLES (D3D12_GFX_SHADER_STAGES * 4)
502 
503 static void
update_shader_stage_root_parameters(struct d3d12_context * ctx,const struct d3d12_shader_selector * shader_sel,unsigned & num_params,unsigned & num_root_descriptors,D3D12_GPU_DESCRIPTOR_HANDLE root_desc_tables[MAX_DESCRIPTOR_TABLES],int root_desc_indices[MAX_DESCRIPTOR_TABLES])504 update_shader_stage_root_parameters(struct d3d12_context *ctx,
505                                     const struct d3d12_shader_selector *shader_sel,
506                                     unsigned &num_params,
507                                     unsigned &num_root_descriptors,
508                                     D3D12_GPU_DESCRIPTOR_HANDLE root_desc_tables[MAX_DESCRIPTOR_TABLES],
509                                     int root_desc_indices[MAX_DESCRIPTOR_TABLES])
510 {
511    auto stage = shader_sel->stage;
512    struct d3d12_shader *shader = shader_sel->current;
513    uint64_t dirty = ctx->shader_dirty[stage];
514    assert(shader);
515 
516    if (shader->end_ubo_binding - shader->begin_ubo_binding > 0) {
517       if (dirty & D3D12_SHADER_DIRTY_CONSTBUF) {
518          assert(num_root_descriptors < MAX_DESCRIPTOR_TABLES);
519          root_desc_tables[num_root_descriptors] = fill_cbv_descriptors(ctx, shader, stage);
520          root_desc_indices[num_root_descriptors++] = num_params;
521       }
522       num_params++;
523    }
524    if (shader->end_srv_binding > 0) {
525       if (dirty & D3D12_SHADER_DIRTY_SAMPLER_VIEWS) {
526          assert(num_root_descriptors < MAX_DESCRIPTOR_TABLES);
527          root_desc_tables[num_root_descriptors] = fill_srv_descriptors(ctx, shader, stage);
528          root_desc_indices[num_root_descriptors++] = num_params;
529       }
530       num_params++;
531       if (dirty & D3D12_SHADER_DIRTY_SAMPLERS) {
532          assert(num_root_descriptors < MAX_DESCRIPTOR_TABLES);
533          root_desc_tables[num_root_descriptors] = fill_sampler_descriptors(ctx, shader_sel, stage);
534          root_desc_indices[num_root_descriptors++] = num_params;
535       }
536       num_params++;
537    }
538    if (shader->nir->info.num_ssbos > 0) {
539       if (dirty & D3D12_SHADER_DIRTY_SSBO) {
540          assert(num_root_descriptors < MAX_DESCRIPTOR_TABLES);
541          root_desc_tables[num_root_descriptors] = fill_ssbo_descriptors(ctx, shader, stage);
542          root_desc_indices[num_root_descriptors++] = num_params;
543       }
544       num_params++;
545    }
546    if (shader->nir->info.num_images > 0) {
547       if (dirty & D3D12_SHADER_DIRTY_IMAGE) {
548          assert(num_root_descriptors < MAX_DESCRIPTOR_TABLES);
549          root_desc_tables[num_root_descriptors] = fill_image_descriptors(ctx, shader, stage);
550          root_desc_indices[num_root_descriptors++] = num_params;
551       }
552       num_params++;
553    }
554 }
555 
556 static unsigned
update_graphics_root_parameters(struct d3d12_context * ctx,const struct pipe_draw_info * dinfo,unsigned drawid,const struct pipe_draw_start_count_bias * draw,D3D12_GPU_DESCRIPTOR_HANDLE root_desc_tables[MAX_DESCRIPTOR_TABLES],int root_desc_indices[MAX_DESCRIPTOR_TABLES],struct d3d12_cmd_signature_key * cmd_sig_key)557 update_graphics_root_parameters(struct d3d12_context *ctx,
558                                 const struct pipe_draw_info *dinfo,
559                                 unsigned drawid,
560                                 const struct pipe_draw_start_count_bias *draw,
561                                 D3D12_GPU_DESCRIPTOR_HANDLE root_desc_tables[MAX_DESCRIPTOR_TABLES],
562                                 int root_desc_indices[MAX_DESCRIPTOR_TABLES],
563                                 struct d3d12_cmd_signature_key *cmd_sig_key)
564 {
565    unsigned num_params = 0;
566    unsigned num_root_descriptors = 0;
567 
568    for (unsigned i = 0; i < D3D12_GFX_SHADER_STAGES; ++i) {
569       struct d3d12_shader_selector *shader_sel = ctx->gfx_stages[i];
570       if (!shader_sel)
571          continue;
572 
573       update_shader_stage_root_parameters(ctx, shader_sel, num_params, num_root_descriptors, root_desc_tables, root_desc_indices);
574       /* TODO Don't always update state vars */
575       if (shader_sel->current->num_state_vars > 0) {
576          uint32_t constants[D3D12_MAX_GRAPHICS_STATE_VARS * 4];
577          unsigned size = fill_graphics_state_vars(ctx, dinfo, drawid, draw, shader_sel->current, constants, num_params, cmd_sig_key);
578          ctx->cmdlist->SetGraphicsRoot32BitConstants(num_params, size, constants, 0);
579          num_params++;
580       }
581    }
582    return num_root_descriptors;
583 }
584 
585 static unsigned
update_compute_root_parameters(struct d3d12_context * ctx,const struct pipe_grid_info * info,D3D12_GPU_DESCRIPTOR_HANDLE root_desc_tables[MAX_DESCRIPTOR_TABLES],int root_desc_indices[MAX_DESCRIPTOR_TABLES],struct d3d12_cmd_signature_key * cmd_sig_key)586 update_compute_root_parameters(struct d3d12_context *ctx,
587                                const struct pipe_grid_info *info,
588                                D3D12_GPU_DESCRIPTOR_HANDLE root_desc_tables[MAX_DESCRIPTOR_TABLES],
589                                int root_desc_indices[MAX_DESCRIPTOR_TABLES],
590                                struct d3d12_cmd_signature_key *cmd_sig_key)
591 {
592    unsigned num_params = 0;
593    unsigned num_root_descriptors = 0;
594 
595    struct d3d12_shader_selector *shader_sel = ctx->compute_state;
596    if (shader_sel) {
597       update_shader_stage_root_parameters(ctx, shader_sel, num_params, num_root_descriptors, root_desc_tables, root_desc_indices);
598       /* TODO Don't always update state vars */
599       if (shader_sel->current->num_state_vars > 0) {
600          uint32_t constants[D3D12_MAX_COMPUTE_STATE_VARS * 4];
601          unsigned size = fill_compute_state_vars(ctx, info, shader_sel->current, constants, cmd_sig_key);
602          if (cmd_sig_key->draw_or_dispatch_params)
603             cmd_sig_key->params_root_const_param = num_params;
604          ctx->cmdlist->SetComputeRoot32BitConstants(num_params, size, constants, 0);
605          num_params++;
606       }
607    }
608    return num_root_descriptors;
609 }
610 
611 static bool
validate_stream_output_targets(struct d3d12_context * ctx)612 validate_stream_output_targets(struct d3d12_context *ctx)
613 {
614    unsigned factor = 0;
615 
616    if (ctx->gfx_pipeline_state.num_so_targets &&
617        ctx->gfx_pipeline_state.stages[PIPE_SHADER_GEOMETRY])
618       factor = ctx->gfx_pipeline_state.stages[PIPE_SHADER_GEOMETRY]->key.gs.stream_output_factor;
619 
620    if (factor > 1)
621       return d3d12_enable_fake_so_buffers(ctx, factor);
622    else
623       return d3d12_disable_fake_so_buffers(ctx);
624 }
625 
626 static D3D_PRIMITIVE_TOPOLOGY
topology(enum mesa_prim prim_type,uint8_t patch_vertices)627 topology(enum mesa_prim prim_type, uint8_t patch_vertices)
628 {
629    switch (prim_type) {
630    case MESA_PRIM_POINTS:
631       return D3D_PRIMITIVE_TOPOLOGY_POINTLIST;
632 
633    case MESA_PRIM_LINES:
634       return D3D_PRIMITIVE_TOPOLOGY_LINELIST;
635 
636    case MESA_PRIM_LINE_STRIP:
637       return D3D_PRIMITIVE_TOPOLOGY_LINESTRIP;
638 
639    case MESA_PRIM_TRIANGLES:
640       return D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST;
641 
642    case MESA_PRIM_TRIANGLE_STRIP:
643       return D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP;
644 
645    case MESA_PRIM_LINES_ADJACENCY:
646       return D3D_PRIMITIVE_TOPOLOGY_LINELIST_ADJ;
647 
648    case MESA_PRIM_LINE_STRIP_ADJACENCY:
649       return D3D_PRIMITIVE_TOPOLOGY_LINESTRIP_ADJ;
650 
651    case MESA_PRIM_TRIANGLES_ADJACENCY:
652       return D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ;
653 
654    case MESA_PRIM_TRIANGLE_STRIP_ADJACENCY:
655       return D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ;
656 
657    case MESA_PRIM_PATCHES:
658       return (D3D_PRIMITIVE_TOPOLOGY)(D3D_PRIMITIVE_TOPOLOGY_1_CONTROL_POINT_PATCHLIST + patch_vertices - 1);
659 
660    case MESA_PRIM_QUADS:
661    case MESA_PRIM_QUAD_STRIP:
662       return D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; /* HACK: this is just wrong! */
663 
664    default:
665       debug_printf("mesa_prim: %s\n", u_prim_name(prim_type));
666       unreachable("unexpected enum mesa_prim");
667    }
668 }
669 
670 static DXGI_FORMAT
ib_format(unsigned index_size)671 ib_format(unsigned index_size)
672 {
673    switch (index_size) {
674    case 1: return DXGI_FORMAT_R8_UINT;
675    case 2: return DXGI_FORMAT_R16_UINT;
676    case 4: return DXGI_FORMAT_R32_UINT;
677 
678    default:
679       unreachable("unexpected index-buffer size");
680    }
681 }
682 
683 static void
twoface_emulation(struct d3d12_context * ctx,struct d3d12_rasterizer_state * rast,const struct pipe_draw_info * dinfo,const struct pipe_draw_indirect_info * indirect,const struct pipe_draw_start_count_bias * draw)684 twoface_emulation(struct d3d12_context *ctx,
685                   struct d3d12_rasterizer_state *rast,
686                   const struct pipe_draw_info *dinfo,
687                   const struct pipe_draw_indirect_info *indirect,
688                   const struct pipe_draw_start_count_bias *draw)
689 {
690    /* draw backfaces */
691    ctx->base.bind_rasterizer_state(&ctx->base, rast->twoface_back);
692    d3d12_draw_vbo(&ctx->base, dinfo, 0, indirect, draw, 1);
693 
694    /* restore real state */
695    ctx->base.bind_rasterizer_state(&ctx->base, rast);
696 }
697 
698 static void
transition_surface_subresources_state(struct d3d12_context * ctx,struct pipe_surface * psurf,struct pipe_resource * pres,D3D12_RESOURCE_STATES state)699 transition_surface_subresources_state(struct d3d12_context *ctx,
700                                       struct pipe_surface *psurf,
701                                       struct pipe_resource *pres,
702                                       D3D12_RESOURCE_STATES state)
703 {
704    struct d3d12_resource *res = d3d12_resource(pres);
705    unsigned start_layer, num_layers;
706    if (!d3d12_subresource_id_uses_layer(res->base.b.target)) {
707       start_layer = 0;
708       num_layers = 1;
709    } else {
710       start_layer = psurf->u.tex.first_layer;
711       num_layers = psurf->u.tex.last_layer - psurf->u.tex.first_layer + 1;
712    }
713    d3d12_transition_subresources_state(ctx, res,
714                                        psurf->u.tex.level, 1,
715                                        start_layer, num_layers,
716                                        d3d12_get_format_start_plane(psurf->format),
717                                        d3d12_get_format_num_planes(psurf->format),
718                                        state,
719                                        D3D12_TRANSITION_FLAG_ACCUMULATE_STATE);
720 }
721 
722 static bool
prim_supported(enum mesa_prim prim_type)723 prim_supported(enum mesa_prim prim_type)
724 {
725    switch (prim_type) {
726    case MESA_PRIM_POINTS:
727    case MESA_PRIM_LINES:
728    case MESA_PRIM_LINE_STRIP:
729    case MESA_PRIM_TRIANGLES:
730    case MESA_PRIM_TRIANGLE_STRIP:
731    case MESA_PRIM_LINES_ADJACENCY:
732    case MESA_PRIM_LINE_STRIP_ADJACENCY:
733    case MESA_PRIM_TRIANGLES_ADJACENCY:
734    case MESA_PRIM_TRIANGLE_STRIP_ADJACENCY:
735    case MESA_PRIM_PATCHES:
736       return true;
737 
738    default:
739       return false;
740    }
741 }
742 
743 static inline struct d3d12_shader_selector *
d3d12_last_vertex_stage(struct d3d12_context * ctx)744 d3d12_last_vertex_stage(struct d3d12_context *ctx)
745 {
746    struct d3d12_shader_selector *sel = ctx->gfx_stages[PIPE_SHADER_GEOMETRY];
747    if (!sel || sel->is_variant)
748       sel = ctx->gfx_stages[PIPE_SHADER_TESS_EVAL];
749    if (!sel)
750       sel = ctx->gfx_stages[PIPE_SHADER_VERTEX];
751    return sel;
752 }
753 
754 static bool
update_draw_indirect_with_sysvals(struct d3d12_context * ctx,const struct pipe_draw_info * dinfo,unsigned drawid,const struct pipe_draw_indirect_info ** indirect_inout,struct pipe_draw_indirect_info * indirect_out)755 update_draw_indirect_with_sysvals(struct d3d12_context *ctx,
756    const struct pipe_draw_info *dinfo,
757    unsigned drawid,
758    const struct pipe_draw_indirect_info **indirect_inout,
759    struct pipe_draw_indirect_info *indirect_out)
760 {
761    if (*indirect_inout == nullptr ||
762       ctx->gfx_stages[PIPE_SHADER_VERTEX] == nullptr)
763       return false;
764 
765    auto sys_values_read = ctx->gfx_stages[PIPE_SHADER_VERTEX]->initial->info.system_values_read;
766    bool any =  BITSET_TEST(sys_values_read, SYSTEM_VALUE_VERTEX_ID_ZERO_BASE) ||
767                BITSET_TEST(sys_values_read, SYSTEM_VALUE_BASE_VERTEX) ||
768                BITSET_TEST(sys_values_read, SYSTEM_VALUE_FIRST_VERTEX) ||
769                BITSET_TEST(sys_values_read, SYSTEM_VALUE_BASE_INSTANCE) ||
770                BITSET_TEST(sys_values_read, SYSTEM_VALUE_DRAW_ID);
771 
772    if (!any)
773       return false;
774 
775    d3d12_compute_transform_save_restore save;
776    d3d12_save_compute_transform_state(ctx, &save);
777 
778    auto indirect_in = *indirect_inout;
779    *indirect_inout = indirect_out;
780 
781    d3d12_compute_transform_key key;
782    memset(&key, 0, sizeof(key));
783    key.type = d3d12_compute_transform_type::base_vertex;
784    key.base_vertex.indexed = dinfo->index_size > 0;
785    key.base_vertex.dynamic_count = indirect_in->indirect_draw_count != nullptr;
786    ctx->base.bind_compute_state(&ctx->base, d3d12_get_compute_transform(ctx, &key));
787 
788    ctx->transform_state_vars[0] = indirect_in->stride;
789    ctx->transform_state_vars[1] = indirect_in->offset;
790    ctx->transform_state_vars[2] = drawid;
791 
792    if (indirect_in->indirect_draw_count) {
793       pipe_constant_buffer draw_count_cbuf;
794       draw_count_cbuf.buffer = indirect_in->indirect_draw_count;
795       draw_count_cbuf.buffer_offset = indirect_in->indirect_draw_count_offset;
796       draw_count_cbuf.buffer_size = 4;
797       draw_count_cbuf.user_buffer = nullptr;
798       ctx->base.set_constant_buffer(&ctx->base, PIPE_SHADER_COMPUTE, 1, false, &draw_count_cbuf);
799    }
800 
801    pipe_shader_buffer new_cs_ssbos[2];
802    new_cs_ssbos[0].buffer = indirect_in->buffer;
803    new_cs_ssbos[0].buffer_offset = 0;
804    new_cs_ssbos[0].buffer_size = indirect_in->buffer->width0;
805 
806    /* 4 additional uints for base vertex, base instance, draw ID, and a bool for indexed draw */
807    unsigned out_stride = sizeof(uint32_t) * ((key.base_vertex.indexed ? 5 : 4) + 4);
808    pipe_resource output_buf_templ = {};
809    output_buf_templ.target = PIPE_BUFFER;
810    output_buf_templ.width0 = out_stride * indirect_in->draw_count;
811    output_buf_templ.height0 = output_buf_templ.depth0 = output_buf_templ.array_size =
812       output_buf_templ.last_level = 1;
813    output_buf_templ.usage = PIPE_USAGE_DEFAULT;
814 
815    new_cs_ssbos[1].buffer = ctx->base.screen->resource_create(ctx->base.screen, &output_buf_templ);
816    new_cs_ssbos[1].buffer_offset = 0;
817    new_cs_ssbos[1].buffer_size = output_buf_templ.width0;
818    ctx->base.set_shader_buffers(&ctx->base, PIPE_SHADER_COMPUTE, 0, 2, new_cs_ssbos, 2);
819 
820    pipe_grid_info grid = {};
821    grid.block[0] = grid.block[1] = grid.block[2] = 1;
822    grid.grid[0] = indirect_in->draw_count;
823    grid.grid[1] = grid.grid[2] = 1;
824    ctx->base.launch_grid(&ctx->base, &grid);
825 
826    d3d12_restore_compute_transform_state(ctx, &save);
827 
828    *indirect_out = *indirect_in;
829    indirect_out->buffer = new_cs_ssbos[1].buffer;
830    indirect_out->offset = 0;
831    indirect_out->stride = out_stride;
832    return true;
833 }
834 
835 static bool
update_draw_auto(struct d3d12_context * ctx,const struct pipe_draw_indirect_info ** indirect_inout,struct pipe_draw_indirect_info * indirect_out)836 update_draw_auto(struct d3d12_context *ctx,
837    const struct pipe_draw_indirect_info **indirect_inout,
838    struct pipe_draw_indirect_info *indirect_out)
839 {
840    if (*indirect_inout == nullptr ||
841        (*indirect_inout)->count_from_stream_output == nullptr ||
842        ctx->gfx_stages[PIPE_SHADER_VERTEX] == nullptr)
843       return false;
844 
845    d3d12_compute_transform_save_restore save;
846    d3d12_save_compute_transform_state(ctx, &save);
847 
848    auto indirect_in = *indirect_inout;
849    *indirect_inout = indirect_out;
850 
851    d3d12_compute_transform_key key;
852    memset(&key, 0, sizeof(key));
853    key.type = d3d12_compute_transform_type::draw_auto;
854    ctx->base.bind_compute_state(&ctx->base, d3d12_get_compute_transform(ctx, &key));
855 
856    auto so_arg = indirect_in->count_from_stream_output;
857    d3d12_stream_output_target *target = (d3d12_stream_output_target *)so_arg;
858 
859    ctx->transform_state_vars[0] = ctx->gfx_pipeline_state.ves->strides[0];
860    ctx->transform_state_vars[1] = 0;
861 
862    pipe_shader_buffer new_cs_ssbo;
863    new_cs_ssbo.buffer = target->fill_buffer;
864    new_cs_ssbo.buffer_offset = target->fill_buffer_offset;
865    new_cs_ssbo.buffer_size = target->fill_buffer->width0 - new_cs_ssbo.buffer_offset;
866    ctx->base.set_shader_buffers(&ctx->base, PIPE_SHADER_COMPUTE, 0, 1, &new_cs_ssbo, 1);
867 
868    pipe_grid_info grid = {};
869    grid.block[0] = grid.block[1] = grid.block[2] = 1;
870    grid.grid[0] = grid.grid[1] = grid.grid[2] = 1;
871    ctx->base.launch_grid(&ctx->base, &grid);
872 
873    d3d12_restore_compute_transform_state(ctx, &save);
874 
875    *indirect_out = *indirect_in;
876    pipe_resource_reference(&indirect_out->buffer, target->fill_buffer);
877    indirect_out->offset = target->fill_buffer_offset + 4;
878    indirect_out->stride = sizeof(D3D12_DRAW_ARGUMENTS);
879    indirect_out->count_from_stream_output = nullptr;
880    return true;
881 }
882 
883 void
d3d12_draw_vbo(struct pipe_context * pctx,const struct pipe_draw_info * dinfo,unsigned drawid_offset,const struct pipe_draw_indirect_info * indirect,const struct pipe_draw_start_count_bias * draws,unsigned num_draws)884 d3d12_draw_vbo(struct pipe_context *pctx,
885                const struct pipe_draw_info *dinfo,
886                unsigned drawid_offset,
887                const struct pipe_draw_indirect_info *indirect,
888                const struct pipe_draw_start_count_bias *draws,
889                unsigned num_draws)
890 {
891    if (num_draws > 1) {
892       util_draw_multi(pctx, dinfo, drawid_offset, indirect, draws, num_draws);
893       return;
894    }
895 
896    if (!indirect && (!draws[0].count || !dinfo->instance_count))
897       return;
898 
899    struct d3d12_context *ctx = d3d12_context(pctx);
900    struct d3d12_screen *screen = d3d12_screen(pctx->screen);
901    struct d3d12_batch *batch;
902    struct pipe_resource *index_buffer = NULL;
903    unsigned index_offset = 0;
904    enum d3d12_surface_conversion_mode conversion_modes[PIPE_MAX_COLOR_BUFS] = {};
905    struct pipe_draw_indirect_info patched_indirect = {};
906 
907    if (!prim_supported((enum mesa_prim)dinfo->mode) ||
908        dinfo->index_size == 1 ||
909        (dinfo->primitive_restart && dinfo->restart_index != 0xffff &&
910         dinfo->restart_index != 0xffffffff)) {
911 
912       if (!dinfo->primitive_restart &&
913           !indirect &&
914           !u_trim_pipe_prim((enum mesa_prim)dinfo->mode, (unsigned *)&draws[0].count))
915          return;
916 
917       ctx->initial_api_prim = (enum mesa_prim)dinfo->mode;
918       util_primconvert_save_rasterizer_state(ctx->primconvert, &ctx->gfx_pipeline_state.rast->base);
919       util_primconvert_draw_vbo(ctx->primconvert, dinfo, drawid_offset, indirect, draws, num_draws);
920       return;
921    }
922 
923    bool draw_auto = update_draw_auto(ctx, &indirect, &patched_indirect);
924    bool indirect_with_sysvals = !draw_auto && update_draw_indirect_with_sysvals(ctx, dinfo, drawid_offset, &indirect, &patched_indirect);
925    struct d3d12_cmd_signature_key cmd_sig_key;
926    memset(&cmd_sig_key, 0, sizeof(cmd_sig_key));
927 
928    if (indirect) {
929       cmd_sig_key.compute = false;
930       cmd_sig_key.indexed = dinfo->index_size > 0;
931       if (indirect->draw_count > 1 ||
932           indirect->indirect_draw_count ||
933           indirect_with_sysvals)
934          cmd_sig_key.multi_draw_stride = indirect->stride;
935       else if (cmd_sig_key.indexed)
936          cmd_sig_key.multi_draw_stride = sizeof(D3D12_DRAW_INDEXED_ARGUMENTS);
937       else
938          cmd_sig_key.multi_draw_stride = sizeof(D3D12_DRAW_ARGUMENTS);
939    }
940 
941    for (int i = 0; i < ctx->fb.nr_cbufs; ++i) {
942       if (ctx->fb.cbufs[i]) {
943          struct d3d12_surface *surface = d3d12_surface(ctx->fb.cbufs[i]);
944          conversion_modes[i] = d3d12_surface_update_pre_draw(pctx, surface, d3d12_rtv_format(ctx, i));
945          if (conversion_modes[i] != D3D12_SURFACE_CONVERSION_NONE)
946             ctx->cmdlist_dirty |= D3D12_DIRTY_FRAMEBUFFER;
947       }
948    }
949 
950    struct d3d12_rasterizer_state *rast = ctx->gfx_pipeline_state.rast;
951    if (rast->twoface_back) {
952       enum mesa_prim saved_mode = ctx->initial_api_prim;
953       twoface_emulation(ctx, rast, dinfo, indirect, &draws[0]);
954       ctx->initial_api_prim = saved_mode;
955    }
956 
957    if (ctx->pstipple.enabled && ctx->gfx_pipeline_state.rast->base.poly_stipple_enable)
958       ctx->shader_dirty[PIPE_SHADER_FRAGMENT] |= D3D12_SHADER_DIRTY_SAMPLER_VIEWS |
959                                                  D3D12_SHADER_DIRTY_SAMPLERS;
960 
961    /* this should *really* be fixed at a higher level than here! */
962    enum mesa_prim reduced_prim = u_reduced_prim((enum mesa_prim)dinfo->mode);
963    if (reduced_prim == MESA_PRIM_TRIANGLES &&
964        ctx->gfx_pipeline_state.rast->base.cull_face == PIPE_FACE_FRONT_AND_BACK)
965       return;
966 
967    if (ctx->gfx_pipeline_state.prim_type != dinfo->mode) {
968       ctx->gfx_pipeline_state.prim_type = (enum mesa_prim)dinfo->mode;
969       ctx->state_dirty |= D3D12_DIRTY_PRIM_MODE;
970    }
971 
972    d3d12_select_shader_variants(ctx, dinfo);
973    d3d12_validate_queries(ctx);
974    for (unsigned i = 0; i < D3D12_GFX_SHADER_STAGES; ++i) {
975       struct d3d12_shader *shader = ctx->gfx_stages[i] ? ctx->gfx_stages[i]->current : NULL;
976       if (ctx->gfx_pipeline_state.stages[i] != shader) {
977          ctx->gfx_pipeline_state.stages[i] = shader;
978          ctx->state_dirty |= D3D12_DIRTY_SHADER;
979       }
980    }
981 
982    /* Reset to an invalid value after it's been used */
983    ctx->initial_api_prim = MESA_PRIM_COUNT;
984 
985    /* Copy the stream output info from the current vertex/geometry shader */
986    if (ctx->state_dirty & D3D12_DIRTY_SHADER) {
987       struct d3d12_shader_selector *sel = d3d12_last_vertex_stage(ctx);
988       if (sel) {
989          ctx->gfx_pipeline_state.so_info = sel->so_info;
990       } else {
991          memset(&ctx->gfx_pipeline_state.so_info, 0, sizeof(sel->so_info));
992       }
993    }
994    if (!validate_stream_output_targets(ctx)) {
995       debug_printf("validate_stream_output_targets() failed\n");
996       return;
997    }
998 
999    D3D12_INDEX_BUFFER_STRIP_CUT_VALUE ib_strip_cut_value =
1000       D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_DISABLED;
1001    if (dinfo->index_size > 0) {
1002       assert(dinfo->index_size != 1);
1003 
1004       if (dinfo->has_user_indices) {
1005          if (!util_upload_index_buffer(pctx, dinfo, &draws[0], &index_buffer,
1006              &index_offset, 4)) {
1007             debug_printf("util_upload_index_buffer() failed\n");
1008             return;
1009          }
1010       } else {
1011          index_buffer = dinfo->index.resource;
1012       }
1013 
1014       if (dinfo->primitive_restart) {
1015          assert(dinfo->restart_index == 0xffff ||
1016                 dinfo->restart_index == 0xffffffff);
1017          ib_strip_cut_value = dinfo->restart_index == 0xffff ?
1018             D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFF :
1019             D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFFFFFF;
1020       }
1021    }
1022 
1023    if (ctx->gfx_pipeline_state.ib_strip_cut_value != ib_strip_cut_value) {
1024       ctx->gfx_pipeline_state.ib_strip_cut_value = ib_strip_cut_value;
1025       ctx->state_dirty |= D3D12_DIRTY_STRIP_CUT_VALUE;
1026    }
1027 
1028    if (!ctx->gfx_pipeline_state.root_signature || ctx->state_dirty & D3D12_DIRTY_SHADER) {
1029       ID3D12RootSignature *root_signature = d3d12_get_root_signature(ctx, false);
1030       if (ctx->gfx_pipeline_state.root_signature != root_signature) {
1031          ctx->gfx_pipeline_state.root_signature = root_signature;
1032          ctx->state_dirty |= D3D12_DIRTY_ROOT_SIGNATURE;
1033          for (int i = 0; i < D3D12_GFX_SHADER_STAGES; ++i)
1034             ctx->shader_dirty[i] |= D3D12_SHADER_DIRTY_ALL;
1035       }
1036    }
1037 
1038    if (!ctx->current_gfx_pso || ctx->state_dirty & D3D12_DIRTY_GFX_PSO) {
1039       ctx->current_gfx_pso = d3d12_get_gfx_pipeline_state(ctx);
1040       assert(ctx->current_gfx_pso);
1041    }
1042 
1043    ctx->cmdlist_dirty |= ctx->state_dirty;
1044 
1045    if (!check_descriptors_left(ctx, false))
1046       d3d12_flush_cmdlist(ctx);
1047    batch = d3d12_current_batch(ctx);
1048 
1049    if (ctx->cmdlist_dirty & D3D12_DIRTY_ROOT_SIGNATURE) {
1050       d3d12_batch_reference_object(batch, ctx->gfx_pipeline_state.root_signature);
1051       ctx->cmdlist->SetGraphicsRootSignature(ctx->gfx_pipeline_state.root_signature);
1052    }
1053 
1054    if (ctx->cmdlist_dirty & D3D12_DIRTY_GFX_PSO) {
1055       assert(ctx->current_gfx_pso);
1056       d3d12_batch_reference_object(batch, ctx->current_gfx_pso);
1057       ctx->cmdlist->SetPipelineState(ctx->current_gfx_pso);
1058    }
1059 
1060    D3D12_GPU_DESCRIPTOR_HANDLE root_desc_tables[MAX_DESCRIPTOR_TABLES];
1061    int root_desc_indices[MAX_DESCRIPTOR_TABLES];
1062    unsigned num_root_descriptors = update_graphics_root_parameters(ctx, dinfo, drawid_offset, &draws[0],
1063       root_desc_tables, root_desc_indices, &cmd_sig_key);
1064 
1065    bool need_zero_one_depth_range = d3d12_need_zero_one_depth_range(ctx);
1066    if (need_zero_one_depth_range != ctx->need_zero_one_depth_range) {
1067       ctx->cmdlist_dirty |= D3D12_DIRTY_VIEWPORT;
1068       ctx->need_zero_one_depth_range = need_zero_one_depth_range;
1069    }
1070 
1071    if (ctx->cmdlist_dirty & D3D12_DIRTY_VIEWPORT) {
1072       D3D12_VIEWPORT viewports[PIPE_MAX_VIEWPORTS];
1073       for (unsigned i = 0; i < ctx->num_viewports; ++i) {
1074          viewports[i] = ctx->viewports[i];
1075          if (ctx->need_zero_one_depth_range) {
1076             viewports[i].MinDepth = 0.0f;
1077             viewports[i].MaxDepth = 1.0f;
1078          }
1079          if (ctx->fb.nr_cbufs == 0 && !ctx->fb.zsbuf) {
1080             viewports[i].TopLeftX = MAX2(0.0f, viewports[i].TopLeftX);
1081             viewports[i].TopLeftY = MAX2(0.0f, viewports[i].TopLeftY);
1082             viewports[i].Width = MIN2(ctx->fb.width, viewports[i].Width);
1083             viewports[i].Height = MIN2(ctx->fb.height, viewports[i].Height);
1084          }
1085       }
1086       ctx->cmdlist->RSSetViewports(ctx->num_viewports, viewports);
1087    }
1088 
1089    if (ctx->cmdlist_dirty & D3D12_DIRTY_SCISSOR) {
1090       if (ctx->gfx_pipeline_state.rast->base.scissor && ctx->num_viewports > 0)
1091          ctx->cmdlist->RSSetScissorRects(ctx->num_viewports, ctx->scissors);
1092       else
1093          ctx->cmdlist->RSSetScissorRects(PIPE_MAX_VIEWPORTS, MAX_SCISSOR_ARRAY);
1094    }
1095 
1096    if (ctx->cmdlist_dirty & D3D12_DIRTY_BLEND_COLOR) {
1097       unsigned blend_factor_flags = ctx->gfx_pipeline_state.blend->blend_factor_flags;
1098       if (blend_factor_flags & (D3D12_BLEND_FACTOR_COLOR | D3D12_BLEND_FACTOR_ANY)) {
1099          ctx->cmdlist->OMSetBlendFactor(ctx->blend_factor);
1100       } else if (blend_factor_flags & D3D12_BLEND_FACTOR_ALPHA) {
1101          float alpha_const[4] = { ctx->blend_factor[3], ctx->blend_factor[3],
1102                                  ctx->blend_factor[3], ctx->blend_factor[3] };
1103          ctx->cmdlist->OMSetBlendFactor(alpha_const);
1104       }
1105    }
1106 
1107    if (ctx->cmdlist_dirty & D3D12_DIRTY_STENCIL_REF) {
1108       if (ctx->gfx_pipeline_state.zsa->backface_enabled &&
1109           screen->opts14.IndependentFrontAndBackStencilRefMaskSupported &&
1110           ctx->cmdlist8 != nullptr)
1111          ctx->cmdlist8->OMSetFrontAndBackStencilRef(ctx->stencil_ref.ref_value[0], ctx->stencil_ref.ref_value[1]);
1112       else
1113          ctx->cmdlist->OMSetStencilRef(ctx->stencil_ref.ref_value[0]);
1114    }
1115 
1116    if (ctx->cmdlist_dirty & D3D12_DIRTY_PRIM_MODE)
1117       ctx->cmdlist->IASetPrimitiveTopology(topology((enum mesa_prim)dinfo->mode, ctx->patch_vertices));
1118 
1119    for (unsigned i = 0; i < ctx->num_vbs; ++i) {
1120       if (ctx->vbs[i].buffer.resource) {
1121          struct d3d12_resource *res = d3d12_resource(ctx->vbs[i].buffer.resource);
1122          d3d12_transition_resource_state(ctx, res, D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER, D3D12_TRANSITION_FLAG_ACCUMULATE_STATE);
1123          if (ctx->cmdlist_dirty & D3D12_DIRTY_VERTEX_BUFFERS)
1124             d3d12_batch_reference_resource(batch, res, false);
1125       }
1126    }
1127    if (ctx->cmdlist_dirty & (D3D12_DIRTY_VERTEX_BUFFERS | D3D12_DIRTY_VERTEX_ELEMENTS)) {
1128       uint16_t *strides = ctx->gfx_pipeline_state.ves ? ctx->gfx_pipeline_state.ves->strides : NULL;
1129       if (strides) {
1130          for (unsigned i = 0; i < ctx->num_vbs; i++)
1131             ctx->vbvs[i].StrideInBytes = strides[i];
1132       } else {
1133          for (unsigned i = 0; i < ctx->num_vbs; i++)
1134             ctx->vbvs[i].StrideInBytes = 0;
1135       }
1136       ctx->cmdlist->IASetVertexBuffers(0, ctx->num_vbs, ctx->vbvs);
1137    }
1138 
1139    if (index_buffer) {
1140       D3D12_INDEX_BUFFER_VIEW ibv;
1141       struct d3d12_resource *res = d3d12_resource(index_buffer);
1142       ibv.BufferLocation = d3d12_resource_gpu_virtual_address(res) + index_offset;
1143       ibv.SizeInBytes = res->base.b.width0 - index_offset;
1144       ibv.Format = ib_format(dinfo->index_size);
1145       d3d12_transition_resource_state(ctx, res, D3D12_RESOURCE_STATE_INDEX_BUFFER, D3D12_TRANSITION_FLAG_ACCUMULATE_STATE);
1146       if (ctx->cmdlist_dirty & D3D12_DIRTY_INDEX_BUFFER ||
1147           memcmp(&ctx->ibv, &ibv, sizeof(D3D12_INDEX_BUFFER_VIEW)) != 0) {
1148          ctx->ibv = ibv;
1149          d3d12_batch_reference_resource(batch, res, false);
1150          ctx->cmdlist->IASetIndexBuffer(&ibv);
1151       }
1152 
1153       if (dinfo->has_user_indices)
1154          pipe_resource_reference(&index_buffer, NULL);
1155    }
1156 
1157    if (ctx->cmdlist_dirty & D3D12_DIRTY_FRAMEBUFFER) {
1158       D3D12_CPU_DESCRIPTOR_HANDLE render_targets[PIPE_MAX_COLOR_BUFS] = {};
1159       D3D12_CPU_DESCRIPTOR_HANDLE *depth_desc = NULL, tmp_desc;
1160       for (int i = 0; i < ctx->fb.nr_cbufs; ++i) {
1161          if (ctx->fb.cbufs[i]) {
1162             struct d3d12_surface *surface = d3d12_surface(ctx->fb.cbufs[i]);
1163             render_targets[i] = d3d12_surface_get_handle(surface, conversion_modes[i]);
1164             d3d12_batch_reference_surface_texture(batch, surface);
1165          } else
1166             render_targets[i] = screen->null_rtv.cpu_handle;
1167       }
1168       if (ctx->fb.zsbuf) {
1169          struct d3d12_surface *surface = d3d12_surface(ctx->fb.zsbuf);
1170          tmp_desc = surface->desc_handle.cpu_handle;
1171          d3d12_batch_reference_surface_texture(batch, surface);
1172          depth_desc = &tmp_desc;
1173       }
1174       ctx->cmdlist->OMSetRenderTargets(ctx->fb.nr_cbufs, render_targets, false, depth_desc);
1175    }
1176 
1177    struct pipe_stream_output_target **so_targets = ctx->fake_so_buffer_factor ? ctx->fake_so_targets
1178                                                                               : ctx->so_targets;
1179    D3D12_STREAM_OUTPUT_BUFFER_VIEW *so_buffer_views = ctx->fake_so_buffer_factor ? ctx->fake_so_buffer_views
1180                                                                                  : ctx->so_buffer_views;
1181    for (unsigned i = 0; i < ctx->gfx_pipeline_state.num_so_targets; ++i) {
1182       struct d3d12_stream_output_target *target = (struct d3d12_stream_output_target *)so_targets[i];
1183 
1184       if (!target)
1185          continue;
1186 
1187       struct d3d12_resource *so_buffer = d3d12_resource(target->base.buffer);
1188       struct d3d12_resource *fill_buffer = d3d12_resource(target->fill_buffer);
1189 
1190       if (ctx->cmdlist_dirty & D3D12_DIRTY_STREAM_OUTPUT) {
1191          d3d12_batch_reference_resource(batch, so_buffer, true);
1192          d3d12_batch_reference_resource(batch, fill_buffer, true);
1193       }
1194 
1195       d3d12_transition_resource_state(ctx, so_buffer, D3D12_RESOURCE_STATE_STREAM_OUT, D3D12_TRANSITION_FLAG_ACCUMULATE_STATE);
1196       d3d12_transition_resource_state(ctx, fill_buffer, D3D12_RESOURCE_STATE_STREAM_OUT, D3D12_TRANSITION_FLAG_ACCUMULATE_STATE);
1197    }
1198    if (ctx->cmdlist_dirty & D3D12_DIRTY_STREAM_OUTPUT)
1199       ctx->cmdlist->SOSetTargets(0, 4, so_buffer_views);
1200 
1201    for (int i = 0; i < ctx->fb.nr_cbufs; ++i) {
1202       struct pipe_surface *psurf = ctx->fb.cbufs[i];
1203       if (!psurf)
1204          continue;
1205 
1206       struct pipe_resource *pres = conversion_modes[i] == D3D12_SURFACE_CONVERSION_BGRA_UINT ?
1207                                       d3d12_surface(psurf)->rgba_texture : psurf->texture;
1208       transition_surface_subresources_state(ctx, psurf, pres,
1209          D3D12_RESOURCE_STATE_RENDER_TARGET);
1210    }
1211    if (ctx->fb.zsbuf) {
1212       struct pipe_surface *psurf = ctx->fb.zsbuf;
1213       transition_surface_subresources_state(ctx, psurf, psurf->texture,
1214          D3D12_RESOURCE_STATE_DEPTH_WRITE);
1215    }
1216 
1217    ID3D12Resource *indirect_arg_buf = nullptr;
1218    ID3D12Resource *indirect_count_buf = nullptr;
1219    uint64_t indirect_arg_offset = 0, indirect_count_offset = 0;
1220    if (indirect) {
1221       if (indirect->buffer) {
1222          struct d3d12_resource *indirect_buf = d3d12_resource(indirect->buffer);
1223          uint64_t buf_offset = 0;
1224          indirect_arg_buf = d3d12_resource_underlying(indirect_buf, &buf_offset);
1225          indirect_arg_offset = indirect->offset + buf_offset;
1226          d3d12_transition_resource_state(ctx, indirect_buf,
1227             D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT, D3D12_TRANSITION_FLAG_ACCUMULATE_STATE);
1228          d3d12_batch_reference_resource(batch, indirect_buf, false);
1229       }
1230       if (indirect->indirect_draw_count) {
1231          struct d3d12_resource *count_buf = d3d12_resource(indirect->indirect_draw_count);
1232          uint64_t count_offset = 0;
1233          indirect_count_buf = d3d12_resource_underlying(count_buf, &count_offset);
1234          indirect_count_offset = indirect->indirect_draw_count_offset + count_offset;
1235          d3d12_transition_resource_state(ctx, count_buf,
1236             D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT, D3D12_TRANSITION_FLAG_ACCUMULATE_STATE);
1237          d3d12_batch_reference_resource(batch, count_buf, false);
1238       }
1239       assert(!indirect->count_from_stream_output);
1240    }
1241 
1242    d3d12_apply_resource_states(ctx, false);
1243 
1244    for (unsigned i = 0; i < num_root_descriptors; ++i)
1245       ctx->cmdlist->SetGraphicsRootDescriptorTable(root_desc_indices[i], root_desc_tables[i]);
1246 
1247    if (indirect) {
1248       unsigned draw_count = draw_auto ? 1 : indirect->draw_count;
1249       ID3D12CommandSignature *cmd_sig = d3d12_get_cmd_signature(ctx, &cmd_sig_key);
1250       ctx->cmdlist->ExecuteIndirect(cmd_sig, draw_count, indirect_arg_buf,
1251          indirect_arg_offset, indirect_count_buf, indirect_count_offset);
1252    } else {
1253       if (dinfo->index_size > 0)
1254          ctx->cmdlist->DrawIndexedInstanced(draws[0].count, dinfo->instance_count,
1255                                             draws[0].start, draws[0].index_bias,
1256                                             dinfo->start_instance);
1257       else
1258          ctx->cmdlist->DrawInstanced(draws[0].count, dinfo->instance_count,
1259                                      draws[0].start, dinfo->start_instance);
1260    }
1261 
1262    ctx->state_dirty &= D3D12_DIRTY_COMPUTE_MASK;
1263    batch->pending_memory_barrier = false;
1264 
1265    ctx->cmdlist_dirty &= D3D12_DIRTY_COMPUTE_MASK |
1266       (index_buffer ? 0 : D3D12_DIRTY_INDEX_BUFFER);
1267 
1268    /* The next dispatch needs to reassert the compute PSO */
1269    ctx->cmdlist_dirty |= D3D12_DIRTY_COMPUTE_SHADER;
1270 
1271    for (unsigned i = 0; i < D3D12_GFX_SHADER_STAGES; ++i)
1272       ctx->shader_dirty[i] = 0;
1273 
1274    for (int i = 0; i < ctx->fb.nr_cbufs; ++i) {
1275       if (ctx->fb.cbufs[i]) {
1276          struct d3d12_surface *surface = d3d12_surface(ctx->fb.cbufs[i]);
1277          d3d12_surface_update_post_draw(pctx, surface, conversion_modes[i]);
1278       }
1279    }
1280 
1281    pipe_resource_reference(&patched_indirect.buffer, NULL);
1282 }
1283 
1284 static bool
update_dispatch_indirect_with_sysvals(struct d3d12_context * ctx,struct pipe_resource ** indirect_inout,unsigned * indirect_offset_inout,struct pipe_resource ** indirect_out)1285 update_dispatch_indirect_with_sysvals(struct d3d12_context *ctx,
1286                                       struct pipe_resource **indirect_inout,
1287                                       unsigned *indirect_offset_inout,
1288                                       struct pipe_resource **indirect_out)
1289 {
1290    if (*indirect_inout == nullptr ||
1291        ctx->compute_state == nullptr)
1292       return false;
1293 
1294    if (!BITSET_TEST(ctx->compute_state->initial->info.system_values_read, SYSTEM_VALUE_NUM_WORKGROUPS))
1295       return false;
1296 
1297    if (ctx->current_predication)
1298       ctx->cmdlist->SetPredication(nullptr, 0, D3D12_PREDICATION_OP_EQUAL_ZERO);
1299 
1300    auto indirect_in = *indirect_inout;
1301 
1302    /* 6 uints: 2 copies of the indirect arg buffer */
1303    pipe_resource output_buf_templ = {};
1304    output_buf_templ.target = PIPE_BUFFER;
1305    output_buf_templ.width0 = sizeof(uint32_t) * 6;
1306    output_buf_templ.height0 = output_buf_templ.depth0 = output_buf_templ.array_size =
1307       output_buf_templ.last_level = 1;
1308    output_buf_templ.usage = PIPE_USAGE_DEFAULT;
1309    *indirect_out = ctx->base.screen->resource_create(ctx->base.screen, &output_buf_templ);
1310 
1311    struct pipe_box src_box;
1312    u_box_3d((int)*indirect_offset_inout, 0, 0, sizeof(uint32_t) * 3, 1, 1, &src_box);
1313    ctx->base.resource_copy_region(&ctx->base, *indirect_out, 0, 0, 0, 0, indirect_in, 0, &src_box);
1314    ctx->base.resource_copy_region(&ctx->base, *indirect_out, 0, src_box.width, 0, 0, indirect_in, 0, &src_box);
1315 
1316    if (ctx->current_predication)
1317       d3d12_enable_predication(ctx);
1318 
1319    *indirect_inout = *indirect_out;
1320    *indirect_offset_inout = 0;
1321    return true;
1322 }
1323 
1324 void
d3d12_launch_grid(struct pipe_context * pctx,const struct pipe_grid_info * info)1325 d3d12_launch_grid(struct pipe_context *pctx, const struct pipe_grid_info *info)
1326 {
1327    struct d3d12_context *ctx = d3d12_context(pctx);
1328    struct d3d12_batch *batch;
1329    struct pipe_resource *patched_indirect = nullptr;
1330 
1331    struct d3d12_cmd_signature_key cmd_sig_key;
1332    memset(&cmd_sig_key, 0, sizeof(cmd_sig_key));
1333    cmd_sig_key.compute = 1;
1334    cmd_sig_key.multi_draw_stride = sizeof(D3D12_DISPATCH_ARGUMENTS);
1335 
1336    struct pipe_resource *indirect = info->indirect;
1337    unsigned indirect_offset = info->indirect_offset;
1338    if (indirect && update_dispatch_indirect_with_sysvals(ctx, &indirect, &indirect_offset, &patched_indirect))
1339       cmd_sig_key.multi_draw_stride = sizeof(D3D12_DISPATCH_ARGUMENTS) * 2;
1340 
1341    d3d12_select_compute_shader_variants(ctx, info);
1342    d3d12_validate_queries(ctx);
1343    struct d3d12_shader *shader = ctx->compute_state ? ctx->compute_state->current : NULL;
1344    if (ctx->compute_pipeline_state.stage != shader) {
1345       ctx->compute_pipeline_state.stage = shader;
1346       ctx->state_dirty |= D3D12_DIRTY_COMPUTE_SHADER;
1347    }
1348 
1349    if (!ctx->compute_pipeline_state.root_signature || ctx->state_dirty & D3D12_DIRTY_COMPUTE_SHADER) {
1350       ID3D12RootSignature *root_signature = d3d12_get_root_signature(ctx, true);
1351       if (ctx->compute_pipeline_state.root_signature != root_signature) {
1352          ctx->compute_pipeline_state.root_signature = root_signature;
1353          ctx->state_dirty |= D3D12_DIRTY_COMPUTE_ROOT_SIGNATURE;
1354          ctx->shader_dirty[PIPE_SHADER_COMPUTE] |= D3D12_SHADER_DIRTY_ALL;
1355       }
1356    }
1357 
1358    if (!ctx->current_compute_pso || ctx->state_dirty & D3D12_DIRTY_COMPUTE_PSO) {
1359       ctx->current_compute_pso = d3d12_get_compute_pipeline_state(ctx);
1360       assert(ctx->current_compute_pso);
1361    }
1362 
1363    ctx->cmdlist_dirty |= ctx->state_dirty;
1364 
1365    if (!check_descriptors_left(ctx, true))
1366       d3d12_flush_cmdlist(ctx);
1367    batch = d3d12_current_batch(ctx);
1368 
1369    if (ctx->cmdlist_dirty & D3D12_DIRTY_COMPUTE_ROOT_SIGNATURE) {
1370       d3d12_batch_reference_object(batch, ctx->compute_pipeline_state.root_signature);
1371       ctx->cmdlist->SetComputeRootSignature(ctx->compute_pipeline_state.root_signature);
1372    }
1373 
1374    if (ctx->cmdlist_dirty & D3D12_DIRTY_COMPUTE_PSO) {
1375       assert(ctx->current_compute_pso);
1376       d3d12_batch_reference_object(batch, ctx->current_compute_pso);
1377       ctx->cmdlist->SetPipelineState(ctx->current_compute_pso);
1378    }
1379 
1380    D3D12_GPU_DESCRIPTOR_HANDLE root_desc_tables[MAX_DESCRIPTOR_TABLES];
1381    int root_desc_indices[MAX_DESCRIPTOR_TABLES];
1382    unsigned num_root_descriptors = update_compute_root_parameters(ctx, info, root_desc_tables, root_desc_indices, &cmd_sig_key);
1383 
1384    ID3D12Resource *indirect_arg_buf = nullptr;
1385    uint64_t indirect_arg_offset = 0;
1386    if (indirect) {
1387       struct d3d12_resource *indirect_buf = d3d12_resource(indirect);
1388       uint64_t buf_offset = 0;
1389       indirect_arg_buf = d3d12_resource_underlying(indirect_buf, &buf_offset);
1390       indirect_arg_offset = indirect_offset + buf_offset;
1391       d3d12_transition_resource_state(ctx, indirect_buf,
1392          D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT, D3D12_TRANSITION_FLAG_ACCUMULATE_STATE);
1393       d3d12_batch_reference_resource(batch, indirect_buf, false);
1394    }
1395 
1396    d3d12_apply_resource_states(ctx, ctx->compute_state->is_variant);
1397 
1398    for (unsigned i = 0; i < num_root_descriptors; ++i)
1399       ctx->cmdlist->SetComputeRootDescriptorTable(root_desc_indices[i], root_desc_tables[i]);
1400 
1401    if (indirect) {
1402       ID3D12CommandSignature *cmd_sig = d3d12_get_cmd_signature(ctx, &cmd_sig_key);
1403       ctx->cmdlist->ExecuteIndirect(cmd_sig, 1, indirect_arg_buf, indirect_arg_offset, nullptr, 0);
1404    } else {
1405       ctx->cmdlist->Dispatch(info->grid[0], info->grid[1], info->grid[2]);
1406    }
1407 
1408    ctx->state_dirty &= D3D12_DIRTY_GFX_MASK;
1409    ctx->cmdlist_dirty &= D3D12_DIRTY_GFX_MASK;
1410 
1411    /* The next draw needs to reassert the graphics PSO */
1412    ctx->cmdlist_dirty |= D3D12_DIRTY_SHADER;
1413    batch->pending_memory_barrier = false;
1414 
1415    ctx->shader_dirty[PIPE_SHADER_COMPUTE] = 0;
1416    pipe_resource_reference(&patched_indirect, nullptr);
1417 }
1418