xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/d3d12/d3d12_compiler.cpp (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © Microsoft Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "d3d12_compiler.h"
25 #include "d3d12_context.h"
26 #include "d3d12_debug.h"
27 #include "d3d12_screen.h"
28 #include "d3d12_nir_passes.h"
29 #include "nir_to_dxil.h"
30 #include "dxil_nir.h"
31 #include "dxil_nir_lower_int_cubemaps.h"
32 
33 #include "pipe/p_state.h"
34 
35 #include "nir.h"
36 #include "nir/nir_draw_helpers.h"
37 #include "nir/tgsi_to_nir.h"
38 #include "compiler/nir/nir_builder.h"
39 
40 #include "util/hash_table.h"
41 #include "util/u_memory.h"
42 #include "util/u_prim.h"
43 #include "util/u_simple_shaders.h"
44 #include "util/u_dl.h"
45 
46 #include <dxguids/dxguids.h>
47 
48 #ifdef _WIN32
49 #include "dxil_validator.h"
50 #endif
51 
52 const void *
d3d12_get_compiler_options(struct pipe_screen * screen,enum pipe_shader_ir ir,enum pipe_shader_type shader)53 d3d12_get_compiler_options(struct pipe_screen *screen,
54                            enum pipe_shader_ir ir,
55                            enum pipe_shader_type shader)
56 {
57    assert(ir == PIPE_SHADER_IR_NIR);
58    return &d3d12_screen(screen)->nir_options;
59 }
60 
61 static uint32_t
resource_dimension(enum glsl_sampler_dim dim)62 resource_dimension(enum glsl_sampler_dim dim)
63 {
64    switch (dim) {
65    case GLSL_SAMPLER_DIM_1D:
66       return RESOURCE_DIMENSION_TEXTURE1D;
67    case GLSL_SAMPLER_DIM_2D:
68       return RESOURCE_DIMENSION_TEXTURE2D;
69    case GLSL_SAMPLER_DIM_3D:
70       return RESOURCE_DIMENSION_TEXTURE3D;
71    case GLSL_SAMPLER_DIM_CUBE:
72       return RESOURCE_DIMENSION_TEXTURECUBE;
73    default:
74       return RESOURCE_DIMENSION_UNKNOWN;
75    }
76 }
77 
78 static bool
can_remove_dead_sampler(nir_variable * var,void * data)79 can_remove_dead_sampler(nir_variable *var, void *data)
80 {
81    const struct glsl_type *base_type = glsl_without_array(var->type);
82    return glsl_type_is_sampler(base_type) && !glsl_type_is_bare_sampler(base_type);
83 }
84 
85 static struct d3d12_shader *
compile_nir(struct d3d12_context * ctx,struct d3d12_shader_selector * sel,struct d3d12_shader_key * key,struct nir_shader * nir)86 compile_nir(struct d3d12_context *ctx, struct d3d12_shader_selector *sel,
87             struct d3d12_shader_key *key, struct nir_shader *nir)
88 {
89    struct d3d12_screen *screen = d3d12_screen(ctx->base.screen);
90    struct d3d12_shader *shader = rzalloc(sel, d3d12_shader);
91    shader->key = *key;
92 
93    if (shader->key.n_texture_states > 0) {
94       shader->key.tex_wrap_states = (dxil_wrap_sampler_state*)ralloc_size(sel, sizeof(dxil_wrap_sampler_state) * shader->key.n_texture_states);
95       memcpy(shader->key.tex_wrap_states, key->tex_wrap_states, sizeof(dxil_wrap_sampler_state) * shader->key.n_texture_states);
96    }
97    else
98       shader->key.tex_wrap_states = nullptr;
99 
100    shader->nir = nir;
101    sel->current = shader;
102 
103    NIR_PASS_V(nir, nir_lower_samplers);
104    NIR_PASS_V(nir, dxil_nir_split_typed_samplers);
105 
106    NIR_PASS_V(nir, nir_opt_dce);
107    struct nir_remove_dead_variables_options dead_var_opts = {};
108    dead_var_opts.can_remove_var = can_remove_dead_sampler;
109    NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_uniform, &dead_var_opts);
110 
111    if (key->samples_int_textures)
112       NIR_PASS_V(nir, dxil_lower_sample_to_txf_for_integer_tex,
113                  key->n_texture_states, key->tex_wrap_states, key->swizzle_state,
114                  screen->base.get_paramf(&screen->base, PIPE_CAPF_MAX_TEXTURE_LOD_BIAS));
115 
116    if (key->stage == PIPE_SHADER_VERTEX && key->vs.needs_format_emulation)
117       dxil_nir_lower_vs_vertex_conversion(nir, key->vs.format_conversion);
118 
119    if (key->last_vertex_processing_stage) {
120       if (key->invert_depth)
121          NIR_PASS_V(nir, d3d12_nir_invert_depth, key->invert_depth, key->halfz);
122       if (!key->halfz)
123          NIR_PASS_V(nir, nir_lower_clip_halfz);
124       NIR_PASS_V(nir, d3d12_lower_yflip);
125    }
126 
127    NIR_PASS_V(nir, d3d12_lower_state_vars, shader);
128 
129    const struct dxil_nir_lower_loads_stores_options loads_stores_options = {};
130    NIR_PASS_V(nir, dxil_nir_lower_loads_stores_to_dxil, &loads_stores_options);
131 
132    if (key->stage == PIPE_SHADER_FRAGMENT && key->fs.multisample_disabled)
133       NIR_PASS_V(nir, d3d12_disable_multisampling);
134 
135    struct nir_to_dxil_options opts = {};
136    opts.interpolate_at_vertex = screen->have_load_at_vertex;
137    opts.lower_int16 = !screen->opts4.Native16BitShaderOpsSupported;
138    opts.last_ubo_is_not_arrayed = shader->num_state_vars > 0;
139    if (key->stage == PIPE_SHADER_FRAGMENT)
140       opts.provoking_vertex = key->fs.provoking_vertex;
141    opts.input_clip_size = key->input_clip_size;
142    opts.environment = DXIL_ENVIRONMENT_GL;
143    opts.shader_model_max = screen->max_shader_model;
144 #ifdef _WIN32
145    opts.validator_version_max = dxil_get_validator_version(ctx->dxil_validator);
146 #endif
147 
148    struct blob tmp;
149    if (!nir_to_dxil(nir, &opts, NULL, &tmp)) {
150       debug_printf("D3D12: nir_to_dxil failed\n");
151       return NULL;
152    }
153 
154    // Non-ubo variables
155    shader->begin_srv_binding = (UINT_MAX);
156    nir_foreach_variable_with_modes(var, nir, nir_var_uniform) {
157       auto type_no_array = glsl_without_array(var->type);
158       if (glsl_type_is_texture(type_no_array)) {
159          unsigned count = glsl_type_is_array(var->type) ? glsl_get_aoa_size(var->type) : 1;
160          for (unsigned i = 0; i < count; ++i) {
161             shader->srv_bindings[var->data.binding + i].dimension = resource_dimension(glsl_get_sampler_dim(type_no_array));
162          }
163          shader->begin_srv_binding = MIN2(var->data.binding, shader->begin_srv_binding);
164          shader->end_srv_binding = MAX2(var->data.binding + count, shader->end_srv_binding);
165       }
166    }
167 
168    nir_foreach_image_variable(var, nir) {
169       auto type_no_array = glsl_without_array(var->type);
170       unsigned count = glsl_type_is_array(var->type) ? glsl_get_aoa_size(var->type) : 1;
171       for (unsigned i = 0; i < count; ++i) {
172          shader->uav_bindings[var->data.driver_location + i].dimension = resource_dimension(glsl_get_sampler_dim(type_no_array));
173       }
174    }
175 
176    // Ubo variables
177    if(nir->info.num_ubos) {
178       shader->begin_ubo_binding = shader->nir->num_uniforms > 0 || !shader->nir->info.first_ubo_is_default_ubo ? 0 : 1;
179       // Ignore state_vars ubo as it is bound as root constants
180       shader->end_ubo_binding = nir->info.num_ubos - (shader->state_vars_used ? 1 : 0);
181    }
182 
183 #ifdef _WIN32
184    if (ctx->dxil_validator) {
185       if (!(d3d12_debug & D3D12_DEBUG_EXPERIMENTAL)) {
186          char *err;
187          if (!dxil_validate_module(ctx->dxil_validator, tmp.data,
188                                    tmp.size, &err) && err) {
189             debug_printf(
190                "== VALIDATION ERROR =============================================\n"
191                "%s\n"
192                "== END ==========================================================\n",
193                err);
194             ralloc_free(err);
195          }
196       }
197 
198       if (d3d12_debug & D3D12_DEBUG_DISASS) {
199          char *str = dxil_disasm_module(ctx->dxil_validator, tmp.data,
200                                         tmp.size);
201          fprintf(stderr,
202                  "== BEGIN SHADER ============================================\n"
203                  "%s\n"
204                  "== END SHADER ==============================================\n",
205                str);
206          ralloc_free(str);
207       }
208    }
209 #endif
210 
211    blob_finish_get_buffer(&tmp, &shader->bytecode, &shader->bytecode_length);
212 
213    if (d3d12_debug & D3D12_DEBUG_DXIL) {
214       char buf[256];
215       static int i;
216       snprintf(buf, sizeof(buf), "dump%02d.dxil", i++);
217       FILE *fp = fopen(buf, "wb");
218       fwrite(shader->bytecode, sizeof(char), shader->bytecode_length, fp);
219       fclose(fp);
220       fprintf(stderr, "wrote '%s'...\n", buf);
221    }
222    return shader;
223 }
224 
225 struct d3d12_selection_context {
226    struct d3d12_context *ctx;
227    bool needs_point_sprite_lowering;
228    bool needs_vertex_reordering;
229    unsigned provoking_vertex;
230    bool alternate_tri;
231    unsigned fill_mode_lowered;
232    unsigned cull_mode_lowered;
233    bool manual_depth_range;
234    unsigned missing_dual_src_outputs;
235    unsigned frag_result_color_lowering;
236    const unsigned *variable_workgroup_size;
237 };
238 
239 unsigned
missing_dual_src_outputs(struct d3d12_context * ctx)240 missing_dual_src_outputs(struct d3d12_context *ctx)
241 {
242    if (!ctx->gfx_pipeline_state.blend || !ctx->gfx_pipeline_state.blend->is_dual_src)
243       return 0;
244 
245    struct d3d12_shader_selector *fs = ctx->gfx_stages[PIPE_SHADER_FRAGMENT];
246    if (!fs)
247       return 0;
248 
249    const nir_shader *s = fs->initial;
250 
251    unsigned indices_seen = 0;
252    nir_foreach_function_impl(impl, s) {
253       nir_foreach_block(block, impl) {
254          nir_foreach_instr(instr, block) {
255             if (instr->type != nir_instr_type_intrinsic)
256                continue;
257 
258             nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
259             if (intr->intrinsic != nir_intrinsic_store_deref)
260                continue;
261 
262             nir_variable *var = nir_intrinsic_get_var(intr, 0);
263             if (var->data.mode != nir_var_shader_out)
264                continue;
265 
266             unsigned index = var->data.index;
267             if (var->data.location > FRAG_RESULT_DATA0)
268                index = var->data.location - FRAG_RESULT_DATA0;
269             else if (var->data.location != FRAG_RESULT_COLOR &&
270                      var->data.location != FRAG_RESULT_DATA0)
271                continue;
272 
273             indices_seen |= 1u << index;
274             if ((indices_seen & 3) == 3)
275                return 0;
276          }
277       }
278    }
279 
280    return 3 & ~indices_seen;
281 }
282 
283 static unsigned
frag_result_color_lowering(struct d3d12_context * ctx)284 frag_result_color_lowering(struct d3d12_context *ctx)
285 {
286    struct d3d12_shader_selector *fs = ctx->gfx_stages[PIPE_SHADER_FRAGMENT];
287    assert(fs);
288 
289    if (fs->initial->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_COLOR))
290       return ctx->fb.nr_cbufs > 1 ? ctx->fb.nr_cbufs : 0;
291 
292    return 0;
293 }
294 
295 bool
manual_depth_range(struct d3d12_context * ctx)296 manual_depth_range(struct d3d12_context *ctx)
297 {
298    if (!d3d12_need_zero_one_depth_range(ctx))
299       return false;
300 
301    /**
302     * If we can't use the D3D12 zero-one depth-range, we might have to apply
303     * depth-range ourselves.
304     *
305     * Because we only need to override the depth-range to zero-one range in
306     * the case where we write frag-depth, we only need to apply manual
307     * depth-range to gl_FragCoord.z.
308     *
309     * No extra care is needed to be taken in the case where gl_FragDepth is
310     * written conditionally, because the GLSL 4.60 spec states:
311     *
312     *    If a shader statically assigns a value to gl_FragDepth, and there
313     *    is an execution path through the shader that does not set
314     *    gl_FragDepth, then the value of the fragment’s depth may be
315     *    undefined for executions of the shader that take that path. That
316     *    is, if the set of linked fragment shaders statically contain a
317     *    write to gl_FragDepth, then it is responsible for always writing
318     *    it.
319     */
320 
321    struct d3d12_shader_selector *fs = ctx->gfx_stages[PIPE_SHADER_FRAGMENT];
322    return fs && fs->initial->info.inputs_read & VARYING_BIT_POS;
323 }
324 
325 static bool
needs_edge_flag_fix(enum mesa_prim mode)326 needs_edge_flag_fix(enum mesa_prim mode)
327 {
328    return (mode == MESA_PRIM_QUADS ||
329            mode == MESA_PRIM_QUAD_STRIP ||
330            mode == MESA_PRIM_POLYGON);
331 }
332 
333 static unsigned
fill_mode_lowered(struct d3d12_context * ctx,const struct pipe_draw_info * dinfo)334 fill_mode_lowered(struct d3d12_context *ctx, const struct pipe_draw_info *dinfo)
335 {
336    struct d3d12_shader_selector *vs = ctx->gfx_stages[PIPE_SHADER_VERTEX];
337 
338    if ((ctx->gfx_stages[PIPE_SHADER_GEOMETRY] != NULL &&
339         !ctx->gfx_stages[PIPE_SHADER_GEOMETRY]->is_variant) ||
340        ctx->gfx_pipeline_state.rast == NULL ||
341        (dinfo->mode != MESA_PRIM_TRIANGLES &&
342         dinfo->mode != MESA_PRIM_TRIANGLE_STRIP))
343       return PIPE_POLYGON_MODE_FILL;
344 
345    /* D3D12 supports line mode (wireframe) but doesn't support edge flags */
346    if (((ctx->gfx_pipeline_state.rast->base.fill_front == PIPE_POLYGON_MODE_LINE &&
347          ctx->gfx_pipeline_state.rast->base.cull_face != PIPE_FACE_FRONT) ||
348         (ctx->gfx_pipeline_state.rast->base.fill_back == PIPE_POLYGON_MODE_LINE &&
349          ctx->gfx_pipeline_state.rast->base.cull_face == PIPE_FACE_FRONT)) &&
350        (vs->initial->info.outputs_written & VARYING_BIT_EDGE ||
351         needs_edge_flag_fix(ctx->initial_api_prim)))
352       return PIPE_POLYGON_MODE_LINE;
353 
354    if (ctx->gfx_pipeline_state.rast->base.fill_front == PIPE_POLYGON_MODE_POINT)
355       return PIPE_POLYGON_MODE_POINT;
356 
357    return PIPE_POLYGON_MODE_FILL;
358 }
359 
360 static bool
has_stream_out_for_streams(struct d3d12_context * ctx)361 has_stream_out_for_streams(struct d3d12_context *ctx)
362 {
363    unsigned mask = ctx->gfx_stages[PIPE_SHADER_GEOMETRY]->initial->info.gs.active_stream_mask & ~1;
364    for (unsigned i = 0; i < ctx->gfx_pipeline_state.so_info.num_outputs; ++i) {
365       unsigned stream = ctx->gfx_pipeline_state.so_info.output[i].stream;
366       if (((1 << stream) & mask) &&
367          ctx->so_buffer_views[stream].SizeInBytes)
368          return true;
369    }
370    return false;
371 }
372 
373 static bool
needs_point_sprite_lowering(struct d3d12_context * ctx,const struct pipe_draw_info * dinfo)374 needs_point_sprite_lowering(struct d3d12_context *ctx, const struct pipe_draw_info *dinfo)
375 {
376    struct d3d12_shader_selector *vs = ctx->gfx_stages[PIPE_SHADER_VERTEX];
377    struct d3d12_shader_selector *gs = ctx->gfx_stages[PIPE_SHADER_GEOMETRY];
378 
379    if (gs != NULL && !gs->is_variant) {
380       /* There is an user GS; Check if it outputs points with PSIZE */
381       return (gs->initial->info.gs.output_primitive == MESA_PRIM_POINTS &&
382               (gs->initial->info.outputs_written & VARYING_BIT_PSIZ ||
383                  ctx->gfx_pipeline_state.rast->base.point_size > 1.0) &&
384               (gs->initial->info.gs.active_stream_mask == 1 ||
385                  !has_stream_out_for_streams(ctx)));
386    } else {
387       /* No user GS; check if we are drawing wide points */
388       return ((dinfo->mode == MESA_PRIM_POINTS ||
389                fill_mode_lowered(ctx, dinfo) == PIPE_POLYGON_MODE_POINT) &&
390               (ctx->gfx_pipeline_state.rast->base.point_size > 1.0 ||
391                ctx->gfx_pipeline_state.rast->base.offset_point ||
392                (ctx->gfx_pipeline_state.rast->base.point_size_per_vertex &&
393                 vs->initial->info.outputs_written & VARYING_BIT_PSIZ)) &&
394               (vs->initial->info.outputs_written & VARYING_BIT_POS));
395    }
396 }
397 
398 static unsigned
cull_mode_lowered(struct d3d12_context * ctx,unsigned fill_mode)399 cull_mode_lowered(struct d3d12_context *ctx, unsigned fill_mode)
400 {
401    if ((ctx->gfx_stages[PIPE_SHADER_GEOMETRY] != NULL &&
402         !ctx->gfx_stages[PIPE_SHADER_GEOMETRY]->is_variant) ||
403        ctx->gfx_pipeline_state.rast == NULL ||
404        ctx->gfx_pipeline_state.rast->base.cull_face == PIPE_FACE_NONE)
405       return PIPE_FACE_NONE;
406 
407    return ctx->gfx_pipeline_state.rast->base.cull_face;
408 }
409 
410 static unsigned
get_provoking_vertex(struct d3d12_selection_context * sel_ctx,bool * alternate,const struct pipe_draw_info * dinfo)411 get_provoking_vertex(struct d3d12_selection_context *sel_ctx, bool *alternate, const struct pipe_draw_info *dinfo)
412 {
413    if (dinfo->mode == GL_PATCHES) {
414       *alternate = false;
415       return 0;
416    }
417 
418    struct d3d12_shader_selector *vs = sel_ctx->ctx->gfx_stages[PIPE_SHADER_VERTEX];
419    struct d3d12_shader_selector *gs = sel_ctx->ctx->gfx_stages[PIPE_SHADER_GEOMETRY];
420    struct d3d12_shader_selector *last_vertex_stage = gs && !gs->is_variant ? gs : vs;
421 
422    enum mesa_prim mode;
423    switch (last_vertex_stage->stage) {
424    case PIPE_SHADER_GEOMETRY:
425       mode = (enum mesa_prim)last_vertex_stage->initial->info.gs.output_primitive;
426       break;
427    case PIPE_SHADER_VERTEX:
428       mode = (enum mesa_prim)dinfo->mode;
429       break;
430    default:
431       unreachable("Tesselation shaders are not supported");
432    }
433 
434    bool flatshade_first = sel_ctx->ctx->gfx_pipeline_state.rast &&
435                           sel_ctx->ctx->gfx_pipeline_state.rast->base.flatshade_first;
436    *alternate = (mode == GL_TRIANGLE_STRIP || mode == GL_TRIANGLE_STRIP_ADJACENCY) &&
437                 (!gs || gs->is_variant ||
438                  gs->initial->info.gs.vertices_out > u_prim_vertex_count(mode)->min);
439    return flatshade_first ? 0 : u_prim_vertex_count(mode)->min - 1;
440 }
441 
442 bool
has_flat_varyings(struct d3d12_context * ctx)443 has_flat_varyings(struct d3d12_context *ctx)
444 {
445    struct d3d12_shader_selector *fs = ctx->gfx_stages[PIPE_SHADER_FRAGMENT];
446 
447    if (!fs)
448       return false;
449 
450    nir_foreach_variable_with_modes(input, fs->initial,
451                                    nir_var_shader_in) {
452       if (input->data.interpolation == INTERP_MODE_FLAT &&
453           /* Disregard sysvals */
454           (input->data.location >= VARYING_SLOT_VAR0 ||
455              input->data.location <= VARYING_SLOT_TEX7))
456          return true;
457    }
458 
459    return false;
460 }
461 
462 static bool
needs_vertex_reordering(struct d3d12_selection_context * sel_ctx,const struct pipe_draw_info * dinfo)463 needs_vertex_reordering(struct d3d12_selection_context *sel_ctx, const struct pipe_draw_info *dinfo)
464 {
465    struct d3d12_context *ctx = sel_ctx->ctx;
466    bool flat = ctx->has_flat_varyings;
467    bool xfb = ctx->gfx_pipeline_state.num_so_targets > 0;
468 
469    if (fill_mode_lowered(ctx, dinfo) != PIPE_POLYGON_MODE_FILL)
470       return false;
471 
472    /* TODO add support for line primitives */
473    if (u_reduced_prim((mesa_prim)dinfo->mode) == MESA_PRIM_LINES)
474       return false;
475 
476    /* When flat shading a triangle and provoking vertex is not the first one, we use load_at_vertex.
477       If not available for this adapter, or if it's a triangle strip, we need to reorder the vertices */
478    if (flat && sel_ctx->provoking_vertex >= 2 && (!d3d12_screen(ctx->base.screen)->have_load_at_vertex ||
479                                                   sel_ctx->alternate_tri))
480       return true;
481 
482    /* When transform feedback is enabled and the output is alternating (triangle strip or triangle
483       strip with adjacency), we need to reorder vertices to get the order expected by OpenGL. This
484       only works when there is no flat shading involved. In that scenario, we don't care about
485       the provoking vertex. */
486    if (xfb && !flat && sel_ctx->alternate_tri) {
487       sel_ctx->provoking_vertex = 0;
488       return true;
489    }
490 
491    return false;
492 }
493 
494 static d3d12_varying_info*
fill_varyings(struct d3d12_context * ctx,const nir_shader * s,nir_variable_mode modes,uint64_t mask,bool patch)495 fill_varyings(struct d3d12_context *ctx, const nir_shader *s,
496               nir_variable_mode modes, uint64_t mask, bool patch)
497 {
498    struct d3d12_varying_info info;
499 
500    info.max = 0;
501    info.mask = 0;
502    info.hash = 0;
503 
504    nir_foreach_variable_with_modes(var, s, modes) {
505       unsigned slot = var->data.location;
506       bool is_generic_patch = slot >= VARYING_SLOT_PATCH0;
507       if (patch ^ is_generic_patch)
508          continue;
509       if (is_generic_patch)
510          slot -= VARYING_SLOT_PATCH0;
511       uint64_t slot_bit = BITFIELD64_BIT(slot);
512 
513       if (!(mask & slot_bit))
514          continue;
515 
516       if ((info.mask & slot_bit) == 0) {
517          memset(info.slots + slot, 0, sizeof(info.slots[0]));
518          info.max = MAX2(info.max, slot);
519       }
520 
521       const struct glsl_type *type = var->type;
522       if (nir_is_arrayed_io(var, s->info.stage))
523          type = glsl_get_array_element(type);
524       info.slots[slot].types[var->data.location_frac] = type;
525 
526       info.slots[slot].patch = var->data.patch;
527       auto& var_slot = info.slots[slot].vars[var->data.location_frac];
528       var_slot.driver_location = var->data.driver_location;
529       var_slot.interpolation = var->data.interpolation;
530       var_slot.compact = var->data.compact;
531       var_slot.always_active_io = var->data.always_active_io;
532       info.mask |= slot_bit;
533       info.slots[slot].location_frac_mask |= (1 << var->data.location_frac);
534    }
535 
536    for (uint32_t i = 0; i <= info.max; ++i) {
537       if (((1llu << i) & info.mask) == 0)
538          memset(info.slots + i, 0, sizeof(info.slots[0]));
539       else
540          info.hash = _mesa_hash_data_with_seed(info.slots + i, sizeof(info.slots[0]), info.hash);
541    }
542    info.hash = _mesa_hash_data_with_seed(&info.mask, sizeof(info.mask), info.hash);
543 
544    struct d3d12_screen *screen = d3d12_screen(ctx->base.screen);
545 
546    mtx_lock(&screen->varying_info_mutex);
547    set_entry *pentry = _mesa_set_search_pre_hashed(screen->varying_info_set, info.hash, &info);
548    if (pentry != nullptr) {
549       mtx_unlock(&screen->varying_info_mutex);
550       return (d3d12_varying_info*)pentry->key;
551    }
552    else {
553       d3d12_varying_info *key = MALLOC_STRUCT(d3d12_varying_info);
554       *key = info;
555 
556       _mesa_set_add_pre_hashed(screen->varying_info_set, info.hash, key);
557 
558       mtx_unlock(&screen->varying_info_mutex);
559       return key;
560    }
561 }
562 
563 static void
fill_flat_varyings(struct d3d12_gs_variant_key * key,d3d12_shader_selector * fs)564 fill_flat_varyings(struct d3d12_gs_variant_key *key, d3d12_shader_selector *fs)
565 {
566    if (!fs)
567       return;
568 
569    nir_foreach_variable_with_modes(input, fs->initial,
570                                    nir_var_shader_in) {
571       if (input->data.interpolation == INTERP_MODE_FLAT)
572          key->flat_varyings |= BITFIELD64_BIT(input->data.location);
573    }
574 }
575 
576 bool
d3d12_compare_varying_info(const d3d12_varying_info * expect,const d3d12_varying_info * have)577 d3d12_compare_varying_info(const d3d12_varying_info *expect, const d3d12_varying_info *have)
578 {
579    if (expect == have)
580       return true;
581 
582    if (expect == nullptr || have == nullptr)
583       return false;
584 
585    if (expect->mask != have->mask
586       || expect->max != have->max)
587       return false;
588 
589    if (!expect->mask)
590       return true;
591 
592    /* 6 is a rough (wild) guess for a bulk memcmp cross-over point.  When there
593     * are a small number of slots present, individual   is much faster. */
594    if (util_bitcount64(expect->mask) < 6) {
595       uint64_t mask = expect->mask;
596       while (mask) {
597          int slot = u_bit_scan64(&mask);
598          if (memcmp(&expect->slots[slot], &have->slots[slot], sizeof(have->slots[slot])))
599             return false;
600       }
601 
602       return true;
603    }
604 
605    return !memcmp(expect->slots, have->slots, sizeof(expect->slots[0]) * expect->max);
606 }
607 
608 
varying_info_hash(const void * info)609 uint32_t varying_info_hash(const void *info) {
610    return ((d3d12_varying_info*)info)->hash;
611 }
varying_info_compare(const void * a,const void * b)612 bool varying_info_compare(const void *a, const void *b) {
613    return d3d12_compare_varying_info((d3d12_varying_info*)a, (d3d12_varying_info*)b);
614 }
varying_info_entry_destroy(set_entry * entry)615 void varying_info_entry_destroy(set_entry *entry) {
616    if (entry->key)
617       free((void*)entry->key);
618 }
619 
620 void
d3d12_varying_cache_init(struct d3d12_screen * screen)621 d3d12_varying_cache_init(struct d3d12_screen *screen) {
622    screen->varying_info_set = _mesa_set_create(nullptr, varying_info_hash, varying_info_compare);
623 }
624 
625 void
d3d12_varying_cache_destroy(struct d3d12_screen * screen)626 d3d12_varying_cache_destroy(struct d3d12_screen *screen) {
627    _mesa_set_destroy(screen->varying_info_set, varying_info_entry_destroy);
628 }
629 
630 
631 static void
validate_geometry_shader_variant(struct d3d12_selection_context * sel_ctx)632 validate_geometry_shader_variant(struct d3d12_selection_context *sel_ctx)
633 {
634    struct d3d12_context *ctx = sel_ctx->ctx;
635    d3d12_shader_selector *gs = ctx->gfx_stages[PIPE_SHADER_GEOMETRY];
636 
637    /* Nothing to do if there is a user geometry shader bound */
638    if (gs != NULL && !gs->is_variant)
639       return;
640 
641    d3d12_shader_selector* vs = ctx->gfx_stages[PIPE_SHADER_VERTEX];
642    d3d12_shader_selector* fs = ctx->gfx_stages[PIPE_SHADER_FRAGMENT];
643 
644    struct d3d12_gs_variant_key key;
645    key.all = 0;
646    key.flat_varyings = 0;
647 
648    /* Fill the geometry shader variant key */
649    if (sel_ctx->fill_mode_lowered != PIPE_POLYGON_MODE_FILL) {
650       key.fill_mode = sel_ctx->fill_mode_lowered;
651       key.cull_mode = sel_ctx->cull_mode_lowered;
652       key.has_front_face = (fs->initial->info.inputs_read & VARYING_BIT_FACE) != 0;
653       if (key.cull_mode != PIPE_FACE_NONE || key.has_front_face)
654          key.front_ccw = ctx->gfx_pipeline_state.rast->base.front_ccw ^ (ctx->flip_y < 0);
655       key.edge_flag_fix = needs_edge_flag_fix(ctx->initial_api_prim);
656       fill_flat_varyings(&key, fs);
657       if (key.flat_varyings != 0)
658          key.flatshade_first = ctx->gfx_pipeline_state.rast->base.flatshade_first;
659    } else if (sel_ctx->needs_point_sprite_lowering) {
660       key.passthrough = true;
661    } else if (sel_ctx->needs_vertex_reordering) {
662       /* TODO support cases where flat shading (pv != 0) and xfb are enabled, or lines */
663       key.provoking_vertex = sel_ctx->provoking_vertex;
664       key.alternate_tri = sel_ctx->alternate_tri;
665    }
666 
667    if (vs->initial_output_vars == nullptr) {
668       vs->initial_output_vars = fill_varyings(sel_ctx->ctx, vs->initial, nir_var_shader_out,
669                                                 vs->initial->info.outputs_written, false);
670    }
671    key.varyings = vs->initial_output_vars;
672    gs = d3d12_get_gs_variant(ctx, &key);
673    ctx->gfx_stages[PIPE_SHADER_GEOMETRY] = gs;
674 }
675 
676 static void
validate_tess_ctrl_shader_variant(struct d3d12_selection_context * sel_ctx)677 validate_tess_ctrl_shader_variant(struct d3d12_selection_context *sel_ctx)
678 {
679    struct d3d12_context *ctx = sel_ctx->ctx;
680    d3d12_shader_selector *tcs = ctx->gfx_stages[PIPE_SHADER_TESS_CTRL];
681 
682    /* Nothing to do if there is a user tess ctrl shader bound */
683    if (tcs != NULL && !tcs->is_variant)
684       return;
685 
686    d3d12_shader_selector *tes = ctx->gfx_stages[PIPE_SHADER_TESS_EVAL];
687    struct d3d12_tcs_variant_key key = {0};
688 
689    bool variant_needed = tes != nullptr;
690 
691    /* Fill the variant key */
692    if (variant_needed) {
693       if (tes->initial_input_vars == nullptr) {
694          tes->initial_input_vars = fill_varyings(sel_ctx->ctx, tes->initial, nir_var_shader_in,
695                                                  tes->initial->info.inputs_read & ~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER),
696                                                  false);
697       }
698       key.varyings = tes->initial_input_vars;
699       key.vertices_out = ctx->patch_vertices;
700    }
701 
702    /* Find/create the proper variant and bind it */
703    tcs = variant_needed ? d3d12_get_tcs_variant(ctx, &key) : NULL;
704    ctx->gfx_stages[PIPE_SHADER_TESS_CTRL] = tcs;
705 }
706 
707 static bool
d3d12_compare_shader_keys(struct d3d12_selection_context * sel_ctx,const d3d12_shader_key * expect,const d3d12_shader_key * have)708 d3d12_compare_shader_keys(struct d3d12_selection_context* sel_ctx, const d3d12_shader_key *expect, const d3d12_shader_key *have)
709 {
710    assert(expect->stage == have->stage);
711    assert(expect);
712    assert(have);
713 
714    if (expect->hash != have->hash)
715       return false;
716 
717    switch (expect->stage) {
718    case PIPE_SHADER_VERTEX:
719       if (expect->vs.needs_format_emulation != have->vs.needs_format_emulation)
720          return false;
721 
722       if (expect->vs.needs_format_emulation) {
723          if (memcmp(expect->vs.format_conversion, have->vs.format_conversion,
724             sel_ctx->ctx->gfx_pipeline_state.ves->num_elements * sizeof(enum pipe_format)))
725             return false;
726       }
727       break;
728    case PIPE_SHADER_GEOMETRY:
729       if (expect->gs.all != have->gs.all)
730          return false;
731       break;
732    case PIPE_SHADER_TESS_CTRL:
733       if (expect->hs.all != have->hs.all)
734          return false;
735       break;
736    case PIPE_SHADER_TESS_EVAL:
737       if (expect->ds.tcs_vertices_out != have->ds.tcs_vertices_out ||
738           expect->ds.prev_patch_outputs != have->ds.prev_patch_outputs)
739          return false;
740       break;
741    case PIPE_SHADER_FRAGMENT:
742       if (expect->fs.all != have->fs.all)
743          return false;
744       break;
745    case PIPE_SHADER_COMPUTE:
746       if (memcmp(expect->cs.workgroup_size, have->cs.workgroup_size,
747                  sizeof(have->cs.workgroup_size)))
748          return false;
749       break;
750    default:
751       unreachable("invalid stage");
752    }
753 
754    if (expect->n_texture_states != have->n_texture_states)
755       return false;
756 
757    if (expect->n_images != have->n_images)
758       return false;
759 
760    if (expect->n_texture_states > 0 &&
761        memcmp(expect->tex_wrap_states, have->tex_wrap_states,
762               expect->n_texture_states * sizeof(dxil_wrap_sampler_state)))
763       return false;
764 
765    if (memcmp(expect->swizzle_state, have->swizzle_state,
766               expect->n_texture_states * sizeof(dxil_texture_swizzle_state)))
767       return false;
768 
769    if (memcmp(expect->sampler_compare_funcs, have->sampler_compare_funcs,
770               expect->n_texture_states * sizeof(enum compare_func)))
771       return false;
772 
773    if (memcmp(expect->image_format_conversion, have->image_format_conversion,
774       expect->n_images * sizeof(struct d3d12_image_format_conversion_info)))
775       return false;
776 
777    if (!(expect->next_varying_inputs == have->next_varying_inputs &&
778          expect->prev_varying_outputs == have->prev_varying_outputs &&
779          expect->common_all == have->common_all &&
780          expect->tex_saturate_s == have->tex_saturate_s &&
781          expect->tex_saturate_r == have->tex_saturate_r &&
782          expect->tex_saturate_t == have->tex_saturate_t))
783       return false;
784 
785    if (expect->next_has_frac_inputs &&
786        expect->next_varying_frac_inputs != have->next_varying_frac_inputs &&
787        memcmp(expect->next_varying_frac_inputs, have->next_varying_frac_inputs, sizeof(d3d12_shader_selector::varying_frac_inputs)))
788       return false;
789    if (expect->prev_has_frac_outputs &&
790        expect->prev_varying_frac_outputs != have->prev_varying_frac_outputs &&
791        memcmp(expect->prev_varying_frac_outputs, have->prev_varying_frac_outputs, sizeof(d3d12_shader_selector::varying_frac_outputs)))
792       return false;
793    return true;
794 }
795 
796 static uint32_t
d3d12_shader_key_hash(const d3d12_shader_key * key)797 d3d12_shader_key_hash(const d3d12_shader_key *key)
798 {
799    uint32_t hash;
800 
801    hash = (uint32_t)key->stage;
802 
803    hash += key->next_varying_inputs;
804    hash += key->prev_varying_outputs;
805    hash += key->common_all;
806    if (key->next_has_frac_inputs)
807       hash = _mesa_hash_data_with_seed(key->next_varying_frac_inputs, sizeof(d3d12_shader_selector::varying_frac_inputs), hash);
808    if (key->prev_has_frac_outputs)
809       hash = _mesa_hash_data_with_seed(key->prev_varying_frac_outputs, sizeof(d3d12_shader_selector::varying_frac_outputs), hash);
810    switch (key->stage) {
811    case PIPE_SHADER_VERTEX:
812       /* (Probably) not worth the bit extraction for needs_format_emulation and
813        * the rest of the the format_conversion data is large.  Don't bother
814        * hashing for now until this is shown to be worthwhile. */
815        break;
816    case PIPE_SHADER_GEOMETRY:
817       hash += key->gs.all;
818       break;
819    case PIPE_SHADER_FRAGMENT:
820       hash += key->fs.all;
821       break;
822    case PIPE_SHADER_COMPUTE:
823       hash = _mesa_hash_data_with_seed(&key->cs, sizeof(key->cs), hash);
824       break;
825    case PIPE_SHADER_TESS_CTRL:
826       hash += key->hs.all;
827       break;
828    case PIPE_SHADER_TESS_EVAL:
829       hash += key->ds.tcs_vertices_out;
830       hash += key->ds.prev_patch_outputs;
831       break;
832    default:
833       /* No type specific information to hash for other stages. */
834       break;
835    }
836 
837    hash += key->n_texture_states;
838    hash += key->n_images;
839    return hash;
840 }
841 
842 static void
d3d12_fill_shader_key(struct d3d12_selection_context * sel_ctx,d3d12_shader_key * key,d3d12_shader_selector * sel,d3d12_shader_selector * prev,d3d12_shader_selector * next)843 d3d12_fill_shader_key(struct d3d12_selection_context *sel_ctx,
844                       d3d12_shader_key *key, d3d12_shader_selector *sel,
845                       d3d12_shader_selector *prev, d3d12_shader_selector *next)
846 {
847    pipe_shader_type stage = sel->stage;
848 
849    memset(key, 0, offsetof(d3d12_shader_key, vs));
850    key->stage = stage;
851 
852    switch (stage)
853    {
854    case PIPE_SHADER_VERTEX:
855       key->vs.needs_format_emulation = 0;
856       break;
857    case PIPE_SHADER_FRAGMENT:
858       key->fs.all = 0;
859       break;
860    case PIPE_SHADER_GEOMETRY:
861       key->gs.all = 0;
862       break;
863    case PIPE_SHADER_TESS_CTRL:
864       key->hs.all = 0;
865       break;
866    case PIPE_SHADER_TESS_EVAL:
867       key->ds.tcs_vertices_out = 0;
868       key->ds.prev_patch_outputs = 0;
869       break;
870    case PIPE_SHADER_COMPUTE:
871       memset(key->cs.workgroup_size, 0, sizeof(key->cs.workgroup_size));
872       break;
873    default: unreachable("Invalid stage type");
874    }
875 
876    key->n_texture_states = 0;
877    key->tex_wrap_states = sel_ctx->ctx->tex_wrap_states_shader_key;
878    key->n_images = 0;
879 
880    if (prev) {
881       key->prev_varying_outputs = prev->initial->info.outputs_written;
882       key->prev_has_frac_outputs = prev->has_frac_outputs;
883       key->prev_varying_frac_outputs = prev->varying_frac_outputs;
884 
885       if (stage == PIPE_SHADER_TESS_EVAL)
886          key->ds.prev_patch_outputs = prev->initial->info.patch_outputs_written;
887 
888       /* Set the provoking vertex based on the previous shader output. Only set the
889        * key value if the driver actually supports changing the provoking vertex though */
890       if (stage == PIPE_SHADER_FRAGMENT && sel_ctx->ctx->gfx_pipeline_state.rast &&
891           !sel_ctx->needs_vertex_reordering &&
892           d3d12_screen(sel_ctx->ctx->base.screen)->have_load_at_vertex)
893          key->fs.provoking_vertex = sel_ctx->provoking_vertex;
894 
895       /* Get the input clip distance size. The info's clip_distance_array_size corresponds
896        * to the output, and in cases of TES or GS you could have differently-sized inputs
897        * and outputs. For FS, there is no output, so it's repurposed to mean input.
898        */
899       if (stage != PIPE_SHADER_FRAGMENT)
900          key->input_clip_size = prev->initial->info.clip_distance_array_size;
901    }
902 
903    if (next) {
904       if (stage == PIPE_SHADER_TESS_CTRL)
905          key->hs.next_patch_inputs = next->initial->info.patch_outputs_read;
906       key->next_varying_inputs = next->initial->info.inputs_read;
907       if (BITSET_TEST(next->initial->info.system_values_read, SYSTEM_VALUE_PRIMITIVE_ID))
908          key->next_varying_inputs |= VARYING_SLOT_PRIMITIVE_ID;
909       key->next_has_frac_inputs = next->has_frac_inputs;
910       key->next_varying_frac_inputs = next->varying_frac_inputs;
911    }
912 
913    if (stage == PIPE_SHADER_GEOMETRY ||
914        ((stage == PIPE_SHADER_VERTEX || stage == PIPE_SHADER_TESS_EVAL) &&
915           (!next || next->stage == PIPE_SHADER_FRAGMENT))) {
916       key->last_vertex_processing_stage = 1;
917       key->invert_depth = sel_ctx->ctx->reverse_depth_range;
918       key->halfz = sel_ctx->ctx->gfx_pipeline_state.rast ?
919          sel_ctx->ctx->gfx_pipeline_state.rast->base.clip_halfz : false;
920       if (sel_ctx->ctx->pstipple.enabled &&
921          sel_ctx->ctx->gfx_pipeline_state.rast->base.poly_stipple_enable)
922          key->next_varying_inputs |= VARYING_BIT_POS;
923    }
924 
925    if (stage == PIPE_SHADER_GEOMETRY && sel_ctx->ctx->gfx_pipeline_state.rast) {
926       struct pipe_rasterizer_state *rast = &sel_ctx->ctx->gfx_pipeline_state.rast->base;
927       if (sel_ctx->needs_point_sprite_lowering) {
928          key->gs.writes_psize = 1;
929          key->gs.point_size_per_vertex = rast->point_size_per_vertex;
930          key->gs.sprite_coord_enable = rast->sprite_coord_enable;
931          key->gs.sprite_origin_upper_left = (rast->sprite_coord_mode != PIPE_SPRITE_COORD_LOWER_LEFT);
932          if (sel_ctx->ctx->flip_y < 0)
933             key->gs.sprite_origin_upper_left = !key->gs.sprite_origin_upper_left;
934          key->gs.aa_point = rast->point_smooth;
935          key->gs.stream_output_factor = 6;
936       } else if (sel_ctx->fill_mode_lowered == PIPE_POLYGON_MODE_LINE) {
937          key->gs.stream_output_factor = 2;
938       } else if (sel_ctx->needs_vertex_reordering && !sel->is_variant) {
939          key->gs.triangle_strip = 1;
940       }
941 
942       if (sel->is_variant && next) {
943          if (next->initial->info.inputs_read & VARYING_BIT_FACE)
944             key->next_varying_inputs = (key->next_varying_inputs | VARYING_BIT_VAR(12)) & ~VARYING_BIT_FACE;
945          if (next->initial->info.inputs_read & VARYING_BIT_PRIMITIVE_ID)
946             key->gs.primitive_id = 1;
947       }
948    } else if (stage == PIPE_SHADER_FRAGMENT) {
949       key->fs.missing_dual_src_outputs = sel_ctx->missing_dual_src_outputs;
950       key->fs.frag_result_color_lowering = sel_ctx->frag_result_color_lowering;
951       key->fs.manual_depth_range = sel_ctx->manual_depth_range;
952       key->fs.polygon_stipple = sel_ctx->ctx->pstipple.enabled &&
953          sel_ctx->ctx->gfx_pipeline_state.rast->base.poly_stipple_enable;
954       key->fs.multisample_disabled = sel_ctx->ctx->gfx_pipeline_state.rast &&
955          !sel_ctx->ctx->gfx_pipeline_state.rast->desc.MultisampleEnable;
956       if (sel_ctx->ctx->gfx_pipeline_state.blend &&
957           sel_ctx->ctx->gfx_pipeline_state.blend->desc.RenderTarget[0].LogicOpEnable &&
958           !sel_ctx->ctx->gfx_pipeline_state.has_float_rtv) {
959          key->fs.cast_to_uint = util_format_is_unorm(sel_ctx->ctx->fb.cbufs[0]->format);
960          key->fs.cast_to_int = !key->fs.cast_to_uint;
961       }
962       if (sel_ctx->needs_point_sprite_lowering) {
963          if (sel->initial->info.inputs_read & VARYING_BIT_FACE)
964             key->prev_varying_outputs = (key->prev_varying_outputs | VARYING_BIT_VAR(12)) & ~VARYING_BIT_FACE;
965          key->prev_varying_outputs |= sel->initial->info.inputs_read & (VARYING_BIT_PNTC | BITFIELD64_RANGE(VARYING_SLOT_TEX0, 8));
966       }
967    } else if (stage == PIPE_SHADER_TESS_CTRL) {
968       if (next && next->initial->info.stage == MESA_SHADER_TESS_EVAL) {
969          key->hs.primitive_mode = next->initial->info.tess._primitive_mode;
970          key->hs.ccw = next->initial->info.tess.ccw;
971          key->hs.point_mode = next->initial->info.tess.point_mode;
972          key->hs.spacing = next->initial->info.tess.spacing;
973       } else {
974          key->hs.primitive_mode = TESS_PRIMITIVE_QUADS;
975          key->hs.ccw = true;
976          key->hs.point_mode = false;
977          key->hs.spacing = TESS_SPACING_EQUAL;
978       }
979       key->hs.patch_vertices_in = MAX2(sel_ctx->ctx->patch_vertices, 1);
980    } else if (stage == PIPE_SHADER_TESS_EVAL) {
981       if (prev && prev->initial->info.stage == MESA_SHADER_TESS_CTRL)
982          key->ds.tcs_vertices_out = prev->initial->info.tess.tcs_vertices_out;
983       else
984          key->ds.tcs_vertices_out = 32;
985    }
986 
987    if (sel->samples_int_textures) {
988       key->samples_int_textures = sel->samples_int_textures;
989       key->n_texture_states = sel_ctx->ctx->num_sampler_views[stage];
990       /* Copy only states with integer textures */
991       for(int i = 0; i < key->n_texture_states; ++i) {
992          auto& wrap_state = sel_ctx->ctx->tex_wrap_states[stage][i];
993          if (wrap_state.is_int_sampler) {
994             memcpy(&key->tex_wrap_states[i], &wrap_state, sizeof(wrap_state));
995             key->swizzle_state[i] = sel_ctx->ctx->tex_swizzle_state[stage][i];
996          } else {
997             memset(&key->tex_wrap_states[i], 0, sizeof(key->tex_wrap_states[i]));
998             key->swizzle_state[i] = { PIPE_SWIZZLE_X,  PIPE_SWIZZLE_Y,  PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W };
999          }
1000       }
1001    }
1002 
1003    for (unsigned i = 0, e = sel_ctx->ctx->num_samplers[stage]; i < e; ++i) {
1004       if (!sel_ctx->ctx->samplers[stage][i] ||
1005           sel_ctx->ctx->samplers[stage][i]->filter == PIPE_TEX_FILTER_NEAREST)
1006          continue;
1007 
1008       if (sel_ctx->ctx->samplers[stage][i]->wrap_r == PIPE_TEX_WRAP_CLAMP)
1009          key->tex_saturate_r |= 1 << i;
1010       if (sel_ctx->ctx->samplers[stage][i]->wrap_s == PIPE_TEX_WRAP_CLAMP)
1011          key->tex_saturate_s |= 1 << i;
1012       if (sel_ctx->ctx->samplers[stage][i]->wrap_t == PIPE_TEX_WRAP_CLAMP)
1013          key->tex_saturate_t |= 1 << i;
1014    }
1015 
1016    if (sel->compare_with_lod_bias_grad) {
1017       key->n_texture_states = sel_ctx->ctx->num_sampler_views[stage];
1018       memcpy(key->sampler_compare_funcs, sel_ctx->ctx->tex_compare_func[stage],
1019              key->n_texture_states * sizeof(enum compare_func));
1020       memcpy(key->swizzle_state, sel_ctx->ctx->tex_swizzle_state[stage],
1021              key->n_texture_states * sizeof(dxil_texture_swizzle_state));
1022       if (!sel->samples_int_textures)
1023          memset(key->tex_wrap_states, 0, sizeof(key->tex_wrap_states[0]) * key->n_texture_states);
1024    }
1025 
1026    if (stage == PIPE_SHADER_VERTEX && sel_ctx->ctx->gfx_pipeline_state.ves) {
1027       key->vs.needs_format_emulation = sel_ctx->ctx->gfx_pipeline_state.ves->needs_format_emulation;
1028       if (key->vs.needs_format_emulation) {
1029          unsigned num_elements = sel_ctx->ctx->gfx_pipeline_state.ves->num_elements;
1030 
1031          memset(key->vs.format_conversion + num_elements,
1032                   0,
1033                   sizeof(key->vs.format_conversion) - (num_elements * sizeof(enum pipe_format)));
1034 
1035          memcpy(key->vs.format_conversion, sel_ctx->ctx->gfx_pipeline_state.ves->format_conversion,
1036                   num_elements * sizeof(enum pipe_format));
1037       }
1038    }
1039 
1040    if (stage == PIPE_SHADER_FRAGMENT &&
1041        sel_ctx->ctx->gfx_stages[PIPE_SHADER_GEOMETRY] &&
1042        sel_ctx->ctx->gfx_stages[PIPE_SHADER_GEOMETRY]->is_variant &&
1043        sel_ctx->ctx->gfx_stages[PIPE_SHADER_GEOMETRY]->gs_key.has_front_face) {
1044       key->fs.remap_front_facing = 1;
1045    }
1046 
1047    if (stage == PIPE_SHADER_COMPUTE && sel_ctx->variable_workgroup_size) {
1048       memcpy(key->cs.workgroup_size, sel_ctx->variable_workgroup_size, sizeof(key->cs.workgroup_size));
1049    }
1050 
1051    key->n_images = sel_ctx->ctx->num_image_views[stage];
1052    for (int i = 0; i < key->n_images; ++i) {
1053       key->image_format_conversion[i].emulated_format = sel_ctx->ctx->image_view_emulation_formats[stage][i];
1054       if (key->image_format_conversion[i].emulated_format != PIPE_FORMAT_NONE)
1055          key->image_format_conversion[i].view_format = sel_ctx->ctx->image_views[stage][i].format;
1056    }
1057 
1058    key->hash = d3d12_shader_key_hash(key);
1059 }
1060 
1061 static void
select_shader_variant(struct d3d12_selection_context * sel_ctx,d3d12_shader_selector * sel,d3d12_shader_selector * prev,d3d12_shader_selector * next)1062 select_shader_variant(struct d3d12_selection_context *sel_ctx, d3d12_shader_selector *sel,
1063                      d3d12_shader_selector *prev, d3d12_shader_selector *next)
1064 {
1065    struct d3d12_context *ctx = sel_ctx->ctx;
1066    d3d12_shader_key key;
1067    nir_shader *new_nir_variant;
1068    unsigned pstipple_binding = UINT32_MAX;
1069 
1070    d3d12_fill_shader_key(sel_ctx, &key, sel, prev, next);
1071 
1072    /* Check for an existing variant */
1073    for (d3d12_shader *variant = sel->first; variant;
1074         variant = variant->next_variant) {
1075 
1076       if (d3d12_compare_shader_keys(sel_ctx, &key, &variant->key)) {
1077          sel->current = variant;
1078          return;
1079       }
1080    }
1081 
1082    /* Clone the NIR shader */
1083    new_nir_variant = nir_shader_clone(sel, sel->initial);
1084 
1085    /* Apply any needed lowering passes */
1086    if (key.stage == PIPE_SHADER_GEOMETRY) {
1087       if (key.gs.writes_psize) {
1088          NIR_PASS_V(new_nir_variant, d3d12_lower_point_sprite,
1089                     !key.gs.sprite_origin_upper_left,
1090                     key.gs.point_size_per_vertex,
1091                     key.gs.sprite_coord_enable,
1092                     key.next_varying_inputs);
1093       }
1094 
1095       if (key.gs.primitive_id)
1096          NIR_PASS_V(new_nir_variant, d3d12_lower_primitive_id);
1097 
1098       if (key.gs.triangle_strip)
1099          NIR_PASS_V(new_nir_variant, d3d12_lower_triangle_strip);
1100    }
1101    else if (key.stage == PIPE_SHADER_FRAGMENT)
1102    {
1103       if (key.fs.polygon_stipple) {
1104          NIR_PASS_V(new_nir_variant, nir_lower_pstipple_fs,
1105                     &pstipple_binding, 0, false, nir_type_bool1);
1106       }
1107 
1108       if (key.fs.remap_front_facing)
1109          dxil_nir_forward_front_face(new_nir_variant);
1110 
1111       if (key.fs.missing_dual_src_outputs) {
1112          NIR_PASS_V(new_nir_variant, d3d12_add_missing_dual_src_target,
1113                     key.fs.missing_dual_src_outputs);
1114       } else if (key.fs.frag_result_color_lowering) {
1115          NIR_PASS_V(new_nir_variant, nir_lower_fragcolor,
1116                     key.fs.frag_result_color_lowering);
1117       }
1118 
1119       if (key.fs.manual_depth_range)
1120          NIR_PASS_V(new_nir_variant, d3d12_lower_depth_range);
1121    }
1122 
1123 
1124    if (sel->compare_with_lod_bias_grad) {
1125       STATIC_ASSERT(sizeof(dxil_texture_swizzle_state) ==
1126                     sizeof(nir_lower_tex_shadow_swizzle));
1127 
1128       NIR_PASS_V(new_nir_variant, nir_lower_tex_shadow, key.n_texture_states,
1129                  key.sampler_compare_funcs, (nir_lower_tex_shadow_swizzle *)key.swizzle_state);
1130    }
1131 
1132    if (key.stage == PIPE_SHADER_FRAGMENT) {
1133       if (key.fs.cast_to_uint)
1134          NIR_PASS_V(new_nir_variant, d3d12_lower_uint_cast, false);
1135       if (key.fs.cast_to_int)
1136          NIR_PASS_V(new_nir_variant, d3d12_lower_uint_cast, true);
1137    }
1138 
1139    if (key.n_images) {
1140       d3d12_image_format_conversion_info_arr image_format_arr = { key.n_images, key.image_format_conversion };
1141       NIR_PASS_V(new_nir_variant, d3d12_lower_image_casts, &image_format_arr);
1142    }
1143 
1144    if (key.stage == PIPE_SHADER_COMPUTE && sel->workgroup_size_variable) {
1145       new_nir_variant->info.workgroup_size[0] = key.cs.workgroup_size[0];
1146       new_nir_variant->info.workgroup_size[1] = key.cs.workgroup_size[1];
1147       new_nir_variant->info.workgroup_size[2] = key.cs.workgroup_size[2];
1148    }
1149 
1150    if (new_nir_variant->info.stage == MESA_SHADER_TESS_CTRL) {
1151       new_nir_variant->info.tess._primitive_mode = (tess_primitive_mode)key.hs.primitive_mode;
1152       new_nir_variant->info.tess.ccw = key.hs.ccw;
1153       new_nir_variant->info.tess.point_mode = key.hs.point_mode;
1154       new_nir_variant->info.tess.spacing = key.hs.spacing;
1155 
1156       NIR_PASS_V(new_nir_variant, dxil_nir_set_tcs_patches_in, key.hs.patch_vertices_in);
1157    } else if (new_nir_variant->info.stage == MESA_SHADER_TESS_EVAL) {
1158       new_nir_variant->info.tess.tcs_vertices_out = key.ds.tcs_vertices_out;
1159    }
1160 
1161    {
1162       struct nir_lower_tex_options tex_options = { };
1163       tex_options.lower_txp = ~0u; /* No equivalent for textureProj */
1164       tex_options.lower_rect = true;
1165       tex_options.lower_rect_offset = true;
1166       tex_options.saturate_s = key.tex_saturate_s;
1167       tex_options.saturate_r = key.tex_saturate_r;
1168       tex_options.saturate_t = key.tex_saturate_t;
1169       tex_options.lower_invalid_implicit_lod = true;
1170       tex_options.lower_tg4_offsets = true;
1171 
1172       NIR_PASS_V(new_nir_variant, nir_lower_tex, &tex_options);
1173    }
1174 
1175    /* Remove not-written inputs, and re-sort */
1176    if (prev) {
1177       NIR_PASS_V(new_nir_variant, dxil_nir_kill_undefined_varyings, key.prev_varying_outputs,
1178                  prev->initial->info.patch_outputs_written, key.prev_varying_frac_outputs);
1179       dxil_reassign_driver_locations(new_nir_variant, nir_var_shader_in, key.prev_varying_outputs,
1180                                      key.prev_varying_frac_outputs);
1181    }
1182 
1183    /* Remove not-read outputs and re-sort */
1184    if (next) {
1185       NIR_PASS_V(new_nir_variant, dxil_nir_kill_unused_outputs, key.next_varying_inputs,
1186                  next->initial->info.patch_inputs_read, key.next_varying_frac_inputs);
1187       dxil_reassign_driver_locations(new_nir_variant, nir_var_shader_out, key.next_varying_inputs,
1188                                      key.next_varying_frac_inputs);
1189    }
1190 
1191    nir_shader_gather_info(new_nir_variant, nir_shader_get_entrypoint(new_nir_variant));
1192    d3d12_shader *new_variant = compile_nir(ctx, sel, &key, new_nir_variant);
1193    assert(new_variant);
1194 
1195    /* keep track of polygon stipple texture binding */
1196    new_variant->pstipple_binding = pstipple_binding;
1197 
1198    /* prepend the new shader in the selector chain and pick it */
1199    new_variant->next_variant = sel->first;
1200    sel->current = sel->first = new_variant;
1201 }
1202 
1203 static d3d12_shader_selector *
get_prev_shader(struct d3d12_context * ctx,pipe_shader_type current)1204 get_prev_shader(struct d3d12_context *ctx, pipe_shader_type current)
1205 {
1206    switch (current) {
1207    case PIPE_SHADER_VERTEX:
1208       return NULL;
1209    case PIPE_SHADER_FRAGMENT:
1210       if (ctx->gfx_stages[PIPE_SHADER_GEOMETRY])
1211          return ctx->gfx_stages[PIPE_SHADER_GEOMETRY];
1212       FALLTHROUGH;
1213    case PIPE_SHADER_GEOMETRY:
1214       if (ctx->gfx_stages[PIPE_SHADER_TESS_EVAL])
1215          return ctx->gfx_stages[PIPE_SHADER_TESS_EVAL];
1216       FALLTHROUGH;
1217    case PIPE_SHADER_TESS_EVAL:
1218       if (ctx->gfx_stages[PIPE_SHADER_TESS_CTRL])
1219          return ctx->gfx_stages[PIPE_SHADER_TESS_CTRL];
1220       FALLTHROUGH;
1221    case PIPE_SHADER_TESS_CTRL:
1222       return ctx->gfx_stages[PIPE_SHADER_VERTEX];
1223    default:
1224       unreachable("shader type not supported");
1225    }
1226 }
1227 
1228 static d3d12_shader_selector *
get_next_shader(struct d3d12_context * ctx,pipe_shader_type current)1229 get_next_shader(struct d3d12_context *ctx, pipe_shader_type current)
1230 {
1231    switch (current) {
1232    case PIPE_SHADER_VERTEX:
1233       if (ctx->gfx_stages[PIPE_SHADER_TESS_CTRL])
1234          return ctx->gfx_stages[PIPE_SHADER_TESS_CTRL];
1235       FALLTHROUGH;
1236    case PIPE_SHADER_TESS_CTRL:
1237       if (ctx->gfx_stages[PIPE_SHADER_TESS_EVAL])
1238          return ctx->gfx_stages[PIPE_SHADER_TESS_EVAL];
1239       FALLTHROUGH;
1240    case PIPE_SHADER_TESS_EVAL:
1241       if (ctx->gfx_stages[PIPE_SHADER_GEOMETRY])
1242          return ctx->gfx_stages[PIPE_SHADER_GEOMETRY];
1243       FALLTHROUGH;
1244    case PIPE_SHADER_GEOMETRY:
1245       return ctx->gfx_stages[PIPE_SHADER_FRAGMENT];
1246    case PIPE_SHADER_FRAGMENT:
1247       return NULL;
1248    default:
1249       unreachable("shader type not supported");
1250    }
1251 }
1252 
1253 enum tex_scan_flags {
1254    TEX_SAMPLE_INTEGER_TEXTURE = 1 << 0,
1255    TEX_CMP_WITH_LOD_BIAS_GRAD = 1 << 1,
1256    TEX_SCAN_ALL_FLAGS         = (1 << 2) - 1
1257 };
1258 
1259 static unsigned
scan_texture_use(nir_shader * nir)1260 scan_texture_use(nir_shader *nir)
1261 {
1262    unsigned result = 0;
1263    nir_foreach_function_impl(impl, nir) {
1264       nir_foreach_block(block, impl) {
1265          nir_foreach_instr(instr, block) {
1266             if (instr->type == nir_instr_type_tex) {
1267                auto tex = nir_instr_as_tex(instr);
1268                switch (tex->op) {
1269                case nir_texop_txb:
1270                case nir_texop_txl:
1271                case nir_texop_txd:
1272                   if (tex->is_shadow)
1273                      result |= TEX_CMP_WITH_LOD_BIAS_GRAD;
1274                   FALLTHROUGH;
1275                case nir_texop_tex:
1276                   if (tex->dest_type & (nir_type_int | nir_type_uint))
1277                      result |= TEX_SAMPLE_INTEGER_TEXTURE;
1278                default:
1279                   ;
1280                }
1281             }
1282             if (TEX_SCAN_ALL_FLAGS == result)
1283                return result;
1284          }
1285       }
1286    }
1287    return result;
1288 }
1289 
1290 static uint64_t
update_so_info(struct pipe_stream_output_info * so_info,uint64_t outputs_written)1291 update_so_info(struct pipe_stream_output_info *so_info,
1292                uint64_t outputs_written)
1293 {
1294    uint64_t so_outputs = 0;
1295    uint8_t reverse_map[64] = {0};
1296    unsigned slot = 0;
1297 
1298    while (outputs_written)
1299       reverse_map[slot++] = u_bit_scan64(&outputs_written);
1300 
1301    for (unsigned i = 0; i < so_info->num_outputs; i++) {
1302       struct pipe_stream_output *output = &so_info->output[i];
1303 
1304       /* Map Gallium's condensed "slots" back to real VARYING_SLOT_* enums */
1305       output->register_index = reverse_map[output->register_index];
1306 
1307       so_outputs |= 1ull << output->register_index;
1308    }
1309 
1310    return so_outputs;
1311 }
1312 
1313 static struct d3d12_shader_selector *
d3d12_create_shader_impl(struct d3d12_context * ctx,struct d3d12_shader_selector * sel,struct nir_shader * nir)1314 d3d12_create_shader_impl(struct d3d12_context *ctx,
1315                          struct d3d12_shader_selector *sel,
1316                          struct nir_shader *nir)
1317 {
1318    unsigned tex_scan_result = scan_texture_use(nir);
1319    sel->samples_int_textures = (tex_scan_result & TEX_SAMPLE_INTEGER_TEXTURE) != 0;
1320    sel->compare_with_lod_bias_grad = (tex_scan_result & TEX_CMP_WITH_LOD_BIAS_GRAD) != 0;
1321    sel->workgroup_size_variable = nir->info.workgroup_size_variable;
1322 
1323    /* Integer cube maps are not supported in DirectX because sampling is not supported
1324     * on integer textures and TextureLoad is not supported for cube maps, so we have to
1325     * lower integer cube maps to be handled like 2D textures arrays*/
1326    NIR_PASS_V(nir, dxil_nir_lower_int_cubemaps, true);
1327 
1328    NIR_PASS_V(nir, dxil_nir_lower_subgroup_id);
1329    NIR_PASS_V(nir, dxil_nir_lower_num_subgroups);
1330 
1331    nir_lower_subgroups_options subgroup_options = {};
1332    subgroup_options.ballot_bit_size = 32;
1333    subgroup_options.ballot_components = 4;
1334    subgroup_options.lower_subgroup_masks = true;
1335    subgroup_options.lower_to_scalar = true;
1336    subgroup_options.lower_relative_shuffle = true;
1337    subgroup_options.lower_inverse_ballot = true;
1338    if (nir->info.stage != MESA_SHADER_FRAGMENT && nir->info.stage != MESA_SHADER_COMPUTE)
1339       subgroup_options.lower_quad = true;
1340    NIR_PASS_V(nir, nir_lower_subgroups, &subgroup_options);
1341    NIR_PASS_V(nir, nir_lower_bit_size, [](const nir_instr *instr, void *) -> unsigned {
1342       if (instr->type != nir_instr_type_intrinsic)
1343          return 0;
1344       nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1345       switch (intr->intrinsic) {
1346       case nir_intrinsic_quad_swap_horizontal:
1347       case nir_intrinsic_quad_swap_vertical:
1348       case nir_intrinsic_quad_swap_diagonal:
1349       case nir_intrinsic_reduce:
1350       case nir_intrinsic_inclusive_scan:
1351       case nir_intrinsic_exclusive_scan:
1352          return intr->def.bit_size == 1 ? 32 : 0;
1353       default:
1354          return 0;
1355       }
1356       }, NULL);
1357 
1358    // Ensure subgroup scans on bools are gone
1359    NIR_PASS_V(nir, nir_opt_dce);
1360    NIR_PASS_V(nir, dxil_nir_lower_unsupported_subgroup_scan);
1361 
1362    nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
1363 
1364    if (nir->info.stage == MESA_SHADER_COMPUTE)
1365       NIR_PASS_V(nir, d3d12_lower_compute_state_vars);
1366    NIR_PASS_V(nir, d3d12_lower_load_draw_params);
1367    NIR_PASS_V(nir, d3d12_lower_load_patch_vertices_in);
1368    NIR_PASS_V(nir, dxil_nir_lower_double_math);
1369 
1370    nir_foreach_variable_with_modes(var, nir, nir_var_shader_in) {
1371       if (var->data.location >= VARYING_SLOT_VAR0 && var->data.location_frac) {
1372          sel->has_frac_inputs = 1;
1373          BITSET_SET(sel->varying_frac_inputs, (var->data.location - VARYING_SLOT_VAR0) * 4 + var->data.location_frac);
1374       }
1375    }
1376    nir_foreach_variable_with_modes(var, nir, nir_var_shader_out) {
1377       if (var->data.location >= VARYING_SLOT_VAR0 && var->data.location_frac) {
1378          sel->has_frac_outputs = 1;
1379          BITSET_SET(sel->varying_frac_outputs, (var->data.location - VARYING_SLOT_VAR0) * 4 + var->data.location_frac);
1380       }
1381    }
1382 
1383    /* Keep this initial shader as the blue print for possible variants */
1384    sel->initial = nir;
1385    sel->initial_output_vars = nullptr;
1386    sel->initial_input_vars = nullptr;
1387    sel->gs_key.varyings = nullptr;
1388    sel->tcs_key.varyings = nullptr;
1389 
1390    return sel;
1391 }
1392 
1393 struct d3d12_shader_selector *
d3d12_create_shader(struct d3d12_context * ctx,pipe_shader_type stage,const struct pipe_shader_state * shader)1394 d3d12_create_shader(struct d3d12_context *ctx,
1395                     pipe_shader_type stage,
1396                     const struct pipe_shader_state *shader)
1397 {
1398    struct d3d12_shader_selector *sel = rzalloc(nullptr, d3d12_shader_selector);
1399    sel->stage = stage;
1400 
1401    struct nir_shader *nir = NULL;
1402 
1403    if (shader->type == PIPE_SHADER_IR_NIR) {
1404       nir = (nir_shader *)shader->ir.nir;
1405    } else {
1406       assert(shader->type == PIPE_SHADER_IR_TGSI);
1407       nir = tgsi_to_nir(shader->tokens, ctx->base.screen, false);
1408    }
1409 
1410    nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
1411    memcpy(&sel->so_info, &shader->stream_output, sizeof(sel->so_info));
1412    update_so_info(&sel->so_info, nir->info.outputs_written);
1413 
1414    assert(nir != NULL);
1415 
1416    NIR_PASS_V(nir, dxil_nir_split_clip_cull_distance);
1417    NIR_PASS_V(nir, d3d12_split_needed_varyings);
1418 
1419    if (nir->info.stage == MESA_SHADER_TESS_EVAL || nir->info.stage == MESA_SHADER_TESS_CTRL) {
1420       /* D3D requires exactly-matching patch constant signatures. Since tess ctrl must write these vars,
1421        * tess eval must have them. */
1422       for (uint32_t i = 0; i < 2; ++i) {
1423          unsigned loc = i == 0 ? VARYING_SLOT_TESS_LEVEL_OUTER : VARYING_SLOT_TESS_LEVEL_INNER;
1424          nir_variable_mode mode = nir->info.stage == MESA_SHADER_TESS_EVAL ? nir_var_shader_in : nir_var_shader_out;
1425          nir_variable *var = nir_find_variable_with_location(nir, mode, loc);
1426          uint32_t arr_size = i == 0 ? 4 : 2;
1427          if (!var) {
1428             var = nir_variable_create(nir, mode, glsl_array_type(glsl_float_type(), arr_size, 0), i == 0 ? "outer" : "inner");
1429             var->data.location = loc;
1430             var->data.patch = true;
1431             var->data.compact = true;
1432 
1433             if (mode == nir_var_shader_out) {
1434                nir_builder b = nir_builder_create(nir_shader_get_entrypoint(nir));
1435                b.cursor = nir_after_impl(b.impl);
1436                for (uint32_t j = 0; j < arr_size; ++j)
1437                   nir_store_deref(&b, nir_build_deref_array_imm(&b, nir_build_deref_var(&b, var), j), nir_imm_zero(&b, 1, 32), 1);
1438             }
1439          }
1440       }
1441    }
1442 
1443    if (nir->info.stage != MESA_SHADER_VERTEX) {
1444       dxil_reassign_driver_locations(nir, nir_var_shader_in, 0, NULL);
1445    } else {
1446       dxil_sort_by_driver_location(nir, nir_var_shader_in);
1447 
1448       uint32_t driver_loc = 0;
1449       nir_foreach_variable_with_modes(var, nir, nir_var_shader_in) {
1450          var->data.driver_location = driver_loc;
1451          driver_loc += glsl_count_attribute_slots(var->type, false);
1452       }
1453    }
1454 
1455    if (nir->info.stage != MESA_SHADER_FRAGMENT) {
1456       dxil_reassign_driver_locations(nir, nir_var_shader_out, 0, NULL);
1457    } else {
1458       NIR_PASS_V(nir, nir_lower_fragcoord_wtrans);
1459       NIR_PASS_V(nir, dxil_nir_lower_sample_pos);
1460       dxil_sort_ps_outputs(nir);
1461    }
1462 
1463    return d3d12_create_shader_impl(ctx, sel, nir);
1464 }
1465 
1466 struct d3d12_shader_selector *
d3d12_create_compute_shader(struct d3d12_context * ctx,const struct pipe_compute_state * shader)1467 d3d12_create_compute_shader(struct d3d12_context *ctx,
1468                             const struct pipe_compute_state *shader)
1469 {
1470    struct d3d12_shader_selector *sel = rzalloc(nullptr, d3d12_shader_selector);
1471    sel->stage = PIPE_SHADER_COMPUTE;
1472 
1473    struct nir_shader *nir = NULL;
1474 
1475    if (shader->ir_type == PIPE_SHADER_IR_NIR) {
1476       nir = (nir_shader *)shader->prog;
1477    } else {
1478       assert(shader->ir_type == PIPE_SHADER_IR_TGSI);
1479       nir = tgsi_to_nir(shader->prog, ctx->base.screen, false);
1480    }
1481 
1482    nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
1483 
1484    return d3d12_create_shader_impl(ctx, sel, nir);
1485 }
1486 
1487 void
d3d12_select_shader_variants(struct d3d12_context * ctx,const struct pipe_draw_info * dinfo)1488 d3d12_select_shader_variants(struct d3d12_context *ctx, const struct pipe_draw_info *dinfo)
1489 {
1490    struct d3d12_selection_context sel_ctx;
1491 
1492    sel_ctx.ctx = ctx;
1493    sel_ctx.needs_point_sprite_lowering = needs_point_sprite_lowering(ctx, dinfo);
1494    sel_ctx.fill_mode_lowered = fill_mode_lowered(ctx, dinfo);
1495    sel_ctx.cull_mode_lowered = cull_mode_lowered(ctx, sel_ctx.fill_mode_lowered);
1496    sel_ctx.provoking_vertex = get_provoking_vertex(&sel_ctx, &sel_ctx.alternate_tri, dinfo);
1497    sel_ctx.needs_vertex_reordering = needs_vertex_reordering(&sel_ctx, dinfo);
1498    sel_ctx.missing_dual_src_outputs = ctx->missing_dual_src_outputs;
1499    sel_ctx.frag_result_color_lowering = frag_result_color_lowering(ctx);
1500    sel_ctx.manual_depth_range = ctx->manual_depth_range;
1501 
1502    d3d12_shader_selector* gs = ctx->gfx_stages[PIPE_SHADER_GEOMETRY];
1503    if (gs == nullptr || gs->is_variant) {
1504       if (sel_ctx.fill_mode_lowered != PIPE_POLYGON_MODE_FILL || sel_ctx.needs_point_sprite_lowering || sel_ctx.needs_vertex_reordering)
1505          validate_geometry_shader_variant(&sel_ctx);
1506       else if (gs != nullptr) {
1507          ctx->gfx_stages[PIPE_SHADER_GEOMETRY] = NULL;
1508       }
1509    }
1510 
1511    validate_tess_ctrl_shader_variant(&sel_ctx);
1512 
1513    auto* stages = ctx->gfx_stages;
1514    d3d12_shader_selector* prev;
1515    d3d12_shader_selector* next;
1516    if (stages[PIPE_SHADER_VERTEX]) {
1517       next = get_next_shader(ctx, PIPE_SHADER_VERTEX);
1518       select_shader_variant(&sel_ctx, stages[PIPE_SHADER_VERTEX], nullptr, next);
1519    }
1520    if (stages[PIPE_SHADER_TESS_CTRL]) {
1521       prev = get_prev_shader(ctx, PIPE_SHADER_TESS_CTRL);
1522       next = get_next_shader(ctx, PIPE_SHADER_TESS_CTRL);
1523       select_shader_variant(&sel_ctx, stages[PIPE_SHADER_TESS_CTRL], prev, next);
1524    }
1525    if (stages[PIPE_SHADER_TESS_EVAL]) {
1526       prev = get_prev_shader(ctx, PIPE_SHADER_TESS_EVAL);
1527       next = get_next_shader(ctx, PIPE_SHADER_TESS_EVAL);
1528       select_shader_variant(&sel_ctx, stages[PIPE_SHADER_TESS_EVAL], prev, next);
1529    }
1530    if (stages[PIPE_SHADER_GEOMETRY]) {
1531       prev = get_prev_shader(ctx, PIPE_SHADER_GEOMETRY);
1532       next = get_next_shader(ctx, PIPE_SHADER_GEOMETRY);
1533       select_shader_variant(&sel_ctx, stages[PIPE_SHADER_GEOMETRY], prev, next);
1534    }
1535    if (stages[PIPE_SHADER_FRAGMENT]) {
1536       prev = get_prev_shader(ctx, PIPE_SHADER_FRAGMENT);
1537       select_shader_variant(&sel_ctx, stages[PIPE_SHADER_FRAGMENT], prev, nullptr);
1538    }
1539 }
1540 
1541 static const unsigned *
workgroup_size_variable(struct d3d12_context * ctx,const struct pipe_grid_info * info)1542 workgroup_size_variable(struct d3d12_context *ctx,
1543                         const struct pipe_grid_info *info)
1544 {
1545    if (ctx->compute_state->workgroup_size_variable)
1546       return info->block;
1547    return nullptr;
1548 }
1549 
1550 void
d3d12_select_compute_shader_variants(struct d3d12_context * ctx,const struct pipe_grid_info * info)1551 d3d12_select_compute_shader_variants(struct d3d12_context *ctx, const struct pipe_grid_info *info)
1552 {
1553    struct d3d12_selection_context sel_ctx = {};
1554 
1555    sel_ctx.ctx = ctx;
1556    sel_ctx.variable_workgroup_size = workgroup_size_variable(ctx, info);
1557 
1558    select_shader_variant(&sel_ctx, ctx->compute_state, nullptr, nullptr);
1559 }
1560 
1561 void
d3d12_shader_free(struct d3d12_shader_selector * sel)1562 d3d12_shader_free(struct d3d12_shader_selector *sel)
1563 {
1564    auto shader = sel->first;
1565    while (shader) {
1566       free(shader->bytecode);
1567       shader = shader->next_variant;
1568    }
1569 
1570    ralloc_free((void*)sel->initial);
1571    ralloc_free(sel);
1572 }
1573