1 /*
2 * Copyright © Microsoft Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "d3d12_compiler.h"
25 #include "d3d12_context.h"
26 #include "d3d12_debug.h"
27 #include "d3d12_screen.h"
28 #include "d3d12_nir_passes.h"
29 #include "nir_to_dxil.h"
30 #include "dxil_nir.h"
31 #include "dxil_nir_lower_int_cubemaps.h"
32
33 #include "pipe/p_state.h"
34
35 #include "nir.h"
36 #include "nir/nir_draw_helpers.h"
37 #include "nir/tgsi_to_nir.h"
38 #include "compiler/nir/nir_builder.h"
39
40 #include "util/hash_table.h"
41 #include "util/u_memory.h"
42 #include "util/u_prim.h"
43 #include "util/u_simple_shaders.h"
44 #include "util/u_dl.h"
45
46 #include <dxguids/dxguids.h>
47
48 #ifdef _WIN32
49 #include "dxil_validator.h"
50 #endif
51
52 const void *
d3d12_get_compiler_options(struct pipe_screen * screen,enum pipe_shader_ir ir,enum pipe_shader_type shader)53 d3d12_get_compiler_options(struct pipe_screen *screen,
54 enum pipe_shader_ir ir,
55 enum pipe_shader_type shader)
56 {
57 assert(ir == PIPE_SHADER_IR_NIR);
58 return &d3d12_screen(screen)->nir_options;
59 }
60
61 static uint32_t
resource_dimension(enum glsl_sampler_dim dim)62 resource_dimension(enum glsl_sampler_dim dim)
63 {
64 switch (dim) {
65 case GLSL_SAMPLER_DIM_1D:
66 return RESOURCE_DIMENSION_TEXTURE1D;
67 case GLSL_SAMPLER_DIM_2D:
68 return RESOURCE_DIMENSION_TEXTURE2D;
69 case GLSL_SAMPLER_DIM_3D:
70 return RESOURCE_DIMENSION_TEXTURE3D;
71 case GLSL_SAMPLER_DIM_CUBE:
72 return RESOURCE_DIMENSION_TEXTURECUBE;
73 default:
74 return RESOURCE_DIMENSION_UNKNOWN;
75 }
76 }
77
78 static bool
can_remove_dead_sampler(nir_variable * var,void * data)79 can_remove_dead_sampler(nir_variable *var, void *data)
80 {
81 const struct glsl_type *base_type = glsl_without_array(var->type);
82 return glsl_type_is_sampler(base_type) && !glsl_type_is_bare_sampler(base_type);
83 }
84
85 static struct d3d12_shader *
compile_nir(struct d3d12_context * ctx,struct d3d12_shader_selector * sel,struct d3d12_shader_key * key,struct nir_shader * nir)86 compile_nir(struct d3d12_context *ctx, struct d3d12_shader_selector *sel,
87 struct d3d12_shader_key *key, struct nir_shader *nir)
88 {
89 struct d3d12_screen *screen = d3d12_screen(ctx->base.screen);
90 struct d3d12_shader *shader = rzalloc(sel, d3d12_shader);
91 shader->key = *key;
92
93 if (shader->key.n_texture_states > 0) {
94 shader->key.tex_wrap_states = (dxil_wrap_sampler_state*)ralloc_size(sel, sizeof(dxil_wrap_sampler_state) * shader->key.n_texture_states);
95 memcpy(shader->key.tex_wrap_states, key->tex_wrap_states, sizeof(dxil_wrap_sampler_state) * shader->key.n_texture_states);
96 }
97 else
98 shader->key.tex_wrap_states = nullptr;
99
100 shader->nir = nir;
101 sel->current = shader;
102
103 NIR_PASS_V(nir, nir_lower_samplers);
104 NIR_PASS_V(nir, dxil_nir_split_typed_samplers);
105
106 NIR_PASS_V(nir, nir_opt_dce);
107 struct nir_remove_dead_variables_options dead_var_opts = {};
108 dead_var_opts.can_remove_var = can_remove_dead_sampler;
109 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_uniform, &dead_var_opts);
110
111 if (key->samples_int_textures)
112 NIR_PASS_V(nir, dxil_lower_sample_to_txf_for_integer_tex,
113 key->n_texture_states, key->tex_wrap_states, key->swizzle_state,
114 screen->base.get_paramf(&screen->base, PIPE_CAPF_MAX_TEXTURE_LOD_BIAS));
115
116 if (key->stage == PIPE_SHADER_VERTEX && key->vs.needs_format_emulation)
117 dxil_nir_lower_vs_vertex_conversion(nir, key->vs.format_conversion);
118
119 if (key->last_vertex_processing_stage) {
120 if (key->invert_depth)
121 NIR_PASS_V(nir, d3d12_nir_invert_depth, key->invert_depth, key->halfz);
122 if (!key->halfz)
123 NIR_PASS_V(nir, nir_lower_clip_halfz);
124 NIR_PASS_V(nir, d3d12_lower_yflip);
125 }
126
127 NIR_PASS_V(nir, d3d12_lower_state_vars, shader);
128
129 const struct dxil_nir_lower_loads_stores_options loads_stores_options = {};
130 NIR_PASS_V(nir, dxil_nir_lower_loads_stores_to_dxil, &loads_stores_options);
131
132 if (key->stage == PIPE_SHADER_FRAGMENT && key->fs.multisample_disabled)
133 NIR_PASS_V(nir, d3d12_disable_multisampling);
134
135 struct nir_to_dxil_options opts = {};
136 opts.interpolate_at_vertex = screen->have_load_at_vertex;
137 opts.lower_int16 = !screen->opts4.Native16BitShaderOpsSupported;
138 opts.last_ubo_is_not_arrayed = shader->num_state_vars > 0;
139 if (key->stage == PIPE_SHADER_FRAGMENT)
140 opts.provoking_vertex = key->fs.provoking_vertex;
141 opts.input_clip_size = key->input_clip_size;
142 opts.environment = DXIL_ENVIRONMENT_GL;
143 opts.shader_model_max = screen->max_shader_model;
144 #ifdef _WIN32
145 opts.validator_version_max = dxil_get_validator_version(ctx->dxil_validator);
146 #endif
147
148 struct blob tmp;
149 if (!nir_to_dxil(nir, &opts, NULL, &tmp)) {
150 debug_printf("D3D12: nir_to_dxil failed\n");
151 return NULL;
152 }
153
154 // Non-ubo variables
155 shader->begin_srv_binding = (UINT_MAX);
156 nir_foreach_variable_with_modes(var, nir, nir_var_uniform) {
157 auto type_no_array = glsl_without_array(var->type);
158 if (glsl_type_is_texture(type_no_array)) {
159 unsigned count = glsl_type_is_array(var->type) ? glsl_get_aoa_size(var->type) : 1;
160 for (unsigned i = 0; i < count; ++i) {
161 shader->srv_bindings[var->data.binding + i].dimension = resource_dimension(glsl_get_sampler_dim(type_no_array));
162 }
163 shader->begin_srv_binding = MIN2(var->data.binding, shader->begin_srv_binding);
164 shader->end_srv_binding = MAX2(var->data.binding + count, shader->end_srv_binding);
165 }
166 }
167
168 nir_foreach_image_variable(var, nir) {
169 auto type_no_array = glsl_without_array(var->type);
170 unsigned count = glsl_type_is_array(var->type) ? glsl_get_aoa_size(var->type) : 1;
171 for (unsigned i = 0; i < count; ++i) {
172 shader->uav_bindings[var->data.driver_location + i].dimension = resource_dimension(glsl_get_sampler_dim(type_no_array));
173 }
174 }
175
176 // Ubo variables
177 if(nir->info.num_ubos) {
178 shader->begin_ubo_binding = shader->nir->num_uniforms > 0 || !shader->nir->info.first_ubo_is_default_ubo ? 0 : 1;
179 // Ignore state_vars ubo as it is bound as root constants
180 shader->end_ubo_binding = nir->info.num_ubos - (shader->state_vars_used ? 1 : 0);
181 }
182
183 #ifdef _WIN32
184 if (ctx->dxil_validator) {
185 if (!(d3d12_debug & D3D12_DEBUG_EXPERIMENTAL)) {
186 char *err;
187 if (!dxil_validate_module(ctx->dxil_validator, tmp.data,
188 tmp.size, &err) && err) {
189 debug_printf(
190 "== VALIDATION ERROR =============================================\n"
191 "%s\n"
192 "== END ==========================================================\n",
193 err);
194 ralloc_free(err);
195 }
196 }
197
198 if (d3d12_debug & D3D12_DEBUG_DISASS) {
199 char *str = dxil_disasm_module(ctx->dxil_validator, tmp.data,
200 tmp.size);
201 fprintf(stderr,
202 "== BEGIN SHADER ============================================\n"
203 "%s\n"
204 "== END SHADER ==============================================\n",
205 str);
206 ralloc_free(str);
207 }
208 }
209 #endif
210
211 blob_finish_get_buffer(&tmp, &shader->bytecode, &shader->bytecode_length);
212
213 if (d3d12_debug & D3D12_DEBUG_DXIL) {
214 char buf[256];
215 static int i;
216 snprintf(buf, sizeof(buf), "dump%02d.dxil", i++);
217 FILE *fp = fopen(buf, "wb");
218 fwrite(shader->bytecode, sizeof(char), shader->bytecode_length, fp);
219 fclose(fp);
220 fprintf(stderr, "wrote '%s'...\n", buf);
221 }
222 return shader;
223 }
224
225 struct d3d12_selection_context {
226 struct d3d12_context *ctx;
227 bool needs_point_sprite_lowering;
228 bool needs_vertex_reordering;
229 unsigned provoking_vertex;
230 bool alternate_tri;
231 unsigned fill_mode_lowered;
232 unsigned cull_mode_lowered;
233 bool manual_depth_range;
234 unsigned missing_dual_src_outputs;
235 unsigned frag_result_color_lowering;
236 const unsigned *variable_workgroup_size;
237 };
238
239 unsigned
missing_dual_src_outputs(struct d3d12_context * ctx)240 missing_dual_src_outputs(struct d3d12_context *ctx)
241 {
242 if (!ctx->gfx_pipeline_state.blend || !ctx->gfx_pipeline_state.blend->is_dual_src)
243 return 0;
244
245 struct d3d12_shader_selector *fs = ctx->gfx_stages[PIPE_SHADER_FRAGMENT];
246 if (!fs)
247 return 0;
248
249 const nir_shader *s = fs->initial;
250
251 unsigned indices_seen = 0;
252 nir_foreach_function_impl(impl, s) {
253 nir_foreach_block(block, impl) {
254 nir_foreach_instr(instr, block) {
255 if (instr->type != nir_instr_type_intrinsic)
256 continue;
257
258 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
259 if (intr->intrinsic != nir_intrinsic_store_deref)
260 continue;
261
262 nir_variable *var = nir_intrinsic_get_var(intr, 0);
263 if (var->data.mode != nir_var_shader_out)
264 continue;
265
266 unsigned index = var->data.index;
267 if (var->data.location > FRAG_RESULT_DATA0)
268 index = var->data.location - FRAG_RESULT_DATA0;
269 else if (var->data.location != FRAG_RESULT_COLOR &&
270 var->data.location != FRAG_RESULT_DATA0)
271 continue;
272
273 indices_seen |= 1u << index;
274 if ((indices_seen & 3) == 3)
275 return 0;
276 }
277 }
278 }
279
280 return 3 & ~indices_seen;
281 }
282
283 static unsigned
frag_result_color_lowering(struct d3d12_context * ctx)284 frag_result_color_lowering(struct d3d12_context *ctx)
285 {
286 struct d3d12_shader_selector *fs = ctx->gfx_stages[PIPE_SHADER_FRAGMENT];
287 assert(fs);
288
289 if (fs->initial->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_COLOR))
290 return ctx->fb.nr_cbufs > 1 ? ctx->fb.nr_cbufs : 0;
291
292 return 0;
293 }
294
295 bool
manual_depth_range(struct d3d12_context * ctx)296 manual_depth_range(struct d3d12_context *ctx)
297 {
298 if (!d3d12_need_zero_one_depth_range(ctx))
299 return false;
300
301 /**
302 * If we can't use the D3D12 zero-one depth-range, we might have to apply
303 * depth-range ourselves.
304 *
305 * Because we only need to override the depth-range to zero-one range in
306 * the case where we write frag-depth, we only need to apply manual
307 * depth-range to gl_FragCoord.z.
308 *
309 * No extra care is needed to be taken in the case where gl_FragDepth is
310 * written conditionally, because the GLSL 4.60 spec states:
311 *
312 * If a shader statically assigns a value to gl_FragDepth, and there
313 * is an execution path through the shader that does not set
314 * gl_FragDepth, then the value of the fragment’s depth may be
315 * undefined for executions of the shader that take that path. That
316 * is, if the set of linked fragment shaders statically contain a
317 * write to gl_FragDepth, then it is responsible for always writing
318 * it.
319 */
320
321 struct d3d12_shader_selector *fs = ctx->gfx_stages[PIPE_SHADER_FRAGMENT];
322 return fs && fs->initial->info.inputs_read & VARYING_BIT_POS;
323 }
324
325 static bool
needs_edge_flag_fix(enum mesa_prim mode)326 needs_edge_flag_fix(enum mesa_prim mode)
327 {
328 return (mode == MESA_PRIM_QUADS ||
329 mode == MESA_PRIM_QUAD_STRIP ||
330 mode == MESA_PRIM_POLYGON);
331 }
332
333 static unsigned
fill_mode_lowered(struct d3d12_context * ctx,const struct pipe_draw_info * dinfo)334 fill_mode_lowered(struct d3d12_context *ctx, const struct pipe_draw_info *dinfo)
335 {
336 struct d3d12_shader_selector *vs = ctx->gfx_stages[PIPE_SHADER_VERTEX];
337
338 if ((ctx->gfx_stages[PIPE_SHADER_GEOMETRY] != NULL &&
339 !ctx->gfx_stages[PIPE_SHADER_GEOMETRY]->is_variant) ||
340 ctx->gfx_pipeline_state.rast == NULL ||
341 (dinfo->mode != MESA_PRIM_TRIANGLES &&
342 dinfo->mode != MESA_PRIM_TRIANGLE_STRIP))
343 return PIPE_POLYGON_MODE_FILL;
344
345 /* D3D12 supports line mode (wireframe) but doesn't support edge flags */
346 if (((ctx->gfx_pipeline_state.rast->base.fill_front == PIPE_POLYGON_MODE_LINE &&
347 ctx->gfx_pipeline_state.rast->base.cull_face != PIPE_FACE_FRONT) ||
348 (ctx->gfx_pipeline_state.rast->base.fill_back == PIPE_POLYGON_MODE_LINE &&
349 ctx->gfx_pipeline_state.rast->base.cull_face == PIPE_FACE_FRONT)) &&
350 (vs->initial->info.outputs_written & VARYING_BIT_EDGE ||
351 needs_edge_flag_fix(ctx->initial_api_prim)))
352 return PIPE_POLYGON_MODE_LINE;
353
354 if (ctx->gfx_pipeline_state.rast->base.fill_front == PIPE_POLYGON_MODE_POINT)
355 return PIPE_POLYGON_MODE_POINT;
356
357 return PIPE_POLYGON_MODE_FILL;
358 }
359
360 static bool
has_stream_out_for_streams(struct d3d12_context * ctx)361 has_stream_out_for_streams(struct d3d12_context *ctx)
362 {
363 unsigned mask = ctx->gfx_stages[PIPE_SHADER_GEOMETRY]->initial->info.gs.active_stream_mask & ~1;
364 for (unsigned i = 0; i < ctx->gfx_pipeline_state.so_info.num_outputs; ++i) {
365 unsigned stream = ctx->gfx_pipeline_state.so_info.output[i].stream;
366 if (((1 << stream) & mask) &&
367 ctx->so_buffer_views[stream].SizeInBytes)
368 return true;
369 }
370 return false;
371 }
372
373 static bool
needs_point_sprite_lowering(struct d3d12_context * ctx,const struct pipe_draw_info * dinfo)374 needs_point_sprite_lowering(struct d3d12_context *ctx, const struct pipe_draw_info *dinfo)
375 {
376 struct d3d12_shader_selector *vs = ctx->gfx_stages[PIPE_SHADER_VERTEX];
377 struct d3d12_shader_selector *gs = ctx->gfx_stages[PIPE_SHADER_GEOMETRY];
378
379 if (gs != NULL && !gs->is_variant) {
380 /* There is an user GS; Check if it outputs points with PSIZE */
381 return (gs->initial->info.gs.output_primitive == MESA_PRIM_POINTS &&
382 (gs->initial->info.outputs_written & VARYING_BIT_PSIZ ||
383 ctx->gfx_pipeline_state.rast->base.point_size > 1.0) &&
384 (gs->initial->info.gs.active_stream_mask == 1 ||
385 !has_stream_out_for_streams(ctx)));
386 } else {
387 /* No user GS; check if we are drawing wide points */
388 return ((dinfo->mode == MESA_PRIM_POINTS ||
389 fill_mode_lowered(ctx, dinfo) == PIPE_POLYGON_MODE_POINT) &&
390 (ctx->gfx_pipeline_state.rast->base.point_size > 1.0 ||
391 ctx->gfx_pipeline_state.rast->base.offset_point ||
392 (ctx->gfx_pipeline_state.rast->base.point_size_per_vertex &&
393 vs->initial->info.outputs_written & VARYING_BIT_PSIZ)) &&
394 (vs->initial->info.outputs_written & VARYING_BIT_POS));
395 }
396 }
397
398 static unsigned
cull_mode_lowered(struct d3d12_context * ctx,unsigned fill_mode)399 cull_mode_lowered(struct d3d12_context *ctx, unsigned fill_mode)
400 {
401 if ((ctx->gfx_stages[PIPE_SHADER_GEOMETRY] != NULL &&
402 !ctx->gfx_stages[PIPE_SHADER_GEOMETRY]->is_variant) ||
403 ctx->gfx_pipeline_state.rast == NULL ||
404 ctx->gfx_pipeline_state.rast->base.cull_face == PIPE_FACE_NONE)
405 return PIPE_FACE_NONE;
406
407 return ctx->gfx_pipeline_state.rast->base.cull_face;
408 }
409
410 static unsigned
get_provoking_vertex(struct d3d12_selection_context * sel_ctx,bool * alternate,const struct pipe_draw_info * dinfo)411 get_provoking_vertex(struct d3d12_selection_context *sel_ctx, bool *alternate, const struct pipe_draw_info *dinfo)
412 {
413 if (dinfo->mode == GL_PATCHES) {
414 *alternate = false;
415 return 0;
416 }
417
418 struct d3d12_shader_selector *vs = sel_ctx->ctx->gfx_stages[PIPE_SHADER_VERTEX];
419 struct d3d12_shader_selector *gs = sel_ctx->ctx->gfx_stages[PIPE_SHADER_GEOMETRY];
420 struct d3d12_shader_selector *last_vertex_stage = gs && !gs->is_variant ? gs : vs;
421
422 enum mesa_prim mode;
423 switch (last_vertex_stage->stage) {
424 case PIPE_SHADER_GEOMETRY:
425 mode = (enum mesa_prim)last_vertex_stage->initial->info.gs.output_primitive;
426 break;
427 case PIPE_SHADER_VERTEX:
428 mode = (enum mesa_prim)dinfo->mode;
429 break;
430 default:
431 unreachable("Tesselation shaders are not supported");
432 }
433
434 bool flatshade_first = sel_ctx->ctx->gfx_pipeline_state.rast &&
435 sel_ctx->ctx->gfx_pipeline_state.rast->base.flatshade_first;
436 *alternate = (mode == GL_TRIANGLE_STRIP || mode == GL_TRIANGLE_STRIP_ADJACENCY) &&
437 (!gs || gs->is_variant ||
438 gs->initial->info.gs.vertices_out > u_prim_vertex_count(mode)->min);
439 return flatshade_first ? 0 : u_prim_vertex_count(mode)->min - 1;
440 }
441
442 bool
has_flat_varyings(struct d3d12_context * ctx)443 has_flat_varyings(struct d3d12_context *ctx)
444 {
445 struct d3d12_shader_selector *fs = ctx->gfx_stages[PIPE_SHADER_FRAGMENT];
446
447 if (!fs)
448 return false;
449
450 nir_foreach_variable_with_modes(input, fs->initial,
451 nir_var_shader_in) {
452 if (input->data.interpolation == INTERP_MODE_FLAT &&
453 /* Disregard sysvals */
454 (input->data.location >= VARYING_SLOT_VAR0 ||
455 input->data.location <= VARYING_SLOT_TEX7))
456 return true;
457 }
458
459 return false;
460 }
461
462 static bool
needs_vertex_reordering(struct d3d12_selection_context * sel_ctx,const struct pipe_draw_info * dinfo)463 needs_vertex_reordering(struct d3d12_selection_context *sel_ctx, const struct pipe_draw_info *dinfo)
464 {
465 struct d3d12_context *ctx = sel_ctx->ctx;
466 bool flat = ctx->has_flat_varyings;
467 bool xfb = ctx->gfx_pipeline_state.num_so_targets > 0;
468
469 if (fill_mode_lowered(ctx, dinfo) != PIPE_POLYGON_MODE_FILL)
470 return false;
471
472 /* TODO add support for line primitives */
473 if (u_reduced_prim((mesa_prim)dinfo->mode) == MESA_PRIM_LINES)
474 return false;
475
476 /* When flat shading a triangle and provoking vertex is not the first one, we use load_at_vertex.
477 If not available for this adapter, or if it's a triangle strip, we need to reorder the vertices */
478 if (flat && sel_ctx->provoking_vertex >= 2 && (!d3d12_screen(ctx->base.screen)->have_load_at_vertex ||
479 sel_ctx->alternate_tri))
480 return true;
481
482 /* When transform feedback is enabled and the output is alternating (triangle strip or triangle
483 strip with adjacency), we need to reorder vertices to get the order expected by OpenGL. This
484 only works when there is no flat shading involved. In that scenario, we don't care about
485 the provoking vertex. */
486 if (xfb && !flat && sel_ctx->alternate_tri) {
487 sel_ctx->provoking_vertex = 0;
488 return true;
489 }
490
491 return false;
492 }
493
494 static d3d12_varying_info*
fill_varyings(struct d3d12_context * ctx,const nir_shader * s,nir_variable_mode modes,uint64_t mask,bool patch)495 fill_varyings(struct d3d12_context *ctx, const nir_shader *s,
496 nir_variable_mode modes, uint64_t mask, bool patch)
497 {
498 struct d3d12_varying_info info;
499
500 info.max = 0;
501 info.mask = 0;
502 info.hash = 0;
503
504 nir_foreach_variable_with_modes(var, s, modes) {
505 unsigned slot = var->data.location;
506 bool is_generic_patch = slot >= VARYING_SLOT_PATCH0;
507 if (patch ^ is_generic_patch)
508 continue;
509 if (is_generic_patch)
510 slot -= VARYING_SLOT_PATCH0;
511 uint64_t slot_bit = BITFIELD64_BIT(slot);
512
513 if (!(mask & slot_bit))
514 continue;
515
516 if ((info.mask & slot_bit) == 0) {
517 memset(info.slots + slot, 0, sizeof(info.slots[0]));
518 info.max = MAX2(info.max, slot);
519 }
520
521 const struct glsl_type *type = var->type;
522 if (nir_is_arrayed_io(var, s->info.stage))
523 type = glsl_get_array_element(type);
524 info.slots[slot].types[var->data.location_frac] = type;
525
526 info.slots[slot].patch = var->data.patch;
527 auto& var_slot = info.slots[slot].vars[var->data.location_frac];
528 var_slot.driver_location = var->data.driver_location;
529 var_slot.interpolation = var->data.interpolation;
530 var_slot.compact = var->data.compact;
531 var_slot.always_active_io = var->data.always_active_io;
532 info.mask |= slot_bit;
533 info.slots[slot].location_frac_mask |= (1 << var->data.location_frac);
534 }
535
536 for (uint32_t i = 0; i <= info.max; ++i) {
537 if (((1llu << i) & info.mask) == 0)
538 memset(info.slots + i, 0, sizeof(info.slots[0]));
539 else
540 info.hash = _mesa_hash_data_with_seed(info.slots + i, sizeof(info.slots[0]), info.hash);
541 }
542 info.hash = _mesa_hash_data_with_seed(&info.mask, sizeof(info.mask), info.hash);
543
544 struct d3d12_screen *screen = d3d12_screen(ctx->base.screen);
545
546 mtx_lock(&screen->varying_info_mutex);
547 set_entry *pentry = _mesa_set_search_pre_hashed(screen->varying_info_set, info.hash, &info);
548 if (pentry != nullptr) {
549 mtx_unlock(&screen->varying_info_mutex);
550 return (d3d12_varying_info*)pentry->key;
551 }
552 else {
553 d3d12_varying_info *key = MALLOC_STRUCT(d3d12_varying_info);
554 *key = info;
555
556 _mesa_set_add_pre_hashed(screen->varying_info_set, info.hash, key);
557
558 mtx_unlock(&screen->varying_info_mutex);
559 return key;
560 }
561 }
562
563 static void
fill_flat_varyings(struct d3d12_gs_variant_key * key,d3d12_shader_selector * fs)564 fill_flat_varyings(struct d3d12_gs_variant_key *key, d3d12_shader_selector *fs)
565 {
566 if (!fs)
567 return;
568
569 nir_foreach_variable_with_modes(input, fs->initial,
570 nir_var_shader_in) {
571 if (input->data.interpolation == INTERP_MODE_FLAT)
572 key->flat_varyings |= BITFIELD64_BIT(input->data.location);
573 }
574 }
575
576 bool
d3d12_compare_varying_info(const d3d12_varying_info * expect,const d3d12_varying_info * have)577 d3d12_compare_varying_info(const d3d12_varying_info *expect, const d3d12_varying_info *have)
578 {
579 if (expect == have)
580 return true;
581
582 if (expect == nullptr || have == nullptr)
583 return false;
584
585 if (expect->mask != have->mask
586 || expect->max != have->max)
587 return false;
588
589 if (!expect->mask)
590 return true;
591
592 /* 6 is a rough (wild) guess for a bulk memcmp cross-over point. When there
593 * are a small number of slots present, individual is much faster. */
594 if (util_bitcount64(expect->mask) < 6) {
595 uint64_t mask = expect->mask;
596 while (mask) {
597 int slot = u_bit_scan64(&mask);
598 if (memcmp(&expect->slots[slot], &have->slots[slot], sizeof(have->slots[slot])))
599 return false;
600 }
601
602 return true;
603 }
604
605 return !memcmp(expect->slots, have->slots, sizeof(expect->slots[0]) * expect->max);
606 }
607
608
varying_info_hash(const void * info)609 uint32_t varying_info_hash(const void *info) {
610 return ((d3d12_varying_info*)info)->hash;
611 }
varying_info_compare(const void * a,const void * b)612 bool varying_info_compare(const void *a, const void *b) {
613 return d3d12_compare_varying_info((d3d12_varying_info*)a, (d3d12_varying_info*)b);
614 }
varying_info_entry_destroy(set_entry * entry)615 void varying_info_entry_destroy(set_entry *entry) {
616 if (entry->key)
617 free((void*)entry->key);
618 }
619
620 void
d3d12_varying_cache_init(struct d3d12_screen * screen)621 d3d12_varying_cache_init(struct d3d12_screen *screen) {
622 screen->varying_info_set = _mesa_set_create(nullptr, varying_info_hash, varying_info_compare);
623 }
624
625 void
d3d12_varying_cache_destroy(struct d3d12_screen * screen)626 d3d12_varying_cache_destroy(struct d3d12_screen *screen) {
627 _mesa_set_destroy(screen->varying_info_set, varying_info_entry_destroy);
628 }
629
630
631 static void
validate_geometry_shader_variant(struct d3d12_selection_context * sel_ctx)632 validate_geometry_shader_variant(struct d3d12_selection_context *sel_ctx)
633 {
634 struct d3d12_context *ctx = sel_ctx->ctx;
635 d3d12_shader_selector *gs = ctx->gfx_stages[PIPE_SHADER_GEOMETRY];
636
637 /* Nothing to do if there is a user geometry shader bound */
638 if (gs != NULL && !gs->is_variant)
639 return;
640
641 d3d12_shader_selector* vs = ctx->gfx_stages[PIPE_SHADER_VERTEX];
642 d3d12_shader_selector* fs = ctx->gfx_stages[PIPE_SHADER_FRAGMENT];
643
644 struct d3d12_gs_variant_key key;
645 key.all = 0;
646 key.flat_varyings = 0;
647
648 /* Fill the geometry shader variant key */
649 if (sel_ctx->fill_mode_lowered != PIPE_POLYGON_MODE_FILL) {
650 key.fill_mode = sel_ctx->fill_mode_lowered;
651 key.cull_mode = sel_ctx->cull_mode_lowered;
652 key.has_front_face = (fs->initial->info.inputs_read & VARYING_BIT_FACE) != 0;
653 if (key.cull_mode != PIPE_FACE_NONE || key.has_front_face)
654 key.front_ccw = ctx->gfx_pipeline_state.rast->base.front_ccw ^ (ctx->flip_y < 0);
655 key.edge_flag_fix = needs_edge_flag_fix(ctx->initial_api_prim);
656 fill_flat_varyings(&key, fs);
657 if (key.flat_varyings != 0)
658 key.flatshade_first = ctx->gfx_pipeline_state.rast->base.flatshade_first;
659 } else if (sel_ctx->needs_point_sprite_lowering) {
660 key.passthrough = true;
661 } else if (sel_ctx->needs_vertex_reordering) {
662 /* TODO support cases where flat shading (pv != 0) and xfb are enabled, or lines */
663 key.provoking_vertex = sel_ctx->provoking_vertex;
664 key.alternate_tri = sel_ctx->alternate_tri;
665 }
666
667 if (vs->initial_output_vars == nullptr) {
668 vs->initial_output_vars = fill_varyings(sel_ctx->ctx, vs->initial, nir_var_shader_out,
669 vs->initial->info.outputs_written, false);
670 }
671 key.varyings = vs->initial_output_vars;
672 gs = d3d12_get_gs_variant(ctx, &key);
673 ctx->gfx_stages[PIPE_SHADER_GEOMETRY] = gs;
674 }
675
676 static void
validate_tess_ctrl_shader_variant(struct d3d12_selection_context * sel_ctx)677 validate_tess_ctrl_shader_variant(struct d3d12_selection_context *sel_ctx)
678 {
679 struct d3d12_context *ctx = sel_ctx->ctx;
680 d3d12_shader_selector *tcs = ctx->gfx_stages[PIPE_SHADER_TESS_CTRL];
681
682 /* Nothing to do if there is a user tess ctrl shader bound */
683 if (tcs != NULL && !tcs->is_variant)
684 return;
685
686 d3d12_shader_selector *tes = ctx->gfx_stages[PIPE_SHADER_TESS_EVAL];
687 struct d3d12_tcs_variant_key key = {0};
688
689 bool variant_needed = tes != nullptr;
690
691 /* Fill the variant key */
692 if (variant_needed) {
693 if (tes->initial_input_vars == nullptr) {
694 tes->initial_input_vars = fill_varyings(sel_ctx->ctx, tes->initial, nir_var_shader_in,
695 tes->initial->info.inputs_read & ~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER),
696 false);
697 }
698 key.varyings = tes->initial_input_vars;
699 key.vertices_out = ctx->patch_vertices;
700 }
701
702 /* Find/create the proper variant and bind it */
703 tcs = variant_needed ? d3d12_get_tcs_variant(ctx, &key) : NULL;
704 ctx->gfx_stages[PIPE_SHADER_TESS_CTRL] = tcs;
705 }
706
707 static bool
d3d12_compare_shader_keys(struct d3d12_selection_context * sel_ctx,const d3d12_shader_key * expect,const d3d12_shader_key * have)708 d3d12_compare_shader_keys(struct d3d12_selection_context* sel_ctx, const d3d12_shader_key *expect, const d3d12_shader_key *have)
709 {
710 assert(expect->stage == have->stage);
711 assert(expect);
712 assert(have);
713
714 if (expect->hash != have->hash)
715 return false;
716
717 switch (expect->stage) {
718 case PIPE_SHADER_VERTEX:
719 if (expect->vs.needs_format_emulation != have->vs.needs_format_emulation)
720 return false;
721
722 if (expect->vs.needs_format_emulation) {
723 if (memcmp(expect->vs.format_conversion, have->vs.format_conversion,
724 sel_ctx->ctx->gfx_pipeline_state.ves->num_elements * sizeof(enum pipe_format)))
725 return false;
726 }
727 break;
728 case PIPE_SHADER_GEOMETRY:
729 if (expect->gs.all != have->gs.all)
730 return false;
731 break;
732 case PIPE_SHADER_TESS_CTRL:
733 if (expect->hs.all != have->hs.all)
734 return false;
735 break;
736 case PIPE_SHADER_TESS_EVAL:
737 if (expect->ds.tcs_vertices_out != have->ds.tcs_vertices_out ||
738 expect->ds.prev_patch_outputs != have->ds.prev_patch_outputs)
739 return false;
740 break;
741 case PIPE_SHADER_FRAGMENT:
742 if (expect->fs.all != have->fs.all)
743 return false;
744 break;
745 case PIPE_SHADER_COMPUTE:
746 if (memcmp(expect->cs.workgroup_size, have->cs.workgroup_size,
747 sizeof(have->cs.workgroup_size)))
748 return false;
749 break;
750 default:
751 unreachable("invalid stage");
752 }
753
754 if (expect->n_texture_states != have->n_texture_states)
755 return false;
756
757 if (expect->n_images != have->n_images)
758 return false;
759
760 if (expect->n_texture_states > 0 &&
761 memcmp(expect->tex_wrap_states, have->tex_wrap_states,
762 expect->n_texture_states * sizeof(dxil_wrap_sampler_state)))
763 return false;
764
765 if (memcmp(expect->swizzle_state, have->swizzle_state,
766 expect->n_texture_states * sizeof(dxil_texture_swizzle_state)))
767 return false;
768
769 if (memcmp(expect->sampler_compare_funcs, have->sampler_compare_funcs,
770 expect->n_texture_states * sizeof(enum compare_func)))
771 return false;
772
773 if (memcmp(expect->image_format_conversion, have->image_format_conversion,
774 expect->n_images * sizeof(struct d3d12_image_format_conversion_info)))
775 return false;
776
777 if (!(expect->next_varying_inputs == have->next_varying_inputs &&
778 expect->prev_varying_outputs == have->prev_varying_outputs &&
779 expect->common_all == have->common_all &&
780 expect->tex_saturate_s == have->tex_saturate_s &&
781 expect->tex_saturate_r == have->tex_saturate_r &&
782 expect->tex_saturate_t == have->tex_saturate_t))
783 return false;
784
785 if (expect->next_has_frac_inputs &&
786 expect->next_varying_frac_inputs != have->next_varying_frac_inputs &&
787 memcmp(expect->next_varying_frac_inputs, have->next_varying_frac_inputs, sizeof(d3d12_shader_selector::varying_frac_inputs)))
788 return false;
789 if (expect->prev_has_frac_outputs &&
790 expect->prev_varying_frac_outputs != have->prev_varying_frac_outputs &&
791 memcmp(expect->prev_varying_frac_outputs, have->prev_varying_frac_outputs, sizeof(d3d12_shader_selector::varying_frac_outputs)))
792 return false;
793 return true;
794 }
795
796 static uint32_t
d3d12_shader_key_hash(const d3d12_shader_key * key)797 d3d12_shader_key_hash(const d3d12_shader_key *key)
798 {
799 uint32_t hash;
800
801 hash = (uint32_t)key->stage;
802
803 hash += key->next_varying_inputs;
804 hash += key->prev_varying_outputs;
805 hash += key->common_all;
806 if (key->next_has_frac_inputs)
807 hash = _mesa_hash_data_with_seed(key->next_varying_frac_inputs, sizeof(d3d12_shader_selector::varying_frac_inputs), hash);
808 if (key->prev_has_frac_outputs)
809 hash = _mesa_hash_data_with_seed(key->prev_varying_frac_outputs, sizeof(d3d12_shader_selector::varying_frac_outputs), hash);
810 switch (key->stage) {
811 case PIPE_SHADER_VERTEX:
812 /* (Probably) not worth the bit extraction for needs_format_emulation and
813 * the rest of the the format_conversion data is large. Don't bother
814 * hashing for now until this is shown to be worthwhile. */
815 break;
816 case PIPE_SHADER_GEOMETRY:
817 hash += key->gs.all;
818 break;
819 case PIPE_SHADER_FRAGMENT:
820 hash += key->fs.all;
821 break;
822 case PIPE_SHADER_COMPUTE:
823 hash = _mesa_hash_data_with_seed(&key->cs, sizeof(key->cs), hash);
824 break;
825 case PIPE_SHADER_TESS_CTRL:
826 hash += key->hs.all;
827 break;
828 case PIPE_SHADER_TESS_EVAL:
829 hash += key->ds.tcs_vertices_out;
830 hash += key->ds.prev_patch_outputs;
831 break;
832 default:
833 /* No type specific information to hash for other stages. */
834 break;
835 }
836
837 hash += key->n_texture_states;
838 hash += key->n_images;
839 return hash;
840 }
841
842 static void
d3d12_fill_shader_key(struct d3d12_selection_context * sel_ctx,d3d12_shader_key * key,d3d12_shader_selector * sel,d3d12_shader_selector * prev,d3d12_shader_selector * next)843 d3d12_fill_shader_key(struct d3d12_selection_context *sel_ctx,
844 d3d12_shader_key *key, d3d12_shader_selector *sel,
845 d3d12_shader_selector *prev, d3d12_shader_selector *next)
846 {
847 pipe_shader_type stage = sel->stage;
848
849 memset(key, 0, offsetof(d3d12_shader_key, vs));
850 key->stage = stage;
851
852 switch (stage)
853 {
854 case PIPE_SHADER_VERTEX:
855 key->vs.needs_format_emulation = 0;
856 break;
857 case PIPE_SHADER_FRAGMENT:
858 key->fs.all = 0;
859 break;
860 case PIPE_SHADER_GEOMETRY:
861 key->gs.all = 0;
862 break;
863 case PIPE_SHADER_TESS_CTRL:
864 key->hs.all = 0;
865 break;
866 case PIPE_SHADER_TESS_EVAL:
867 key->ds.tcs_vertices_out = 0;
868 key->ds.prev_patch_outputs = 0;
869 break;
870 case PIPE_SHADER_COMPUTE:
871 memset(key->cs.workgroup_size, 0, sizeof(key->cs.workgroup_size));
872 break;
873 default: unreachable("Invalid stage type");
874 }
875
876 key->n_texture_states = 0;
877 key->tex_wrap_states = sel_ctx->ctx->tex_wrap_states_shader_key;
878 key->n_images = 0;
879
880 if (prev) {
881 key->prev_varying_outputs = prev->initial->info.outputs_written;
882 key->prev_has_frac_outputs = prev->has_frac_outputs;
883 key->prev_varying_frac_outputs = prev->varying_frac_outputs;
884
885 if (stage == PIPE_SHADER_TESS_EVAL)
886 key->ds.prev_patch_outputs = prev->initial->info.patch_outputs_written;
887
888 /* Set the provoking vertex based on the previous shader output. Only set the
889 * key value if the driver actually supports changing the provoking vertex though */
890 if (stage == PIPE_SHADER_FRAGMENT && sel_ctx->ctx->gfx_pipeline_state.rast &&
891 !sel_ctx->needs_vertex_reordering &&
892 d3d12_screen(sel_ctx->ctx->base.screen)->have_load_at_vertex)
893 key->fs.provoking_vertex = sel_ctx->provoking_vertex;
894
895 /* Get the input clip distance size. The info's clip_distance_array_size corresponds
896 * to the output, and in cases of TES or GS you could have differently-sized inputs
897 * and outputs. For FS, there is no output, so it's repurposed to mean input.
898 */
899 if (stage != PIPE_SHADER_FRAGMENT)
900 key->input_clip_size = prev->initial->info.clip_distance_array_size;
901 }
902
903 if (next) {
904 if (stage == PIPE_SHADER_TESS_CTRL)
905 key->hs.next_patch_inputs = next->initial->info.patch_outputs_read;
906 key->next_varying_inputs = next->initial->info.inputs_read;
907 if (BITSET_TEST(next->initial->info.system_values_read, SYSTEM_VALUE_PRIMITIVE_ID))
908 key->next_varying_inputs |= VARYING_SLOT_PRIMITIVE_ID;
909 key->next_has_frac_inputs = next->has_frac_inputs;
910 key->next_varying_frac_inputs = next->varying_frac_inputs;
911 }
912
913 if (stage == PIPE_SHADER_GEOMETRY ||
914 ((stage == PIPE_SHADER_VERTEX || stage == PIPE_SHADER_TESS_EVAL) &&
915 (!next || next->stage == PIPE_SHADER_FRAGMENT))) {
916 key->last_vertex_processing_stage = 1;
917 key->invert_depth = sel_ctx->ctx->reverse_depth_range;
918 key->halfz = sel_ctx->ctx->gfx_pipeline_state.rast ?
919 sel_ctx->ctx->gfx_pipeline_state.rast->base.clip_halfz : false;
920 if (sel_ctx->ctx->pstipple.enabled &&
921 sel_ctx->ctx->gfx_pipeline_state.rast->base.poly_stipple_enable)
922 key->next_varying_inputs |= VARYING_BIT_POS;
923 }
924
925 if (stage == PIPE_SHADER_GEOMETRY && sel_ctx->ctx->gfx_pipeline_state.rast) {
926 struct pipe_rasterizer_state *rast = &sel_ctx->ctx->gfx_pipeline_state.rast->base;
927 if (sel_ctx->needs_point_sprite_lowering) {
928 key->gs.writes_psize = 1;
929 key->gs.point_size_per_vertex = rast->point_size_per_vertex;
930 key->gs.sprite_coord_enable = rast->sprite_coord_enable;
931 key->gs.sprite_origin_upper_left = (rast->sprite_coord_mode != PIPE_SPRITE_COORD_LOWER_LEFT);
932 if (sel_ctx->ctx->flip_y < 0)
933 key->gs.sprite_origin_upper_left = !key->gs.sprite_origin_upper_left;
934 key->gs.aa_point = rast->point_smooth;
935 key->gs.stream_output_factor = 6;
936 } else if (sel_ctx->fill_mode_lowered == PIPE_POLYGON_MODE_LINE) {
937 key->gs.stream_output_factor = 2;
938 } else if (sel_ctx->needs_vertex_reordering && !sel->is_variant) {
939 key->gs.triangle_strip = 1;
940 }
941
942 if (sel->is_variant && next) {
943 if (next->initial->info.inputs_read & VARYING_BIT_FACE)
944 key->next_varying_inputs = (key->next_varying_inputs | VARYING_BIT_VAR(12)) & ~VARYING_BIT_FACE;
945 if (next->initial->info.inputs_read & VARYING_BIT_PRIMITIVE_ID)
946 key->gs.primitive_id = 1;
947 }
948 } else if (stage == PIPE_SHADER_FRAGMENT) {
949 key->fs.missing_dual_src_outputs = sel_ctx->missing_dual_src_outputs;
950 key->fs.frag_result_color_lowering = sel_ctx->frag_result_color_lowering;
951 key->fs.manual_depth_range = sel_ctx->manual_depth_range;
952 key->fs.polygon_stipple = sel_ctx->ctx->pstipple.enabled &&
953 sel_ctx->ctx->gfx_pipeline_state.rast->base.poly_stipple_enable;
954 key->fs.multisample_disabled = sel_ctx->ctx->gfx_pipeline_state.rast &&
955 !sel_ctx->ctx->gfx_pipeline_state.rast->desc.MultisampleEnable;
956 if (sel_ctx->ctx->gfx_pipeline_state.blend &&
957 sel_ctx->ctx->gfx_pipeline_state.blend->desc.RenderTarget[0].LogicOpEnable &&
958 !sel_ctx->ctx->gfx_pipeline_state.has_float_rtv) {
959 key->fs.cast_to_uint = util_format_is_unorm(sel_ctx->ctx->fb.cbufs[0]->format);
960 key->fs.cast_to_int = !key->fs.cast_to_uint;
961 }
962 if (sel_ctx->needs_point_sprite_lowering) {
963 if (sel->initial->info.inputs_read & VARYING_BIT_FACE)
964 key->prev_varying_outputs = (key->prev_varying_outputs | VARYING_BIT_VAR(12)) & ~VARYING_BIT_FACE;
965 key->prev_varying_outputs |= sel->initial->info.inputs_read & (VARYING_BIT_PNTC | BITFIELD64_RANGE(VARYING_SLOT_TEX0, 8));
966 }
967 } else if (stage == PIPE_SHADER_TESS_CTRL) {
968 if (next && next->initial->info.stage == MESA_SHADER_TESS_EVAL) {
969 key->hs.primitive_mode = next->initial->info.tess._primitive_mode;
970 key->hs.ccw = next->initial->info.tess.ccw;
971 key->hs.point_mode = next->initial->info.tess.point_mode;
972 key->hs.spacing = next->initial->info.tess.spacing;
973 } else {
974 key->hs.primitive_mode = TESS_PRIMITIVE_QUADS;
975 key->hs.ccw = true;
976 key->hs.point_mode = false;
977 key->hs.spacing = TESS_SPACING_EQUAL;
978 }
979 key->hs.patch_vertices_in = MAX2(sel_ctx->ctx->patch_vertices, 1);
980 } else if (stage == PIPE_SHADER_TESS_EVAL) {
981 if (prev && prev->initial->info.stage == MESA_SHADER_TESS_CTRL)
982 key->ds.tcs_vertices_out = prev->initial->info.tess.tcs_vertices_out;
983 else
984 key->ds.tcs_vertices_out = 32;
985 }
986
987 if (sel->samples_int_textures) {
988 key->samples_int_textures = sel->samples_int_textures;
989 key->n_texture_states = sel_ctx->ctx->num_sampler_views[stage];
990 /* Copy only states with integer textures */
991 for(int i = 0; i < key->n_texture_states; ++i) {
992 auto& wrap_state = sel_ctx->ctx->tex_wrap_states[stage][i];
993 if (wrap_state.is_int_sampler) {
994 memcpy(&key->tex_wrap_states[i], &wrap_state, sizeof(wrap_state));
995 key->swizzle_state[i] = sel_ctx->ctx->tex_swizzle_state[stage][i];
996 } else {
997 memset(&key->tex_wrap_states[i], 0, sizeof(key->tex_wrap_states[i]));
998 key->swizzle_state[i] = { PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W };
999 }
1000 }
1001 }
1002
1003 for (unsigned i = 0, e = sel_ctx->ctx->num_samplers[stage]; i < e; ++i) {
1004 if (!sel_ctx->ctx->samplers[stage][i] ||
1005 sel_ctx->ctx->samplers[stage][i]->filter == PIPE_TEX_FILTER_NEAREST)
1006 continue;
1007
1008 if (sel_ctx->ctx->samplers[stage][i]->wrap_r == PIPE_TEX_WRAP_CLAMP)
1009 key->tex_saturate_r |= 1 << i;
1010 if (sel_ctx->ctx->samplers[stage][i]->wrap_s == PIPE_TEX_WRAP_CLAMP)
1011 key->tex_saturate_s |= 1 << i;
1012 if (sel_ctx->ctx->samplers[stage][i]->wrap_t == PIPE_TEX_WRAP_CLAMP)
1013 key->tex_saturate_t |= 1 << i;
1014 }
1015
1016 if (sel->compare_with_lod_bias_grad) {
1017 key->n_texture_states = sel_ctx->ctx->num_sampler_views[stage];
1018 memcpy(key->sampler_compare_funcs, sel_ctx->ctx->tex_compare_func[stage],
1019 key->n_texture_states * sizeof(enum compare_func));
1020 memcpy(key->swizzle_state, sel_ctx->ctx->tex_swizzle_state[stage],
1021 key->n_texture_states * sizeof(dxil_texture_swizzle_state));
1022 if (!sel->samples_int_textures)
1023 memset(key->tex_wrap_states, 0, sizeof(key->tex_wrap_states[0]) * key->n_texture_states);
1024 }
1025
1026 if (stage == PIPE_SHADER_VERTEX && sel_ctx->ctx->gfx_pipeline_state.ves) {
1027 key->vs.needs_format_emulation = sel_ctx->ctx->gfx_pipeline_state.ves->needs_format_emulation;
1028 if (key->vs.needs_format_emulation) {
1029 unsigned num_elements = sel_ctx->ctx->gfx_pipeline_state.ves->num_elements;
1030
1031 memset(key->vs.format_conversion + num_elements,
1032 0,
1033 sizeof(key->vs.format_conversion) - (num_elements * sizeof(enum pipe_format)));
1034
1035 memcpy(key->vs.format_conversion, sel_ctx->ctx->gfx_pipeline_state.ves->format_conversion,
1036 num_elements * sizeof(enum pipe_format));
1037 }
1038 }
1039
1040 if (stage == PIPE_SHADER_FRAGMENT &&
1041 sel_ctx->ctx->gfx_stages[PIPE_SHADER_GEOMETRY] &&
1042 sel_ctx->ctx->gfx_stages[PIPE_SHADER_GEOMETRY]->is_variant &&
1043 sel_ctx->ctx->gfx_stages[PIPE_SHADER_GEOMETRY]->gs_key.has_front_face) {
1044 key->fs.remap_front_facing = 1;
1045 }
1046
1047 if (stage == PIPE_SHADER_COMPUTE && sel_ctx->variable_workgroup_size) {
1048 memcpy(key->cs.workgroup_size, sel_ctx->variable_workgroup_size, sizeof(key->cs.workgroup_size));
1049 }
1050
1051 key->n_images = sel_ctx->ctx->num_image_views[stage];
1052 for (int i = 0; i < key->n_images; ++i) {
1053 key->image_format_conversion[i].emulated_format = sel_ctx->ctx->image_view_emulation_formats[stage][i];
1054 if (key->image_format_conversion[i].emulated_format != PIPE_FORMAT_NONE)
1055 key->image_format_conversion[i].view_format = sel_ctx->ctx->image_views[stage][i].format;
1056 }
1057
1058 key->hash = d3d12_shader_key_hash(key);
1059 }
1060
1061 static void
select_shader_variant(struct d3d12_selection_context * sel_ctx,d3d12_shader_selector * sel,d3d12_shader_selector * prev,d3d12_shader_selector * next)1062 select_shader_variant(struct d3d12_selection_context *sel_ctx, d3d12_shader_selector *sel,
1063 d3d12_shader_selector *prev, d3d12_shader_selector *next)
1064 {
1065 struct d3d12_context *ctx = sel_ctx->ctx;
1066 d3d12_shader_key key;
1067 nir_shader *new_nir_variant;
1068 unsigned pstipple_binding = UINT32_MAX;
1069
1070 d3d12_fill_shader_key(sel_ctx, &key, sel, prev, next);
1071
1072 /* Check for an existing variant */
1073 for (d3d12_shader *variant = sel->first; variant;
1074 variant = variant->next_variant) {
1075
1076 if (d3d12_compare_shader_keys(sel_ctx, &key, &variant->key)) {
1077 sel->current = variant;
1078 return;
1079 }
1080 }
1081
1082 /* Clone the NIR shader */
1083 new_nir_variant = nir_shader_clone(sel, sel->initial);
1084
1085 /* Apply any needed lowering passes */
1086 if (key.stage == PIPE_SHADER_GEOMETRY) {
1087 if (key.gs.writes_psize) {
1088 NIR_PASS_V(new_nir_variant, d3d12_lower_point_sprite,
1089 !key.gs.sprite_origin_upper_left,
1090 key.gs.point_size_per_vertex,
1091 key.gs.sprite_coord_enable,
1092 key.next_varying_inputs);
1093 }
1094
1095 if (key.gs.primitive_id)
1096 NIR_PASS_V(new_nir_variant, d3d12_lower_primitive_id);
1097
1098 if (key.gs.triangle_strip)
1099 NIR_PASS_V(new_nir_variant, d3d12_lower_triangle_strip);
1100 }
1101 else if (key.stage == PIPE_SHADER_FRAGMENT)
1102 {
1103 if (key.fs.polygon_stipple) {
1104 NIR_PASS_V(new_nir_variant, nir_lower_pstipple_fs,
1105 &pstipple_binding, 0, false, nir_type_bool1);
1106 }
1107
1108 if (key.fs.remap_front_facing)
1109 dxil_nir_forward_front_face(new_nir_variant);
1110
1111 if (key.fs.missing_dual_src_outputs) {
1112 NIR_PASS_V(new_nir_variant, d3d12_add_missing_dual_src_target,
1113 key.fs.missing_dual_src_outputs);
1114 } else if (key.fs.frag_result_color_lowering) {
1115 NIR_PASS_V(new_nir_variant, nir_lower_fragcolor,
1116 key.fs.frag_result_color_lowering);
1117 }
1118
1119 if (key.fs.manual_depth_range)
1120 NIR_PASS_V(new_nir_variant, d3d12_lower_depth_range);
1121 }
1122
1123
1124 if (sel->compare_with_lod_bias_grad) {
1125 STATIC_ASSERT(sizeof(dxil_texture_swizzle_state) ==
1126 sizeof(nir_lower_tex_shadow_swizzle));
1127
1128 NIR_PASS_V(new_nir_variant, nir_lower_tex_shadow, key.n_texture_states,
1129 key.sampler_compare_funcs, (nir_lower_tex_shadow_swizzle *)key.swizzle_state);
1130 }
1131
1132 if (key.stage == PIPE_SHADER_FRAGMENT) {
1133 if (key.fs.cast_to_uint)
1134 NIR_PASS_V(new_nir_variant, d3d12_lower_uint_cast, false);
1135 if (key.fs.cast_to_int)
1136 NIR_PASS_V(new_nir_variant, d3d12_lower_uint_cast, true);
1137 }
1138
1139 if (key.n_images) {
1140 d3d12_image_format_conversion_info_arr image_format_arr = { key.n_images, key.image_format_conversion };
1141 NIR_PASS_V(new_nir_variant, d3d12_lower_image_casts, &image_format_arr);
1142 }
1143
1144 if (key.stage == PIPE_SHADER_COMPUTE && sel->workgroup_size_variable) {
1145 new_nir_variant->info.workgroup_size[0] = key.cs.workgroup_size[0];
1146 new_nir_variant->info.workgroup_size[1] = key.cs.workgroup_size[1];
1147 new_nir_variant->info.workgroup_size[2] = key.cs.workgroup_size[2];
1148 }
1149
1150 if (new_nir_variant->info.stage == MESA_SHADER_TESS_CTRL) {
1151 new_nir_variant->info.tess._primitive_mode = (tess_primitive_mode)key.hs.primitive_mode;
1152 new_nir_variant->info.tess.ccw = key.hs.ccw;
1153 new_nir_variant->info.tess.point_mode = key.hs.point_mode;
1154 new_nir_variant->info.tess.spacing = key.hs.spacing;
1155
1156 NIR_PASS_V(new_nir_variant, dxil_nir_set_tcs_patches_in, key.hs.patch_vertices_in);
1157 } else if (new_nir_variant->info.stage == MESA_SHADER_TESS_EVAL) {
1158 new_nir_variant->info.tess.tcs_vertices_out = key.ds.tcs_vertices_out;
1159 }
1160
1161 {
1162 struct nir_lower_tex_options tex_options = { };
1163 tex_options.lower_txp = ~0u; /* No equivalent for textureProj */
1164 tex_options.lower_rect = true;
1165 tex_options.lower_rect_offset = true;
1166 tex_options.saturate_s = key.tex_saturate_s;
1167 tex_options.saturate_r = key.tex_saturate_r;
1168 tex_options.saturate_t = key.tex_saturate_t;
1169 tex_options.lower_invalid_implicit_lod = true;
1170 tex_options.lower_tg4_offsets = true;
1171
1172 NIR_PASS_V(new_nir_variant, nir_lower_tex, &tex_options);
1173 }
1174
1175 /* Remove not-written inputs, and re-sort */
1176 if (prev) {
1177 NIR_PASS_V(new_nir_variant, dxil_nir_kill_undefined_varyings, key.prev_varying_outputs,
1178 prev->initial->info.patch_outputs_written, key.prev_varying_frac_outputs);
1179 dxil_reassign_driver_locations(new_nir_variant, nir_var_shader_in, key.prev_varying_outputs,
1180 key.prev_varying_frac_outputs);
1181 }
1182
1183 /* Remove not-read outputs and re-sort */
1184 if (next) {
1185 NIR_PASS_V(new_nir_variant, dxil_nir_kill_unused_outputs, key.next_varying_inputs,
1186 next->initial->info.patch_inputs_read, key.next_varying_frac_inputs);
1187 dxil_reassign_driver_locations(new_nir_variant, nir_var_shader_out, key.next_varying_inputs,
1188 key.next_varying_frac_inputs);
1189 }
1190
1191 nir_shader_gather_info(new_nir_variant, nir_shader_get_entrypoint(new_nir_variant));
1192 d3d12_shader *new_variant = compile_nir(ctx, sel, &key, new_nir_variant);
1193 assert(new_variant);
1194
1195 /* keep track of polygon stipple texture binding */
1196 new_variant->pstipple_binding = pstipple_binding;
1197
1198 /* prepend the new shader in the selector chain and pick it */
1199 new_variant->next_variant = sel->first;
1200 sel->current = sel->first = new_variant;
1201 }
1202
1203 static d3d12_shader_selector *
get_prev_shader(struct d3d12_context * ctx,pipe_shader_type current)1204 get_prev_shader(struct d3d12_context *ctx, pipe_shader_type current)
1205 {
1206 switch (current) {
1207 case PIPE_SHADER_VERTEX:
1208 return NULL;
1209 case PIPE_SHADER_FRAGMENT:
1210 if (ctx->gfx_stages[PIPE_SHADER_GEOMETRY])
1211 return ctx->gfx_stages[PIPE_SHADER_GEOMETRY];
1212 FALLTHROUGH;
1213 case PIPE_SHADER_GEOMETRY:
1214 if (ctx->gfx_stages[PIPE_SHADER_TESS_EVAL])
1215 return ctx->gfx_stages[PIPE_SHADER_TESS_EVAL];
1216 FALLTHROUGH;
1217 case PIPE_SHADER_TESS_EVAL:
1218 if (ctx->gfx_stages[PIPE_SHADER_TESS_CTRL])
1219 return ctx->gfx_stages[PIPE_SHADER_TESS_CTRL];
1220 FALLTHROUGH;
1221 case PIPE_SHADER_TESS_CTRL:
1222 return ctx->gfx_stages[PIPE_SHADER_VERTEX];
1223 default:
1224 unreachable("shader type not supported");
1225 }
1226 }
1227
1228 static d3d12_shader_selector *
get_next_shader(struct d3d12_context * ctx,pipe_shader_type current)1229 get_next_shader(struct d3d12_context *ctx, pipe_shader_type current)
1230 {
1231 switch (current) {
1232 case PIPE_SHADER_VERTEX:
1233 if (ctx->gfx_stages[PIPE_SHADER_TESS_CTRL])
1234 return ctx->gfx_stages[PIPE_SHADER_TESS_CTRL];
1235 FALLTHROUGH;
1236 case PIPE_SHADER_TESS_CTRL:
1237 if (ctx->gfx_stages[PIPE_SHADER_TESS_EVAL])
1238 return ctx->gfx_stages[PIPE_SHADER_TESS_EVAL];
1239 FALLTHROUGH;
1240 case PIPE_SHADER_TESS_EVAL:
1241 if (ctx->gfx_stages[PIPE_SHADER_GEOMETRY])
1242 return ctx->gfx_stages[PIPE_SHADER_GEOMETRY];
1243 FALLTHROUGH;
1244 case PIPE_SHADER_GEOMETRY:
1245 return ctx->gfx_stages[PIPE_SHADER_FRAGMENT];
1246 case PIPE_SHADER_FRAGMENT:
1247 return NULL;
1248 default:
1249 unreachable("shader type not supported");
1250 }
1251 }
1252
1253 enum tex_scan_flags {
1254 TEX_SAMPLE_INTEGER_TEXTURE = 1 << 0,
1255 TEX_CMP_WITH_LOD_BIAS_GRAD = 1 << 1,
1256 TEX_SCAN_ALL_FLAGS = (1 << 2) - 1
1257 };
1258
1259 static unsigned
scan_texture_use(nir_shader * nir)1260 scan_texture_use(nir_shader *nir)
1261 {
1262 unsigned result = 0;
1263 nir_foreach_function_impl(impl, nir) {
1264 nir_foreach_block(block, impl) {
1265 nir_foreach_instr(instr, block) {
1266 if (instr->type == nir_instr_type_tex) {
1267 auto tex = nir_instr_as_tex(instr);
1268 switch (tex->op) {
1269 case nir_texop_txb:
1270 case nir_texop_txl:
1271 case nir_texop_txd:
1272 if (tex->is_shadow)
1273 result |= TEX_CMP_WITH_LOD_BIAS_GRAD;
1274 FALLTHROUGH;
1275 case nir_texop_tex:
1276 if (tex->dest_type & (nir_type_int | nir_type_uint))
1277 result |= TEX_SAMPLE_INTEGER_TEXTURE;
1278 default:
1279 ;
1280 }
1281 }
1282 if (TEX_SCAN_ALL_FLAGS == result)
1283 return result;
1284 }
1285 }
1286 }
1287 return result;
1288 }
1289
1290 static uint64_t
update_so_info(struct pipe_stream_output_info * so_info,uint64_t outputs_written)1291 update_so_info(struct pipe_stream_output_info *so_info,
1292 uint64_t outputs_written)
1293 {
1294 uint64_t so_outputs = 0;
1295 uint8_t reverse_map[64] = {0};
1296 unsigned slot = 0;
1297
1298 while (outputs_written)
1299 reverse_map[slot++] = u_bit_scan64(&outputs_written);
1300
1301 for (unsigned i = 0; i < so_info->num_outputs; i++) {
1302 struct pipe_stream_output *output = &so_info->output[i];
1303
1304 /* Map Gallium's condensed "slots" back to real VARYING_SLOT_* enums */
1305 output->register_index = reverse_map[output->register_index];
1306
1307 so_outputs |= 1ull << output->register_index;
1308 }
1309
1310 return so_outputs;
1311 }
1312
1313 static struct d3d12_shader_selector *
d3d12_create_shader_impl(struct d3d12_context * ctx,struct d3d12_shader_selector * sel,struct nir_shader * nir)1314 d3d12_create_shader_impl(struct d3d12_context *ctx,
1315 struct d3d12_shader_selector *sel,
1316 struct nir_shader *nir)
1317 {
1318 unsigned tex_scan_result = scan_texture_use(nir);
1319 sel->samples_int_textures = (tex_scan_result & TEX_SAMPLE_INTEGER_TEXTURE) != 0;
1320 sel->compare_with_lod_bias_grad = (tex_scan_result & TEX_CMP_WITH_LOD_BIAS_GRAD) != 0;
1321 sel->workgroup_size_variable = nir->info.workgroup_size_variable;
1322
1323 /* Integer cube maps are not supported in DirectX because sampling is not supported
1324 * on integer textures and TextureLoad is not supported for cube maps, so we have to
1325 * lower integer cube maps to be handled like 2D textures arrays*/
1326 NIR_PASS_V(nir, dxil_nir_lower_int_cubemaps, true);
1327
1328 NIR_PASS_V(nir, dxil_nir_lower_subgroup_id);
1329 NIR_PASS_V(nir, dxil_nir_lower_num_subgroups);
1330
1331 nir_lower_subgroups_options subgroup_options = {};
1332 subgroup_options.ballot_bit_size = 32;
1333 subgroup_options.ballot_components = 4;
1334 subgroup_options.lower_subgroup_masks = true;
1335 subgroup_options.lower_to_scalar = true;
1336 subgroup_options.lower_relative_shuffle = true;
1337 subgroup_options.lower_inverse_ballot = true;
1338 if (nir->info.stage != MESA_SHADER_FRAGMENT && nir->info.stage != MESA_SHADER_COMPUTE)
1339 subgroup_options.lower_quad = true;
1340 NIR_PASS_V(nir, nir_lower_subgroups, &subgroup_options);
1341 NIR_PASS_V(nir, nir_lower_bit_size, [](const nir_instr *instr, void *) -> unsigned {
1342 if (instr->type != nir_instr_type_intrinsic)
1343 return 0;
1344 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1345 switch (intr->intrinsic) {
1346 case nir_intrinsic_quad_swap_horizontal:
1347 case nir_intrinsic_quad_swap_vertical:
1348 case nir_intrinsic_quad_swap_diagonal:
1349 case nir_intrinsic_reduce:
1350 case nir_intrinsic_inclusive_scan:
1351 case nir_intrinsic_exclusive_scan:
1352 return intr->def.bit_size == 1 ? 32 : 0;
1353 default:
1354 return 0;
1355 }
1356 }, NULL);
1357
1358 // Ensure subgroup scans on bools are gone
1359 NIR_PASS_V(nir, nir_opt_dce);
1360 NIR_PASS_V(nir, dxil_nir_lower_unsupported_subgroup_scan);
1361
1362 nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
1363
1364 if (nir->info.stage == MESA_SHADER_COMPUTE)
1365 NIR_PASS_V(nir, d3d12_lower_compute_state_vars);
1366 NIR_PASS_V(nir, d3d12_lower_load_draw_params);
1367 NIR_PASS_V(nir, d3d12_lower_load_patch_vertices_in);
1368 NIR_PASS_V(nir, dxil_nir_lower_double_math);
1369
1370 nir_foreach_variable_with_modes(var, nir, nir_var_shader_in) {
1371 if (var->data.location >= VARYING_SLOT_VAR0 && var->data.location_frac) {
1372 sel->has_frac_inputs = 1;
1373 BITSET_SET(sel->varying_frac_inputs, (var->data.location - VARYING_SLOT_VAR0) * 4 + var->data.location_frac);
1374 }
1375 }
1376 nir_foreach_variable_with_modes(var, nir, nir_var_shader_out) {
1377 if (var->data.location >= VARYING_SLOT_VAR0 && var->data.location_frac) {
1378 sel->has_frac_outputs = 1;
1379 BITSET_SET(sel->varying_frac_outputs, (var->data.location - VARYING_SLOT_VAR0) * 4 + var->data.location_frac);
1380 }
1381 }
1382
1383 /* Keep this initial shader as the blue print for possible variants */
1384 sel->initial = nir;
1385 sel->initial_output_vars = nullptr;
1386 sel->initial_input_vars = nullptr;
1387 sel->gs_key.varyings = nullptr;
1388 sel->tcs_key.varyings = nullptr;
1389
1390 return sel;
1391 }
1392
1393 struct d3d12_shader_selector *
d3d12_create_shader(struct d3d12_context * ctx,pipe_shader_type stage,const struct pipe_shader_state * shader)1394 d3d12_create_shader(struct d3d12_context *ctx,
1395 pipe_shader_type stage,
1396 const struct pipe_shader_state *shader)
1397 {
1398 struct d3d12_shader_selector *sel = rzalloc(nullptr, d3d12_shader_selector);
1399 sel->stage = stage;
1400
1401 struct nir_shader *nir = NULL;
1402
1403 if (shader->type == PIPE_SHADER_IR_NIR) {
1404 nir = (nir_shader *)shader->ir.nir;
1405 } else {
1406 assert(shader->type == PIPE_SHADER_IR_TGSI);
1407 nir = tgsi_to_nir(shader->tokens, ctx->base.screen, false);
1408 }
1409
1410 nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
1411 memcpy(&sel->so_info, &shader->stream_output, sizeof(sel->so_info));
1412 update_so_info(&sel->so_info, nir->info.outputs_written);
1413
1414 assert(nir != NULL);
1415
1416 NIR_PASS_V(nir, dxil_nir_split_clip_cull_distance);
1417 NIR_PASS_V(nir, d3d12_split_needed_varyings);
1418
1419 if (nir->info.stage == MESA_SHADER_TESS_EVAL || nir->info.stage == MESA_SHADER_TESS_CTRL) {
1420 /* D3D requires exactly-matching patch constant signatures. Since tess ctrl must write these vars,
1421 * tess eval must have them. */
1422 for (uint32_t i = 0; i < 2; ++i) {
1423 unsigned loc = i == 0 ? VARYING_SLOT_TESS_LEVEL_OUTER : VARYING_SLOT_TESS_LEVEL_INNER;
1424 nir_variable_mode mode = nir->info.stage == MESA_SHADER_TESS_EVAL ? nir_var_shader_in : nir_var_shader_out;
1425 nir_variable *var = nir_find_variable_with_location(nir, mode, loc);
1426 uint32_t arr_size = i == 0 ? 4 : 2;
1427 if (!var) {
1428 var = nir_variable_create(nir, mode, glsl_array_type(glsl_float_type(), arr_size, 0), i == 0 ? "outer" : "inner");
1429 var->data.location = loc;
1430 var->data.patch = true;
1431 var->data.compact = true;
1432
1433 if (mode == nir_var_shader_out) {
1434 nir_builder b = nir_builder_create(nir_shader_get_entrypoint(nir));
1435 b.cursor = nir_after_impl(b.impl);
1436 for (uint32_t j = 0; j < arr_size; ++j)
1437 nir_store_deref(&b, nir_build_deref_array_imm(&b, nir_build_deref_var(&b, var), j), nir_imm_zero(&b, 1, 32), 1);
1438 }
1439 }
1440 }
1441 }
1442
1443 if (nir->info.stage != MESA_SHADER_VERTEX) {
1444 dxil_reassign_driver_locations(nir, nir_var_shader_in, 0, NULL);
1445 } else {
1446 dxil_sort_by_driver_location(nir, nir_var_shader_in);
1447
1448 uint32_t driver_loc = 0;
1449 nir_foreach_variable_with_modes(var, nir, nir_var_shader_in) {
1450 var->data.driver_location = driver_loc;
1451 driver_loc += glsl_count_attribute_slots(var->type, false);
1452 }
1453 }
1454
1455 if (nir->info.stage != MESA_SHADER_FRAGMENT) {
1456 dxil_reassign_driver_locations(nir, nir_var_shader_out, 0, NULL);
1457 } else {
1458 NIR_PASS_V(nir, nir_lower_fragcoord_wtrans);
1459 NIR_PASS_V(nir, dxil_nir_lower_sample_pos);
1460 dxil_sort_ps_outputs(nir);
1461 }
1462
1463 return d3d12_create_shader_impl(ctx, sel, nir);
1464 }
1465
1466 struct d3d12_shader_selector *
d3d12_create_compute_shader(struct d3d12_context * ctx,const struct pipe_compute_state * shader)1467 d3d12_create_compute_shader(struct d3d12_context *ctx,
1468 const struct pipe_compute_state *shader)
1469 {
1470 struct d3d12_shader_selector *sel = rzalloc(nullptr, d3d12_shader_selector);
1471 sel->stage = PIPE_SHADER_COMPUTE;
1472
1473 struct nir_shader *nir = NULL;
1474
1475 if (shader->ir_type == PIPE_SHADER_IR_NIR) {
1476 nir = (nir_shader *)shader->prog;
1477 } else {
1478 assert(shader->ir_type == PIPE_SHADER_IR_TGSI);
1479 nir = tgsi_to_nir(shader->prog, ctx->base.screen, false);
1480 }
1481
1482 nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
1483
1484 return d3d12_create_shader_impl(ctx, sel, nir);
1485 }
1486
1487 void
d3d12_select_shader_variants(struct d3d12_context * ctx,const struct pipe_draw_info * dinfo)1488 d3d12_select_shader_variants(struct d3d12_context *ctx, const struct pipe_draw_info *dinfo)
1489 {
1490 struct d3d12_selection_context sel_ctx;
1491
1492 sel_ctx.ctx = ctx;
1493 sel_ctx.needs_point_sprite_lowering = needs_point_sprite_lowering(ctx, dinfo);
1494 sel_ctx.fill_mode_lowered = fill_mode_lowered(ctx, dinfo);
1495 sel_ctx.cull_mode_lowered = cull_mode_lowered(ctx, sel_ctx.fill_mode_lowered);
1496 sel_ctx.provoking_vertex = get_provoking_vertex(&sel_ctx, &sel_ctx.alternate_tri, dinfo);
1497 sel_ctx.needs_vertex_reordering = needs_vertex_reordering(&sel_ctx, dinfo);
1498 sel_ctx.missing_dual_src_outputs = ctx->missing_dual_src_outputs;
1499 sel_ctx.frag_result_color_lowering = frag_result_color_lowering(ctx);
1500 sel_ctx.manual_depth_range = ctx->manual_depth_range;
1501
1502 d3d12_shader_selector* gs = ctx->gfx_stages[PIPE_SHADER_GEOMETRY];
1503 if (gs == nullptr || gs->is_variant) {
1504 if (sel_ctx.fill_mode_lowered != PIPE_POLYGON_MODE_FILL || sel_ctx.needs_point_sprite_lowering || sel_ctx.needs_vertex_reordering)
1505 validate_geometry_shader_variant(&sel_ctx);
1506 else if (gs != nullptr) {
1507 ctx->gfx_stages[PIPE_SHADER_GEOMETRY] = NULL;
1508 }
1509 }
1510
1511 validate_tess_ctrl_shader_variant(&sel_ctx);
1512
1513 auto* stages = ctx->gfx_stages;
1514 d3d12_shader_selector* prev;
1515 d3d12_shader_selector* next;
1516 if (stages[PIPE_SHADER_VERTEX]) {
1517 next = get_next_shader(ctx, PIPE_SHADER_VERTEX);
1518 select_shader_variant(&sel_ctx, stages[PIPE_SHADER_VERTEX], nullptr, next);
1519 }
1520 if (stages[PIPE_SHADER_TESS_CTRL]) {
1521 prev = get_prev_shader(ctx, PIPE_SHADER_TESS_CTRL);
1522 next = get_next_shader(ctx, PIPE_SHADER_TESS_CTRL);
1523 select_shader_variant(&sel_ctx, stages[PIPE_SHADER_TESS_CTRL], prev, next);
1524 }
1525 if (stages[PIPE_SHADER_TESS_EVAL]) {
1526 prev = get_prev_shader(ctx, PIPE_SHADER_TESS_EVAL);
1527 next = get_next_shader(ctx, PIPE_SHADER_TESS_EVAL);
1528 select_shader_variant(&sel_ctx, stages[PIPE_SHADER_TESS_EVAL], prev, next);
1529 }
1530 if (stages[PIPE_SHADER_GEOMETRY]) {
1531 prev = get_prev_shader(ctx, PIPE_SHADER_GEOMETRY);
1532 next = get_next_shader(ctx, PIPE_SHADER_GEOMETRY);
1533 select_shader_variant(&sel_ctx, stages[PIPE_SHADER_GEOMETRY], prev, next);
1534 }
1535 if (stages[PIPE_SHADER_FRAGMENT]) {
1536 prev = get_prev_shader(ctx, PIPE_SHADER_FRAGMENT);
1537 select_shader_variant(&sel_ctx, stages[PIPE_SHADER_FRAGMENT], prev, nullptr);
1538 }
1539 }
1540
1541 static const unsigned *
workgroup_size_variable(struct d3d12_context * ctx,const struct pipe_grid_info * info)1542 workgroup_size_variable(struct d3d12_context *ctx,
1543 const struct pipe_grid_info *info)
1544 {
1545 if (ctx->compute_state->workgroup_size_variable)
1546 return info->block;
1547 return nullptr;
1548 }
1549
1550 void
d3d12_select_compute_shader_variants(struct d3d12_context * ctx,const struct pipe_grid_info * info)1551 d3d12_select_compute_shader_variants(struct d3d12_context *ctx, const struct pipe_grid_info *info)
1552 {
1553 struct d3d12_selection_context sel_ctx = {};
1554
1555 sel_ctx.ctx = ctx;
1556 sel_ctx.variable_workgroup_size = workgroup_size_variable(ctx, info);
1557
1558 select_shader_variant(&sel_ctx, ctx->compute_state, nullptr, nullptr);
1559 }
1560
1561 void
d3d12_shader_free(struct d3d12_shader_selector * sel)1562 d3d12_shader_free(struct d3d12_shader_selector *sel)
1563 {
1564 auto shader = sel->first;
1565 while (shader) {
1566 free(shader->bytecode);
1567 shader = shader->next_variant;
1568 }
1569
1570 ralloc_free((void*)sel->initial);
1571 ralloc_free(sel);
1572 }
1573