1 /*
2 * Copyright © Microsoft Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23
24 #include "d3d12_nir_passes.h"
25 #include "d3d12_compiler.h"
26 #include "nir_builder.h"
27 #include "nir_builtin_builder.h"
28 #include "nir_deref.h"
29 #include "nir_format_convert.h"
30 #include "program/prog_instruction.h"
31 #include "dxil_nir.h"
32
33 /**
34 * Lower Y Flip:
35 *
36 * We can't do a Y flip simply by negating the viewport height,
37 * so we need to lower the flip into the NIR shader.
38 */
39
40 nir_def *
d3d12_get_state_var(nir_builder * b,enum d3d12_state_var var_enum,const char * var_name,const struct glsl_type * var_type,nir_variable ** out_var)41 d3d12_get_state_var(nir_builder *b,
42 enum d3d12_state_var var_enum,
43 const char *var_name,
44 const struct glsl_type *var_type,
45 nir_variable **out_var)
46 {
47 const gl_state_index16 tokens[STATE_LENGTH] = { STATE_INTERNAL_DRIVER, var_enum };
48 if (*out_var == NULL) {
49 nir_variable *var = nir_state_variable_create(b->shader, var_type,
50 var_name, tokens);
51 var->data.how_declared = nir_var_hidden;
52 *out_var = var;
53 }
54 return nir_load_var(b, *out_var);
55 }
56
57 static void
lower_pos_write(nir_builder * b,struct nir_instr * instr,nir_variable ** flip)58 lower_pos_write(nir_builder *b, struct nir_instr *instr, nir_variable **flip)
59 {
60 if (instr->type != nir_instr_type_intrinsic)
61 return;
62
63 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
64 if (intr->intrinsic != nir_intrinsic_store_deref)
65 return;
66
67 nir_variable *var = nir_intrinsic_get_var(intr, 0);
68 if (var->data.mode != nir_var_shader_out ||
69 var->data.location != VARYING_SLOT_POS)
70 return;
71
72 b->cursor = nir_before_instr(&intr->instr);
73
74 nir_def *pos = intr->src[1].ssa;
75 nir_def *flip_y = d3d12_get_state_var(b, D3D12_STATE_VAR_Y_FLIP, "d3d12_FlipY",
76 glsl_float_type(), flip);
77 nir_def *def = nir_vec4(b,
78 nir_channel(b, pos, 0),
79 nir_fmul(b, nir_channel(b, pos, 1), flip_y),
80 nir_channel(b, pos, 2),
81 nir_channel(b, pos, 3));
82 nir_src_rewrite(intr->src + 1, def);
83 }
84
85 void
d3d12_lower_yflip(nir_shader * nir)86 d3d12_lower_yflip(nir_shader *nir)
87 {
88 nir_variable *flip = NULL;
89
90 if (nir->info.stage != MESA_SHADER_VERTEX &&
91 nir->info.stage != MESA_SHADER_TESS_EVAL &&
92 nir->info.stage != MESA_SHADER_GEOMETRY)
93 return;
94
95 nir_foreach_function_impl(impl, nir) {
96 nir_builder b = nir_builder_create(impl);
97
98 nir_foreach_block(block, impl) {
99 nir_foreach_instr_safe(instr, block) {
100 lower_pos_write(&b, instr, &flip);
101 }
102 }
103
104 nir_metadata_preserve(impl, nir_metadata_control_flow);
105 }
106 }
107
108 static void
lower_pos_read(nir_builder * b,struct nir_instr * instr,nir_variable ** depth_transform_var)109 lower_pos_read(nir_builder *b, struct nir_instr *instr,
110 nir_variable **depth_transform_var)
111 {
112 if (instr->type != nir_instr_type_intrinsic)
113 return;
114
115 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
116 if (intr->intrinsic != nir_intrinsic_load_deref)
117 return;
118
119 nir_variable *var = nir_intrinsic_get_var(intr, 0);
120 if (var->data.mode != nir_var_shader_in ||
121 var->data.location != VARYING_SLOT_POS)
122 return;
123
124 b->cursor = nir_after_instr(instr);
125
126 nir_def *pos = nir_instr_def(instr);
127 nir_def *depth = nir_channel(b, pos, 2);
128
129 assert(depth_transform_var);
130 nir_def *depth_transform = d3d12_get_state_var(b, D3D12_STATE_VAR_DEPTH_TRANSFORM,
131 "d3d12_DepthTransform",
132 glsl_vec_type(2),
133 depth_transform_var);
134 depth = nir_fmad(b, depth, nir_channel(b, depth_transform, 0),
135 nir_channel(b, depth_transform, 1));
136
137 pos = nir_vector_insert_imm(b, pos, depth, 2);
138
139 nir_def_rewrite_uses_after(&intr->def, pos,
140 pos->parent_instr);
141 }
142
143 void
d3d12_lower_depth_range(nir_shader * nir)144 d3d12_lower_depth_range(nir_shader *nir)
145 {
146 assert(nir->info.stage == MESA_SHADER_FRAGMENT);
147 nir_variable *depth_transform = NULL;
148 nir_foreach_function_impl(impl, nir) {
149 nir_builder b = nir_builder_create(impl);
150
151 nir_foreach_block(block, impl) {
152 nir_foreach_instr_safe(instr, block) {
153 lower_pos_read(&b, instr, &depth_transform);
154 }
155 }
156
157 nir_metadata_preserve(impl, nir_metadata_control_flow);
158 }
159 }
160
161 struct compute_state_vars {
162 nir_variable *num_workgroups;
163 };
164
165 static bool
lower_compute_state_vars(nir_builder * b,nir_instr * instr,void * _state)166 lower_compute_state_vars(nir_builder *b, nir_instr *instr, void *_state)
167 {
168 if (instr->type != nir_instr_type_intrinsic)
169 return false;
170
171 b->cursor = nir_after_instr(instr);
172 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
173 struct compute_state_vars *vars = _state;
174 nir_def *result = NULL;
175 switch (intr->intrinsic) {
176 case nir_intrinsic_load_num_workgroups:
177 result = d3d12_get_state_var(b, D3D12_STATE_VAR_NUM_WORKGROUPS, "d3d12_NumWorkgroups",
178 glsl_vec_type(3), &vars->num_workgroups);
179 break;
180 default:
181 return false;
182 }
183
184 nir_def_rewrite_uses(&intr->def, result);
185 nir_instr_remove(instr);
186 return true;
187 }
188
189 bool
d3d12_lower_compute_state_vars(nir_shader * nir)190 d3d12_lower_compute_state_vars(nir_shader *nir)
191 {
192 assert(nir->info.stage == MESA_SHADER_COMPUTE);
193 struct compute_state_vars vars = { 0 };
194 return nir_shader_instructions_pass(nir, lower_compute_state_vars,
195 nir_metadata_control_flow, &vars);
196 }
197
198 static bool
is_color_output(nir_variable * var)199 is_color_output(nir_variable *var)
200 {
201 return (var->data.mode == nir_var_shader_out &&
202 (var->data.location == FRAG_RESULT_COLOR ||
203 var->data.location >= FRAG_RESULT_DATA0));
204 }
205
206 static void
lower_uint_color_write(nir_builder * b,struct nir_instr * instr,bool is_signed)207 lower_uint_color_write(nir_builder *b, struct nir_instr *instr, bool is_signed)
208 {
209 const unsigned NUM_BITS = 8;
210 const unsigned bits[4] = { NUM_BITS, NUM_BITS, NUM_BITS, NUM_BITS };
211
212 if (instr->type != nir_instr_type_intrinsic)
213 return;
214
215 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
216 if (intr->intrinsic != nir_intrinsic_store_deref)
217 return;
218
219 nir_variable *var = nir_intrinsic_get_var(intr, 0);
220 if (!is_color_output(var))
221 return;
222
223 b->cursor = nir_before_instr(&intr->instr);
224
225 nir_def *col = intr->src[1].ssa;
226 nir_def *def = is_signed ? nir_format_float_to_snorm(b, col, bits) :
227 nir_format_float_to_unorm(b, col, bits);
228 if (is_signed)
229 def = nir_bcsel(b, nir_ilt_imm(b, def, 0),
230 nir_iadd_imm(b, def, 1ull << NUM_BITS),
231 def);
232 nir_src_rewrite(intr->src + 1, def);
233 }
234
235 void
d3d12_lower_uint_cast(nir_shader * nir,bool is_signed)236 d3d12_lower_uint_cast(nir_shader *nir, bool is_signed)
237 {
238 if (nir->info.stage != MESA_SHADER_FRAGMENT)
239 return;
240
241 nir_foreach_function_impl(impl, nir) {
242 nir_builder b = nir_builder_create(impl);
243
244 nir_foreach_block(block, impl) {
245 nir_foreach_instr_safe(instr, block) {
246 lower_uint_color_write(&b, instr, is_signed);
247 }
248 }
249
250 nir_metadata_preserve(impl, nir_metadata_control_flow);
251 }
252 }
253
254 static bool
lower_load_draw_params(nir_builder * b,nir_intrinsic_instr * intr,void * draw_params)255 lower_load_draw_params(nir_builder *b, nir_intrinsic_instr *intr,
256 void *draw_params)
257 {
258 if (intr->intrinsic != nir_intrinsic_load_first_vertex &&
259 intr->intrinsic != nir_intrinsic_load_base_instance &&
260 intr->intrinsic != nir_intrinsic_load_draw_id &&
261 intr->intrinsic != nir_intrinsic_load_is_indexed_draw)
262 return false;
263
264 b->cursor = nir_before_instr(&intr->instr);
265
266 nir_def *load = d3d12_get_state_var(b, D3D12_STATE_VAR_DRAW_PARAMS, "d3d12_DrawParams",
267 glsl_uvec4_type(), draw_params);
268 unsigned channel = intr->intrinsic == nir_intrinsic_load_first_vertex ? 0 :
269 intr->intrinsic == nir_intrinsic_load_base_instance ? 1 :
270 intr->intrinsic == nir_intrinsic_load_draw_id ? 2 : 3;
271 nir_def_replace(&intr->def, nir_channel(b, load, channel));
272
273 return true;
274 }
275
276 bool
d3d12_lower_load_draw_params(struct nir_shader * nir)277 d3d12_lower_load_draw_params(struct nir_shader *nir)
278 {
279 nir_variable *draw_params = NULL;
280 if (nir->info.stage != MESA_SHADER_VERTEX)
281 return false;
282
283 return nir_shader_intrinsics_pass(nir, lower_load_draw_params,
284 nir_metadata_control_flow,
285 &draw_params);
286 }
287
288 static bool
lower_load_patch_vertices_in(nir_builder * b,nir_intrinsic_instr * intr,void * _state)289 lower_load_patch_vertices_in(nir_builder *b, nir_intrinsic_instr *intr,
290 void *_state)
291 {
292 if (intr->intrinsic != nir_intrinsic_load_patch_vertices_in)
293 return false;
294
295 b->cursor = nir_before_instr(&intr->instr);
296 nir_def *load = b->shader->info.stage == MESA_SHADER_TESS_CTRL ?
297 d3d12_get_state_var(b, D3D12_STATE_VAR_PATCH_VERTICES_IN, "d3d12_FirstVertex", glsl_uint_type(), _state) :
298 nir_imm_int(b, b->shader->info.tess.tcs_vertices_out);
299 nir_def_replace(&intr->def, load);
300 return true;
301 }
302
303 bool
d3d12_lower_load_patch_vertices_in(struct nir_shader * nir)304 d3d12_lower_load_patch_vertices_in(struct nir_shader *nir)
305 {
306 nir_variable *var = NULL;
307
308 if (nir->info.stage != MESA_SHADER_TESS_CTRL &&
309 nir->info.stage != MESA_SHADER_TESS_EVAL)
310 return false;
311
312 return nir_shader_intrinsics_pass(nir, lower_load_patch_vertices_in,
313 nir_metadata_control_flow,
314 &var);
315 }
316
317 struct invert_depth_state
318 {
319 unsigned viewport_mask;
320 bool clip_halfz;
321 nir_def *viewport_index;
322 nir_instr *store_pos_instr;
323 };
324
325 static void
invert_depth_impl(nir_builder * b,struct invert_depth_state * state)326 invert_depth_impl(nir_builder *b, struct invert_depth_state *state)
327 {
328 assert(state->store_pos_instr);
329
330 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(state->store_pos_instr);
331 if (state->viewport_index) {
332 /* Cursor is assigned before calling. Make sure that storing pos comes
333 * after computing the viewport.
334 */
335 nir_instr_move(b->cursor, &intr->instr);
336 }
337
338 b->cursor = nir_before_instr(&intr->instr);
339
340 nir_def *pos = intr->src[1].ssa;
341
342 if (state->viewport_index) {
343 nir_push_if(b, nir_test_mask(b, nir_ishl(b, nir_imm_int(b, 1), state->viewport_index), state->viewport_mask));
344 }
345 nir_def *old_depth = nir_channel(b, pos, 2);
346 nir_def *new_depth = nir_fneg(b, old_depth);
347 if (state->clip_halfz)
348 new_depth = nir_fadd_imm(b, new_depth, 1.0);
349 nir_def *def = nir_vec4(b,
350 nir_channel(b, pos, 0),
351 nir_channel(b, pos, 1),
352 new_depth,
353 nir_channel(b, pos, 3));
354 if (state->viewport_index) {
355 nir_pop_if(b, NULL);
356 def = nir_if_phi(b, def, pos);
357 }
358 nir_src_rewrite(intr->src + 1, def);
359
360 state->viewport_index = NULL;
361 state->store_pos_instr = NULL;
362 }
363
364 static void
invert_depth_instr(nir_builder * b,struct nir_instr * instr,struct invert_depth_state * state)365 invert_depth_instr(nir_builder *b, struct nir_instr *instr, struct invert_depth_state *state)
366 {
367 if (instr->type != nir_instr_type_intrinsic)
368 return;
369
370 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
371 if (intr->intrinsic == nir_intrinsic_store_deref) {
372 nir_variable *var = nir_intrinsic_get_var(intr, 0);
373 if (var->data.mode != nir_var_shader_out)
374 return;
375
376 if (var->data.location == VARYING_SLOT_VIEWPORT)
377 state->viewport_index = intr->src[1].ssa;
378 if (var->data.location == VARYING_SLOT_POS)
379 state->store_pos_instr = instr;
380 } else if (intr->intrinsic == nir_intrinsic_emit_vertex) {
381 b->cursor = nir_before_instr(instr);
382 invert_depth_impl(b, state);
383 }
384 }
385
386 /* In OpenGL the windows space depth value z_w is evaluated according to "s * z_d + b"
387 * with "s = (far - near) / 2" (depth clip:minus_one_to_one) [OpenGL 3.3, 2.13.1].
388 * When we switch the far and near value to satisfy DirectX requirements we have
389 * to compensate by inverting "z_d' = -z_d" with this lowering pass.
390 * When depth clip is set zero_to_one, we compensate with "z_d' = 1.0f - z_d" instead.
391 */
392 void
d3d12_nir_invert_depth(nir_shader * shader,unsigned viewport_mask,bool clip_halfz)393 d3d12_nir_invert_depth(nir_shader *shader, unsigned viewport_mask, bool clip_halfz)
394 {
395 if (shader->info.stage != MESA_SHADER_VERTEX &&
396 shader->info.stage != MESA_SHADER_TESS_EVAL &&
397 shader->info.stage != MESA_SHADER_GEOMETRY)
398 return;
399
400 struct invert_depth_state state = { viewport_mask, clip_halfz };
401 nir_foreach_function_impl(impl, shader) {
402 nir_builder b = nir_builder_create(impl);
403
404 nir_foreach_block(block, impl) {
405 nir_foreach_instr_safe(instr, block) {
406 invert_depth_instr(&b, instr, &state);
407 }
408 }
409
410 if (state.store_pos_instr) {
411 b.cursor = nir_after_block(impl->end_block);
412 invert_depth_impl(&b, &state);
413 }
414
415 nir_metadata_preserve(impl, nir_metadata_control_flow);
416 }
417 }
418
419
420 /**
421 * Lower State Vars:
422 *
423 * All uniforms related to internal D3D12 variables are
424 * condensed into a UBO that is appended at the end of the
425 * current ones.
426 */
427
428 static unsigned
get_state_var_offset(struct d3d12_shader * shader,enum d3d12_state_var var)429 get_state_var_offset(struct d3d12_shader *shader, enum d3d12_state_var var)
430 {
431 for (unsigned i = 0; i < shader->num_state_vars; ++i) {
432 if (shader->state_vars[i].var == var)
433 return shader->state_vars[i].offset;
434 }
435
436 unsigned offset = shader->state_vars_size;
437 shader->state_vars[shader->num_state_vars].offset = offset;
438 shader->state_vars[shader->num_state_vars].var = var;
439 shader->state_vars_size += 4; /* Use 4-words slots no matter the variable size */
440 shader->num_state_vars++;
441
442 return offset;
443 }
444
445 static bool
lower_instr(nir_intrinsic_instr * instr,nir_builder * b,struct d3d12_shader * shader,unsigned binding)446 lower_instr(nir_intrinsic_instr *instr, nir_builder *b,
447 struct d3d12_shader *shader, unsigned binding)
448 {
449 nir_variable *variable = NULL;
450 nir_deref_instr *deref = NULL;
451
452 b->cursor = nir_before_instr(&instr->instr);
453
454 if (instr->intrinsic == nir_intrinsic_load_uniform) {
455 nir_foreach_variable_with_modes(var, b->shader, nir_var_uniform) {
456 if (var->data.driver_location == nir_intrinsic_base(instr)) {
457 variable = var;
458 break;
459 }
460 }
461 } else if (instr->intrinsic == nir_intrinsic_load_deref) {
462 deref = nir_src_as_deref(instr->src[0]);
463 variable = nir_intrinsic_get_var(instr, 0);
464 }
465
466 if (variable == NULL ||
467 variable->num_state_slots != 1 ||
468 variable->state_slots[0].tokens[0] != STATE_INTERNAL_DRIVER)
469 return false;
470
471 enum d3d12_state_var var = variable->state_slots[0].tokens[1];
472 nir_def *ubo_idx = nir_imm_int(b, binding);
473 nir_def *ubo_offset = nir_imm_int(b, get_state_var_offset(shader, var) * 4);
474 nir_def *load =
475 nir_load_ubo(b, instr->num_components, instr->def.bit_size,
476 ubo_idx, ubo_offset,
477 .align_mul = 16,
478 .align_offset = 0,
479 .range_base = 0,
480 .range = ~0,
481 );
482
483 nir_def_replace(&instr->def, load);
484 for (nir_deref_instr *d = deref; d; d = nir_deref_instr_parent(d)) {
485 /* If anyone is using this deref, leave it alone */
486 if (!list_is_empty(&d->def.uses))
487 break;
488
489 nir_instr_remove(&d->instr);
490 }
491
492 return true;
493 }
494
495 bool
d3d12_lower_state_vars(nir_shader * nir,struct d3d12_shader * shader)496 d3d12_lower_state_vars(nir_shader *nir, struct d3d12_shader *shader)
497 {
498 bool progress = false;
499
500 /* The state var UBO is added after all the other UBOs if it already
501 * exists it will be replaced by using the same binding.
502 * In the event there are no other UBO's, use binding slot 1 to
503 * be consistent with other non-default UBO's */
504 unsigned binding = MAX2(nir->info.num_ubos, nir->info.first_ubo_is_default_ubo ? 1 : 0);
505
506 nir_foreach_variable_with_modes_safe(var, nir, nir_var_uniform) {
507 if (var->num_state_slots == 1 &&
508 var->state_slots[0].tokens[0] == STATE_INTERNAL_DRIVER) {
509 if (var->data.mode == nir_var_mem_ubo) {
510 binding = var->data.binding;
511 }
512 }
513 }
514
515 nir_foreach_function_impl(impl, nir) {
516 nir_builder builder = nir_builder_create(impl);
517 nir_foreach_block(block, impl) {
518 nir_foreach_instr_safe(instr, block) {
519 if (instr->type == nir_instr_type_intrinsic)
520 progress |= lower_instr(nir_instr_as_intrinsic(instr),
521 &builder,
522 shader,
523 binding);
524 }
525 }
526
527 nir_metadata_preserve(impl, nir_metadata_control_flow);
528 }
529
530 if (progress) {
531 assert(shader->num_state_vars > 0);
532
533 shader->state_vars_used = true;
534
535 /* Remove state variables */
536 nir_foreach_variable_with_modes_safe(var, nir, nir_var_uniform) {
537 if (var->num_state_slots == 1 &&
538 var->state_slots[0].tokens[0] == STATE_INTERNAL_DRIVER) {
539 exec_node_remove(&var->node);
540 nir->num_uniforms--;
541 }
542 }
543
544 const gl_state_index16 tokens[STATE_LENGTH] = { STATE_INTERNAL_DRIVER };
545 const struct glsl_type *type = glsl_array_type(glsl_vec4_type(),
546 shader->state_vars_size / 4, 0);
547 nir_variable *ubo = nir_variable_create(nir, nir_var_mem_ubo, type,
548 "d3d12_state_vars");
549 if (binding >= nir->info.num_ubos)
550 nir->info.num_ubos = binding + 1;
551 ubo->data.binding = binding;
552 ubo->num_state_slots = 1;
553 ubo->state_slots = ralloc_array(ubo, nir_state_slot, 1);
554 memcpy(ubo->state_slots[0].tokens, tokens,
555 sizeof(ubo->state_slots[0].tokens));
556
557 struct glsl_struct_field field = {
558 .type = type,
559 .name = "data",
560 .location = -1,
561 };
562 ubo->interface_type =
563 glsl_interface_type(&field, 1, GLSL_INTERFACE_PACKING_STD430,
564 false, "__d3d12_state_vars_interface");
565 }
566
567 return progress;
568 }
569
570 void
d3d12_add_missing_dual_src_target(struct nir_shader * s,unsigned missing_mask)571 d3d12_add_missing_dual_src_target(struct nir_shader *s,
572 unsigned missing_mask)
573 {
574 assert(missing_mask != 0);
575 nir_builder b;
576 nir_function_impl *impl = nir_shader_get_entrypoint(s);
577 b = nir_builder_at(nir_before_impl(impl));
578
579 nir_def *zero = nir_imm_zero(&b, 4, 32);
580 for (unsigned i = 0; i < 2; ++i) {
581
582 if (!(missing_mask & (1u << i)))
583 continue;
584
585 const char *name = i == 0 ? "gl_FragData[0]" :
586 "gl_SecondaryFragDataEXT[0]";
587 nir_variable *out = nir_variable_create(s, nir_var_shader_out,
588 glsl_vec4_type(), name);
589 out->data.location = FRAG_RESULT_DATA0;
590 out->data.driver_location = i;
591 out->data.index = i;
592
593 nir_store_var(&b, out, zero, 0xf);
594 }
595 nir_metadata_preserve(impl, nir_metadata_control_flow);
596 }
597
598 void
d3d12_lower_primitive_id(nir_shader * shader)599 d3d12_lower_primitive_id(nir_shader *shader)
600 {
601 nir_builder b;
602 nir_function_impl *impl = nir_shader_get_entrypoint(shader);
603 nir_def *primitive_id;
604 b = nir_builder_create(impl);
605
606 nir_variable *primitive_id_var = nir_variable_create(shader, nir_var_shader_out,
607 glsl_uint_type(), "primitive_id");
608 primitive_id_var->data.location = VARYING_SLOT_PRIMITIVE_ID;
609 primitive_id_var->data.interpolation = INTERP_MODE_FLAT;
610
611 nir_foreach_block(block, impl) {
612 b.cursor = nir_before_block(block);
613 primitive_id = nir_load_primitive_id(&b);
614
615 nir_foreach_instr_safe(instr, block) {
616 if (instr->type != nir_instr_type_intrinsic ||
617 nir_instr_as_intrinsic(instr)->intrinsic != nir_intrinsic_emit_vertex)
618 continue;
619
620 b.cursor = nir_before_instr(instr);
621 nir_store_var(&b, primitive_id_var, primitive_id, 0x1);
622 }
623 }
624
625 nir_metadata_preserve(impl, nir_metadata_none);
626 }
627
628 static void
lower_triangle_strip_store(nir_builder * b,nir_intrinsic_instr * intr,nir_variable * vertex_count_var,struct hash_table * varyings)629 lower_triangle_strip_store(nir_builder *b, nir_intrinsic_instr *intr,
630 nir_variable *vertex_count_var,
631 struct hash_table *varyings)
632 {
633 /**
634 * tmp_varying[slot][min(vertex_count, 2)] = src
635 */
636 nir_def *vertex_count = nir_load_var(b, vertex_count_var);
637 nir_def *index = nir_imin(b, vertex_count, nir_imm_int(b, 2));
638 nir_variable *var = nir_intrinsic_get_var(intr, 0);
639
640 if (var->data.mode != nir_var_shader_out)
641 return;
642
643 nir_deref_instr *deref = nir_build_deref_array(b, nir_build_deref_var(b, _mesa_hash_table_search(varyings, var)->data), index);
644 nir_def *value = intr->src[1].ssa;
645 nir_store_deref(b, deref, value, 0xf);
646 nir_instr_remove(&intr->instr);
647 }
648
649 static void
lower_triangle_strip_emit_vertex(nir_builder * b,nir_intrinsic_instr * intr,nir_variable * vertex_count_var,struct hash_table * varyings)650 lower_triangle_strip_emit_vertex(nir_builder *b, nir_intrinsic_instr *intr,
651 nir_variable *vertex_count_var,
652 struct hash_table *varyings)
653 {
654 // TODO xfb + flat shading + last_pv
655 /**
656 * if (vertex_count >= 2) {
657 * for (i = 0; i < 3; i++) {
658 * foreach(slot)
659 * out[slot] = tmp_varying[slot][i];
660 * EmitVertex();
661 * }
662 * EndPrimitive();
663 * foreach(slot)
664 * tmp_varying[slot][vertex_count % 2] = tmp_varying[slot][2];
665 * }
666 * vertex_count++;
667 */
668
669 nir_def *two = nir_imm_int(b, 2);
670 nir_def *vertex_count = nir_load_var(b, vertex_count_var);
671 nir_def *count_cmp = nir_uge(b, vertex_count, two);
672 nir_if *count_check = nir_push_if(b, count_cmp);
673
674 for (int j = 0; j < 3; ++j) {
675 nir_foreach_shader_out_variable(var, b->shader) {
676 nir_copy_deref(b, nir_build_deref_var(b, var),
677 nir_build_deref_array_imm(b, nir_build_deref_var(b, _mesa_hash_table_search(varyings, var)->data), j));
678 }
679 nir_emit_vertex(b, 0);
680 }
681
682 nir_foreach_shader_out_variable(var, b->shader) {
683 nir_variable *varying = _mesa_hash_table_search(varyings, var)->data;
684 nir_copy_deref(b, nir_build_deref_array(b, nir_build_deref_var(b, varying), nir_umod(b, vertex_count, two)),
685 nir_build_deref_array(b, nir_build_deref_var(b, varying), two));
686 }
687
688 nir_end_primitive(b, .stream_id = 0);
689
690 nir_pop_if(b, count_check);
691
692 vertex_count = nir_iadd_imm(b, vertex_count, 1);
693 nir_store_var(b, vertex_count_var, vertex_count, 0x1);
694
695 nir_instr_remove(&intr->instr);
696 }
697
698 static void
lower_triangle_strip_end_primitive(nir_builder * b,nir_intrinsic_instr * intr,nir_variable * vertex_count_var)699 lower_triangle_strip_end_primitive(nir_builder *b, nir_intrinsic_instr *intr,
700 nir_variable *vertex_count_var)
701 {
702 /**
703 * vertex_count = 0;
704 */
705 nir_store_var(b, vertex_count_var, nir_imm_int(b, 0), 0x1);
706 nir_instr_remove(&intr->instr);
707 }
708
709 void
d3d12_lower_triangle_strip(nir_shader * shader)710 d3d12_lower_triangle_strip(nir_shader *shader)
711 {
712 nir_builder b;
713 nir_function_impl *impl = nir_shader_get_entrypoint(shader);
714 struct hash_table *tmp_vars = _mesa_pointer_hash_table_create(NULL);
715 b = nir_builder_create(impl);
716
717 shader->info.gs.vertices_out = (shader->info.gs.vertices_out - 2) * 3;
718
719 nir_variable *vertex_count_var =
720 nir_local_variable_create(impl, glsl_uint_type(), "vertex_count");
721
722 nir_block *first = nir_start_block(impl);
723 b.cursor = nir_before_block(first);
724 nir_foreach_variable_with_modes(var, shader, nir_var_shader_out) {
725 const struct glsl_type *type = glsl_array_type(var->type, 3, 0);
726 _mesa_hash_table_insert(tmp_vars, var, nir_local_variable_create(impl, type, "tmp_var"));
727 }
728 nir_store_var(&b, vertex_count_var, nir_imm_int(&b, 0), 1);
729
730 nir_foreach_block(block, impl) {
731 nir_foreach_instr_safe(instr, block) {
732 if (instr->type != nir_instr_type_intrinsic)
733 continue;
734
735 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
736 switch (intrin->intrinsic) {
737 case nir_intrinsic_store_deref:
738 b.cursor = nir_before_instr(instr);
739 lower_triangle_strip_store(&b, intrin, vertex_count_var, tmp_vars);
740 break;
741 case nir_intrinsic_emit_vertex_with_counter:
742 case nir_intrinsic_emit_vertex:
743 b.cursor = nir_before_instr(instr);
744 lower_triangle_strip_emit_vertex(&b, intrin, vertex_count_var, tmp_vars);
745 break;
746 case nir_intrinsic_end_primitive:
747 case nir_intrinsic_end_primitive_with_counter:
748 b.cursor = nir_before_instr(instr);
749 lower_triangle_strip_end_primitive(&b, intrin, vertex_count_var);
750 break;
751 default:
752 break;
753 }
754 }
755 }
756
757 _mesa_hash_table_destroy(tmp_vars, NULL);
758 nir_metadata_preserve(impl, nir_metadata_none);
759 NIR_PASS_V(shader, nir_lower_var_copies);
760 }
761
762 static bool
is_multisampling_instr(const nir_instr * instr,const void * _data)763 is_multisampling_instr(const nir_instr *instr, const void *_data)
764 {
765 if (instr->type != nir_instr_type_intrinsic)
766 return false;
767 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
768 if (intr->intrinsic == nir_intrinsic_store_output) {
769 nir_io_semantics semantics = nir_intrinsic_io_semantics(intr);
770 return semantics.location == FRAG_RESULT_SAMPLE_MASK;
771 } else if (intr->intrinsic == nir_intrinsic_store_deref) {
772 nir_variable *var = nir_intrinsic_get_var(intr, 0);
773 return var->data.location == FRAG_RESULT_SAMPLE_MASK;
774 } else if (intr->intrinsic == nir_intrinsic_load_sample_id ||
775 intr->intrinsic == nir_intrinsic_load_sample_mask_in)
776 return true;
777 return false;
778 }
779
780 static nir_def *
lower_multisampling_instr(nir_builder * b,nir_instr * instr,void * _data)781 lower_multisampling_instr(nir_builder *b, nir_instr *instr, void *_data)
782 {
783 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
784 switch (intr->intrinsic) {
785 case nir_intrinsic_store_output:
786 case nir_intrinsic_store_deref:
787 return NIR_LOWER_INSTR_PROGRESS_REPLACE;
788 case nir_intrinsic_load_sample_id:
789 return nir_imm_int(b, 0);
790 case nir_intrinsic_load_sample_mask_in:
791 return nir_b2i32(b, nir_ine_imm(b, &intr->def, 0));
792 default:
793 unreachable("Invalid intrinsic");
794 }
795 }
796
797 bool
d3d12_disable_multisampling(nir_shader * s)798 d3d12_disable_multisampling(nir_shader *s)
799 {
800 if (s->info.stage != MESA_SHADER_FRAGMENT)
801 return false;
802 bool progress = nir_shader_lower_instructions(s, is_multisampling_instr, lower_multisampling_instr, NULL);
803
804 nir_foreach_variable_with_modes_safe(var, s, nir_var_shader_out) {
805 if (var->data.location == FRAG_RESULT_SAMPLE_MASK) {
806 exec_node_remove(&var->node);
807 s->info.outputs_written &= ~(1ull << FRAG_RESULT_SAMPLE_MASK);
808 progress = true;
809 }
810 }
811 nir_foreach_variable_with_modes_safe(var, s, nir_var_system_value) {
812 if (var->data.location == SYSTEM_VALUE_SAMPLE_MASK_IN ||
813 var->data.location == SYSTEM_VALUE_SAMPLE_ID) {
814 exec_node_remove(&var->node);
815 progress = true;
816 }
817 var->data.sample = false;
818 }
819 BITSET_CLEAR(s->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID);
820 s->info.fs.uses_sample_qualifier = false;
821 s->info.fs.uses_sample_shading = false;
822 return progress;
823 }
824
825 struct var_split_subvar_state {
826 nir_variable *var;
827 uint8_t stream;
828 uint8_t num_components;
829 };
830 struct var_split_var_state {
831 unsigned num_subvars;
832 struct var_split_subvar_state subvars[4];
833 };
834 struct var_split_state {
835 struct var_split_var_state vars[2][VARYING_SLOT_MAX];
836 };
837
838 static bool
split_varying_accesses(nir_builder * b,nir_intrinsic_instr * intr,void * _state)839 split_varying_accesses(nir_builder *b, nir_intrinsic_instr *intr,
840 void *_state)
841 {
842 if (intr->intrinsic != nir_intrinsic_store_deref &&
843 intr->intrinsic != nir_intrinsic_load_deref)
844 return false;
845
846 nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
847 if (!nir_deref_mode_is(deref, nir_var_shader_out) &&
848 !nir_deref_mode_is(deref, nir_var_shader_in))
849 return false;
850
851 nir_variable *var = nir_deref_instr_get_variable(deref);
852 if (!var)
853 return false;
854
855 uint32_t mode_index = deref->modes == nir_var_shader_out ? 0 : 1;
856
857 struct var_split_state *state = _state;
858 struct var_split_var_state *var_state = &state->vars[mode_index][var->data.location];
859 if (var_state->num_subvars <= 1)
860 return false;
861
862 nir_deref_path path;
863 nir_deref_path_init(&path, deref, b->shader);
864 assert(path.path[0]->deref_type == nir_deref_type_var && path.path[0]->var == var);
865
866 unsigned first_channel = 0;
867 nir_def *loads[2];
868 for (unsigned subvar = 0; subvar < var_state->num_subvars; ++subvar) {
869 b->cursor = nir_after_instr(&path.path[0]->instr);
870 nir_deref_instr *new_path = nir_build_deref_var(b, var_state->subvars[subvar].var);
871
872 for (unsigned i = 1; path.path[i]; ++i) {
873 b->cursor = nir_after_instr(&path.path[i]->instr);
874 new_path = nir_build_deref_follower(b, new_path, path.path[i]);
875 }
876
877 b->cursor = nir_before_instr(&intr->instr);
878 if (intr->intrinsic == nir_intrinsic_store_deref) {
879 unsigned mask_num_channels = (1 << var_state->subvars[subvar].num_components) - 1;
880 unsigned orig_write_mask = nir_intrinsic_write_mask(intr);
881 nir_def *sub_value = nir_channels(b, intr->src[1].ssa, mask_num_channels << first_channel);
882
883 unsigned new_write_mask = (orig_write_mask >> first_channel) & mask_num_channels;
884 nir_build_store_deref(b, &new_path->def, sub_value, new_write_mask, nir_intrinsic_access(intr));
885
886 first_channel += var_state->subvars[subvar].num_components;
887 } else {
888 /* The load path only handles splitting dvec3/dvec4 */
889 assert(subvar == 0 || subvar == 1);
890 assert(intr->def.num_components >= 3);
891 loads[subvar] = nir_build_load_deref(b, var_state->subvars[subvar].num_components, intr->def.bit_size, &new_path->def);
892 }
893 }
894
895 nir_deref_path_finish(&path);
896 if (intr->intrinsic == nir_intrinsic_load_deref) {
897 nir_def *result = nir_extract_bits(b, loads, 2, 0, intr->def.num_components, intr->def.bit_size);
898 nir_def_rewrite_uses(&intr->def, result);
899 }
900 nir_instr_free_and_dce(&intr->instr);
901 return true;
902 }
903
904 bool
d3d12_split_needed_varyings(nir_shader * s)905 d3d12_split_needed_varyings(nir_shader *s)
906 {
907 struct var_split_state state;
908 memset(&state, 0, sizeof(state));
909
910 bool progress = false;
911 nir_foreach_variable_with_modes_safe(var, s, nir_var_shader_out | nir_var_shader_in) {
912 uint32_t mode_index = var->data.mode == nir_var_shader_out ? 0 : 1;
913 struct var_split_var_state *var_state = &state.vars[mode_index][var->data.location];
914 struct var_split_subvar_state *subvars = var_state->subvars;
915 if ((var->data.stream & NIR_STREAM_PACKED) != 0 &&
916 s->info.stage == MESA_SHADER_GEOMETRY &&
917 var->data.mode == nir_var_shader_out) {
918 for (unsigned i = 0; i < glsl_get_vector_elements(var->type); ++i) {
919 unsigned stream = (var->data.stream >> (2 * (i + var->data.location_frac))) & 0x3;
920 if (var_state->num_subvars == 0 || stream != subvars[var_state->num_subvars - 1].stream) {
921 subvars[var_state->num_subvars].stream = stream;
922 subvars[var_state->num_subvars].num_components = 1;
923 var_state->num_subvars++;
924 } else {
925 subvars[var_state->num_subvars - 1].num_components++;
926 }
927 }
928
929 var->data.stream = subvars[0].stream;
930 if (var_state->num_subvars == 1)
931 continue;
932
933 progress = true;
934
935 subvars[0].var = var;
936 var->type = glsl_vector_type(glsl_get_base_type(var->type), subvars[0].num_components);
937 unsigned location_frac = var->data.location_frac + subvars[0].num_components;
938 for (unsigned subvar = 1; subvar < var_state->num_subvars; ++subvar) {
939 char *name = ralloc_asprintf(s, "unpacked:%s_stream%d", var->name, subvars[subvar].stream);
940 nir_variable *new_var = nir_variable_create(s, nir_var_shader_out,
941 glsl_vector_type(glsl_get_base_type(var->type), subvars[subvar].num_components),
942 name);
943
944 new_var->data = var->data;
945 new_var->data.stream = subvars[subvar].stream;
946 new_var->data.location_frac = location_frac;
947 location_frac += subvars[subvar].num_components;
948 subvars[subvar].var = new_var;
949 }
950 } else if (glsl_type_is_64bit(glsl_without_array(var->type)) &&
951 glsl_get_components(glsl_without_array(var->type)) >= 3) {
952 progress = true;
953 assert(var->data.location_frac == 0);
954 uint32_t components = glsl_get_components(glsl_without_array(var->type));
955 var_state->num_subvars = 2;
956 subvars[0].var = var;
957 subvars[0].num_components = 2;
958 subvars[0].stream = var->data.stream;
959 const struct glsl_type *base_type = glsl_without_array(var->type);
960 var->type = glsl_type_wrap_in_arrays(glsl_vector_type(glsl_get_base_type(base_type), 2), var->type);
961
962 subvars[1].var = nir_variable_clone(var, s);
963 subvars[1].num_components = components - 2;
964 subvars[1].stream = var->data.stream;
965 exec_node_insert_after(&var->node, &subvars[1].var->node);
966 subvars[1].var->type = glsl_type_wrap_in_arrays(glsl_vector_type(glsl_get_base_type(base_type), components - 2), var->type);
967 subvars[1].var->data.location++;
968 subvars[1].var->data.driver_location++;
969 }
970 }
971
972 if (progress) {
973 nir_shader_intrinsics_pass(s, split_varying_accesses,
974 nir_metadata_control_flow,
975 &state);
976 } else {
977 nir_shader_preserve_all_metadata(s);
978 }
979
980 return progress;
981 }
982
983 static void
write_0(nir_builder * b,nir_deref_instr * deref)984 write_0(nir_builder *b, nir_deref_instr *deref)
985 {
986 if (glsl_type_is_array_or_matrix(deref->type)) {
987 for (unsigned i = 0; i < glsl_get_length(deref->type); ++i)
988 write_0(b, nir_build_deref_array_imm(b, deref, i));
989 } else if (glsl_type_is_struct(deref->type)) {
990 for (unsigned i = 0; i < glsl_get_length(deref->type); ++i)
991 write_0(b, nir_build_deref_struct(b, deref, i));
992 } else {
993 nir_def *scalar = nir_imm_intN_t(b, 0, glsl_get_bit_size(deref->type));
994 nir_def *scalar_arr[NIR_MAX_VEC_COMPONENTS];
995 unsigned num_comps = glsl_get_components(deref->type);
996 unsigned writemask = (1 << num_comps) - 1;
997 for (unsigned i = 0; i < num_comps; ++i)
998 scalar_arr[i] = scalar;
999 nir_def *zero_val = nir_vec(b, scalar_arr, num_comps);
1000 nir_store_deref(b, deref, zero_val, writemask);
1001 }
1002 }
1003
1004 void
d3d12_write_0_to_new_varying(nir_shader * s,nir_variable * var)1005 d3d12_write_0_to_new_varying(nir_shader *s, nir_variable *var)
1006 {
1007 /* Skip per-vertex HS outputs */
1008 if (s->info.stage == MESA_SHADER_TESS_CTRL && !var->data.patch)
1009 return;
1010
1011 nir_foreach_function_impl(impl, s) {
1012 nir_builder b = nir_builder_create(impl);
1013
1014 nir_foreach_block(block, impl) {
1015 b.cursor = nir_before_block(block);
1016 if (s->info.stage != MESA_SHADER_GEOMETRY) {
1017 write_0(&b, nir_build_deref_var(&b, var));
1018 break;
1019 }
1020
1021 nir_foreach_instr_safe(instr, block) {
1022 if (instr->type != nir_instr_type_intrinsic)
1023 continue;
1024 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1025 if (intr->intrinsic != nir_intrinsic_emit_vertex)
1026 continue;
1027
1028 b.cursor = nir_before_instr(instr);
1029 write_0(&b, nir_build_deref_var(&b, var));
1030 }
1031 }
1032
1033 nir_metadata_preserve(impl, nir_metadata_control_flow);
1034 }
1035 }
1036