xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/d3d12/d3d12_nir_passes.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © Microsoft Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * on the rights to use, copy, modify, merge, publish, distribute, sub
8  * license, and/or sell copies of the Software, and to permit persons to whom
9  * the Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
22  */
23 
24 #include "d3d12_nir_passes.h"
25 #include "d3d12_compiler.h"
26 #include "nir_builder.h"
27 #include "nir_builtin_builder.h"
28 #include "nir_deref.h"
29 #include "nir_format_convert.h"
30 #include "program/prog_instruction.h"
31 #include "dxil_nir.h"
32 
33 /**
34  * Lower Y Flip:
35  *
36  * We can't do a Y flip simply by negating the viewport height,
37  * so we need to lower the flip into the NIR shader.
38  */
39 
40 nir_def *
d3d12_get_state_var(nir_builder * b,enum d3d12_state_var var_enum,const char * var_name,const struct glsl_type * var_type,nir_variable ** out_var)41 d3d12_get_state_var(nir_builder *b,
42                     enum d3d12_state_var var_enum,
43                     const char *var_name,
44                     const struct glsl_type *var_type,
45                     nir_variable **out_var)
46 {
47    const gl_state_index16 tokens[STATE_LENGTH] = { STATE_INTERNAL_DRIVER, var_enum };
48    if (*out_var == NULL) {
49       nir_variable *var = nir_state_variable_create(b->shader, var_type,
50                                                     var_name, tokens);
51       var->data.how_declared = nir_var_hidden;
52       *out_var = var;
53    }
54    return nir_load_var(b, *out_var);
55 }
56 
57 static void
lower_pos_write(nir_builder * b,struct nir_instr * instr,nir_variable ** flip)58 lower_pos_write(nir_builder *b, struct nir_instr *instr, nir_variable **flip)
59 {
60    if (instr->type != nir_instr_type_intrinsic)
61       return;
62 
63    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
64    if (intr->intrinsic != nir_intrinsic_store_deref)
65       return;
66 
67    nir_variable *var = nir_intrinsic_get_var(intr, 0);
68    if (var->data.mode != nir_var_shader_out ||
69        var->data.location != VARYING_SLOT_POS)
70       return;
71 
72    b->cursor = nir_before_instr(&intr->instr);
73 
74    nir_def *pos = intr->src[1].ssa;
75    nir_def *flip_y = d3d12_get_state_var(b, D3D12_STATE_VAR_Y_FLIP, "d3d12_FlipY",
76                                              glsl_float_type(), flip);
77    nir_def *def = nir_vec4(b,
78                                nir_channel(b, pos, 0),
79                                nir_fmul(b, nir_channel(b, pos, 1), flip_y),
80                                nir_channel(b, pos, 2),
81                                nir_channel(b, pos, 3));
82    nir_src_rewrite(intr->src + 1, def);
83 }
84 
85 void
d3d12_lower_yflip(nir_shader * nir)86 d3d12_lower_yflip(nir_shader *nir)
87 {
88    nir_variable *flip = NULL;
89 
90    if (nir->info.stage != MESA_SHADER_VERTEX &&
91        nir->info.stage != MESA_SHADER_TESS_EVAL &&
92        nir->info.stage != MESA_SHADER_GEOMETRY)
93       return;
94 
95    nir_foreach_function_impl(impl, nir) {
96       nir_builder b = nir_builder_create(impl);
97 
98       nir_foreach_block(block, impl) {
99          nir_foreach_instr_safe(instr, block) {
100             lower_pos_write(&b, instr, &flip);
101          }
102       }
103 
104       nir_metadata_preserve(impl, nir_metadata_control_flow);
105    }
106 }
107 
108 static void
lower_pos_read(nir_builder * b,struct nir_instr * instr,nir_variable ** depth_transform_var)109 lower_pos_read(nir_builder *b, struct nir_instr *instr,
110                nir_variable **depth_transform_var)
111 {
112    if (instr->type != nir_instr_type_intrinsic)
113       return;
114 
115    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
116    if (intr->intrinsic != nir_intrinsic_load_deref)
117       return;
118 
119    nir_variable *var = nir_intrinsic_get_var(intr, 0);
120    if (var->data.mode != nir_var_shader_in ||
121        var->data.location != VARYING_SLOT_POS)
122       return;
123 
124    b->cursor = nir_after_instr(instr);
125 
126    nir_def *pos = nir_instr_def(instr);
127    nir_def *depth = nir_channel(b, pos, 2);
128 
129    assert(depth_transform_var);
130    nir_def *depth_transform = d3d12_get_state_var(b, D3D12_STATE_VAR_DEPTH_TRANSFORM,
131                                                       "d3d12_DepthTransform",
132                                                       glsl_vec_type(2),
133                                                       depth_transform_var);
134    depth = nir_fmad(b, depth, nir_channel(b, depth_transform, 0),
135                               nir_channel(b, depth_transform, 1));
136 
137    pos = nir_vector_insert_imm(b, pos, depth, 2);
138 
139    nir_def_rewrite_uses_after(&intr->def, pos,
140                                   pos->parent_instr);
141 }
142 
143 void
d3d12_lower_depth_range(nir_shader * nir)144 d3d12_lower_depth_range(nir_shader *nir)
145 {
146    assert(nir->info.stage == MESA_SHADER_FRAGMENT);
147    nir_variable *depth_transform = NULL;
148    nir_foreach_function_impl(impl, nir) {
149       nir_builder b = nir_builder_create(impl);
150 
151       nir_foreach_block(block, impl) {
152          nir_foreach_instr_safe(instr, block) {
153             lower_pos_read(&b, instr, &depth_transform);
154          }
155       }
156 
157       nir_metadata_preserve(impl, nir_metadata_control_flow);
158    }
159 }
160 
161 struct compute_state_vars {
162    nir_variable *num_workgroups;
163 };
164 
165 static bool
lower_compute_state_vars(nir_builder * b,nir_instr * instr,void * _state)166 lower_compute_state_vars(nir_builder *b, nir_instr *instr, void *_state)
167 {
168    if (instr->type != nir_instr_type_intrinsic)
169       return false;
170 
171    b->cursor = nir_after_instr(instr);
172    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
173    struct compute_state_vars *vars = _state;
174    nir_def *result = NULL;
175    switch (intr->intrinsic) {
176    case nir_intrinsic_load_num_workgroups:
177       result = d3d12_get_state_var(b, D3D12_STATE_VAR_NUM_WORKGROUPS, "d3d12_NumWorkgroups",
178          glsl_vec_type(3), &vars->num_workgroups);
179       break;
180    default:
181       return false;
182    }
183 
184    nir_def_rewrite_uses(&intr->def, result);
185    nir_instr_remove(instr);
186    return true;
187 }
188 
189 bool
d3d12_lower_compute_state_vars(nir_shader * nir)190 d3d12_lower_compute_state_vars(nir_shader *nir)
191 {
192    assert(nir->info.stage == MESA_SHADER_COMPUTE);
193    struct compute_state_vars vars = { 0 };
194    return nir_shader_instructions_pass(nir, lower_compute_state_vars,
195       nir_metadata_control_flow, &vars);
196 }
197 
198 static bool
is_color_output(nir_variable * var)199 is_color_output(nir_variable *var)
200 {
201    return (var->data.mode == nir_var_shader_out &&
202            (var->data.location == FRAG_RESULT_COLOR ||
203             var->data.location >= FRAG_RESULT_DATA0));
204 }
205 
206 static void
lower_uint_color_write(nir_builder * b,struct nir_instr * instr,bool is_signed)207 lower_uint_color_write(nir_builder *b, struct nir_instr *instr, bool is_signed)
208 {
209    const unsigned NUM_BITS = 8;
210    const unsigned bits[4] = { NUM_BITS, NUM_BITS, NUM_BITS, NUM_BITS };
211 
212    if (instr->type != nir_instr_type_intrinsic)
213       return;
214 
215    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
216    if (intr->intrinsic != nir_intrinsic_store_deref)
217       return;
218 
219    nir_variable *var = nir_intrinsic_get_var(intr, 0);
220    if (!is_color_output(var))
221       return;
222 
223    b->cursor = nir_before_instr(&intr->instr);
224 
225    nir_def *col = intr->src[1].ssa;
226    nir_def *def = is_signed ? nir_format_float_to_snorm(b, col, bits) :
227                                   nir_format_float_to_unorm(b, col, bits);
228    if (is_signed)
229       def = nir_bcsel(b, nir_ilt_imm(b, def, 0),
230                       nir_iadd_imm(b, def, 1ull << NUM_BITS),
231                       def);
232    nir_src_rewrite(intr->src + 1, def);
233 }
234 
235 void
d3d12_lower_uint_cast(nir_shader * nir,bool is_signed)236 d3d12_lower_uint_cast(nir_shader *nir, bool is_signed)
237 {
238    if (nir->info.stage != MESA_SHADER_FRAGMENT)
239       return;
240 
241    nir_foreach_function_impl(impl, nir) {
242       nir_builder b = nir_builder_create(impl);
243 
244       nir_foreach_block(block, impl) {
245          nir_foreach_instr_safe(instr, block) {
246             lower_uint_color_write(&b, instr, is_signed);
247          }
248       }
249 
250       nir_metadata_preserve(impl, nir_metadata_control_flow);
251    }
252 }
253 
254 static bool
lower_load_draw_params(nir_builder * b,nir_intrinsic_instr * intr,void * draw_params)255 lower_load_draw_params(nir_builder *b, nir_intrinsic_instr *intr,
256                        void *draw_params)
257 {
258    if (intr->intrinsic != nir_intrinsic_load_first_vertex &&
259        intr->intrinsic != nir_intrinsic_load_base_instance &&
260        intr->intrinsic != nir_intrinsic_load_draw_id &&
261        intr->intrinsic != nir_intrinsic_load_is_indexed_draw)
262       return false;
263 
264    b->cursor = nir_before_instr(&intr->instr);
265 
266    nir_def *load = d3d12_get_state_var(b, D3D12_STATE_VAR_DRAW_PARAMS, "d3d12_DrawParams",
267                                            glsl_uvec4_type(), draw_params);
268    unsigned channel = intr->intrinsic == nir_intrinsic_load_first_vertex ? 0 :
269       intr->intrinsic == nir_intrinsic_load_base_instance ? 1 :
270       intr->intrinsic == nir_intrinsic_load_draw_id ? 2 : 3;
271    nir_def_replace(&intr->def, nir_channel(b, load, channel));
272 
273    return true;
274 }
275 
276 bool
d3d12_lower_load_draw_params(struct nir_shader * nir)277 d3d12_lower_load_draw_params(struct nir_shader *nir)
278 {
279    nir_variable *draw_params = NULL;
280    if (nir->info.stage != MESA_SHADER_VERTEX)
281       return false;
282 
283    return nir_shader_intrinsics_pass(nir, lower_load_draw_params,
284                                      nir_metadata_control_flow,
285                                      &draw_params);
286 }
287 
288 static bool
lower_load_patch_vertices_in(nir_builder * b,nir_intrinsic_instr * intr,void * _state)289 lower_load_patch_vertices_in(nir_builder *b, nir_intrinsic_instr *intr,
290                              void *_state)
291 {
292    if (intr->intrinsic != nir_intrinsic_load_patch_vertices_in)
293       return false;
294 
295    b->cursor = nir_before_instr(&intr->instr);
296    nir_def *load = b->shader->info.stage == MESA_SHADER_TESS_CTRL ?
297       d3d12_get_state_var(b, D3D12_STATE_VAR_PATCH_VERTICES_IN, "d3d12_FirstVertex", glsl_uint_type(), _state) :
298       nir_imm_int(b, b->shader->info.tess.tcs_vertices_out);
299    nir_def_replace(&intr->def, load);
300    return true;
301 }
302 
303 bool
d3d12_lower_load_patch_vertices_in(struct nir_shader * nir)304 d3d12_lower_load_patch_vertices_in(struct nir_shader *nir)
305 {
306    nir_variable *var = NULL;
307 
308    if (nir->info.stage != MESA_SHADER_TESS_CTRL &&
309        nir->info.stage != MESA_SHADER_TESS_EVAL)
310       return false;
311 
312    return nir_shader_intrinsics_pass(nir, lower_load_patch_vertices_in,
313                                      nir_metadata_control_flow,
314                                      &var);
315 }
316 
317 struct invert_depth_state
318 {
319    unsigned viewport_mask;
320    bool clip_halfz;
321    nir_def *viewport_index;
322    nir_instr *store_pos_instr;
323 };
324 
325 static void
invert_depth_impl(nir_builder * b,struct invert_depth_state * state)326 invert_depth_impl(nir_builder *b, struct invert_depth_state *state)
327 {
328    assert(state->store_pos_instr);
329 
330    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(state->store_pos_instr);
331    if (state->viewport_index) {
332       /* Cursor is assigned before calling. Make sure that storing pos comes
333        * after computing the viewport.
334        */
335       nir_instr_move(b->cursor, &intr->instr);
336    }
337 
338    b->cursor = nir_before_instr(&intr->instr);
339 
340    nir_def *pos = intr->src[1].ssa;
341 
342    if (state->viewport_index) {
343       nir_push_if(b, nir_test_mask(b, nir_ishl(b, nir_imm_int(b, 1), state->viewport_index), state->viewport_mask));
344    }
345    nir_def *old_depth = nir_channel(b, pos, 2);
346    nir_def *new_depth = nir_fneg(b, old_depth);
347    if (state->clip_halfz)
348       new_depth = nir_fadd_imm(b, new_depth, 1.0);
349    nir_def *def = nir_vec4(b,
350                                nir_channel(b, pos, 0),
351                                nir_channel(b, pos, 1),
352                                new_depth,
353                                nir_channel(b, pos, 3));
354    if (state->viewport_index) {
355       nir_pop_if(b, NULL);
356       def = nir_if_phi(b, def, pos);
357    }
358    nir_src_rewrite(intr->src + 1, def);
359 
360    state->viewport_index = NULL;
361    state->store_pos_instr = NULL;
362 }
363 
364 static void
invert_depth_instr(nir_builder * b,struct nir_instr * instr,struct invert_depth_state * state)365 invert_depth_instr(nir_builder *b, struct nir_instr *instr, struct invert_depth_state *state)
366 {
367    if (instr->type != nir_instr_type_intrinsic)
368       return;
369 
370    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
371    if (intr->intrinsic == nir_intrinsic_store_deref) {
372       nir_variable *var = nir_intrinsic_get_var(intr, 0);
373       if (var->data.mode != nir_var_shader_out)
374          return;
375 
376       if (var->data.location == VARYING_SLOT_VIEWPORT)
377          state->viewport_index = intr->src[1].ssa;
378       if (var->data.location == VARYING_SLOT_POS)
379          state->store_pos_instr = instr;
380    } else if (intr->intrinsic == nir_intrinsic_emit_vertex) {
381       b->cursor = nir_before_instr(instr);
382       invert_depth_impl(b, state);
383    }
384 }
385 
386 /* In OpenGL the windows space depth value z_w is evaluated according to "s * z_d + b"
387  * with  "s = (far - near) / 2" (depth clip:minus_one_to_one) [OpenGL 3.3, 2.13.1].
388  * When we switch the far and near value to satisfy DirectX requirements we have
389  * to compensate by inverting "z_d' = -z_d" with this lowering pass.
390  * When depth clip is set zero_to_one, we compensate with "z_d' = 1.0f - z_d" instead.
391  */
392 void
d3d12_nir_invert_depth(nir_shader * shader,unsigned viewport_mask,bool clip_halfz)393 d3d12_nir_invert_depth(nir_shader *shader, unsigned viewport_mask, bool clip_halfz)
394 {
395    if (shader->info.stage != MESA_SHADER_VERTEX &&
396        shader->info.stage != MESA_SHADER_TESS_EVAL &&
397        shader->info.stage != MESA_SHADER_GEOMETRY)
398       return;
399 
400    struct invert_depth_state state = { viewport_mask, clip_halfz };
401    nir_foreach_function_impl(impl, shader) {
402       nir_builder b = nir_builder_create(impl);
403 
404       nir_foreach_block(block, impl) {
405          nir_foreach_instr_safe(instr, block) {
406             invert_depth_instr(&b, instr, &state);
407          }
408       }
409 
410       if (state.store_pos_instr) {
411          b.cursor = nir_after_block(impl->end_block);
412          invert_depth_impl(&b, &state);
413       }
414 
415       nir_metadata_preserve(impl, nir_metadata_control_flow);
416    }
417 }
418 
419 
420 /**
421  * Lower State Vars:
422  *
423  * All uniforms related to internal D3D12 variables are
424  * condensed into a UBO that is appended at the end of the
425  * current ones.
426  */
427 
428 static unsigned
get_state_var_offset(struct d3d12_shader * shader,enum d3d12_state_var var)429 get_state_var_offset(struct d3d12_shader *shader, enum d3d12_state_var var)
430 {
431    for (unsigned i = 0; i < shader->num_state_vars; ++i) {
432       if (shader->state_vars[i].var == var)
433          return shader->state_vars[i].offset;
434    }
435 
436    unsigned offset = shader->state_vars_size;
437    shader->state_vars[shader->num_state_vars].offset = offset;
438    shader->state_vars[shader->num_state_vars].var = var;
439    shader->state_vars_size += 4; /* Use 4-words slots no matter the variable size */
440    shader->num_state_vars++;
441 
442    return offset;
443 }
444 
445 static bool
lower_instr(nir_intrinsic_instr * instr,nir_builder * b,struct d3d12_shader * shader,unsigned binding)446 lower_instr(nir_intrinsic_instr *instr, nir_builder *b,
447             struct d3d12_shader *shader, unsigned binding)
448 {
449    nir_variable *variable = NULL;
450    nir_deref_instr *deref = NULL;
451 
452    b->cursor = nir_before_instr(&instr->instr);
453 
454    if (instr->intrinsic == nir_intrinsic_load_uniform) {
455       nir_foreach_variable_with_modes(var, b->shader, nir_var_uniform) {
456          if (var->data.driver_location == nir_intrinsic_base(instr)) {
457             variable = var;
458             break;
459          }
460       }
461    } else if (instr->intrinsic == nir_intrinsic_load_deref) {
462       deref = nir_src_as_deref(instr->src[0]);
463       variable = nir_intrinsic_get_var(instr, 0);
464    }
465 
466    if (variable == NULL ||
467        variable->num_state_slots != 1 ||
468        variable->state_slots[0].tokens[0] != STATE_INTERNAL_DRIVER)
469       return false;
470 
471    enum d3d12_state_var var = variable->state_slots[0].tokens[1];
472    nir_def *ubo_idx = nir_imm_int(b, binding);
473    nir_def *ubo_offset =  nir_imm_int(b, get_state_var_offset(shader, var) * 4);
474    nir_def *load =
475       nir_load_ubo(b, instr->num_components, instr->def.bit_size,
476                    ubo_idx, ubo_offset,
477                    .align_mul = 16,
478                    .align_offset = 0,
479                    .range_base = 0,
480                    .range = ~0,
481                    );
482 
483    nir_def_replace(&instr->def, load);
484    for (nir_deref_instr *d = deref; d; d = nir_deref_instr_parent(d)) {
485       /* If anyone is using this deref, leave it alone */
486       if (!list_is_empty(&d->def.uses))
487          break;
488 
489       nir_instr_remove(&d->instr);
490    }
491 
492    return true;
493 }
494 
495 bool
d3d12_lower_state_vars(nir_shader * nir,struct d3d12_shader * shader)496 d3d12_lower_state_vars(nir_shader *nir, struct d3d12_shader *shader)
497 {
498    bool progress = false;
499 
500    /* The state var UBO is added after all the other UBOs if it already
501     * exists it will be replaced by using the same binding.
502     * In the event there are no other UBO's, use binding slot 1 to
503     * be consistent with other non-default UBO's */
504    unsigned binding = MAX2(nir->info.num_ubos, nir->info.first_ubo_is_default_ubo ? 1 : 0);
505 
506    nir_foreach_variable_with_modes_safe(var, nir, nir_var_uniform) {
507       if (var->num_state_slots == 1 &&
508           var->state_slots[0].tokens[0] == STATE_INTERNAL_DRIVER) {
509          if (var->data.mode == nir_var_mem_ubo) {
510             binding = var->data.binding;
511          }
512       }
513    }
514 
515    nir_foreach_function_impl(impl, nir) {
516       nir_builder builder = nir_builder_create(impl);
517       nir_foreach_block(block, impl) {
518          nir_foreach_instr_safe(instr, block) {
519             if (instr->type == nir_instr_type_intrinsic)
520                progress |= lower_instr(nir_instr_as_intrinsic(instr),
521                                        &builder,
522                                        shader,
523                                        binding);
524          }
525       }
526 
527       nir_metadata_preserve(impl, nir_metadata_control_flow);
528    }
529 
530    if (progress) {
531       assert(shader->num_state_vars > 0);
532 
533       shader->state_vars_used = true;
534 
535       /* Remove state variables */
536       nir_foreach_variable_with_modes_safe(var, nir, nir_var_uniform) {
537          if (var->num_state_slots == 1 &&
538              var->state_slots[0].tokens[0] == STATE_INTERNAL_DRIVER) {
539             exec_node_remove(&var->node);
540             nir->num_uniforms--;
541          }
542       }
543 
544       const gl_state_index16 tokens[STATE_LENGTH] = { STATE_INTERNAL_DRIVER };
545       const struct glsl_type *type = glsl_array_type(glsl_vec4_type(),
546                                                      shader->state_vars_size / 4, 0);
547       nir_variable *ubo = nir_variable_create(nir, nir_var_mem_ubo, type,
548                                                   "d3d12_state_vars");
549       if (binding >= nir->info.num_ubos)
550          nir->info.num_ubos = binding + 1;
551       ubo->data.binding = binding;
552       ubo->num_state_slots = 1;
553       ubo->state_slots = ralloc_array(ubo, nir_state_slot, 1);
554       memcpy(ubo->state_slots[0].tokens, tokens,
555               sizeof(ubo->state_slots[0].tokens));
556 
557       struct glsl_struct_field field = {
558           .type = type,
559           .name = "data",
560           .location = -1,
561       };
562       ubo->interface_type =
563               glsl_interface_type(&field, 1, GLSL_INTERFACE_PACKING_STD430,
564                                   false, "__d3d12_state_vars_interface");
565    }
566 
567    return progress;
568 }
569 
570 void
d3d12_add_missing_dual_src_target(struct nir_shader * s,unsigned missing_mask)571 d3d12_add_missing_dual_src_target(struct nir_shader *s,
572                                   unsigned missing_mask)
573 {
574    assert(missing_mask != 0);
575    nir_builder b;
576    nir_function_impl *impl = nir_shader_get_entrypoint(s);
577    b = nir_builder_at(nir_before_impl(impl));
578 
579    nir_def *zero = nir_imm_zero(&b, 4, 32);
580    for (unsigned i = 0; i < 2; ++i) {
581 
582       if (!(missing_mask & (1u << i)))
583          continue;
584 
585       const char *name = i == 0 ? "gl_FragData[0]" :
586                                   "gl_SecondaryFragDataEXT[0]";
587       nir_variable *out = nir_variable_create(s, nir_var_shader_out,
588                                               glsl_vec4_type(), name);
589       out->data.location = FRAG_RESULT_DATA0;
590       out->data.driver_location = i;
591       out->data.index = i;
592 
593       nir_store_var(&b, out, zero, 0xf);
594    }
595    nir_metadata_preserve(impl, nir_metadata_control_flow);
596 }
597 
598 void
d3d12_lower_primitive_id(nir_shader * shader)599 d3d12_lower_primitive_id(nir_shader *shader)
600 {
601    nir_builder b;
602    nir_function_impl *impl = nir_shader_get_entrypoint(shader);
603    nir_def *primitive_id;
604    b = nir_builder_create(impl);
605 
606    nir_variable *primitive_id_var = nir_variable_create(shader, nir_var_shader_out,
607                                                         glsl_uint_type(), "primitive_id");
608    primitive_id_var->data.location = VARYING_SLOT_PRIMITIVE_ID;
609    primitive_id_var->data.interpolation = INTERP_MODE_FLAT;
610 
611    nir_foreach_block(block, impl) {
612       b.cursor = nir_before_block(block);
613       primitive_id = nir_load_primitive_id(&b);
614 
615       nir_foreach_instr_safe(instr, block) {
616          if (instr->type != nir_instr_type_intrinsic ||
617              nir_instr_as_intrinsic(instr)->intrinsic != nir_intrinsic_emit_vertex)
618             continue;
619 
620          b.cursor = nir_before_instr(instr);
621          nir_store_var(&b, primitive_id_var, primitive_id, 0x1);
622       }
623    }
624 
625    nir_metadata_preserve(impl, nir_metadata_none);
626 }
627 
628 static void
lower_triangle_strip_store(nir_builder * b,nir_intrinsic_instr * intr,nir_variable * vertex_count_var,struct hash_table * varyings)629 lower_triangle_strip_store(nir_builder *b, nir_intrinsic_instr *intr,
630                            nir_variable *vertex_count_var,
631                            struct hash_table *varyings)
632 {
633    /**
634     * tmp_varying[slot][min(vertex_count, 2)] = src
635     */
636    nir_def *vertex_count = nir_load_var(b, vertex_count_var);
637    nir_def *index = nir_imin(b, vertex_count, nir_imm_int(b, 2));
638    nir_variable *var = nir_intrinsic_get_var(intr, 0);
639 
640    if (var->data.mode != nir_var_shader_out)
641       return;
642 
643    nir_deref_instr *deref = nir_build_deref_array(b, nir_build_deref_var(b, _mesa_hash_table_search(varyings, var)->data), index);
644    nir_def *value = intr->src[1].ssa;
645    nir_store_deref(b, deref, value, 0xf);
646    nir_instr_remove(&intr->instr);
647 }
648 
649 static void
lower_triangle_strip_emit_vertex(nir_builder * b,nir_intrinsic_instr * intr,nir_variable * vertex_count_var,struct hash_table * varyings)650 lower_triangle_strip_emit_vertex(nir_builder *b, nir_intrinsic_instr *intr,
651                                  nir_variable *vertex_count_var,
652                                  struct hash_table *varyings)
653 {
654    // TODO xfb + flat shading + last_pv
655    /**
656     * if (vertex_count >= 2) {
657     *    for (i = 0; i < 3; i++) {
658     *       foreach(slot)
659     *          out[slot] = tmp_varying[slot][i];
660     *       EmitVertex();
661     *    }
662     *    EndPrimitive();
663     *    foreach(slot)
664     *       tmp_varying[slot][vertex_count % 2] = tmp_varying[slot][2];
665     * }
666     * vertex_count++;
667     */
668 
669    nir_def *two = nir_imm_int(b, 2);
670    nir_def *vertex_count = nir_load_var(b, vertex_count_var);
671    nir_def *count_cmp = nir_uge(b, vertex_count, two);
672    nir_if *count_check = nir_push_if(b, count_cmp);
673 
674    for (int j = 0; j < 3; ++j) {
675       nir_foreach_shader_out_variable(var, b->shader) {
676          nir_copy_deref(b, nir_build_deref_var(b, var),
677                         nir_build_deref_array_imm(b, nir_build_deref_var(b, _mesa_hash_table_search(varyings, var)->data), j));
678       }
679       nir_emit_vertex(b, 0);
680    }
681 
682    nir_foreach_shader_out_variable(var, b->shader) {
683       nir_variable *varying = _mesa_hash_table_search(varyings, var)->data;
684       nir_copy_deref(b, nir_build_deref_array(b, nir_build_deref_var(b, varying), nir_umod(b, vertex_count, two)),
685                         nir_build_deref_array(b, nir_build_deref_var(b, varying), two));
686    }
687 
688    nir_end_primitive(b, .stream_id = 0);
689 
690    nir_pop_if(b, count_check);
691 
692    vertex_count = nir_iadd_imm(b, vertex_count, 1);
693    nir_store_var(b, vertex_count_var, vertex_count, 0x1);
694 
695    nir_instr_remove(&intr->instr);
696 }
697 
698 static void
lower_triangle_strip_end_primitive(nir_builder * b,nir_intrinsic_instr * intr,nir_variable * vertex_count_var)699 lower_triangle_strip_end_primitive(nir_builder *b, nir_intrinsic_instr *intr,
700                                    nir_variable *vertex_count_var)
701 {
702    /**
703     * vertex_count = 0;
704     */
705    nir_store_var(b, vertex_count_var, nir_imm_int(b, 0), 0x1);
706    nir_instr_remove(&intr->instr);
707 }
708 
709 void
d3d12_lower_triangle_strip(nir_shader * shader)710 d3d12_lower_triangle_strip(nir_shader *shader)
711 {
712    nir_builder b;
713    nir_function_impl *impl = nir_shader_get_entrypoint(shader);
714    struct hash_table *tmp_vars = _mesa_pointer_hash_table_create(NULL);
715    b = nir_builder_create(impl);
716 
717    shader->info.gs.vertices_out = (shader->info.gs.vertices_out - 2) * 3;
718 
719    nir_variable *vertex_count_var =
720       nir_local_variable_create(impl, glsl_uint_type(), "vertex_count");
721 
722    nir_block *first = nir_start_block(impl);
723    b.cursor = nir_before_block(first);
724    nir_foreach_variable_with_modes(var, shader, nir_var_shader_out) {
725       const struct glsl_type *type = glsl_array_type(var->type, 3, 0);
726       _mesa_hash_table_insert(tmp_vars, var, nir_local_variable_create(impl, type, "tmp_var"));
727    }
728    nir_store_var(&b, vertex_count_var, nir_imm_int(&b, 0), 1);
729 
730    nir_foreach_block(block, impl) {
731       nir_foreach_instr_safe(instr, block) {
732          if (instr->type != nir_instr_type_intrinsic)
733             continue;
734 
735          nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
736          switch (intrin->intrinsic) {
737          case nir_intrinsic_store_deref:
738             b.cursor = nir_before_instr(instr);
739             lower_triangle_strip_store(&b, intrin, vertex_count_var, tmp_vars);
740             break;
741          case nir_intrinsic_emit_vertex_with_counter:
742          case nir_intrinsic_emit_vertex:
743             b.cursor = nir_before_instr(instr);
744             lower_triangle_strip_emit_vertex(&b, intrin, vertex_count_var, tmp_vars);
745             break;
746          case nir_intrinsic_end_primitive:
747          case nir_intrinsic_end_primitive_with_counter:
748             b.cursor = nir_before_instr(instr);
749             lower_triangle_strip_end_primitive(&b, intrin, vertex_count_var);
750             break;
751          default:
752             break;
753          }
754       }
755    }
756 
757    _mesa_hash_table_destroy(tmp_vars, NULL);
758    nir_metadata_preserve(impl, nir_metadata_none);
759    NIR_PASS_V(shader, nir_lower_var_copies);
760 }
761 
762 static bool
is_multisampling_instr(const nir_instr * instr,const void * _data)763 is_multisampling_instr(const nir_instr *instr, const void *_data)
764 {
765    if (instr->type != nir_instr_type_intrinsic)
766       return false;
767    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
768    if (intr->intrinsic == nir_intrinsic_store_output) {
769       nir_io_semantics semantics = nir_intrinsic_io_semantics(intr);
770       return semantics.location == FRAG_RESULT_SAMPLE_MASK;
771    } else if (intr->intrinsic == nir_intrinsic_store_deref) {
772       nir_variable *var = nir_intrinsic_get_var(intr, 0);
773       return var->data.location == FRAG_RESULT_SAMPLE_MASK;
774    } else if (intr->intrinsic == nir_intrinsic_load_sample_id ||
775               intr->intrinsic == nir_intrinsic_load_sample_mask_in)
776       return true;
777    return false;
778 }
779 
780 static nir_def *
lower_multisampling_instr(nir_builder * b,nir_instr * instr,void * _data)781 lower_multisampling_instr(nir_builder *b, nir_instr *instr, void *_data)
782 {
783    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
784    switch (intr->intrinsic) {
785    case nir_intrinsic_store_output:
786    case nir_intrinsic_store_deref:
787       return NIR_LOWER_INSTR_PROGRESS_REPLACE;
788    case nir_intrinsic_load_sample_id:
789       return nir_imm_int(b, 0);
790    case nir_intrinsic_load_sample_mask_in:
791       return nir_b2i32(b, nir_ine_imm(b, &intr->def, 0));
792    default:
793       unreachable("Invalid intrinsic");
794    }
795 }
796 
797 bool
d3d12_disable_multisampling(nir_shader * s)798 d3d12_disable_multisampling(nir_shader *s)
799 {
800    if (s->info.stage != MESA_SHADER_FRAGMENT)
801       return false;
802    bool progress = nir_shader_lower_instructions(s, is_multisampling_instr, lower_multisampling_instr, NULL);
803 
804    nir_foreach_variable_with_modes_safe(var, s, nir_var_shader_out) {
805       if (var->data.location == FRAG_RESULT_SAMPLE_MASK) {
806          exec_node_remove(&var->node);
807          s->info.outputs_written &= ~(1ull << FRAG_RESULT_SAMPLE_MASK);
808          progress = true;
809       }
810    }
811    nir_foreach_variable_with_modes_safe(var, s, nir_var_system_value) {
812       if (var->data.location == SYSTEM_VALUE_SAMPLE_MASK_IN ||
813           var->data.location == SYSTEM_VALUE_SAMPLE_ID) {
814          exec_node_remove(&var->node);
815          progress = true;
816       }
817       var->data.sample = false;
818    }
819    BITSET_CLEAR(s->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID);
820    s->info.fs.uses_sample_qualifier = false;
821    s->info.fs.uses_sample_shading = false;
822    return progress;
823 }
824 
825 struct var_split_subvar_state {
826    nir_variable *var;
827    uint8_t stream;
828    uint8_t num_components;
829 };
830 struct var_split_var_state {
831    unsigned num_subvars;
832    struct var_split_subvar_state subvars[4];
833 };
834 struct var_split_state {
835    struct var_split_var_state vars[2][VARYING_SLOT_MAX];
836 };
837 
838 static bool
split_varying_accesses(nir_builder * b,nir_intrinsic_instr * intr,void * _state)839 split_varying_accesses(nir_builder *b, nir_intrinsic_instr *intr,
840                                  void *_state)
841 {
842    if (intr->intrinsic != nir_intrinsic_store_deref &&
843        intr->intrinsic != nir_intrinsic_load_deref)
844       return false;
845 
846    nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
847    if (!nir_deref_mode_is(deref, nir_var_shader_out) &&
848        !nir_deref_mode_is(deref, nir_var_shader_in))
849       return false;
850 
851    nir_variable *var = nir_deref_instr_get_variable(deref);
852    if (!var)
853       return false;
854 
855    uint32_t mode_index = deref->modes == nir_var_shader_out ? 0 : 1;
856 
857    struct var_split_state *state = _state;
858    struct var_split_var_state *var_state = &state->vars[mode_index][var->data.location];
859    if (var_state->num_subvars <= 1)
860       return false;
861 
862    nir_deref_path path;
863    nir_deref_path_init(&path, deref, b->shader);
864    assert(path.path[0]->deref_type == nir_deref_type_var && path.path[0]->var == var);
865 
866    unsigned first_channel = 0;
867    nir_def *loads[2];
868    for (unsigned subvar = 0; subvar < var_state->num_subvars; ++subvar) {
869       b->cursor = nir_after_instr(&path.path[0]->instr);
870       nir_deref_instr *new_path = nir_build_deref_var(b, var_state->subvars[subvar].var);
871 
872       for (unsigned i = 1; path.path[i]; ++i) {
873          b->cursor = nir_after_instr(&path.path[i]->instr);
874          new_path = nir_build_deref_follower(b, new_path, path.path[i]);
875       }
876 
877       b->cursor = nir_before_instr(&intr->instr);
878       if (intr->intrinsic == nir_intrinsic_store_deref) {
879          unsigned mask_num_channels = (1 << var_state->subvars[subvar].num_components) - 1;
880          unsigned orig_write_mask = nir_intrinsic_write_mask(intr);
881          nir_def *sub_value = nir_channels(b, intr->src[1].ssa, mask_num_channels << first_channel);
882 
883          unsigned new_write_mask = (orig_write_mask >> first_channel) & mask_num_channels;
884          nir_build_store_deref(b, &new_path->def, sub_value, new_write_mask, nir_intrinsic_access(intr));
885 
886          first_channel += var_state->subvars[subvar].num_components;
887       } else {
888          /* The load path only handles splitting dvec3/dvec4 */
889          assert(subvar == 0 || subvar == 1);
890          assert(intr->def.num_components >= 3);
891          loads[subvar] = nir_build_load_deref(b, var_state->subvars[subvar].num_components, intr->def.bit_size, &new_path->def);
892       }
893    }
894 
895    nir_deref_path_finish(&path);
896    if (intr->intrinsic == nir_intrinsic_load_deref) {
897       nir_def *result = nir_extract_bits(b, loads, 2, 0, intr->def.num_components, intr->def.bit_size);
898       nir_def_rewrite_uses(&intr->def, result);
899    }
900    nir_instr_free_and_dce(&intr->instr);
901    return true;
902 }
903 
904 bool
d3d12_split_needed_varyings(nir_shader * s)905 d3d12_split_needed_varyings(nir_shader *s)
906 {
907    struct var_split_state state;
908    memset(&state, 0, sizeof(state));
909 
910    bool progress = false;
911    nir_foreach_variable_with_modes_safe(var, s, nir_var_shader_out | nir_var_shader_in) {
912       uint32_t mode_index = var->data.mode == nir_var_shader_out ? 0 : 1;
913       struct var_split_var_state *var_state = &state.vars[mode_index][var->data.location];
914       struct var_split_subvar_state *subvars = var_state->subvars;
915       if ((var->data.stream & NIR_STREAM_PACKED) != 0 &&
916           s->info.stage == MESA_SHADER_GEOMETRY &&
917           var->data.mode == nir_var_shader_out) {
918          for (unsigned i = 0; i < glsl_get_vector_elements(var->type); ++i) {
919             unsigned stream = (var->data.stream >> (2 * (i + var->data.location_frac))) & 0x3;
920             if (var_state->num_subvars == 0 || stream != subvars[var_state->num_subvars - 1].stream) {
921                subvars[var_state->num_subvars].stream = stream;
922                subvars[var_state->num_subvars].num_components = 1;
923                var_state->num_subvars++;
924             } else {
925                subvars[var_state->num_subvars - 1].num_components++;
926             }
927          }
928 
929          var->data.stream = subvars[0].stream;
930          if (var_state->num_subvars == 1)
931             continue;
932 
933          progress = true;
934 
935          subvars[0].var = var;
936          var->type = glsl_vector_type(glsl_get_base_type(var->type), subvars[0].num_components);
937          unsigned location_frac = var->data.location_frac + subvars[0].num_components;
938          for (unsigned subvar = 1; subvar < var_state->num_subvars; ++subvar) {
939             char *name = ralloc_asprintf(s, "unpacked:%s_stream%d", var->name, subvars[subvar].stream);
940             nir_variable *new_var = nir_variable_create(s, nir_var_shader_out,
941                                                         glsl_vector_type(glsl_get_base_type(var->type), subvars[subvar].num_components),
942                                                         name);
943 
944             new_var->data = var->data;
945             new_var->data.stream = subvars[subvar].stream;
946             new_var->data.location_frac = location_frac;
947             location_frac += subvars[subvar].num_components;
948             subvars[subvar].var = new_var;
949          }
950       } else if (glsl_type_is_64bit(glsl_without_array(var->type)) &&
951                  glsl_get_components(glsl_without_array(var->type)) >= 3) {
952          progress = true;
953          assert(var->data.location_frac == 0);
954          uint32_t components = glsl_get_components(glsl_without_array(var->type));
955          var_state->num_subvars = 2;
956          subvars[0].var = var;
957          subvars[0].num_components = 2;
958          subvars[0].stream = var->data.stream;
959          const struct glsl_type *base_type = glsl_without_array(var->type);
960          var->type = glsl_type_wrap_in_arrays(glsl_vector_type(glsl_get_base_type(base_type), 2), var->type);
961 
962          subvars[1].var = nir_variable_clone(var, s);
963          subvars[1].num_components = components - 2;
964          subvars[1].stream = var->data.stream;
965          exec_node_insert_after(&var->node, &subvars[1].var->node);
966          subvars[1].var->type = glsl_type_wrap_in_arrays(glsl_vector_type(glsl_get_base_type(base_type), components - 2), var->type);
967          subvars[1].var->data.location++;
968          subvars[1].var->data.driver_location++;
969       }
970    }
971 
972    if (progress) {
973       nir_shader_intrinsics_pass(s, split_varying_accesses,
974                                  nir_metadata_control_flow,
975                                  &state);
976    } else {
977       nir_shader_preserve_all_metadata(s);
978    }
979 
980    return progress;
981 }
982 
983 static void
write_0(nir_builder * b,nir_deref_instr * deref)984 write_0(nir_builder *b, nir_deref_instr *deref)
985 {
986    if (glsl_type_is_array_or_matrix(deref->type)) {
987       for (unsigned i = 0; i < glsl_get_length(deref->type); ++i)
988          write_0(b, nir_build_deref_array_imm(b, deref, i));
989    } else if (glsl_type_is_struct(deref->type)) {
990       for (unsigned i = 0; i < glsl_get_length(deref->type); ++i)
991          write_0(b, nir_build_deref_struct(b, deref, i));
992    } else {
993       nir_def *scalar = nir_imm_intN_t(b, 0, glsl_get_bit_size(deref->type));
994       nir_def *scalar_arr[NIR_MAX_VEC_COMPONENTS];
995       unsigned num_comps = glsl_get_components(deref->type);
996       unsigned writemask = (1 << num_comps) - 1;
997       for (unsigned i = 0; i < num_comps; ++i)
998          scalar_arr[i] = scalar;
999       nir_def *zero_val = nir_vec(b, scalar_arr, num_comps);
1000       nir_store_deref(b, deref, zero_val, writemask);
1001    }
1002 }
1003 
1004 void
d3d12_write_0_to_new_varying(nir_shader * s,nir_variable * var)1005 d3d12_write_0_to_new_varying(nir_shader *s, nir_variable *var)
1006 {
1007    /* Skip per-vertex HS outputs */
1008    if (s->info.stage == MESA_SHADER_TESS_CTRL && !var->data.patch)
1009       return;
1010 
1011    nir_foreach_function_impl(impl, s) {
1012       nir_builder b = nir_builder_create(impl);
1013 
1014       nir_foreach_block(block, impl) {
1015          b.cursor = nir_before_block(block);
1016          if (s->info.stage != MESA_SHADER_GEOMETRY) {
1017             write_0(&b, nir_build_deref_var(&b, var));
1018             break;
1019          }
1020 
1021          nir_foreach_instr_safe(instr, block) {
1022             if (instr->type != nir_instr_type_intrinsic)
1023                continue;
1024             nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1025             if (intr->intrinsic != nir_intrinsic_emit_vertex)
1026                continue;
1027 
1028             b.cursor = nir_before_instr(instr);
1029             write_0(&b, nir_build_deref_var(&b, var));
1030          }
1031       }
1032 
1033       nir_metadata_preserve(impl, nir_metadata_control_flow);
1034    }
1035 }
1036