xref: /aosp_15_r20/external/mesa3d/src/microsoft/vulkan/dzn_nir.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © Microsoft Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "dzn_nir.h"
25 
26 #include "spirv_to_dxil.h"
27 #include "nir_to_dxil.h"
28 #include "nir_builder.h"
29 #include "nir_builtin_builder.h"
30 #include "dxil_nir.h"
31 #include "vk_nir_convert_ycbcr.h"
32 
33 static nir_def *
dzn_nir_create_bo_desc(nir_builder * b,nir_variable_mode mode,uint32_t desc_set,uint32_t binding,const char * name,unsigned access)34 dzn_nir_create_bo_desc(nir_builder *b,
35                        nir_variable_mode mode,
36                        uint32_t desc_set,
37                        uint32_t binding,
38                        const char *name,
39                        unsigned access)
40 {
41    struct glsl_struct_field field = {
42       .type = mode == nir_var_mem_ubo ?
43               glsl_array_type(glsl_uint_type(), 4096, 4) :
44               glsl_uint_type(),
45       .name = "dummy_int",
46    };
47    const struct glsl_type *dummy_type =
48       glsl_struct_type(&field, 1, "dummy_type", false);
49 
50    nir_variable *var =
51       nir_variable_create(b->shader, mode, dummy_type, name);
52    var->data.descriptor_set = desc_set;
53    var->data.binding = binding;
54    var->data.access = access;
55 
56    assert(mode == nir_var_mem_ubo || mode == nir_var_mem_ssbo);
57    if (mode == nir_var_mem_ubo)
58       b->shader->info.num_ubos++;
59    else
60       b->shader->info.num_ssbos++;
61 
62    VkDescriptorType desc_type =
63       var->data.mode == nir_var_mem_ubo ?
64       VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER :
65       VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
66    nir_address_format addr_format = nir_address_format_32bit_index_offset;
67    nir_def *index =
68       nir_vulkan_resource_index(b,
69                                 nir_address_format_num_components(addr_format),
70                                 nir_address_format_bit_size(addr_format),
71                                 nir_imm_int(b, 0),
72                                 .desc_set = desc_set,
73                                 .binding = binding,
74                                 .desc_type = desc_type);
75 
76    nir_def *desc =
77       nir_load_vulkan_descriptor(b,
78                                  nir_address_format_num_components(addr_format),
79                                  nir_address_format_bit_size(addr_format),
80                                  index,
81                                  .desc_type = desc_type);
82 
83    return nir_channel(b, desc, 0);
84 }
85 
86 nir_shader *
dzn_nir_indirect_draw_shader(struct dzn_indirect_draw_type type)87 dzn_nir_indirect_draw_shader(struct dzn_indirect_draw_type type)
88 {
89    nir_builder b =
90       nir_builder_init_simple_shader(MESA_SHADER_COMPUTE,
91                                      dxil_get_base_nir_compiler_options(),
92                                      "dzn_meta_indirect_%sdraw%s%s%s()",
93                                      type.indexed ? "indexed_" : "",
94                                      type.indirect_count ? "_count" : "",
95                                      type.triangle_fan ? "_triangle_fan" : "",
96                                      type.triangle_fan_primitive_restart ? "_primitive_restart" : "");
97    b.shader->info.internal = true;
98 
99    nir_def *params_desc =
100       dzn_nir_create_bo_desc(&b, nir_var_mem_ubo, 0, 0, "params", 0);
101    nir_def *draw_buf_desc =
102       dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 1, "draw_buf", ACCESS_NON_WRITEABLE);
103    nir_def *exec_buf_desc =
104       dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 2, "exec_buf", ACCESS_NON_READABLE);
105 
106    unsigned params_size = 0;
107    if (type.triangle_fan)
108       params_size = sizeof(struct dzn_indirect_draw_triangle_fan_rewrite_params);
109    else
110       params_size = sizeof(struct dzn_indirect_draw_rewrite_params);
111 
112    nir_def *params =
113       nir_load_ubo(&b, params_size / 4, 32,
114                    params_desc, nir_imm_int(&b, 0),
115                    .align_mul = 4, .align_offset = 0, .range_base = 0, .range = ~0);
116 
117    uint32_t exec_stride_imm = 0;
118    uint32_t draw_args_offset = 0;
119    if (type.triangle_fan)
120       exec_stride_imm += sizeof(D3D12_INDEX_BUFFER_VIEW);
121    if (type.draw_params)
122       exec_stride_imm += sizeof(uint32_t) * 2;
123    if (type.draw_id)
124       exec_stride_imm += sizeof(uint32_t);
125    draw_args_offset = exec_stride_imm;
126    exec_stride_imm += (type.indexed || type.triangle_fan) ?
127       sizeof(D3D12_DRAW_INDEXED_ARGUMENTS) : sizeof(D3D12_DRAW_ARGUMENTS);
128 
129    nir_def *draw_stride = nir_channel(&b, params, 0);
130    nir_def *exec_stride = nir_imm_int(&b, exec_stride_imm);
131    nir_def *index =
132       nir_channel(&b, nir_load_global_invocation_id(&b, 32), 0);
133 
134    if (type.indirect_count) {
135       nir_def *count_buf_desc =
136          dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 3, "count_buf", ACCESS_NON_WRITEABLE);
137 
138       nir_def *draw_count =
139          nir_load_ssbo(&b, 1, 32, count_buf_desc, nir_imm_int(&b, 0), .align_mul = 4);
140 
141       nir_push_if(&b, nir_ieq_imm(&b, index, 0));
142       nir_store_ssbo(&b, draw_count, exec_buf_desc, nir_imm_int(&b, 0),
143                     .write_mask = 0x1, .access = ACCESS_NON_READABLE,
144                     .align_mul = 16);
145       nir_pop_if(&b, NULL);
146 
147       nir_push_if(&b, nir_ult(&b, index, draw_count));
148    }
149 
150    nir_def *draw_offset = nir_imul(&b, draw_stride, index);
151 
152    /* The first entry contains the indirect count */
153    nir_def *exec_offset =
154       type.indirect_count ?
155       nir_imul(&b, exec_stride, nir_iadd_imm(&b, index, 1)) :
156       nir_imul(&b, exec_stride, index);
157 
158    nir_def *draw_info1 =
159       nir_load_ssbo(&b, 4, 32, draw_buf_desc, draw_offset, .align_mul = 4);
160    nir_def *draw_info2 =
161       type.indexed ?
162       nir_load_ssbo(&b, 1, 32, draw_buf_desc,
163                     nir_iadd_imm(&b, draw_offset, 16), .align_mul = 4) :
164       nir_imm_int(&b, 0);
165 
166    nir_def *first_vertex = nir_channel(&b, draw_info1, type.indexed ? 3 : 2);
167    nir_def *base_instance =
168       type.indexed ? draw_info2 : nir_channel(&b, draw_info1, 3);
169 
170    uint32_t exec_val_idx = 0;
171    nir_def *exec_vals[8] = { NULL };
172    if (type.draw_params) {
173       exec_vals[exec_val_idx++] = first_vertex;
174       exec_vals[exec_val_idx++] = base_instance;
175    }
176    if (type.draw_id)
177       exec_vals[exec_val_idx++] = index;
178 
179    if (type.triangle_fan) {
180       /* Patch {vertex,index}_count and first_index */
181       nir_def *triangle_count =
182          nir_usub_sat(&b, nir_channel(&b, draw_info1, 0), nir_imm_int(&b, 2));
183       exec_vals[exec_val_idx++] = nir_imul_imm(&b, triangle_count, 3);
184       exec_vals[exec_val_idx++] = nir_channel(&b, draw_info1, 1);
185       exec_vals[exec_val_idx++] = nir_imm_int(&b, 0);
186       exec_vals[exec_val_idx++] = first_vertex;
187       exec_vals[exec_val_idx++] = base_instance;
188 
189       nir_def *triangle_fan_exec_buf_desc =
190          dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 4,
191                                 "triangle_fan_exec_buf",
192                                 ACCESS_NON_READABLE);
193       nir_def *triangle_fan_index_buf_stride = nir_channel(&b, params, 1);
194       nir_def *triangle_fan_index_buf_addr_lo =
195          nir_iadd(&b, nir_channel(&b, params, 2),
196                   nir_imul(&b, triangle_fan_index_buf_stride, index));
197 
198       nir_def *triangle_fan_exec_vals[9] = { 0 };
199       uint32_t triangle_fan_exec_param_count = 0;
200       nir_def *addr_lo_overflow =
201          nir_ult(&b, triangle_fan_index_buf_addr_lo, nir_channel(&b, params, 2));
202       nir_def *triangle_fan_index_buf_addr_hi =
203          nir_iadd(&b, nir_channel(&b, params, 3),
204                   nir_bcsel(&b, addr_lo_overflow, nir_imm_int(&b, 1), nir_imm_int(&b, 0)));
205 
206       triangle_fan_exec_vals[triangle_fan_exec_param_count++] = triangle_fan_index_buf_addr_lo;
207       triangle_fan_exec_vals[triangle_fan_exec_param_count++] = triangle_fan_index_buf_addr_hi;
208 
209       if (type.triangle_fan_primitive_restart) {
210          triangle_fan_exec_vals[triangle_fan_exec_param_count++] = nir_channel(&b, draw_info1, 2);
211          triangle_fan_exec_vals[triangle_fan_exec_param_count++] = nir_channel(&b, draw_info1, 0);
212          uint32_t index_count_offset = draw_args_offset +
213             offsetof(D3D12_DRAW_INDEXED_ARGUMENTS, IndexCountPerInstance);
214          nir_def *exec_buf_start =
215             nir_load_ubo(&b, 2, 32,
216                          params_desc, nir_imm_int(&b, 16),
217                          .align_mul = 4, .align_offset = 0, .range_base = 0, .range = ~0);
218          nir_def *exec_buf_start_lo =
219             nir_iadd(&b, nir_imm_int(&b, index_count_offset),
220                      nir_iadd(&b, nir_channel(&b, exec_buf_start, 0),
221                               nir_imul(&b, exec_stride, index)));
222          addr_lo_overflow = nir_ult(&b, exec_buf_start_lo, nir_channel(&b, exec_buf_start, 0));
223          nir_def *exec_buf_start_hi =
224             nir_iadd(&b, nir_channel(&b, exec_buf_start, 0),
225                      nir_bcsel(&b, addr_lo_overflow, nir_imm_int(&b, 1), nir_imm_int(&b, 0)));
226          triangle_fan_exec_vals[triangle_fan_exec_param_count++] = exec_buf_start_lo;
227          triangle_fan_exec_vals[triangle_fan_exec_param_count++] = exec_buf_start_hi;
228          triangle_fan_exec_vals[triangle_fan_exec_param_count++] = nir_imm_int(&b, 1);
229       } else {
230          triangle_fan_exec_vals[triangle_fan_exec_param_count++] =
231             type.indexed ? nir_channel(&b, draw_info1, 2) : nir_imm_int(&b, 0);
232          triangle_fan_exec_vals[triangle_fan_exec_param_count++] =
233             triangle_count;
234       }
235       triangle_fan_exec_vals[triangle_fan_exec_param_count++] = nir_imm_int(&b, 1);
236       triangle_fan_exec_vals[triangle_fan_exec_param_count++] = nir_imm_int(&b, 1);
237 
238       unsigned rewrite_index_exec_params =
239          type.triangle_fan_primitive_restart ?
240          sizeof(struct dzn_indirect_triangle_fan_prim_restart_rewrite_index_exec_params) :
241          sizeof(struct dzn_indirect_triangle_fan_rewrite_index_exec_params);
242       nir_def *triangle_fan_exec_stride =
243          nir_imm_int(&b, rewrite_index_exec_params);
244       nir_def *triangle_fan_exec_offset =
245          nir_imul(&b, triangle_fan_exec_stride, index);
246 
247       for (uint32_t i = 0; i < triangle_fan_exec_param_count; i += 4) {
248          unsigned comps = MIN2(triangle_fan_exec_param_count - i, 4);
249          uint32_t mask = (1 << comps) - 1;
250 
251          nir_store_ssbo(&b, nir_vec(&b, &triangle_fan_exec_vals[i], comps),
252                         triangle_fan_exec_buf_desc,
253                         nir_iadd_imm(&b, triangle_fan_exec_offset, i * 4),
254                         .write_mask = mask, .access = ACCESS_NON_READABLE, .align_mul = 4);
255       }
256 
257       nir_def *ibview_vals[] = {
258          triangle_fan_index_buf_addr_lo,
259          triangle_fan_index_buf_addr_hi,
260          triangle_fan_index_buf_stride,
261          nir_imm_int(&b, DXGI_FORMAT_R32_UINT),
262       };
263 
264       nir_store_ssbo(&b, nir_vec(&b, ibview_vals, ARRAY_SIZE(ibview_vals)),
265                      exec_buf_desc, exec_offset,
266                      .write_mask = 0xf, .access = ACCESS_NON_READABLE, .align_mul = 16);
267       exec_offset = nir_iadd_imm(&b, exec_offset, ARRAY_SIZE(ibview_vals) * 4);
268    } else {
269       exec_vals[exec_val_idx++] = nir_channel(&b, draw_info1, 0);
270       exec_vals[exec_val_idx++] = nir_channel(&b, draw_info1, 1);
271       exec_vals[exec_val_idx++] = nir_channel(&b, draw_info1, 2);
272       exec_vals[exec_val_idx++] = nir_channel(&b, draw_info1, 3);
273       if (type.indexed)
274          exec_vals[exec_val_idx++] = draw_info2;
275    }
276 
277    nir_store_ssbo(&b, nir_vec(&b, exec_vals, MIN2(exec_val_idx, 4)),
278                   exec_buf_desc, exec_offset,
279                   .write_mask = ((1 << exec_val_idx) - 1) & 0xf, .access = ACCESS_NON_READABLE, .align_mul = 16);
280    if (exec_val_idx > 4) {
281       nir_store_ssbo(&b, nir_vec(&b, &exec_vals[4], exec_val_idx - 4),
282                      exec_buf_desc, nir_iadd_imm(&b, exec_offset, 16),
283                      .write_mask = ((1 << (exec_val_idx - 4)) - 1) & 0xf, .access = ACCESS_NON_READABLE, .align_mul = 16);
284    }
285 
286    if (type.indirect_count)
287       nir_pop_if(&b, NULL);
288 
289    return b.shader;
290 }
291 
292 nir_shader *
dzn_nir_triangle_fan_prim_restart_rewrite_index_shader(uint8_t old_index_size)293 dzn_nir_triangle_fan_prim_restart_rewrite_index_shader(uint8_t old_index_size)
294 {
295    assert(old_index_size == 2 || old_index_size == 4);
296 
297    nir_builder b =
298       nir_builder_init_simple_shader(MESA_SHADER_COMPUTE,
299                                      dxil_get_base_nir_compiler_options(),
300                                      "dzn_meta_triangle_prim_rewrite_index(old_index_size=%d)",
301                                      old_index_size);
302    b.shader->info.internal = true;
303 
304    nir_def *params_desc =
305       dzn_nir_create_bo_desc(&b, nir_var_mem_ubo, 0, 0, "params", 0);
306    nir_def *new_index_buf_desc =
307       dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 1,
308                              "new_index_buf", ACCESS_NON_READABLE);
309    nir_def *old_index_buf_desc =
310       dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 2,
311                              "old_index_buf", ACCESS_NON_WRITEABLE);
312    nir_def *new_index_count_ptr_desc =
313       dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 3,
314                              "new_index_count_ptr", ACCESS_NON_READABLE);
315 
316    nir_def *params =
317       nir_load_ubo(&b, sizeof(struct dzn_triangle_fan_prim_restart_rewrite_index_params) / 4, 32,
318                    params_desc, nir_imm_int(&b, 0),
319                    .align_mul = 4, .align_offset = 0, .range_base = 0, .range = ~0);
320 
321    nir_def *prim_restart_val =
322       nir_imm_int(&b, old_index_size == 2 ? 0xffff : 0xffffffff);
323    nir_variable *old_index_ptr_var =
324       nir_local_variable_create(b.impl, glsl_uint_type(), "old_index_ptr_var");
325    nir_def *old_index_ptr = nir_channel(&b, params, 0);
326    nir_store_var(&b, old_index_ptr_var, old_index_ptr, 1);
327    nir_variable *new_index_ptr_var =
328       nir_local_variable_create(b.impl, glsl_uint_type(), "new_index_ptr_var");
329    nir_store_var(&b, new_index_ptr_var, nir_imm_int(&b, 0), 1);
330    nir_def *old_index_count = nir_channel(&b, params, 1);
331    nir_variable *index0_var =
332       nir_local_variable_create(b.impl, glsl_uint_type(), "index0_var");
333    nir_store_var(&b, index0_var, prim_restart_val, 1);
334 
335    /*
336     * Filter out all primitive-restart magic values, and generate a triangle list
337     * from the triangle fan definition.
338     *
339     * Basically:
340     *
341     * new_index_ptr = 0;
342     * index0 = restart_prim_value; // 0xffff or 0xffffffff
343     * for (old_index_ptr = firstIndex; old_index_ptr < indexCount;) {
344     *    // If we have no starting-point we need at least 3 vertices,
345     *    // otherwise we can do with two. If there's not enough vertices
346     *    // to form a primitive, we just bail out.
347     *    min_indices = index0 == restart_prim_value ? 3 : 2;
348     *    if (old_index_ptr + min_indices > firstIndex + indexCount)
349     *       break;
350     *
351     *    if (index0 == restart_prim_value) {
352     *       // No starting point, skip all entries until we have a
353     *       // non-primitive-restart value
354     *       index0 = old_index_buf[old_index_ptr++];
355     *       continue;
356     *    }
357     *
358     *    // If at least one index contains the primitive-restart pattern,
359          // ignore this triangle, and skip the unused entries
360     *    if (old_index_buf[old_index_ptr + 1] == restart_prim_value) {
361     *       old_index_ptr += 2;
362     *       continue;
363     *    }
364     *    if (old_index_buf[old_index_ptr] == restart_prim_value) {
365     *       old_index_ptr++;
366     *       continue;
367     *    }
368     *
369     *    // We have a valid primitive, queue it to the new index buffer
370     *    new_index_buf[new_index_ptr++] = old_index_buf[old_index_ptr];
371     *    new_index_buf[new_index_ptr++] = old_index_buf[old_index_ptr + 1];
372     *    new_index_buf[new_index_ptr++] = index0;
373     * }
374     *
375     * expressed in NIR, which admitedly is not super easy to grasp with.
376     * TODO: Might be a good thing to use use the CL compiler we have and turn
377     * those shaders into CL kernels.
378     */
379    nir_push_loop(&b);
380 
381    old_index_ptr = nir_load_var(&b, old_index_ptr_var);
382    nir_def *index0 = nir_load_var(&b, index0_var);
383 
384    nir_def *read_index_count =
385       nir_bcsel(&b, nir_ieq(&b, index0, prim_restart_val),
386                 nir_imm_int(&b, 3), nir_imm_int(&b, 2));
387    nir_push_if(&b, nir_ult(&b, old_index_count, nir_iadd(&b, old_index_ptr, read_index_count)));
388    nir_jump(&b, nir_jump_break);
389    nir_pop_if(&b, NULL);
390 
391    nir_def *old_index_offset =
392       nir_imul_imm(&b, old_index_ptr, old_index_size);
393 
394    nir_push_if(&b, nir_ieq(&b, index0, prim_restart_val));
395    nir_def *index_val =
396       nir_load_ssbo(&b, 1, 32, old_index_buf_desc,
397                     old_index_size == 2 ? nir_iand_imm(&b, old_index_offset, ~3ULL) : old_index_offset,
398                     .align_mul = 4);
399    if (old_index_size == 2) {
400      index_val = nir_bcsel(&b, nir_test_mask(&b, old_index_offset, 0x2),
401                            nir_ushr_imm(&b, index_val, 16),
402                            nir_iand_imm(&b, index_val, 0xffff));
403    }
404 
405    nir_store_var(&b, index0_var, index_val, 1);
406    nir_store_var(&b, old_index_ptr_var, nir_iadd_imm(&b, old_index_ptr, 1), 1);
407    nir_jump(&b, nir_jump_continue);
408    nir_pop_if(&b, NULL);
409 
410    nir_def *index12 =
411       nir_load_ssbo(&b, 2, 32, old_index_buf_desc,
412                     old_index_size == 2 ? nir_iand_imm(&b, old_index_offset, ~3ULL) : old_index_offset,
413                     .align_mul = 4);
414    if (old_index_size == 2) {
415       nir_def *indices[] = {
416          nir_iand_imm(&b, nir_channel(&b, index12, 0), 0xffff),
417          nir_ushr_imm(&b, nir_channel(&b, index12, 0), 16),
418          nir_iand_imm(&b, nir_channel(&b, index12, 1), 0xffff),
419       };
420 
421       index12 = nir_bcsel(&b, nir_test_mask(&b, old_index_offset, 0x2),
422                           nir_vec2(&b, indices[1], indices[2]),
423                           nir_vec2(&b, indices[0], indices[1]));
424    }
425 
426    nir_push_if(&b, nir_ieq(&b, nir_channel(&b, index12, 1), prim_restart_val));
427    nir_store_var(&b, old_index_ptr_var, nir_iadd_imm(&b, old_index_ptr, 2), 1);
428    nir_store_var(&b, index0_var, prim_restart_val, 1);
429    nir_jump(&b, nir_jump_continue);
430    nir_push_else(&b, NULL);
431    nir_store_var(&b, old_index_ptr_var, nir_iadd_imm(&b, old_index_ptr, 1), 1);
432    nir_push_if(&b, nir_ieq(&b, nir_channel(&b, index12, 0), prim_restart_val));
433    nir_store_var(&b, index0_var, prim_restart_val, 1);
434    nir_jump(&b, nir_jump_continue);
435    nir_push_else(&b, NULL);
436    nir_def *new_indices =
437       nir_vec3(&b, nir_channel(&b, index12, 0), nir_channel(&b, index12, 1), index0);
438    nir_def *new_index_ptr = nir_load_var(&b, new_index_ptr_var);
439    nir_def *new_index_offset = nir_imul_imm(&b, new_index_ptr, sizeof(uint32_t));
440    nir_store_ssbo(&b, new_indices, new_index_buf_desc,
441                   new_index_offset,
442                   .write_mask = 7, .access = ACCESS_NON_READABLE, .align_mul = 4);
443    nir_store_var(&b, new_index_ptr_var, nir_iadd_imm(&b, new_index_ptr, 3), 1);
444    nir_pop_if(&b, NULL);
445    nir_pop_if(&b, NULL);
446    nir_pop_loop(&b, NULL);
447 
448    nir_store_ssbo(&b, nir_load_var(&b, new_index_ptr_var),
449                   new_index_count_ptr_desc, nir_imm_int(&b, 0),
450                   .write_mask = 1, .access = ACCESS_NON_READABLE, .align_mul = 4);
451 
452    return b.shader;
453 }
454 
455 nir_shader *
dzn_nir_triangle_fan_rewrite_index_shader(uint8_t old_index_size)456 dzn_nir_triangle_fan_rewrite_index_shader(uint8_t old_index_size)
457 {
458    assert(old_index_size == 0 || old_index_size == 2 || old_index_size == 4);
459 
460    nir_builder b =
461       nir_builder_init_simple_shader(MESA_SHADER_COMPUTE,
462                                      dxil_get_base_nir_compiler_options(),
463                                      "dzn_meta_triangle_rewrite_index(old_index_size=%d)",
464                                      old_index_size);
465    b.shader->info.internal = true;
466 
467    nir_def *params_desc =
468       dzn_nir_create_bo_desc(&b, nir_var_mem_ubo, 0, 0, "params", 0);
469    nir_def *new_index_buf_desc =
470       dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 1,
471                              "new_index_buf", ACCESS_NON_READABLE);
472 
473    nir_def *old_index_buf_desc = NULL;
474    if (old_index_size > 0) {
475       old_index_buf_desc =
476          dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 2,
477                                 "old_index_buf", ACCESS_NON_WRITEABLE);
478    }
479 
480    nir_def *params =
481       nir_load_ubo(&b, sizeof(struct dzn_triangle_fan_rewrite_index_params) / 4, 32,
482                    params_desc, nir_imm_int(&b, 0),
483                    .align_mul = 4, .align_offset = 0, .range_base = 0, .range = ~0);
484 
485    nir_def *triangle = nir_channel(&b, nir_load_global_invocation_id(&b, 32), 0);
486    nir_def *new_indices;
487 
488    if (old_index_size > 0) {
489       nir_def *old_first_index = nir_channel(&b, params, 0);
490       nir_def *old_index0_offset =
491          nir_imul_imm(&b, old_first_index, old_index_size);
492       nir_def *old_index1_offset =
493          nir_imul_imm(&b, nir_iadd(&b, nir_iadd_imm(&b, triangle, 1), old_first_index),
494                       old_index_size);
495 
496       nir_def *old_index0 =
497          nir_load_ssbo(&b, 1, 32, old_index_buf_desc,
498                        old_index_size == 2 ? nir_iand_imm(&b, old_index0_offset, ~3ULL) : old_index0_offset,
499                        .align_mul = 4);
500 
501       if (old_index_size == 2) {
502         old_index0 = nir_bcsel(&b, nir_test_mask(&b, old_index0_offset, 0x2),
503                                nir_ushr_imm(&b, old_index0, 16),
504                                nir_iand_imm(&b, old_index0, 0xffff));
505       }
506 
507       nir_def *old_index12 =
508          nir_load_ssbo(&b, 2, 32, old_index_buf_desc,
509                        old_index_size == 2 ? nir_iand_imm(&b, old_index1_offset, ~3ULL) : old_index1_offset,
510                        .align_mul = 4);
511       if (old_index_size == 2) {
512          nir_def *indices[] = {
513             nir_iand_imm(&b, nir_channel(&b, old_index12, 0), 0xffff),
514             nir_ushr_imm(&b, nir_channel(&b, old_index12, 0), 16),
515             nir_iand_imm(&b, nir_channel(&b, old_index12, 1), 0xffff),
516          };
517 
518          old_index12 = nir_bcsel(&b, nir_test_mask(&b, old_index1_offset, 0x2),
519                                  nir_vec2(&b, indices[1], indices[2]),
520                                  nir_vec2(&b, indices[0], indices[1]));
521       }
522 
523       /* TODO: VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT */
524       new_indices =
525          nir_vec3(&b, nir_channel(&b, old_index12, 0),
526                   nir_channel(&b, old_index12, 1), old_index0);
527    } else {
528       new_indices =
529          nir_vec3(&b,
530                   nir_iadd_imm(&b, triangle, 1),
531                   nir_iadd_imm(&b, triangle, 2),
532                   nir_imm_int(&b, 0));
533    }
534 
535    nir_def *new_index_offset =
536       nir_imul_imm(&b, triangle, 4 * 3);
537 
538    nir_store_ssbo(&b, new_indices, new_index_buf_desc,
539                   new_index_offset,
540                   .write_mask = 7, .access = ACCESS_NON_READABLE, .align_mul = 4);
541 
542    return b.shader;
543 }
544 
545 nir_shader *
dzn_nir_blit_vs(void)546 dzn_nir_blit_vs(void)
547 {
548    nir_builder b =
549       nir_builder_init_simple_shader(MESA_SHADER_VERTEX,
550                                      dxil_get_base_nir_compiler_options(),
551                                      "dzn_meta_blit_vs()");
552    b.shader->info.internal = true;
553 
554    nir_def *params_desc =
555       dzn_nir_create_bo_desc(&b, nir_var_mem_ubo, 0, 0, "params", 0);
556 
557    nir_variable *out_pos =
558       nir_variable_create(b.shader, nir_var_shader_out, glsl_vec4_type(),
559                           "gl_Position");
560    out_pos->data.location = VARYING_SLOT_POS;
561    out_pos->data.driver_location = 0;
562 
563    nir_variable *out_coords =
564       nir_variable_create(b.shader, nir_var_shader_out, glsl_vec_type(3),
565                           "coords");
566    out_coords->data.location = VARYING_SLOT_TEX0;
567    out_coords->data.driver_location = 1;
568 
569    nir_def *vertex = nir_load_vertex_id(&b);
570    nir_def *coords_arr[4] = {
571       nir_load_ubo(&b, 4, 32, params_desc, nir_imm_int(&b, 0),
572                    .align_mul = 16, .align_offset = 0, .range_base = 0, .range = ~0),
573       nir_load_ubo(&b, 4, 32, params_desc, nir_imm_int(&b, 16),
574                    .align_mul = 16, .align_offset = 0, .range_base = 0, .range = ~0),
575       nir_load_ubo(&b, 4, 32, params_desc, nir_imm_int(&b, 32),
576                    .align_mul = 16, .align_offset = 0, .range_base = 0, .range = ~0),
577       nir_load_ubo(&b, 4, 32, params_desc, nir_imm_int(&b, 48),
578                    .align_mul = 16, .align_offset = 0, .range_base = 0, .range = ~0),
579    };
580    nir_def *coords =
581       nir_bcsel(&b, nir_ieq_imm(&b, vertex, 0), coords_arr[0],
582                 nir_bcsel(&b, nir_ieq_imm(&b, vertex, 1), coords_arr[1],
583                           nir_bcsel(&b, nir_ieq_imm(&b, vertex, 2), coords_arr[2], coords_arr[3])));
584    nir_def *pos =
585       nir_vec4(&b, nir_channel(&b, coords, 0), nir_channel(&b, coords, 1),
586                nir_imm_float(&b, 0.0), nir_imm_float(&b, 1.0));
587    nir_def *z_coord =
588       nir_load_ubo(&b, 1, 32, params_desc, nir_imm_int(&b, 4 * 4 * sizeof(float)),
589                    .align_mul = 64, .align_offset = 0, .range_base = 0, .range = ~0);
590    coords = nir_vec3(&b, nir_channel(&b, coords, 2), nir_channel(&b, coords, 3), z_coord);
591 
592    nir_store_var(&b, out_pos, pos, 0xf);
593    nir_store_var(&b, out_coords, coords, 0x7);
594    return b.shader;
595 }
596 
597 nir_shader *
dzn_nir_blit_fs(const struct dzn_nir_blit_info * info)598 dzn_nir_blit_fs(const struct dzn_nir_blit_info *info)
599 {
600    bool ms = info->src_samples > 1;
601    nir_alu_type nir_out_type =
602       nir_get_nir_type_for_glsl_base_type(info->out_type);
603    uint32_t coord_comps =
604       glsl_get_sampler_dim_coordinate_components(info->sampler_dim) +
605       info->src_is_array;
606 
607    nir_builder b =
608       nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT,
609                                      dxil_get_base_nir_compiler_options(),
610                                      "dzn_meta_blit_fs()");
611    b.shader->info.internal = true;
612 
613    const struct glsl_type *tex_type =
614       glsl_texture_type(info->sampler_dim, info->src_is_array, info->out_type);
615    nir_variable *tex_var =
616       nir_variable_create(b.shader, nir_var_uniform, tex_type, "texture");
617    nir_deref_instr *tex_deref = nir_build_deref_var(&b, tex_var);
618 
619    nir_variable *pos_var =
620       nir_variable_create(b.shader, nir_var_shader_in,
621                           glsl_vector_type(GLSL_TYPE_FLOAT, 4),
622                           "gl_FragCoord");
623    pos_var->data.location = VARYING_SLOT_POS;
624    pos_var->data.driver_location = 0;
625 
626    nir_variable *coord_var =
627       nir_variable_create(b.shader, nir_var_shader_in,
628                           glsl_vector_type(GLSL_TYPE_FLOAT, 3),
629                           "coord");
630    coord_var->data.location = VARYING_SLOT_TEX0;
631    coord_var->data.driver_location = 1;
632    nir_def *coord =
633       nir_trim_vector(&b, nir_load_var(&b, coord_var), coord_comps);
634 
635    uint32_t out_comps =
636       (info->loc == FRAG_RESULT_DEPTH || info->loc == FRAG_RESULT_STENCIL) ? 1 : 4;
637    nir_variable *out = NULL;
638    if (!info->stencil_fallback) {
639       out = nir_variable_create(b.shader, nir_var_shader_out,
640                                 glsl_vector_type(info->out_type, out_comps),
641                                 "out");
642       out->data.location = info->loc;
643    }
644 
645    nir_def *res = NULL;
646 
647    if (info->resolve_mode != dzn_blit_resolve_none) {
648       enum dzn_blit_resolve_mode resolve_mode = info->resolve_mode;
649 
650       nir_op resolve_op = nir_op_mov;
651       switch (resolve_mode) {
652       case dzn_blit_resolve_average:
653          /* When resolving a float type, we need to calculate the average of all
654           * samples. For integer resolve, Vulkan says that one sample should be
655           * chosen without telling which. Let's just pick the first one in that
656           * case.
657           */
658          if (info->out_type == GLSL_TYPE_FLOAT)
659             resolve_op = nir_op_fadd;
660          else
661             resolve_mode = dzn_blit_resolve_sample_zero;
662          break;
663       case dzn_blit_resolve_min:
664          switch (info->out_type) {
665          case GLSL_TYPE_FLOAT: resolve_op = nir_op_fmin; break;
666          case GLSL_TYPE_INT: resolve_op = nir_op_imin; break;
667          case GLSL_TYPE_UINT: resolve_op = nir_op_umin; break;
668          }
669          break;
670       case dzn_blit_resolve_max:
671          switch (info->out_type) {
672          case GLSL_TYPE_FLOAT: resolve_op = nir_op_fmax; break;
673          case GLSL_TYPE_INT: resolve_op = nir_op_imax; break;
674          case GLSL_TYPE_UINT: resolve_op = nir_op_umax; break;
675          }
676          break;
677       case dzn_blit_resolve_none:
678       case dzn_blit_resolve_sample_zero:
679          break;
680       }
681 
682       unsigned nsamples = resolve_mode == dzn_blit_resolve_sample_zero ?
683                           1 : info->src_samples;
684       for (unsigned s = 0; s < nsamples; s++) {
685          nir_tex_instr *tex = nir_tex_instr_create(b.shader, 4);
686 
687          tex->op = nir_texop_txf_ms;
688          tex->dest_type = nir_out_type;
689          tex->texture_index = 0;
690          tex->is_array = info->src_is_array;
691          tex->sampler_dim = info->sampler_dim;
692 
693          tex->src[0] = nir_tex_src_for_ssa(nir_tex_src_coord,
694                                            nir_f2i32(&b, coord));
695          tex->coord_components = coord_comps;
696 
697          tex->src[1] = nir_tex_src_for_ssa(nir_tex_src_ms_index,
698                                            nir_imm_int(&b, s));
699 
700          tex->src[2] = nir_tex_src_for_ssa(nir_tex_src_lod,
701                                            nir_imm_int(&b, 0));
702 
703          tex->src[3] = nir_tex_src_for_ssa(nir_tex_src_texture_deref,
704                                            &tex_deref->def);
705 
706          nir_def_init(&tex->instr, &tex->def, 4, 32);
707 
708          nir_builder_instr_insert(&b, &tex->instr);
709          res = res ? nir_build_alu2(&b, resolve_op, res, &tex->def) : &tex->def;
710       }
711 
712       if (resolve_mode == dzn_blit_resolve_average)
713          res = nir_fmul_imm(&b, res, 1.0f / nsamples);
714    } else {
715       nir_tex_instr *tex =
716          nir_tex_instr_create(b.shader, ms ? 4 : 3);
717 
718       tex->dest_type = nir_out_type;
719       tex->is_array = info->src_is_array;
720       tex->sampler_dim = info->sampler_dim;
721 
722       if (ms) {
723          tex->op = nir_texop_txf_ms;
724 
725          tex->src[0] = nir_tex_src_for_ssa(nir_tex_src_coord,
726                                            nir_f2i32(&b, coord));
727          tex->coord_components = coord_comps;
728 
729          tex->src[1] = nir_tex_src_for_ssa(nir_tex_src_ms_index,
730                                            nir_load_sample_id(&b));
731 
732          tex->src[2] = nir_tex_src_for_ssa(nir_tex_src_lod,
733                                            nir_imm_int(&b, 0));
734 
735          tex->src[3] = nir_tex_src_for_ssa(nir_tex_src_texture_deref,
736                                            &tex_deref->def);
737       } else {
738          nir_variable *sampler_var =
739             nir_variable_create(b.shader, nir_var_uniform, glsl_bare_sampler_type(), "sampler");
740          nir_deref_instr *sampler_deref = nir_build_deref_var(&b, sampler_var);
741 
742          tex->op = nir_texop_tex;
743          tex->sampler_index = 0;
744 
745          tex->src[0] = nir_tex_src_for_ssa(nir_tex_src_coord, coord);
746          tex->coord_components = coord_comps;
747 
748          tex->src[1] = nir_tex_src_for_ssa(nir_tex_src_texture_deref,
749                                            &tex_deref->def);
750 
751          tex->src[2] = nir_tex_src_for_ssa(nir_tex_src_sampler_deref,
752                                            &sampler_deref->def);
753       }
754 
755       nir_def_init(&tex->instr, &tex->def, 4, 32);
756       nir_builder_instr_insert(&b, &tex->instr);
757       res = &tex->def;
758    }
759 
760    if (info->stencil_fallback) {
761       nir_def *mask_desc =
762          dzn_nir_create_bo_desc(&b, nir_var_mem_ubo, 0, 0, "mask", 0);
763       nir_def *mask = nir_load_ubo(&b, 1, 32, mask_desc, nir_imm_int(&b, 0),
764          .align_mul = 16, .align_offset = 0, .range_base = 0, .range = ~0);
765       nir_def *fail = nir_ieq_imm(&b, nir_iand(&b, nir_channel(&b, res, 0), mask), 0);
766       nir_discard_if(&b, fail);
767    } else {
768       nir_store_var(&b, out, nir_trim_vector(&b, res, out_comps), 0xf);
769    }
770 
771    return b.shader;
772 }
773 
774 static nir_def *
cull_face(nir_builder * b,nir_variable * vertices,bool ccw)775 cull_face(nir_builder *b, nir_variable *vertices, bool ccw)
776 {
777    nir_def *v0 =
778       nir_load_deref(b, nir_build_deref_array(b, nir_build_deref_var(b, vertices), nir_imm_int(b, 0)));
779    nir_def *v1 =
780       nir_load_deref(b, nir_build_deref_array(b, nir_build_deref_var(b, vertices), nir_imm_int(b, 1)));
781    nir_def *v2 =
782       nir_load_deref(b, nir_build_deref_array(b, nir_build_deref_var(b, vertices), nir_imm_int(b, 2)));
783 
784    nir_def *dir = nir_fdot(b, nir_cross4(b, nir_fsub(b, v1, v0),
785                                                 nir_fsub(b, v2, v0)),
786                                nir_imm_vec4(b, 0.0, 0.0, -1.0, 0.0));
787    if (ccw)
788       return nir_fle_imm(b, dir, 0.0f);
789    else
790       return nir_fgt_imm(b, dir, 0.0f);
791 }
792 
793 static void
copy_vars(nir_builder * b,nir_deref_instr * dst,nir_deref_instr * src)794 copy_vars(nir_builder *b, nir_deref_instr *dst, nir_deref_instr *src)
795 {
796    assert(glsl_get_bare_type(dst->type) == glsl_get_bare_type(src->type));
797    if (glsl_type_is_struct(dst->type)) {
798       for (unsigned i = 0; i < glsl_get_length(dst->type); ++i) {
799          copy_vars(b, nir_build_deref_struct(b, dst, i), nir_build_deref_struct(b, src, i));
800       }
801    } else if (glsl_type_is_array_or_matrix(dst->type)) {
802       copy_vars(b, nir_build_deref_array_wildcard(b, dst), nir_build_deref_array_wildcard(b, src));
803    } else {
804       nir_copy_deref(b, dst, src);
805    }
806 }
807 
808 static nir_def *
load_dynamic_depth_bias(nir_builder * b,struct dzn_nir_point_gs_info * info)809 load_dynamic_depth_bias(nir_builder *b, struct dzn_nir_point_gs_info *info)
810 {
811    nir_address_format ubo_format = nir_address_format_32bit_index_offset;
812    unsigned offset = offsetof(struct dxil_spirv_vertex_runtime_data, depth_bias);
813 
814    nir_def *index = nir_vulkan_resource_index(
815       b, nir_address_format_num_components(ubo_format),
816       nir_address_format_bit_size(ubo_format),
817       nir_imm_int(b, 0),
818       .desc_set = info->runtime_data_cbv.register_space,
819       .binding = info->runtime_data_cbv.base_shader_register,
820       .desc_type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER);
821 
822    nir_def *load_desc = nir_load_vulkan_descriptor(
823       b, nir_address_format_num_components(ubo_format),
824       nir_address_format_bit_size(ubo_format),
825       index, .desc_type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER);
826 
827    return nir_load_ubo(
828       b, 1, 32,
829       nir_channel(b, load_desc, 0),
830       nir_imm_int(b, offset),
831       .align_mul = 256,
832       .align_offset = offset);
833 }
834 
835 nir_shader *
dzn_nir_polygon_point_mode_gs(const nir_shader * previous_shader,struct dzn_nir_point_gs_info * info)836 dzn_nir_polygon_point_mode_gs(const nir_shader *previous_shader, struct dzn_nir_point_gs_info *info)
837 {
838    nir_builder builder;
839    nir_builder *b = &builder;
840    nir_variable *pos_var = NULL;
841 
842    unsigned num_vars = 0;
843    nir_variable *in[VARYING_SLOT_MAX];
844    nir_variable *out[VARYING_SLOT_MAX];
845 
846 
847    builder = nir_builder_init_simple_shader(MESA_SHADER_GEOMETRY,
848                                             dxil_get_base_nir_compiler_options(),
849                                             "implicit_gs");
850 
851    nir_shader *nir = b->shader;
852    nir->info.inputs_read = nir->info.outputs_written = previous_shader->info.outputs_written;
853    nir->info.outputs_written |= (1ull << VARYING_SLOT_VAR12);
854    nir->info.gs.input_primitive = MESA_PRIM_TRIANGLES;
855    nir->info.gs.output_primitive = MESA_PRIM_POINTS;
856    nir->info.gs.vertices_in = 3;
857    nir->info.gs.vertices_out = 3;
858    nir->info.gs.invocations = 1;
859    nir->info.gs.active_stream_mask = 1;
860 
861    nir_foreach_shader_out_variable(var, previous_shader) {
862       char tmp[100];
863       snprintf(tmp, ARRAY_SIZE(tmp), "in_%d", num_vars);
864       in[num_vars] = nir_variable_create(nir,
865                                          nir_var_shader_in,
866                                          glsl_array_type(var->type, 3, 0),
867                                          tmp);
868       in[num_vars]->data = var->data;
869       in[num_vars]->data.mode = nir_var_shader_in;
870 
871       if (var->data.location == VARYING_SLOT_POS)
872          pos_var = in[num_vars];
873 
874       snprintf(tmp, ARRAY_SIZE(tmp), "out_%d", num_vars);
875       out[num_vars] = nir_variable_create(nir, nir_var_shader_out, var->type, tmp);
876       out[num_vars]->data = var->data;
877 
878       num_vars++;
879    }
880 
881    nir_variable *front_facing_var = nir_variable_create(nir,
882                                                         nir_var_shader_out,
883                                                         glsl_uint_type(),
884                                                         "gl_FrontFacing");
885    front_facing_var->data.location = VARYING_SLOT_VAR12;
886    front_facing_var->data.driver_location = num_vars;
887    front_facing_var->data.interpolation = INTERP_MODE_FLAT;
888 
889    nir_def *depth_bias_scale = NULL;
890    if (info->depth_bias) {
891       switch (info->ds_fmt) {
892       case DXGI_FORMAT_D16_UNORM:
893          depth_bias_scale = nir_imm_float(b, 1.0f / (1 << 16));
894          break;
895       case DXGI_FORMAT_D24_UNORM_S8_UINT:
896          depth_bias_scale = nir_imm_float(b, 1.0f / (1 << 24));
897          break;
898       case DXGI_FORMAT_D32_FLOAT:
899       case DXGI_FORMAT_D32_FLOAT_S8X24_UINT: {
900          nir_deref_instr *deref_pos = nir_build_deref_var(b, pos_var);
901          nir_def *max_z = NULL;
902          for (uint32_t i = 0; i < 3; ++i) {
903             nir_def *pos = nir_load_deref(b, nir_build_deref_array_imm(b, deref_pos, i));
904             nir_def *z = nir_iand_imm(b, nir_channel(b, pos, 2), 0x7fffffff);
905             max_z = i == 0 ? z : nir_imax(b, z, max_z);
906          }
907          nir_def *exponent = nir_ishr_imm(b, nir_iand_imm(b, max_z, 0x7f800000), 23);
908          depth_bias_scale = nir_fexp2(b, nir_i2f32(b, nir_iadd_imm(b, exponent, -23)));
909          break;
910       }
911       default:
912          depth_bias_scale = nir_imm_float(b, 0.0f);
913       }
914    }
915 
916    /* Temporary variable "loop_index" to loop over input vertices */
917    nir_function_impl *impl = nir_shader_get_entrypoint(nir);
918    nir_variable *loop_index_var =
919       nir_local_variable_create(impl, glsl_uint_type(), "loop_index");
920    nir_deref_instr *loop_index_deref = nir_build_deref_var(b, loop_index_var);
921    nir_store_deref(b, loop_index_deref, nir_imm_int(b, 0), 1);
922 
923    nir_def *cull_pass = nir_imm_true(b);
924    nir_def *front_facing;
925    assert(info->cull_mode != VK_CULL_MODE_FRONT_AND_BACK);
926    if (info->cull_mode == VK_CULL_MODE_FRONT_BIT) {
927       cull_pass = cull_face(b, pos_var, info->front_ccw);
928       front_facing = nir_b2i32(b, cull_pass);
929    } else if (info->cull_mode == VK_CULL_MODE_BACK_BIT) {
930       cull_pass = cull_face(b, pos_var, !info->front_ccw);
931       front_facing = nir_inot(b, nir_b2i32(b, cull_pass));
932    } else
933       front_facing = nir_i2i32(b, cull_face(b, pos_var, info->front_ccw));
934 
935    /**
936     *  if (cull_pass) {
937     *     while {
938     *        if (loop_index >= 3)
939     *           break;
940     */
941    nir_if *cull_check = nir_push_if(b, cull_pass);
942    nir_loop *loop = nir_push_loop(b);
943 
944    nir_def *loop_index = nir_load_deref(b, loop_index_deref);
945    nir_def *cmp = nir_ige(b, loop_index,
946                               nir_imm_int(b, 3));
947    nir_if *loop_check = nir_push_if(b, cmp);
948    nir_jump(b, nir_jump_break);
949    nir_pop_if(b, loop_check);
950 
951    /**
952     *        [...] // Copy all variables
953     *        EmitVertex();
954     */
955    for (unsigned i = 0; i < num_vars; ++i) {
956       nir_def *index = loop_index;
957       nir_deref_instr *in_value = nir_build_deref_array(b, nir_build_deref_var(b, in[i]), index);
958       if (in[i] == pos_var && info->depth_bias) {
959          nir_def *bias_val;
960          if (info->depth_bias_dynamic) {
961             bias_val = load_dynamic_depth_bias(b, info);
962          } else {
963             assert(info->slope_scaled_depth_bias == 0.0f);
964             bias_val = nir_imm_float(b, info->constant_depth_bias);
965          }
966          bias_val = nir_fmul(b, bias_val, depth_bias_scale);
967          nir_def *old_val = nir_load_deref(b, in_value);
968          nir_def *new_val = nir_vector_insert_imm(b, old_val,
969                                                       nir_fadd(b, nir_channel(b, old_val, 2), bias_val),
970                                                       2);
971          nir_store_var(b, out[i], new_val, 0xf);
972       } else {
973          copy_vars(b, nir_build_deref_var(b, out[i]), in_value);
974       }
975    }
976    nir_store_var(b, front_facing_var, front_facing, 0x1);
977    nir_emit_vertex(b, 0);
978 
979    /**
980     *        loop_index++;
981     *     }
982     *  }
983     */
984    nir_store_deref(b, loop_index_deref, nir_iadd_imm(b, loop_index, 1), 1);
985    nir_pop_loop(b, loop);
986    nir_pop_if(b, cull_check);
987 
988    nir_validate_shader(nir, "in dzn_nir_polygon_point_mode_gs");
989 
990    NIR_PASS_V(nir, nir_lower_var_copies);
991    return b->shader;
992 }
993