1 /*
2 * Copyright © Microsoft Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "dzn_nir.h"
25
26 #include "spirv_to_dxil.h"
27 #include "nir_to_dxil.h"
28 #include "nir_builder.h"
29 #include "nir_builtin_builder.h"
30 #include "dxil_nir.h"
31 #include "vk_nir_convert_ycbcr.h"
32
33 static nir_def *
dzn_nir_create_bo_desc(nir_builder * b,nir_variable_mode mode,uint32_t desc_set,uint32_t binding,const char * name,unsigned access)34 dzn_nir_create_bo_desc(nir_builder *b,
35 nir_variable_mode mode,
36 uint32_t desc_set,
37 uint32_t binding,
38 const char *name,
39 unsigned access)
40 {
41 struct glsl_struct_field field = {
42 .type = mode == nir_var_mem_ubo ?
43 glsl_array_type(glsl_uint_type(), 4096, 4) :
44 glsl_uint_type(),
45 .name = "dummy_int",
46 };
47 const struct glsl_type *dummy_type =
48 glsl_struct_type(&field, 1, "dummy_type", false);
49
50 nir_variable *var =
51 nir_variable_create(b->shader, mode, dummy_type, name);
52 var->data.descriptor_set = desc_set;
53 var->data.binding = binding;
54 var->data.access = access;
55
56 assert(mode == nir_var_mem_ubo || mode == nir_var_mem_ssbo);
57 if (mode == nir_var_mem_ubo)
58 b->shader->info.num_ubos++;
59 else
60 b->shader->info.num_ssbos++;
61
62 VkDescriptorType desc_type =
63 var->data.mode == nir_var_mem_ubo ?
64 VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER :
65 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
66 nir_address_format addr_format = nir_address_format_32bit_index_offset;
67 nir_def *index =
68 nir_vulkan_resource_index(b,
69 nir_address_format_num_components(addr_format),
70 nir_address_format_bit_size(addr_format),
71 nir_imm_int(b, 0),
72 .desc_set = desc_set,
73 .binding = binding,
74 .desc_type = desc_type);
75
76 nir_def *desc =
77 nir_load_vulkan_descriptor(b,
78 nir_address_format_num_components(addr_format),
79 nir_address_format_bit_size(addr_format),
80 index,
81 .desc_type = desc_type);
82
83 return nir_channel(b, desc, 0);
84 }
85
86 nir_shader *
dzn_nir_indirect_draw_shader(struct dzn_indirect_draw_type type)87 dzn_nir_indirect_draw_shader(struct dzn_indirect_draw_type type)
88 {
89 nir_builder b =
90 nir_builder_init_simple_shader(MESA_SHADER_COMPUTE,
91 dxil_get_base_nir_compiler_options(),
92 "dzn_meta_indirect_%sdraw%s%s%s()",
93 type.indexed ? "indexed_" : "",
94 type.indirect_count ? "_count" : "",
95 type.triangle_fan ? "_triangle_fan" : "",
96 type.triangle_fan_primitive_restart ? "_primitive_restart" : "");
97 b.shader->info.internal = true;
98
99 nir_def *params_desc =
100 dzn_nir_create_bo_desc(&b, nir_var_mem_ubo, 0, 0, "params", 0);
101 nir_def *draw_buf_desc =
102 dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 1, "draw_buf", ACCESS_NON_WRITEABLE);
103 nir_def *exec_buf_desc =
104 dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 2, "exec_buf", ACCESS_NON_READABLE);
105
106 unsigned params_size = 0;
107 if (type.triangle_fan)
108 params_size = sizeof(struct dzn_indirect_draw_triangle_fan_rewrite_params);
109 else
110 params_size = sizeof(struct dzn_indirect_draw_rewrite_params);
111
112 nir_def *params =
113 nir_load_ubo(&b, params_size / 4, 32,
114 params_desc, nir_imm_int(&b, 0),
115 .align_mul = 4, .align_offset = 0, .range_base = 0, .range = ~0);
116
117 uint32_t exec_stride_imm = 0;
118 uint32_t draw_args_offset = 0;
119 if (type.triangle_fan)
120 exec_stride_imm += sizeof(D3D12_INDEX_BUFFER_VIEW);
121 if (type.draw_params)
122 exec_stride_imm += sizeof(uint32_t) * 2;
123 if (type.draw_id)
124 exec_stride_imm += sizeof(uint32_t);
125 draw_args_offset = exec_stride_imm;
126 exec_stride_imm += (type.indexed || type.triangle_fan) ?
127 sizeof(D3D12_DRAW_INDEXED_ARGUMENTS) : sizeof(D3D12_DRAW_ARGUMENTS);
128
129 nir_def *draw_stride = nir_channel(&b, params, 0);
130 nir_def *exec_stride = nir_imm_int(&b, exec_stride_imm);
131 nir_def *index =
132 nir_channel(&b, nir_load_global_invocation_id(&b, 32), 0);
133
134 if (type.indirect_count) {
135 nir_def *count_buf_desc =
136 dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 3, "count_buf", ACCESS_NON_WRITEABLE);
137
138 nir_def *draw_count =
139 nir_load_ssbo(&b, 1, 32, count_buf_desc, nir_imm_int(&b, 0), .align_mul = 4);
140
141 nir_push_if(&b, nir_ieq_imm(&b, index, 0));
142 nir_store_ssbo(&b, draw_count, exec_buf_desc, nir_imm_int(&b, 0),
143 .write_mask = 0x1, .access = ACCESS_NON_READABLE,
144 .align_mul = 16);
145 nir_pop_if(&b, NULL);
146
147 nir_push_if(&b, nir_ult(&b, index, draw_count));
148 }
149
150 nir_def *draw_offset = nir_imul(&b, draw_stride, index);
151
152 /* The first entry contains the indirect count */
153 nir_def *exec_offset =
154 type.indirect_count ?
155 nir_imul(&b, exec_stride, nir_iadd_imm(&b, index, 1)) :
156 nir_imul(&b, exec_stride, index);
157
158 nir_def *draw_info1 =
159 nir_load_ssbo(&b, 4, 32, draw_buf_desc, draw_offset, .align_mul = 4);
160 nir_def *draw_info2 =
161 type.indexed ?
162 nir_load_ssbo(&b, 1, 32, draw_buf_desc,
163 nir_iadd_imm(&b, draw_offset, 16), .align_mul = 4) :
164 nir_imm_int(&b, 0);
165
166 nir_def *first_vertex = nir_channel(&b, draw_info1, type.indexed ? 3 : 2);
167 nir_def *base_instance =
168 type.indexed ? draw_info2 : nir_channel(&b, draw_info1, 3);
169
170 uint32_t exec_val_idx = 0;
171 nir_def *exec_vals[8] = { NULL };
172 if (type.draw_params) {
173 exec_vals[exec_val_idx++] = first_vertex;
174 exec_vals[exec_val_idx++] = base_instance;
175 }
176 if (type.draw_id)
177 exec_vals[exec_val_idx++] = index;
178
179 if (type.triangle_fan) {
180 /* Patch {vertex,index}_count and first_index */
181 nir_def *triangle_count =
182 nir_usub_sat(&b, nir_channel(&b, draw_info1, 0), nir_imm_int(&b, 2));
183 exec_vals[exec_val_idx++] = nir_imul_imm(&b, triangle_count, 3);
184 exec_vals[exec_val_idx++] = nir_channel(&b, draw_info1, 1);
185 exec_vals[exec_val_idx++] = nir_imm_int(&b, 0);
186 exec_vals[exec_val_idx++] = first_vertex;
187 exec_vals[exec_val_idx++] = base_instance;
188
189 nir_def *triangle_fan_exec_buf_desc =
190 dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 4,
191 "triangle_fan_exec_buf",
192 ACCESS_NON_READABLE);
193 nir_def *triangle_fan_index_buf_stride = nir_channel(&b, params, 1);
194 nir_def *triangle_fan_index_buf_addr_lo =
195 nir_iadd(&b, nir_channel(&b, params, 2),
196 nir_imul(&b, triangle_fan_index_buf_stride, index));
197
198 nir_def *triangle_fan_exec_vals[9] = { 0 };
199 uint32_t triangle_fan_exec_param_count = 0;
200 nir_def *addr_lo_overflow =
201 nir_ult(&b, triangle_fan_index_buf_addr_lo, nir_channel(&b, params, 2));
202 nir_def *triangle_fan_index_buf_addr_hi =
203 nir_iadd(&b, nir_channel(&b, params, 3),
204 nir_bcsel(&b, addr_lo_overflow, nir_imm_int(&b, 1), nir_imm_int(&b, 0)));
205
206 triangle_fan_exec_vals[triangle_fan_exec_param_count++] = triangle_fan_index_buf_addr_lo;
207 triangle_fan_exec_vals[triangle_fan_exec_param_count++] = triangle_fan_index_buf_addr_hi;
208
209 if (type.triangle_fan_primitive_restart) {
210 triangle_fan_exec_vals[triangle_fan_exec_param_count++] = nir_channel(&b, draw_info1, 2);
211 triangle_fan_exec_vals[triangle_fan_exec_param_count++] = nir_channel(&b, draw_info1, 0);
212 uint32_t index_count_offset = draw_args_offset +
213 offsetof(D3D12_DRAW_INDEXED_ARGUMENTS, IndexCountPerInstance);
214 nir_def *exec_buf_start =
215 nir_load_ubo(&b, 2, 32,
216 params_desc, nir_imm_int(&b, 16),
217 .align_mul = 4, .align_offset = 0, .range_base = 0, .range = ~0);
218 nir_def *exec_buf_start_lo =
219 nir_iadd(&b, nir_imm_int(&b, index_count_offset),
220 nir_iadd(&b, nir_channel(&b, exec_buf_start, 0),
221 nir_imul(&b, exec_stride, index)));
222 addr_lo_overflow = nir_ult(&b, exec_buf_start_lo, nir_channel(&b, exec_buf_start, 0));
223 nir_def *exec_buf_start_hi =
224 nir_iadd(&b, nir_channel(&b, exec_buf_start, 0),
225 nir_bcsel(&b, addr_lo_overflow, nir_imm_int(&b, 1), nir_imm_int(&b, 0)));
226 triangle_fan_exec_vals[triangle_fan_exec_param_count++] = exec_buf_start_lo;
227 triangle_fan_exec_vals[triangle_fan_exec_param_count++] = exec_buf_start_hi;
228 triangle_fan_exec_vals[triangle_fan_exec_param_count++] = nir_imm_int(&b, 1);
229 } else {
230 triangle_fan_exec_vals[triangle_fan_exec_param_count++] =
231 type.indexed ? nir_channel(&b, draw_info1, 2) : nir_imm_int(&b, 0);
232 triangle_fan_exec_vals[triangle_fan_exec_param_count++] =
233 triangle_count;
234 }
235 triangle_fan_exec_vals[triangle_fan_exec_param_count++] = nir_imm_int(&b, 1);
236 triangle_fan_exec_vals[triangle_fan_exec_param_count++] = nir_imm_int(&b, 1);
237
238 unsigned rewrite_index_exec_params =
239 type.triangle_fan_primitive_restart ?
240 sizeof(struct dzn_indirect_triangle_fan_prim_restart_rewrite_index_exec_params) :
241 sizeof(struct dzn_indirect_triangle_fan_rewrite_index_exec_params);
242 nir_def *triangle_fan_exec_stride =
243 nir_imm_int(&b, rewrite_index_exec_params);
244 nir_def *triangle_fan_exec_offset =
245 nir_imul(&b, triangle_fan_exec_stride, index);
246
247 for (uint32_t i = 0; i < triangle_fan_exec_param_count; i += 4) {
248 unsigned comps = MIN2(triangle_fan_exec_param_count - i, 4);
249 uint32_t mask = (1 << comps) - 1;
250
251 nir_store_ssbo(&b, nir_vec(&b, &triangle_fan_exec_vals[i], comps),
252 triangle_fan_exec_buf_desc,
253 nir_iadd_imm(&b, triangle_fan_exec_offset, i * 4),
254 .write_mask = mask, .access = ACCESS_NON_READABLE, .align_mul = 4);
255 }
256
257 nir_def *ibview_vals[] = {
258 triangle_fan_index_buf_addr_lo,
259 triangle_fan_index_buf_addr_hi,
260 triangle_fan_index_buf_stride,
261 nir_imm_int(&b, DXGI_FORMAT_R32_UINT),
262 };
263
264 nir_store_ssbo(&b, nir_vec(&b, ibview_vals, ARRAY_SIZE(ibview_vals)),
265 exec_buf_desc, exec_offset,
266 .write_mask = 0xf, .access = ACCESS_NON_READABLE, .align_mul = 16);
267 exec_offset = nir_iadd_imm(&b, exec_offset, ARRAY_SIZE(ibview_vals) * 4);
268 } else {
269 exec_vals[exec_val_idx++] = nir_channel(&b, draw_info1, 0);
270 exec_vals[exec_val_idx++] = nir_channel(&b, draw_info1, 1);
271 exec_vals[exec_val_idx++] = nir_channel(&b, draw_info1, 2);
272 exec_vals[exec_val_idx++] = nir_channel(&b, draw_info1, 3);
273 if (type.indexed)
274 exec_vals[exec_val_idx++] = draw_info2;
275 }
276
277 nir_store_ssbo(&b, nir_vec(&b, exec_vals, MIN2(exec_val_idx, 4)),
278 exec_buf_desc, exec_offset,
279 .write_mask = ((1 << exec_val_idx) - 1) & 0xf, .access = ACCESS_NON_READABLE, .align_mul = 16);
280 if (exec_val_idx > 4) {
281 nir_store_ssbo(&b, nir_vec(&b, &exec_vals[4], exec_val_idx - 4),
282 exec_buf_desc, nir_iadd_imm(&b, exec_offset, 16),
283 .write_mask = ((1 << (exec_val_idx - 4)) - 1) & 0xf, .access = ACCESS_NON_READABLE, .align_mul = 16);
284 }
285
286 if (type.indirect_count)
287 nir_pop_if(&b, NULL);
288
289 return b.shader;
290 }
291
292 nir_shader *
dzn_nir_triangle_fan_prim_restart_rewrite_index_shader(uint8_t old_index_size)293 dzn_nir_triangle_fan_prim_restart_rewrite_index_shader(uint8_t old_index_size)
294 {
295 assert(old_index_size == 2 || old_index_size == 4);
296
297 nir_builder b =
298 nir_builder_init_simple_shader(MESA_SHADER_COMPUTE,
299 dxil_get_base_nir_compiler_options(),
300 "dzn_meta_triangle_prim_rewrite_index(old_index_size=%d)",
301 old_index_size);
302 b.shader->info.internal = true;
303
304 nir_def *params_desc =
305 dzn_nir_create_bo_desc(&b, nir_var_mem_ubo, 0, 0, "params", 0);
306 nir_def *new_index_buf_desc =
307 dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 1,
308 "new_index_buf", ACCESS_NON_READABLE);
309 nir_def *old_index_buf_desc =
310 dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 2,
311 "old_index_buf", ACCESS_NON_WRITEABLE);
312 nir_def *new_index_count_ptr_desc =
313 dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 3,
314 "new_index_count_ptr", ACCESS_NON_READABLE);
315
316 nir_def *params =
317 nir_load_ubo(&b, sizeof(struct dzn_triangle_fan_prim_restart_rewrite_index_params) / 4, 32,
318 params_desc, nir_imm_int(&b, 0),
319 .align_mul = 4, .align_offset = 0, .range_base = 0, .range = ~0);
320
321 nir_def *prim_restart_val =
322 nir_imm_int(&b, old_index_size == 2 ? 0xffff : 0xffffffff);
323 nir_variable *old_index_ptr_var =
324 nir_local_variable_create(b.impl, glsl_uint_type(), "old_index_ptr_var");
325 nir_def *old_index_ptr = nir_channel(&b, params, 0);
326 nir_store_var(&b, old_index_ptr_var, old_index_ptr, 1);
327 nir_variable *new_index_ptr_var =
328 nir_local_variable_create(b.impl, glsl_uint_type(), "new_index_ptr_var");
329 nir_store_var(&b, new_index_ptr_var, nir_imm_int(&b, 0), 1);
330 nir_def *old_index_count = nir_channel(&b, params, 1);
331 nir_variable *index0_var =
332 nir_local_variable_create(b.impl, glsl_uint_type(), "index0_var");
333 nir_store_var(&b, index0_var, prim_restart_val, 1);
334
335 /*
336 * Filter out all primitive-restart magic values, and generate a triangle list
337 * from the triangle fan definition.
338 *
339 * Basically:
340 *
341 * new_index_ptr = 0;
342 * index0 = restart_prim_value; // 0xffff or 0xffffffff
343 * for (old_index_ptr = firstIndex; old_index_ptr < indexCount;) {
344 * // If we have no starting-point we need at least 3 vertices,
345 * // otherwise we can do with two. If there's not enough vertices
346 * // to form a primitive, we just bail out.
347 * min_indices = index0 == restart_prim_value ? 3 : 2;
348 * if (old_index_ptr + min_indices > firstIndex + indexCount)
349 * break;
350 *
351 * if (index0 == restart_prim_value) {
352 * // No starting point, skip all entries until we have a
353 * // non-primitive-restart value
354 * index0 = old_index_buf[old_index_ptr++];
355 * continue;
356 * }
357 *
358 * // If at least one index contains the primitive-restart pattern,
359 // ignore this triangle, and skip the unused entries
360 * if (old_index_buf[old_index_ptr + 1] == restart_prim_value) {
361 * old_index_ptr += 2;
362 * continue;
363 * }
364 * if (old_index_buf[old_index_ptr] == restart_prim_value) {
365 * old_index_ptr++;
366 * continue;
367 * }
368 *
369 * // We have a valid primitive, queue it to the new index buffer
370 * new_index_buf[new_index_ptr++] = old_index_buf[old_index_ptr];
371 * new_index_buf[new_index_ptr++] = old_index_buf[old_index_ptr + 1];
372 * new_index_buf[new_index_ptr++] = index0;
373 * }
374 *
375 * expressed in NIR, which admitedly is not super easy to grasp with.
376 * TODO: Might be a good thing to use use the CL compiler we have and turn
377 * those shaders into CL kernels.
378 */
379 nir_push_loop(&b);
380
381 old_index_ptr = nir_load_var(&b, old_index_ptr_var);
382 nir_def *index0 = nir_load_var(&b, index0_var);
383
384 nir_def *read_index_count =
385 nir_bcsel(&b, nir_ieq(&b, index0, prim_restart_val),
386 nir_imm_int(&b, 3), nir_imm_int(&b, 2));
387 nir_push_if(&b, nir_ult(&b, old_index_count, nir_iadd(&b, old_index_ptr, read_index_count)));
388 nir_jump(&b, nir_jump_break);
389 nir_pop_if(&b, NULL);
390
391 nir_def *old_index_offset =
392 nir_imul_imm(&b, old_index_ptr, old_index_size);
393
394 nir_push_if(&b, nir_ieq(&b, index0, prim_restart_val));
395 nir_def *index_val =
396 nir_load_ssbo(&b, 1, 32, old_index_buf_desc,
397 old_index_size == 2 ? nir_iand_imm(&b, old_index_offset, ~3ULL) : old_index_offset,
398 .align_mul = 4);
399 if (old_index_size == 2) {
400 index_val = nir_bcsel(&b, nir_test_mask(&b, old_index_offset, 0x2),
401 nir_ushr_imm(&b, index_val, 16),
402 nir_iand_imm(&b, index_val, 0xffff));
403 }
404
405 nir_store_var(&b, index0_var, index_val, 1);
406 nir_store_var(&b, old_index_ptr_var, nir_iadd_imm(&b, old_index_ptr, 1), 1);
407 nir_jump(&b, nir_jump_continue);
408 nir_pop_if(&b, NULL);
409
410 nir_def *index12 =
411 nir_load_ssbo(&b, 2, 32, old_index_buf_desc,
412 old_index_size == 2 ? nir_iand_imm(&b, old_index_offset, ~3ULL) : old_index_offset,
413 .align_mul = 4);
414 if (old_index_size == 2) {
415 nir_def *indices[] = {
416 nir_iand_imm(&b, nir_channel(&b, index12, 0), 0xffff),
417 nir_ushr_imm(&b, nir_channel(&b, index12, 0), 16),
418 nir_iand_imm(&b, nir_channel(&b, index12, 1), 0xffff),
419 };
420
421 index12 = nir_bcsel(&b, nir_test_mask(&b, old_index_offset, 0x2),
422 nir_vec2(&b, indices[1], indices[2]),
423 nir_vec2(&b, indices[0], indices[1]));
424 }
425
426 nir_push_if(&b, nir_ieq(&b, nir_channel(&b, index12, 1), prim_restart_val));
427 nir_store_var(&b, old_index_ptr_var, nir_iadd_imm(&b, old_index_ptr, 2), 1);
428 nir_store_var(&b, index0_var, prim_restart_val, 1);
429 nir_jump(&b, nir_jump_continue);
430 nir_push_else(&b, NULL);
431 nir_store_var(&b, old_index_ptr_var, nir_iadd_imm(&b, old_index_ptr, 1), 1);
432 nir_push_if(&b, nir_ieq(&b, nir_channel(&b, index12, 0), prim_restart_val));
433 nir_store_var(&b, index0_var, prim_restart_val, 1);
434 nir_jump(&b, nir_jump_continue);
435 nir_push_else(&b, NULL);
436 nir_def *new_indices =
437 nir_vec3(&b, nir_channel(&b, index12, 0), nir_channel(&b, index12, 1), index0);
438 nir_def *new_index_ptr = nir_load_var(&b, new_index_ptr_var);
439 nir_def *new_index_offset = nir_imul_imm(&b, new_index_ptr, sizeof(uint32_t));
440 nir_store_ssbo(&b, new_indices, new_index_buf_desc,
441 new_index_offset,
442 .write_mask = 7, .access = ACCESS_NON_READABLE, .align_mul = 4);
443 nir_store_var(&b, new_index_ptr_var, nir_iadd_imm(&b, new_index_ptr, 3), 1);
444 nir_pop_if(&b, NULL);
445 nir_pop_if(&b, NULL);
446 nir_pop_loop(&b, NULL);
447
448 nir_store_ssbo(&b, nir_load_var(&b, new_index_ptr_var),
449 new_index_count_ptr_desc, nir_imm_int(&b, 0),
450 .write_mask = 1, .access = ACCESS_NON_READABLE, .align_mul = 4);
451
452 return b.shader;
453 }
454
455 nir_shader *
dzn_nir_triangle_fan_rewrite_index_shader(uint8_t old_index_size)456 dzn_nir_triangle_fan_rewrite_index_shader(uint8_t old_index_size)
457 {
458 assert(old_index_size == 0 || old_index_size == 2 || old_index_size == 4);
459
460 nir_builder b =
461 nir_builder_init_simple_shader(MESA_SHADER_COMPUTE,
462 dxil_get_base_nir_compiler_options(),
463 "dzn_meta_triangle_rewrite_index(old_index_size=%d)",
464 old_index_size);
465 b.shader->info.internal = true;
466
467 nir_def *params_desc =
468 dzn_nir_create_bo_desc(&b, nir_var_mem_ubo, 0, 0, "params", 0);
469 nir_def *new_index_buf_desc =
470 dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 1,
471 "new_index_buf", ACCESS_NON_READABLE);
472
473 nir_def *old_index_buf_desc = NULL;
474 if (old_index_size > 0) {
475 old_index_buf_desc =
476 dzn_nir_create_bo_desc(&b, nir_var_mem_ssbo, 0, 2,
477 "old_index_buf", ACCESS_NON_WRITEABLE);
478 }
479
480 nir_def *params =
481 nir_load_ubo(&b, sizeof(struct dzn_triangle_fan_rewrite_index_params) / 4, 32,
482 params_desc, nir_imm_int(&b, 0),
483 .align_mul = 4, .align_offset = 0, .range_base = 0, .range = ~0);
484
485 nir_def *triangle = nir_channel(&b, nir_load_global_invocation_id(&b, 32), 0);
486 nir_def *new_indices;
487
488 if (old_index_size > 0) {
489 nir_def *old_first_index = nir_channel(&b, params, 0);
490 nir_def *old_index0_offset =
491 nir_imul_imm(&b, old_first_index, old_index_size);
492 nir_def *old_index1_offset =
493 nir_imul_imm(&b, nir_iadd(&b, nir_iadd_imm(&b, triangle, 1), old_first_index),
494 old_index_size);
495
496 nir_def *old_index0 =
497 nir_load_ssbo(&b, 1, 32, old_index_buf_desc,
498 old_index_size == 2 ? nir_iand_imm(&b, old_index0_offset, ~3ULL) : old_index0_offset,
499 .align_mul = 4);
500
501 if (old_index_size == 2) {
502 old_index0 = nir_bcsel(&b, nir_test_mask(&b, old_index0_offset, 0x2),
503 nir_ushr_imm(&b, old_index0, 16),
504 nir_iand_imm(&b, old_index0, 0xffff));
505 }
506
507 nir_def *old_index12 =
508 nir_load_ssbo(&b, 2, 32, old_index_buf_desc,
509 old_index_size == 2 ? nir_iand_imm(&b, old_index1_offset, ~3ULL) : old_index1_offset,
510 .align_mul = 4);
511 if (old_index_size == 2) {
512 nir_def *indices[] = {
513 nir_iand_imm(&b, nir_channel(&b, old_index12, 0), 0xffff),
514 nir_ushr_imm(&b, nir_channel(&b, old_index12, 0), 16),
515 nir_iand_imm(&b, nir_channel(&b, old_index12, 1), 0xffff),
516 };
517
518 old_index12 = nir_bcsel(&b, nir_test_mask(&b, old_index1_offset, 0x2),
519 nir_vec2(&b, indices[1], indices[2]),
520 nir_vec2(&b, indices[0], indices[1]));
521 }
522
523 /* TODO: VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT */
524 new_indices =
525 nir_vec3(&b, nir_channel(&b, old_index12, 0),
526 nir_channel(&b, old_index12, 1), old_index0);
527 } else {
528 new_indices =
529 nir_vec3(&b,
530 nir_iadd_imm(&b, triangle, 1),
531 nir_iadd_imm(&b, triangle, 2),
532 nir_imm_int(&b, 0));
533 }
534
535 nir_def *new_index_offset =
536 nir_imul_imm(&b, triangle, 4 * 3);
537
538 nir_store_ssbo(&b, new_indices, new_index_buf_desc,
539 new_index_offset,
540 .write_mask = 7, .access = ACCESS_NON_READABLE, .align_mul = 4);
541
542 return b.shader;
543 }
544
545 nir_shader *
dzn_nir_blit_vs(void)546 dzn_nir_blit_vs(void)
547 {
548 nir_builder b =
549 nir_builder_init_simple_shader(MESA_SHADER_VERTEX,
550 dxil_get_base_nir_compiler_options(),
551 "dzn_meta_blit_vs()");
552 b.shader->info.internal = true;
553
554 nir_def *params_desc =
555 dzn_nir_create_bo_desc(&b, nir_var_mem_ubo, 0, 0, "params", 0);
556
557 nir_variable *out_pos =
558 nir_variable_create(b.shader, nir_var_shader_out, glsl_vec4_type(),
559 "gl_Position");
560 out_pos->data.location = VARYING_SLOT_POS;
561 out_pos->data.driver_location = 0;
562
563 nir_variable *out_coords =
564 nir_variable_create(b.shader, nir_var_shader_out, glsl_vec_type(3),
565 "coords");
566 out_coords->data.location = VARYING_SLOT_TEX0;
567 out_coords->data.driver_location = 1;
568
569 nir_def *vertex = nir_load_vertex_id(&b);
570 nir_def *coords_arr[4] = {
571 nir_load_ubo(&b, 4, 32, params_desc, nir_imm_int(&b, 0),
572 .align_mul = 16, .align_offset = 0, .range_base = 0, .range = ~0),
573 nir_load_ubo(&b, 4, 32, params_desc, nir_imm_int(&b, 16),
574 .align_mul = 16, .align_offset = 0, .range_base = 0, .range = ~0),
575 nir_load_ubo(&b, 4, 32, params_desc, nir_imm_int(&b, 32),
576 .align_mul = 16, .align_offset = 0, .range_base = 0, .range = ~0),
577 nir_load_ubo(&b, 4, 32, params_desc, nir_imm_int(&b, 48),
578 .align_mul = 16, .align_offset = 0, .range_base = 0, .range = ~0),
579 };
580 nir_def *coords =
581 nir_bcsel(&b, nir_ieq_imm(&b, vertex, 0), coords_arr[0],
582 nir_bcsel(&b, nir_ieq_imm(&b, vertex, 1), coords_arr[1],
583 nir_bcsel(&b, nir_ieq_imm(&b, vertex, 2), coords_arr[2], coords_arr[3])));
584 nir_def *pos =
585 nir_vec4(&b, nir_channel(&b, coords, 0), nir_channel(&b, coords, 1),
586 nir_imm_float(&b, 0.0), nir_imm_float(&b, 1.0));
587 nir_def *z_coord =
588 nir_load_ubo(&b, 1, 32, params_desc, nir_imm_int(&b, 4 * 4 * sizeof(float)),
589 .align_mul = 64, .align_offset = 0, .range_base = 0, .range = ~0);
590 coords = nir_vec3(&b, nir_channel(&b, coords, 2), nir_channel(&b, coords, 3), z_coord);
591
592 nir_store_var(&b, out_pos, pos, 0xf);
593 nir_store_var(&b, out_coords, coords, 0x7);
594 return b.shader;
595 }
596
597 nir_shader *
dzn_nir_blit_fs(const struct dzn_nir_blit_info * info)598 dzn_nir_blit_fs(const struct dzn_nir_blit_info *info)
599 {
600 bool ms = info->src_samples > 1;
601 nir_alu_type nir_out_type =
602 nir_get_nir_type_for_glsl_base_type(info->out_type);
603 uint32_t coord_comps =
604 glsl_get_sampler_dim_coordinate_components(info->sampler_dim) +
605 info->src_is_array;
606
607 nir_builder b =
608 nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT,
609 dxil_get_base_nir_compiler_options(),
610 "dzn_meta_blit_fs()");
611 b.shader->info.internal = true;
612
613 const struct glsl_type *tex_type =
614 glsl_texture_type(info->sampler_dim, info->src_is_array, info->out_type);
615 nir_variable *tex_var =
616 nir_variable_create(b.shader, nir_var_uniform, tex_type, "texture");
617 nir_deref_instr *tex_deref = nir_build_deref_var(&b, tex_var);
618
619 nir_variable *pos_var =
620 nir_variable_create(b.shader, nir_var_shader_in,
621 glsl_vector_type(GLSL_TYPE_FLOAT, 4),
622 "gl_FragCoord");
623 pos_var->data.location = VARYING_SLOT_POS;
624 pos_var->data.driver_location = 0;
625
626 nir_variable *coord_var =
627 nir_variable_create(b.shader, nir_var_shader_in,
628 glsl_vector_type(GLSL_TYPE_FLOAT, 3),
629 "coord");
630 coord_var->data.location = VARYING_SLOT_TEX0;
631 coord_var->data.driver_location = 1;
632 nir_def *coord =
633 nir_trim_vector(&b, nir_load_var(&b, coord_var), coord_comps);
634
635 uint32_t out_comps =
636 (info->loc == FRAG_RESULT_DEPTH || info->loc == FRAG_RESULT_STENCIL) ? 1 : 4;
637 nir_variable *out = NULL;
638 if (!info->stencil_fallback) {
639 out = nir_variable_create(b.shader, nir_var_shader_out,
640 glsl_vector_type(info->out_type, out_comps),
641 "out");
642 out->data.location = info->loc;
643 }
644
645 nir_def *res = NULL;
646
647 if (info->resolve_mode != dzn_blit_resolve_none) {
648 enum dzn_blit_resolve_mode resolve_mode = info->resolve_mode;
649
650 nir_op resolve_op = nir_op_mov;
651 switch (resolve_mode) {
652 case dzn_blit_resolve_average:
653 /* When resolving a float type, we need to calculate the average of all
654 * samples. For integer resolve, Vulkan says that one sample should be
655 * chosen without telling which. Let's just pick the first one in that
656 * case.
657 */
658 if (info->out_type == GLSL_TYPE_FLOAT)
659 resolve_op = nir_op_fadd;
660 else
661 resolve_mode = dzn_blit_resolve_sample_zero;
662 break;
663 case dzn_blit_resolve_min:
664 switch (info->out_type) {
665 case GLSL_TYPE_FLOAT: resolve_op = nir_op_fmin; break;
666 case GLSL_TYPE_INT: resolve_op = nir_op_imin; break;
667 case GLSL_TYPE_UINT: resolve_op = nir_op_umin; break;
668 }
669 break;
670 case dzn_blit_resolve_max:
671 switch (info->out_type) {
672 case GLSL_TYPE_FLOAT: resolve_op = nir_op_fmax; break;
673 case GLSL_TYPE_INT: resolve_op = nir_op_imax; break;
674 case GLSL_TYPE_UINT: resolve_op = nir_op_umax; break;
675 }
676 break;
677 case dzn_blit_resolve_none:
678 case dzn_blit_resolve_sample_zero:
679 break;
680 }
681
682 unsigned nsamples = resolve_mode == dzn_blit_resolve_sample_zero ?
683 1 : info->src_samples;
684 for (unsigned s = 0; s < nsamples; s++) {
685 nir_tex_instr *tex = nir_tex_instr_create(b.shader, 4);
686
687 tex->op = nir_texop_txf_ms;
688 tex->dest_type = nir_out_type;
689 tex->texture_index = 0;
690 tex->is_array = info->src_is_array;
691 tex->sampler_dim = info->sampler_dim;
692
693 tex->src[0] = nir_tex_src_for_ssa(nir_tex_src_coord,
694 nir_f2i32(&b, coord));
695 tex->coord_components = coord_comps;
696
697 tex->src[1] = nir_tex_src_for_ssa(nir_tex_src_ms_index,
698 nir_imm_int(&b, s));
699
700 tex->src[2] = nir_tex_src_for_ssa(nir_tex_src_lod,
701 nir_imm_int(&b, 0));
702
703 tex->src[3] = nir_tex_src_for_ssa(nir_tex_src_texture_deref,
704 &tex_deref->def);
705
706 nir_def_init(&tex->instr, &tex->def, 4, 32);
707
708 nir_builder_instr_insert(&b, &tex->instr);
709 res = res ? nir_build_alu2(&b, resolve_op, res, &tex->def) : &tex->def;
710 }
711
712 if (resolve_mode == dzn_blit_resolve_average)
713 res = nir_fmul_imm(&b, res, 1.0f / nsamples);
714 } else {
715 nir_tex_instr *tex =
716 nir_tex_instr_create(b.shader, ms ? 4 : 3);
717
718 tex->dest_type = nir_out_type;
719 tex->is_array = info->src_is_array;
720 tex->sampler_dim = info->sampler_dim;
721
722 if (ms) {
723 tex->op = nir_texop_txf_ms;
724
725 tex->src[0] = nir_tex_src_for_ssa(nir_tex_src_coord,
726 nir_f2i32(&b, coord));
727 tex->coord_components = coord_comps;
728
729 tex->src[1] = nir_tex_src_for_ssa(nir_tex_src_ms_index,
730 nir_load_sample_id(&b));
731
732 tex->src[2] = nir_tex_src_for_ssa(nir_tex_src_lod,
733 nir_imm_int(&b, 0));
734
735 tex->src[3] = nir_tex_src_for_ssa(nir_tex_src_texture_deref,
736 &tex_deref->def);
737 } else {
738 nir_variable *sampler_var =
739 nir_variable_create(b.shader, nir_var_uniform, glsl_bare_sampler_type(), "sampler");
740 nir_deref_instr *sampler_deref = nir_build_deref_var(&b, sampler_var);
741
742 tex->op = nir_texop_tex;
743 tex->sampler_index = 0;
744
745 tex->src[0] = nir_tex_src_for_ssa(nir_tex_src_coord, coord);
746 tex->coord_components = coord_comps;
747
748 tex->src[1] = nir_tex_src_for_ssa(nir_tex_src_texture_deref,
749 &tex_deref->def);
750
751 tex->src[2] = nir_tex_src_for_ssa(nir_tex_src_sampler_deref,
752 &sampler_deref->def);
753 }
754
755 nir_def_init(&tex->instr, &tex->def, 4, 32);
756 nir_builder_instr_insert(&b, &tex->instr);
757 res = &tex->def;
758 }
759
760 if (info->stencil_fallback) {
761 nir_def *mask_desc =
762 dzn_nir_create_bo_desc(&b, nir_var_mem_ubo, 0, 0, "mask", 0);
763 nir_def *mask = nir_load_ubo(&b, 1, 32, mask_desc, nir_imm_int(&b, 0),
764 .align_mul = 16, .align_offset = 0, .range_base = 0, .range = ~0);
765 nir_def *fail = nir_ieq_imm(&b, nir_iand(&b, nir_channel(&b, res, 0), mask), 0);
766 nir_discard_if(&b, fail);
767 } else {
768 nir_store_var(&b, out, nir_trim_vector(&b, res, out_comps), 0xf);
769 }
770
771 return b.shader;
772 }
773
774 static nir_def *
cull_face(nir_builder * b,nir_variable * vertices,bool ccw)775 cull_face(nir_builder *b, nir_variable *vertices, bool ccw)
776 {
777 nir_def *v0 =
778 nir_load_deref(b, nir_build_deref_array(b, nir_build_deref_var(b, vertices), nir_imm_int(b, 0)));
779 nir_def *v1 =
780 nir_load_deref(b, nir_build_deref_array(b, nir_build_deref_var(b, vertices), nir_imm_int(b, 1)));
781 nir_def *v2 =
782 nir_load_deref(b, nir_build_deref_array(b, nir_build_deref_var(b, vertices), nir_imm_int(b, 2)));
783
784 nir_def *dir = nir_fdot(b, nir_cross4(b, nir_fsub(b, v1, v0),
785 nir_fsub(b, v2, v0)),
786 nir_imm_vec4(b, 0.0, 0.0, -1.0, 0.0));
787 if (ccw)
788 return nir_fle_imm(b, dir, 0.0f);
789 else
790 return nir_fgt_imm(b, dir, 0.0f);
791 }
792
793 static void
copy_vars(nir_builder * b,nir_deref_instr * dst,nir_deref_instr * src)794 copy_vars(nir_builder *b, nir_deref_instr *dst, nir_deref_instr *src)
795 {
796 assert(glsl_get_bare_type(dst->type) == glsl_get_bare_type(src->type));
797 if (glsl_type_is_struct(dst->type)) {
798 for (unsigned i = 0; i < glsl_get_length(dst->type); ++i) {
799 copy_vars(b, nir_build_deref_struct(b, dst, i), nir_build_deref_struct(b, src, i));
800 }
801 } else if (glsl_type_is_array_or_matrix(dst->type)) {
802 copy_vars(b, nir_build_deref_array_wildcard(b, dst), nir_build_deref_array_wildcard(b, src));
803 } else {
804 nir_copy_deref(b, dst, src);
805 }
806 }
807
808 static nir_def *
load_dynamic_depth_bias(nir_builder * b,struct dzn_nir_point_gs_info * info)809 load_dynamic_depth_bias(nir_builder *b, struct dzn_nir_point_gs_info *info)
810 {
811 nir_address_format ubo_format = nir_address_format_32bit_index_offset;
812 unsigned offset = offsetof(struct dxil_spirv_vertex_runtime_data, depth_bias);
813
814 nir_def *index = nir_vulkan_resource_index(
815 b, nir_address_format_num_components(ubo_format),
816 nir_address_format_bit_size(ubo_format),
817 nir_imm_int(b, 0),
818 .desc_set = info->runtime_data_cbv.register_space,
819 .binding = info->runtime_data_cbv.base_shader_register,
820 .desc_type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER);
821
822 nir_def *load_desc = nir_load_vulkan_descriptor(
823 b, nir_address_format_num_components(ubo_format),
824 nir_address_format_bit_size(ubo_format),
825 index, .desc_type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER);
826
827 return nir_load_ubo(
828 b, 1, 32,
829 nir_channel(b, load_desc, 0),
830 nir_imm_int(b, offset),
831 .align_mul = 256,
832 .align_offset = offset);
833 }
834
835 nir_shader *
dzn_nir_polygon_point_mode_gs(const nir_shader * previous_shader,struct dzn_nir_point_gs_info * info)836 dzn_nir_polygon_point_mode_gs(const nir_shader *previous_shader, struct dzn_nir_point_gs_info *info)
837 {
838 nir_builder builder;
839 nir_builder *b = &builder;
840 nir_variable *pos_var = NULL;
841
842 unsigned num_vars = 0;
843 nir_variable *in[VARYING_SLOT_MAX];
844 nir_variable *out[VARYING_SLOT_MAX];
845
846
847 builder = nir_builder_init_simple_shader(MESA_SHADER_GEOMETRY,
848 dxil_get_base_nir_compiler_options(),
849 "implicit_gs");
850
851 nir_shader *nir = b->shader;
852 nir->info.inputs_read = nir->info.outputs_written = previous_shader->info.outputs_written;
853 nir->info.outputs_written |= (1ull << VARYING_SLOT_VAR12);
854 nir->info.gs.input_primitive = MESA_PRIM_TRIANGLES;
855 nir->info.gs.output_primitive = MESA_PRIM_POINTS;
856 nir->info.gs.vertices_in = 3;
857 nir->info.gs.vertices_out = 3;
858 nir->info.gs.invocations = 1;
859 nir->info.gs.active_stream_mask = 1;
860
861 nir_foreach_shader_out_variable(var, previous_shader) {
862 char tmp[100];
863 snprintf(tmp, ARRAY_SIZE(tmp), "in_%d", num_vars);
864 in[num_vars] = nir_variable_create(nir,
865 nir_var_shader_in,
866 glsl_array_type(var->type, 3, 0),
867 tmp);
868 in[num_vars]->data = var->data;
869 in[num_vars]->data.mode = nir_var_shader_in;
870
871 if (var->data.location == VARYING_SLOT_POS)
872 pos_var = in[num_vars];
873
874 snprintf(tmp, ARRAY_SIZE(tmp), "out_%d", num_vars);
875 out[num_vars] = nir_variable_create(nir, nir_var_shader_out, var->type, tmp);
876 out[num_vars]->data = var->data;
877
878 num_vars++;
879 }
880
881 nir_variable *front_facing_var = nir_variable_create(nir,
882 nir_var_shader_out,
883 glsl_uint_type(),
884 "gl_FrontFacing");
885 front_facing_var->data.location = VARYING_SLOT_VAR12;
886 front_facing_var->data.driver_location = num_vars;
887 front_facing_var->data.interpolation = INTERP_MODE_FLAT;
888
889 nir_def *depth_bias_scale = NULL;
890 if (info->depth_bias) {
891 switch (info->ds_fmt) {
892 case DXGI_FORMAT_D16_UNORM:
893 depth_bias_scale = nir_imm_float(b, 1.0f / (1 << 16));
894 break;
895 case DXGI_FORMAT_D24_UNORM_S8_UINT:
896 depth_bias_scale = nir_imm_float(b, 1.0f / (1 << 24));
897 break;
898 case DXGI_FORMAT_D32_FLOAT:
899 case DXGI_FORMAT_D32_FLOAT_S8X24_UINT: {
900 nir_deref_instr *deref_pos = nir_build_deref_var(b, pos_var);
901 nir_def *max_z = NULL;
902 for (uint32_t i = 0; i < 3; ++i) {
903 nir_def *pos = nir_load_deref(b, nir_build_deref_array_imm(b, deref_pos, i));
904 nir_def *z = nir_iand_imm(b, nir_channel(b, pos, 2), 0x7fffffff);
905 max_z = i == 0 ? z : nir_imax(b, z, max_z);
906 }
907 nir_def *exponent = nir_ishr_imm(b, nir_iand_imm(b, max_z, 0x7f800000), 23);
908 depth_bias_scale = nir_fexp2(b, nir_i2f32(b, nir_iadd_imm(b, exponent, -23)));
909 break;
910 }
911 default:
912 depth_bias_scale = nir_imm_float(b, 0.0f);
913 }
914 }
915
916 /* Temporary variable "loop_index" to loop over input vertices */
917 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
918 nir_variable *loop_index_var =
919 nir_local_variable_create(impl, glsl_uint_type(), "loop_index");
920 nir_deref_instr *loop_index_deref = nir_build_deref_var(b, loop_index_var);
921 nir_store_deref(b, loop_index_deref, nir_imm_int(b, 0), 1);
922
923 nir_def *cull_pass = nir_imm_true(b);
924 nir_def *front_facing;
925 assert(info->cull_mode != VK_CULL_MODE_FRONT_AND_BACK);
926 if (info->cull_mode == VK_CULL_MODE_FRONT_BIT) {
927 cull_pass = cull_face(b, pos_var, info->front_ccw);
928 front_facing = nir_b2i32(b, cull_pass);
929 } else if (info->cull_mode == VK_CULL_MODE_BACK_BIT) {
930 cull_pass = cull_face(b, pos_var, !info->front_ccw);
931 front_facing = nir_inot(b, nir_b2i32(b, cull_pass));
932 } else
933 front_facing = nir_i2i32(b, cull_face(b, pos_var, info->front_ccw));
934
935 /**
936 * if (cull_pass) {
937 * while {
938 * if (loop_index >= 3)
939 * break;
940 */
941 nir_if *cull_check = nir_push_if(b, cull_pass);
942 nir_loop *loop = nir_push_loop(b);
943
944 nir_def *loop_index = nir_load_deref(b, loop_index_deref);
945 nir_def *cmp = nir_ige(b, loop_index,
946 nir_imm_int(b, 3));
947 nir_if *loop_check = nir_push_if(b, cmp);
948 nir_jump(b, nir_jump_break);
949 nir_pop_if(b, loop_check);
950
951 /**
952 * [...] // Copy all variables
953 * EmitVertex();
954 */
955 for (unsigned i = 0; i < num_vars; ++i) {
956 nir_def *index = loop_index;
957 nir_deref_instr *in_value = nir_build_deref_array(b, nir_build_deref_var(b, in[i]), index);
958 if (in[i] == pos_var && info->depth_bias) {
959 nir_def *bias_val;
960 if (info->depth_bias_dynamic) {
961 bias_val = load_dynamic_depth_bias(b, info);
962 } else {
963 assert(info->slope_scaled_depth_bias == 0.0f);
964 bias_val = nir_imm_float(b, info->constant_depth_bias);
965 }
966 bias_val = nir_fmul(b, bias_val, depth_bias_scale);
967 nir_def *old_val = nir_load_deref(b, in_value);
968 nir_def *new_val = nir_vector_insert_imm(b, old_val,
969 nir_fadd(b, nir_channel(b, old_val, 2), bias_val),
970 2);
971 nir_store_var(b, out[i], new_val, 0xf);
972 } else {
973 copy_vars(b, nir_build_deref_var(b, out[i]), in_value);
974 }
975 }
976 nir_store_var(b, front_facing_var, front_facing, 0x1);
977 nir_emit_vertex(b, 0);
978
979 /**
980 * loop_index++;
981 * }
982 * }
983 */
984 nir_store_deref(b, loop_index_deref, nir_iadd_imm(b, loop_index, 1), 1);
985 nir_pop_loop(b, loop);
986 nir_pop_if(b, cull_check);
987
988 nir_validate_shader(nir, "in dzn_nir_polygon_point_mode_gs");
989
990 NIR_PASS_V(nir, nir_lower_var_copies);
991 return b->shader;
992 }
993