1 /*
2 * Copyright 2023 Advanced Micro Devices, Inc.
3 *
4 * SPDX-License-Identifier: MIT
5 */
6
7 #include <gtest/gtest.h>
8
9 #include "nir.h"
10 #include "nir_builder.h"
11
12 namespace {
13
14 enum {
15 INTERP_FLAT,
16 INTERP_PERSP_PIXEL,
17 INTERP_PERSP_CENTROID,
18 INTERP_PERSP_SAMPLE,
19 INTERP_PERSP_AT_OFFSET,
20 INTERP_LINEAR_PIXEL,
21 INTERP_LINEAR_CENTROID,
22 INTERP_LINEAR_SAMPLE,
23 INTERP_LINEAR_AT_OFFSET,
24 INTERP_COLOR_PIXEL,
25 INTERP_COLOR_CENTROID,
26 INTERP_COLOR_SAMPLE,
27 INTERP_COLOR_AT_OFFSET,
28 INTERP_CONVERGENT,
29 INTERP_TES_TRIANGLE,
30 INTERP_TES_TRIANGLE_UVW_FADD,
31 INTERP_TES_TRIANGLE_WUV_FADD,
32 INTERP_TES_TRIANGLE_UVW_FFMA,
33 INTERP_TES_TRIANGLE_WUV_FFMA,
34 };
35
36 static inline bool
is_interp_at_offset(unsigned interp)37 is_interp_at_offset(unsigned interp)
38 {
39 return interp == INTERP_PERSP_AT_OFFSET ||
40 interp == INTERP_LINEAR_AT_OFFSET ||
41 interp == INTERP_COLOR_AT_OFFSET;
42 }
43
44 class nir_opt_varyings_test : public ::testing::Test {
45 protected:
nir_opt_varyings_test()46 nir_opt_varyings_test()
47 {
48 glsl_type_singleton_init_or_ref();
49
50 b1 = &_producer_builder;
51 b2 = &_consumer_builder;
52
53 memset(&options, 0, sizeof(options));
54 options.varying_expression_max_cost = varying_expression_max_cost;
55 options.io_options = nir_io_16bit_input_output_support;
56 }
57
~nir_opt_varyings_test()58 virtual ~nir_opt_varyings_test()
59 {
60 if (HasFailure()) {
61 printf("\nPRODUCER:\n");
62 nir_print_shader(b1->shader, stdout);
63 printf("CONSUMER:\n");
64 nir_print_shader(b2->shader, stdout);
65 }
66
67 ralloc_free(b1->shader);
68 ralloc_free(b2->shader);
69 glsl_type_singleton_decref();
70 }
71
72 static inline unsigned
varying_expression_max_cost(struct nir_shader * consumer,struct nir_shader * producer)73 varying_expression_max_cost(struct nir_shader *consumer,
74 struct nir_shader *producer)
75 {
76 return UINT_MAX;
77 }
78
create_shaders(gl_shader_stage producer_stage,gl_shader_stage consumer_stage)79 void create_shaders(gl_shader_stage producer_stage,
80 gl_shader_stage consumer_stage)
81 {
82 _producer_builder =
83 nir_builder_init_simple_shader(producer_stage, &options,
84 "producer_shader");
85 _consumer_builder =
86 nir_builder_init_simple_shader(consumer_stage, &options,
87 "consumer_shader");
88
89 const struct glsl_type *hvec4 = glsl_vector_type(GLSL_TYPE_FLOAT16, 4);
90
91 prod_uniform_vec4_32 =
92 nir_variable_create(b1->shader, nir_var_uniform,
93 glsl_vec4_type(), "prod_uniform_vec4_32");
94 prod_uniform_vec4_16 =
95 nir_variable_create(b1->shader, nir_var_uniform,
96 hvec4, "prod_uniform_vec4_16");
97
98 prod_ubo_vec4_32 =
99 nir_variable_create(b1->shader, nir_var_mem_ubo,
100 glsl_array_type(glsl_vec4_type(), 256, 0),
101 "prod_ubo_vec4_32");
102 prod_ubo_vec4_32->interface_type = prod_ubo_vec4_32->type;
103
104 prod_ubo_vec4_16 =
105 nir_variable_create(b1->shader, nir_var_mem_ubo,
106 glsl_array_type(hvec4, 256, 0),
107 "prod_ubo_vec4_16");
108 prod_ubo_vec4_16->interface_type = prod_ubo_vec4_16->type;
109 }
110
get_uniform(nir_builder * b,unsigned bit_size)111 nir_variable *get_uniform(nir_builder *b, unsigned bit_size)
112 {
113 if (b == b1) {
114 return bit_size == 16 ? prod_uniform_vec4_16 :
115 bit_size == 32 ? prod_uniform_vec4_32 : NULL;
116 }
117
118 return NULL;
119 }
120
get_ubo(nir_builder * b,unsigned bit_size)121 nir_variable *get_ubo(nir_builder *b, unsigned bit_size)
122 {
123 if (b == b1) {
124 return bit_size == 16 ? prod_ubo_vec4_16 :
125 bit_size == 32 ? prod_ubo_vec4_32 : NULL;
126 }
127
128 return NULL;
129 }
130
load_uniform(nir_builder * b,unsigned bit_size,unsigned index)131 nir_def *load_uniform(nir_builder *b, unsigned bit_size, unsigned index)
132 {
133 if (b == b1) {
134 nir_variable *var = get_uniform(b, bit_size);
135 nir_deref_instr *deref = nir_build_deref_var(b, var);
136
137 /* Load vec4, but use only 1 component. */
138 return nir_channel(b, nir_load_deref(b, deref), index);
139 }
140
141 return NULL;
142 }
143
load_ubo(nir_builder * b,unsigned bit_size,unsigned index)144 nir_def *load_ubo(nir_builder *b, unsigned bit_size, unsigned index)
145 {
146 if (b == b1) {
147 nir_variable *var = get_ubo(b, bit_size);
148 nir_deref_instr *deref =
149 nir_build_deref_array(b, nir_build_deref_var(b, var),
150 nir_imm_int(b, 16 + index));
151
152 /* Load vec4, but use only 1 component. */
153 return nir_channel(b, nir_load_deref(b, deref), 1);
154 }
155
156 return NULL;
157 }
158
build_uniform_expr(nir_builder * b,unsigned bit_size,unsigned index)159 nir_def *build_uniform_expr(nir_builder *b, unsigned bit_size, unsigned index)
160 {
161 return nir_fsqrt(b, nir_ffma(b, load_uniform(b, bit_size, index),
162 nir_imm_floatN_t(b, 3.14, bit_size),
163 load_ubo(b, bit_size, index)));
164 }
165
shader_contains_uniform(nir_builder * target_b,unsigned bit_size,unsigned index)166 bool shader_contains_uniform(nir_builder *target_b, unsigned bit_size,
167 unsigned index)
168 {
169 nir_builder *src_b = target_b == b1 ? b2 : b1;
170 nir_shader *target = target_b->shader;
171 nir_variable *var = get_uniform(src_b, bit_size);
172
173 nir_foreach_uniform_variable(it, target) {
174 if (!strcmp(it->name, var->name))
175 return true;
176 }
177
178 return false;
179 }
180
shader_contains_ubo(nir_builder * target_b,unsigned bit_size,unsigned index)181 bool shader_contains_ubo(nir_builder *target_b, unsigned bit_size,
182 unsigned index)
183 {
184 nir_builder *src_b = target_b == b1 ? b2 : b1;
185 nir_shader *target = target_b->shader;
186 nir_variable *var = get_ubo(src_b, bit_size);
187
188 nir_foreach_variable_with_modes(it, target, nir_var_mem_ubo) {
189 if (!strcmp(it->name, var->name))
190 return true;
191 }
192
193 return false;
194 }
195
196 static bool
has_non_io_offset_non_vertex_index_use(nir_builder * b,nir_def * def)197 has_non_io_offset_non_vertex_index_use(nir_builder *b, nir_def *def)
198 {
199 nir_foreach_use(src, def) {
200 nir_instr *instr = nir_src_parent_instr(src);
201
202 if (instr->type == nir_instr_type_intrinsic) {
203 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
204 nir_src *offset_src = nir_get_io_offset_src(intr);
205 nir_src *index_src = nir_get_io_arrayed_index_src(intr);
206
207 if (src == offset_src || src == index_src)
208 continue;
209 }
210 return true;
211 }
212 return false;
213 }
214
215 static bool
shader_contains_const_float(nir_builder * b,float f,unsigned bit_size)216 shader_contains_const_float(nir_builder *b, float f, unsigned bit_size)
217 {
218 if (bit_size == 16)
219 f = _mesa_half_to_float(_mesa_float_to_half(f));
220
221 nir_foreach_block(block, b->impl) {
222 nir_foreach_instr(instr, block) {
223 if (instr->type == nir_instr_type_load_const) {
224 nir_load_const_instr *lc = nir_instr_as_load_const(instr);
225
226 if (lc->def.num_components == 1 &&
227 lc->def.bit_size == bit_size &&
228 nir_const_value_as_float(lc->value[0], lc->def.bit_size) == f &&
229 has_non_io_offset_non_vertex_index_use(b, &lc->def))
230 return true;
231 }
232 }
233 }
234 return false;
235 }
236
237 static bool
shader_contains_alu_op(nir_builder * b,nir_op op,unsigned bit_size)238 shader_contains_alu_op(nir_builder *b, nir_op op, unsigned bit_size)
239 {
240 nir_foreach_block(block, b->impl) {
241 nir_foreach_instr(instr, block) {
242 if (instr->type == nir_instr_type_alu) {
243 if (nir_instr_as_alu(instr)->op == op)
244 return true;
245 }
246 }
247 }
248 return false;
249 }
250
shader_contains_uniform_expr(nir_builder * b,unsigned bit_size,unsigned index,bool contains)251 bool shader_contains_uniform_expr(nir_builder *b, unsigned bit_size,
252 unsigned index, bool contains)
253 {
254 if (contains) {
255 return shader_contains_uniform(b, bit_size, index) &&
256 shader_contains_ubo(b, bit_size, index) &&
257 shader_contains_alu_op(b, nir_op_ffma, bit_size) &&
258 shader_contains_alu_op(b, nir_op_fsqrt, bit_size) &&
259 shader_contains_const_float(b, 3.14, bit_size);
260 } else {
261 return !shader_contains_uniform(b, bit_size, index) &&
262 !shader_contains_ubo(b, bit_size, index) &&
263 !shader_contains_alu_op(b, nir_op_ffma, bit_size) &&
264 !shader_contains_alu_op(b, nir_op_fsqrt, bit_size) &&
265 !shader_contains_const_float(b, 3.14, bit_size);
266 }
267 }
268
optimize()269 void optimize()
270 {
271 NIR_PASS(_, b1->shader, nir_copy_prop);
272 NIR_PASS(_, b1->shader, nir_opt_dce);
273 NIR_PASS(_, b1->shader, nir_opt_cse);
274
275 NIR_PASS(_, b2->shader, nir_copy_prop);
276 NIR_PASS(_, b2->shader, nir_opt_dce);
277 NIR_PASS(_, b2->shader, nir_opt_cse);
278 }
279
opt_varyings()280 nir_opt_varyings_progress opt_varyings()
281 {
282 optimize();
283
284 if (debug_get_bool_option("PRINT_BEFORE", false)) {
285 printf("\nPRODUCER:\n");
286 nir_print_shader(b1->shader, stdout);
287 printf("CONSUMER:\n");
288 nir_print_shader(b2->shader, stdout);
289 }
290
291 nir_opt_varyings_progress progress =
292 nir_opt_varyings(b1->shader, b2->shader, true, 4096, 15);
293 nir_validate_shader(b1->shader, "validate producer shader");
294 nir_validate_shader(b2->shader, "validate consumer shader");
295
296 optimize();
297 nir_shader_gather_info(b1->shader, b1->impl);
298 nir_shader_gather_info(b2->shader, b2->impl);
299 return progress;
300 }
301
302 nir_shader_compiler_options options;
303 nir_builder *b1;
304 nir_builder *b2;
305 nir_builder _consumer_builder;
306 nir_builder _producer_builder;
307 nir_variable *prod_uniform_vec4_32;
308 nir_variable *prod_uniform_vec4_16;
309 nir_variable *prod_ubo_vec4_32;
310 nir_variable *prod_ubo_vec4_16;
311 };
312
313 static inline bool
shader_contains_instr(nir_builder * b,nir_instr * i)314 shader_contains_instr(nir_builder *b, nir_instr *i)
315 {
316 nir_foreach_block(block, b->impl) {
317 nir_foreach_instr(instr, block) {
318 if (instr == i)
319 return true;
320 }
321 }
322 return false;
323 }
324
325 static inline bool
shader_contains_def(nir_builder * b,nir_def * def)326 shader_contains_def(nir_builder *b, nir_def *def)
327 {
328 return shader_contains_instr(b, def->parent_instr);
329 }
330
331 static inline bool
shader_contains_undef(nir_builder * b,unsigned bit_size)332 shader_contains_undef(nir_builder *b, unsigned bit_size)
333 {
334 nir_foreach_block(block, b->impl) {
335 nir_foreach_instr(instr, block) {
336 if (instr->type == nir_instr_type_undef &&
337 nir_instr_as_undef(instr)->def.bit_size == bit_size &&
338 nir_instr_as_undef(instr)->def.num_components == 1)
339 return true;
340 }
341 }
342 return false;
343 }
344
345 static inline bool
is_patch(gl_varying_slot slot)346 is_patch(gl_varying_slot slot)
347 {
348 return slot == VARYING_SLOT_TESS_LEVEL_INNER ||
349 slot == VARYING_SLOT_TESS_LEVEL_OUTER ||
350 (slot >= VARYING_SLOT_PATCH0 && slot <= VARYING_SLOT_PATCH31);
351 }
352
353 static inline bool
is_color(nir_builder * b,gl_varying_slot slot)354 is_color(nir_builder *b, gl_varying_slot slot)
355 {
356 return b->shader->info.stage == MESA_SHADER_FRAGMENT &&
357 (slot == VARYING_SLOT_COL0 || slot == VARYING_SLOT_COL1 ||
358 slot == VARYING_SLOT_BFC0 || slot == VARYING_SLOT_BFC0);
359 }
360
361 static inline bool
is_texcoord(nir_builder * b,gl_varying_slot slot)362 is_texcoord(nir_builder *b, gl_varying_slot slot)
363 {
364 return b->shader->info.stage == MESA_SHADER_FRAGMENT &&
365 slot >= VARYING_SLOT_TEX0 && slot <= VARYING_SLOT_TEX7;
366 }
367
368 static inline bool
is_per_vertex(nir_builder * b,gl_varying_slot slot,bool is_input)369 is_per_vertex(nir_builder *b, gl_varying_slot slot, bool is_input)
370 {
371 return !is_patch(slot) &&
372 (b->shader->info.stage == MESA_SHADER_TESS_CTRL ||
373 (is_input && (b->shader->info.stage == MESA_SHADER_TESS_EVAL ||
374 b->shader->info.stage == MESA_SHADER_GEOMETRY)) ||
375 (!is_input && b->shader->info.stage == MESA_SHADER_MESH));
376 }
377
378 static inline nir_def *
load_input_output(nir_builder * b,gl_varying_slot slot,unsigned component,nir_alu_type type,unsigned vertex_index,bool output)379 load_input_output(nir_builder *b, gl_varying_slot slot, unsigned component,
380 nir_alu_type type, unsigned vertex_index, bool output)
381 {
382 unsigned bit_size = type & ~(nir_type_float | nir_type_int | nir_type_uint);
383 nir_def *zero = nir_imm_int(b, 0);
384 nir_def *def;
385
386 if (is_per_vertex(b, slot, true)) {
387 if (output) {
388 def = nir_load_per_vertex_output(b, 1, bit_size,
389 nir_imm_int(b, vertex_index), zero);
390 } else {
391 def = nir_load_per_vertex_input(b, 1, bit_size,
392 nir_imm_int(b, vertex_index), zero);
393 }
394 } else {
395 if (output)
396 def = nir_load_output(b, 1, bit_size, zero);
397 else
398 def = nir_load_input(b, 1, bit_size, zero);
399 }
400
401 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(def->parent_instr);
402 nir_intrinsic_set_base(intr, 0); /* we don't care */
403 nir_intrinsic_set_range(intr, 1);
404 nir_intrinsic_set_component(intr, component);
405 nir_intrinsic_set_dest_type(intr, type);
406
407 nir_io_semantics sem;
408 memset(&sem, 0, sizeof(sem));
409 sem.location = slot;
410 sem.num_slots = 1;
411 nir_intrinsic_set_io_semantics(intr, sem);
412
413 return def;
414 }
415
416 static inline nir_def *
load_input_interp(nir_builder * b,gl_varying_slot slot,unsigned component,nir_alu_type type,unsigned interp)417 load_input_interp(nir_builder *b, gl_varying_slot slot, unsigned component,
418 nir_alu_type type, unsigned interp)
419 {
420 assert(b->shader->info.stage == MESA_SHADER_FRAGMENT &&
421 interp != INTERP_FLAT && interp != INTERP_CONVERGENT &&
422 interp < INTERP_TES_TRIANGLE);
423 assert(type & nir_type_float);
424
425 unsigned bit_size = type & ~nir_type_float;
426 nir_def *zero = nir_imm_int(b, 0);
427 nir_def *baryc;
428
429 switch (interp) {
430 case INTERP_PERSP_PIXEL:
431 case INTERP_LINEAR_PIXEL:
432 case INTERP_COLOR_PIXEL:
433 baryc = nir_load_barycentric_pixel(b, 32);
434 break;
435 case INTERP_PERSP_CENTROID:
436 case INTERP_LINEAR_CENTROID:
437 case INTERP_COLOR_CENTROID:
438 baryc = nir_load_barycentric_centroid(b, 32);
439 break;
440 case INTERP_PERSP_SAMPLE:
441 case INTERP_LINEAR_SAMPLE:
442 case INTERP_COLOR_SAMPLE:
443 baryc = nir_load_barycentric_sample(b, 32);
444 break;
445 case INTERP_PERSP_AT_OFFSET:
446 case INTERP_LINEAR_AT_OFFSET:
447 case INTERP_COLOR_AT_OFFSET:
448 baryc = nir_load_barycentric_at_offset(b, 32, nir_imm_ivec2(b, 1, 2));
449 break;
450 default:
451 unreachable("invalid interp mode");
452 }
453
454 switch (interp) {
455 case INTERP_PERSP_PIXEL:
456 case INTERP_PERSP_CENTROID:
457 case INTERP_PERSP_SAMPLE:
458 case INTERP_PERSP_AT_OFFSET:
459 nir_intrinsic_set_interp_mode(nir_instr_as_intrinsic(baryc->parent_instr),
460 INTERP_MODE_SMOOTH);
461 break;
462 case INTERP_LINEAR_PIXEL:
463 case INTERP_LINEAR_CENTROID:
464 case INTERP_LINEAR_SAMPLE:
465 case INTERP_LINEAR_AT_OFFSET:
466 nir_intrinsic_set_interp_mode(nir_instr_as_intrinsic(baryc->parent_instr),
467 INTERP_MODE_NOPERSPECTIVE);
468 break;
469 case INTERP_COLOR_PIXEL:
470 case INTERP_COLOR_CENTROID:
471 case INTERP_COLOR_SAMPLE:
472 case INTERP_COLOR_AT_OFFSET:
473 nir_intrinsic_set_interp_mode(nir_instr_as_intrinsic(baryc->parent_instr),
474 INTERP_MODE_NONE);
475 break;
476 default:
477 unreachable("invalid interp mode");
478 }
479
480 nir_def *def = nir_load_interpolated_input(b, 1, bit_size, baryc, zero);
481
482 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(def->parent_instr);
483 nir_intrinsic_set_base(intr, 0); /* we don't care */
484 nir_intrinsic_set_component(intr, component);
485 nir_intrinsic_set_dest_type(intr, type);
486
487 nir_io_semantics sem;
488 memset(&sem, 0, sizeof(sem));
489 sem.location = slot;
490 sem.num_slots = 1;
491 nir_intrinsic_set_io_semantics(intr, sem);
492
493 return def;
494 }
495
496 static inline nir_def *
load_interpolated_input_tes(nir_builder * b,gl_varying_slot slot,unsigned component,nir_alu_type type,unsigned interp)497 load_interpolated_input_tes(nir_builder *b, gl_varying_slot slot,
498 unsigned component, nir_alu_type type,
499 unsigned interp)
500 {
501 assert(b->shader->info.stage == MESA_SHADER_TESS_EVAL && !is_patch(slot));
502 assert(type & nir_type_float);
503 unsigned bit_size = type & ~nir_type_float;
504 nir_def *zero = nir_imm_int(b, 0);
505 nir_def *tesscoord = nir_load_tess_coord(b);
506 nir_def *def[3];
507
508 if (bit_size != 32)
509 tesscoord = nir_f2fN(b, tesscoord, bit_size);
510
511 unsigned remap_uvw[3] = {0, 1, 2};
512 unsigned remap_wuv[3] = {2, 0, 1};
513 unsigned *remap;
514
515 switch (interp) {
516 case INTERP_TES_TRIANGLE_UVW_FADD:
517 case INTERP_TES_TRIANGLE_UVW_FFMA:
518 remap = remap_uvw;
519 break;
520 case INTERP_TES_TRIANGLE_WUV_FADD:
521 case INTERP_TES_TRIANGLE_WUV_FFMA:
522 remap = remap_wuv;
523 break;
524 default:
525 unreachable("unexpected TES interp mode");
526 }
527
528 bool use_ffma = interp == INTERP_TES_TRIANGLE_UVW_FFMA ||
529 interp == INTERP_TES_TRIANGLE_WUV_FFMA;
530
531 for (unsigned i = 0; i < 3; i++) {
532 def[i] = nir_load_per_vertex_input(b, 1, bit_size, nir_imm_int(b, i),
533 zero);
534
535 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(def[i]->parent_instr);
536 nir_intrinsic_set_base(intr, 0); /* we don't care */
537 nir_intrinsic_set_range(intr, 1);
538 nir_intrinsic_set_component(intr, component);
539 nir_intrinsic_set_dest_type(intr, type);
540
541 nir_io_semantics sem;
542 memset(&sem, 0, sizeof(sem));
543 sem.location = slot;
544 sem.num_slots = 1;
545 nir_intrinsic_set_io_semantics(intr, sem);
546
547 if (use_ffma) {
548 if (i == 0)
549 def[i] = nir_fmul(b, def[i], nir_channel(b, tesscoord, remap[i]));
550 else
551 def[i] = nir_ffma(b, def[i], nir_channel(b, tesscoord, remap[i]),
552 def[i - 1]);
553 } else {
554 def[i] = nir_fmul(b, def[i], nir_channel(b, tesscoord, remap[i]));
555 }
556 }
557
558 if (use_ffma)
559 return def[2];
560 else
561 return nir_fadd(b, nir_fadd(b, def[0], def[1]), def[2]);
562 }
563
564 static inline nir_def *
load_input(nir_builder * b,gl_varying_slot slot,unsigned component,nir_alu_type type,unsigned vertex_index,unsigned interp)565 load_input(nir_builder *b, gl_varying_slot slot, unsigned component,
566 nir_alu_type type, unsigned vertex_index, unsigned interp)
567 {
568 if (b->shader->info.stage == MESA_SHADER_FRAGMENT && interp != INTERP_FLAT) {
569 return load_input_interp(b, slot, component, type, interp);
570 } else if (b->shader->info.stage == MESA_SHADER_TESS_EVAL &&
571 interp >= INTERP_TES_TRIANGLE) {
572 return load_interpolated_input_tes(b, slot, component, type, interp);
573 } else {
574 assert(interp == INTERP_FLAT);
575 return load_input_output(b, slot, component, type, vertex_index, false);
576 }
577 }
578
579 static inline nir_def *
load_output(nir_builder * b,gl_varying_slot slot,unsigned component,nir_alu_type type,unsigned vertex_index)580 load_output(nir_builder *b, gl_varying_slot slot, unsigned component,
581 nir_alu_type type, unsigned vertex_index)
582 {
583 return load_input_output(b, slot, component, type, vertex_index, true);
584 }
585
586 static inline nir_intrinsic_instr *
store_output(nir_builder * b,gl_varying_slot slot,unsigned component,nir_alu_type type,nir_def * src,int vertex_index)587 store_output(nir_builder *b, gl_varying_slot slot, unsigned component,
588 nir_alu_type type, nir_def *src, int vertex_index)
589 {
590 nir_def *zero = nir_imm_int(b, 0);
591 nir_intrinsic_instr *intr;
592
593 if (is_per_vertex(b, slot, false)) {
594 assert(b->shader->info.stage == MESA_SHADER_TESS_CTRL ||
595 vertex_index >= 0);
596 nir_def *index = vertex_index >= 0 ? nir_imm_int(b, vertex_index) :
597 nir_load_invocation_id(b);
598 intr = nir_store_per_vertex_output(b, src, index, zero);
599 } else {
600 intr = nir_store_output(b, src, zero);
601 }
602
603 nir_intrinsic_set_base(intr, 0); /* we don't care */
604 nir_intrinsic_set_write_mask(intr, 0x1);
605 nir_intrinsic_set_component(intr, component);
606 nir_intrinsic_set_src_type(intr, type);
607
608 nir_io_semantics sem;
609 memset(&sem, 0, sizeof(sem));
610 sem.location = slot;
611 sem.num_slots = 1;
612 nir_intrinsic_set_io_semantics(intr, sem);
613
614 return intr;
615 }
616
617 static inline nir_intrinsic_instr *
store_ssbo(nir_builder * b,nir_def * src)618 store_ssbo(nir_builder *b, nir_def *src)
619 {
620 return nir_store_ssbo(b, src, nir_imm_int(b, 0), nir_imm_int(b, 0));
621 }
622
623 /* See can_move_alu_across_interp. */
624 static inline bool
movable_across_interp(nir_builder * b,nir_op op,unsigned interp[3],bool divergent[3],unsigned bit_size)625 movable_across_interp(nir_builder *b, nir_op op, unsigned interp[3],
626 bool divergent[3], unsigned bit_size)
627 {
628 if ((interp[0] == INTERP_FLAT || !divergent[0]) &&
629 (interp[1] == INTERP_FLAT || !divergent[1]) &&
630 (interp[2] == INTERP_FLAT || !divergent[2]))
631 return true;
632
633 /* nir_opt_varyings doesn't have an equation for:
634 * v0 * f2f16(u) + v1 * f2f16(v) + v2 * f2f16(w)
635 */
636 if (b->shader->info.stage == MESA_SHADER_TESS_EVAL && bit_size == 16)
637 return false;
638
639 switch (op) {
640 case nir_op_fadd:
641 case nir_op_fsub:
642 case nir_op_fneg:
643 case nir_op_mov:
644 return true;
645
646 case nir_op_fmul:
647 case nir_op_fmulz:
648 case nir_op_ffma:
649 case nir_op_ffmaz:
650 return !divergent[0] || !divergent[1];
651
652 case nir_op_fdiv:
653 return !divergent[1];
654
655 case nir_op_flrp:
656 return (!divergent[0] && !divergent[1]) || !divergent[2];
657
658 default:
659 return false;
660 }
661 }
662