xref: /aosp_15_r20/external/mesa3d/src/compiler/nir/tests/nir_opt_varyings_test.h (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright 2023 Advanced Micro Devices, Inc.
3  *
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #include <gtest/gtest.h>
8 
9 #include "nir.h"
10 #include "nir_builder.h"
11 
12 namespace {
13 
14 enum {
15    INTERP_FLAT,
16    INTERP_PERSP_PIXEL,
17    INTERP_PERSP_CENTROID,
18    INTERP_PERSP_SAMPLE,
19    INTERP_PERSP_AT_OFFSET,
20    INTERP_LINEAR_PIXEL,
21    INTERP_LINEAR_CENTROID,
22    INTERP_LINEAR_SAMPLE,
23    INTERP_LINEAR_AT_OFFSET,
24    INTERP_COLOR_PIXEL,
25    INTERP_COLOR_CENTROID,
26    INTERP_COLOR_SAMPLE,
27    INTERP_COLOR_AT_OFFSET,
28    INTERP_CONVERGENT,
29    INTERP_TES_TRIANGLE,
30    INTERP_TES_TRIANGLE_UVW_FADD,
31    INTERP_TES_TRIANGLE_WUV_FADD,
32    INTERP_TES_TRIANGLE_UVW_FFMA,
33    INTERP_TES_TRIANGLE_WUV_FFMA,
34 };
35 
36 static inline bool
is_interp_at_offset(unsigned interp)37 is_interp_at_offset(unsigned interp)
38 {
39    return interp == INTERP_PERSP_AT_OFFSET ||
40           interp == INTERP_LINEAR_AT_OFFSET ||
41           interp == INTERP_COLOR_AT_OFFSET;
42 }
43 
44 class nir_opt_varyings_test : public ::testing::Test {
45 protected:
nir_opt_varyings_test()46    nir_opt_varyings_test()
47    {
48       glsl_type_singleton_init_or_ref();
49 
50       b1 = &_producer_builder;
51       b2 = &_consumer_builder;
52 
53       memset(&options, 0, sizeof(options));
54       options.varying_expression_max_cost = varying_expression_max_cost;
55       options.io_options = nir_io_16bit_input_output_support;
56    }
57 
~nir_opt_varyings_test()58    virtual ~nir_opt_varyings_test()
59    {
60       if (HasFailure()) {
61          printf("\nPRODUCER:\n");
62          nir_print_shader(b1->shader, stdout);
63          printf("CONSUMER:\n");
64          nir_print_shader(b2->shader, stdout);
65       }
66 
67       ralloc_free(b1->shader);
68       ralloc_free(b2->shader);
69       glsl_type_singleton_decref();
70    }
71 
72    static inline unsigned
varying_expression_max_cost(struct nir_shader * consumer,struct nir_shader * producer)73    varying_expression_max_cost(struct nir_shader *consumer,
74                                struct nir_shader *producer)
75    {
76       return UINT_MAX;
77    }
78 
create_shaders(gl_shader_stage producer_stage,gl_shader_stage consumer_stage)79    void create_shaders(gl_shader_stage producer_stage,
80                        gl_shader_stage consumer_stage)
81    {
82       _producer_builder =
83          nir_builder_init_simple_shader(producer_stage, &options,
84                                         "producer_shader");
85       _consumer_builder =
86          nir_builder_init_simple_shader(consumer_stage, &options,
87                                         "consumer_shader");
88 
89       const struct glsl_type *hvec4 = glsl_vector_type(GLSL_TYPE_FLOAT16, 4);
90 
91       prod_uniform_vec4_32 =
92          nir_variable_create(b1->shader, nir_var_uniform,
93                              glsl_vec4_type(), "prod_uniform_vec4_32");
94       prod_uniform_vec4_16 =
95          nir_variable_create(b1->shader, nir_var_uniform,
96                              hvec4, "prod_uniform_vec4_16");
97 
98       prod_ubo_vec4_32 =
99          nir_variable_create(b1->shader, nir_var_mem_ubo,
100                              glsl_array_type(glsl_vec4_type(), 256, 0),
101                              "prod_ubo_vec4_32");
102       prod_ubo_vec4_32->interface_type = prod_ubo_vec4_32->type;
103 
104       prod_ubo_vec4_16 =
105          nir_variable_create(b1->shader, nir_var_mem_ubo,
106                              glsl_array_type(hvec4, 256, 0),
107                              "prod_ubo_vec4_16");
108       prod_ubo_vec4_16->interface_type = prod_ubo_vec4_16->type;
109    }
110 
get_uniform(nir_builder * b,unsigned bit_size)111    nir_variable *get_uniform(nir_builder *b, unsigned bit_size)
112    {
113       if (b == b1) {
114          return bit_size == 16 ? prod_uniform_vec4_16 :
115                 bit_size == 32 ? prod_uniform_vec4_32 : NULL;
116       }
117 
118       return NULL;
119    }
120 
get_ubo(nir_builder * b,unsigned bit_size)121    nir_variable *get_ubo(nir_builder *b, unsigned bit_size)
122    {
123       if (b == b1) {
124          return bit_size == 16 ? prod_ubo_vec4_16 :
125                 bit_size == 32 ? prod_ubo_vec4_32 : NULL;
126       }
127 
128       return NULL;
129    }
130 
load_uniform(nir_builder * b,unsigned bit_size,unsigned index)131    nir_def *load_uniform(nir_builder *b, unsigned bit_size, unsigned index)
132    {
133       if (b == b1) {
134          nir_variable *var = get_uniform(b, bit_size);
135          nir_deref_instr *deref = nir_build_deref_var(b, var);
136 
137          /* Load vec4, but use only 1 component. */
138          return nir_channel(b, nir_load_deref(b, deref), index);
139       }
140 
141       return NULL;
142    }
143 
load_ubo(nir_builder * b,unsigned bit_size,unsigned index)144    nir_def *load_ubo(nir_builder *b, unsigned bit_size, unsigned index)
145    {
146       if (b == b1) {
147          nir_variable *var = get_ubo(b, bit_size);
148          nir_deref_instr *deref =
149             nir_build_deref_array(b, nir_build_deref_var(b, var),
150                                   nir_imm_int(b, 16 + index));
151 
152          /* Load vec4, but use only 1 component. */
153          return nir_channel(b, nir_load_deref(b, deref), 1);
154       }
155 
156       return NULL;
157    }
158 
build_uniform_expr(nir_builder * b,unsigned bit_size,unsigned index)159    nir_def *build_uniform_expr(nir_builder *b, unsigned bit_size, unsigned index)
160    {
161       return nir_fsqrt(b, nir_ffma(b, load_uniform(b, bit_size, index),
162                                    nir_imm_floatN_t(b, 3.14, bit_size),
163                                    load_ubo(b, bit_size, index)));
164    }
165 
shader_contains_uniform(nir_builder * target_b,unsigned bit_size,unsigned index)166    bool shader_contains_uniform(nir_builder *target_b, unsigned bit_size,
167                                 unsigned index)
168    {
169       nir_builder *src_b = target_b == b1 ? b2 : b1;
170       nir_shader *target = target_b->shader;
171       nir_variable *var = get_uniform(src_b, bit_size);
172 
173       nir_foreach_uniform_variable(it, target) {
174          if (!strcmp(it->name, var->name))
175             return true;
176       }
177 
178       return false;
179    }
180 
shader_contains_ubo(nir_builder * target_b,unsigned bit_size,unsigned index)181    bool shader_contains_ubo(nir_builder *target_b, unsigned bit_size,
182                             unsigned index)
183    {
184       nir_builder *src_b = target_b == b1 ? b2 : b1;
185       nir_shader *target = target_b->shader;
186       nir_variable *var = get_ubo(src_b, bit_size);
187 
188       nir_foreach_variable_with_modes(it, target, nir_var_mem_ubo) {
189          if (!strcmp(it->name, var->name))
190             return true;
191       }
192 
193       return false;
194    }
195 
196    static bool
has_non_io_offset_non_vertex_index_use(nir_builder * b,nir_def * def)197    has_non_io_offset_non_vertex_index_use(nir_builder *b, nir_def *def)
198    {
199       nir_foreach_use(src, def) {
200          nir_instr *instr = nir_src_parent_instr(src);
201 
202          if (instr->type == nir_instr_type_intrinsic) {
203             nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
204             nir_src *offset_src = nir_get_io_offset_src(intr);
205             nir_src *index_src = nir_get_io_arrayed_index_src(intr);
206 
207             if (src == offset_src || src == index_src)
208                continue;
209          }
210          return true;
211       }
212       return false;
213    }
214 
215    static bool
shader_contains_const_float(nir_builder * b,float f,unsigned bit_size)216    shader_contains_const_float(nir_builder *b, float f, unsigned bit_size)
217    {
218       if (bit_size == 16)
219          f = _mesa_half_to_float(_mesa_float_to_half(f));
220 
221       nir_foreach_block(block, b->impl) {
222          nir_foreach_instr(instr, block) {
223             if (instr->type == nir_instr_type_load_const) {
224                nir_load_const_instr *lc = nir_instr_as_load_const(instr);
225 
226                if (lc->def.num_components == 1 &&
227                    lc->def.bit_size == bit_size &&
228                    nir_const_value_as_float(lc->value[0], lc->def.bit_size) == f &&
229                    has_non_io_offset_non_vertex_index_use(b, &lc->def))
230                   return true;
231             }
232          }
233       }
234       return false;
235    }
236 
237    static bool
shader_contains_alu_op(nir_builder * b,nir_op op,unsigned bit_size)238    shader_contains_alu_op(nir_builder *b, nir_op op, unsigned bit_size)
239    {
240       nir_foreach_block(block, b->impl) {
241          nir_foreach_instr(instr, block) {
242             if (instr->type == nir_instr_type_alu) {
243                if (nir_instr_as_alu(instr)->op == op)
244                   return true;
245             }
246          }
247       }
248       return false;
249    }
250 
shader_contains_uniform_expr(nir_builder * b,unsigned bit_size,unsigned index,bool contains)251    bool shader_contains_uniform_expr(nir_builder *b, unsigned bit_size,
252                                      unsigned index, bool contains)
253    {
254       if (contains) {
255          return shader_contains_uniform(b, bit_size, index) &&
256                 shader_contains_ubo(b, bit_size, index) &&
257                 shader_contains_alu_op(b, nir_op_ffma, bit_size) &&
258                 shader_contains_alu_op(b, nir_op_fsqrt, bit_size) &&
259                 shader_contains_const_float(b, 3.14, bit_size);
260       } else {
261          return !shader_contains_uniform(b, bit_size, index) &&
262                 !shader_contains_ubo(b, bit_size, index) &&
263                 !shader_contains_alu_op(b, nir_op_ffma, bit_size) &&
264                 !shader_contains_alu_op(b, nir_op_fsqrt, bit_size) &&
265                 !shader_contains_const_float(b, 3.14, bit_size);
266       }
267    }
268 
optimize()269    void optimize()
270    {
271       NIR_PASS(_, b1->shader, nir_copy_prop);
272       NIR_PASS(_, b1->shader, nir_opt_dce);
273       NIR_PASS(_, b1->shader, nir_opt_cse);
274 
275       NIR_PASS(_, b2->shader, nir_copy_prop);
276       NIR_PASS(_, b2->shader, nir_opt_dce);
277       NIR_PASS(_, b2->shader, nir_opt_cse);
278    }
279 
opt_varyings()280    nir_opt_varyings_progress opt_varyings()
281    {
282       optimize();
283 
284       if (debug_get_bool_option("PRINT_BEFORE", false)) {
285          printf("\nPRODUCER:\n");
286          nir_print_shader(b1->shader, stdout);
287          printf("CONSUMER:\n");
288          nir_print_shader(b2->shader, stdout);
289       }
290 
291       nir_opt_varyings_progress progress =
292          nir_opt_varyings(b1->shader, b2->shader, true, 4096, 15);
293       nir_validate_shader(b1->shader, "validate producer shader");
294       nir_validate_shader(b2->shader, "validate consumer shader");
295 
296       optimize();
297       nir_shader_gather_info(b1->shader, b1->impl);
298       nir_shader_gather_info(b2->shader, b2->impl);
299       return progress;
300    }
301 
302    nir_shader_compiler_options options;
303    nir_builder *b1;
304    nir_builder *b2;
305    nir_builder _consumer_builder;
306    nir_builder _producer_builder;
307    nir_variable *prod_uniform_vec4_32;
308    nir_variable *prod_uniform_vec4_16;
309    nir_variable *prod_ubo_vec4_32;
310    nir_variable *prod_ubo_vec4_16;
311 };
312 
313 static inline bool
shader_contains_instr(nir_builder * b,nir_instr * i)314 shader_contains_instr(nir_builder *b, nir_instr *i)
315 {
316    nir_foreach_block(block, b->impl) {
317       nir_foreach_instr(instr, block) {
318          if (instr == i)
319             return true;
320       }
321    }
322    return false;
323 }
324 
325 static inline bool
shader_contains_def(nir_builder * b,nir_def * def)326 shader_contains_def(nir_builder *b, nir_def *def)
327 {
328    return shader_contains_instr(b, def->parent_instr);
329 }
330 
331 static inline bool
shader_contains_undef(nir_builder * b,unsigned bit_size)332 shader_contains_undef(nir_builder *b, unsigned bit_size)
333 {
334    nir_foreach_block(block, b->impl) {
335       nir_foreach_instr(instr, block) {
336          if (instr->type == nir_instr_type_undef &&
337              nir_instr_as_undef(instr)->def.bit_size == bit_size &&
338              nir_instr_as_undef(instr)->def.num_components == 1)
339             return true;
340       }
341    }
342    return false;
343 }
344 
345 static inline bool
is_patch(gl_varying_slot slot)346 is_patch(gl_varying_slot slot)
347 {
348    return slot == VARYING_SLOT_TESS_LEVEL_INNER ||
349           slot == VARYING_SLOT_TESS_LEVEL_OUTER ||
350           (slot >= VARYING_SLOT_PATCH0 && slot <= VARYING_SLOT_PATCH31);
351 }
352 
353 static inline bool
is_color(nir_builder * b,gl_varying_slot slot)354 is_color(nir_builder *b, gl_varying_slot slot)
355 {
356    return b->shader->info.stage == MESA_SHADER_FRAGMENT &&
357           (slot == VARYING_SLOT_COL0 || slot == VARYING_SLOT_COL1 ||
358            slot == VARYING_SLOT_BFC0 || slot == VARYING_SLOT_BFC0);
359 }
360 
361 static inline bool
is_texcoord(nir_builder * b,gl_varying_slot slot)362 is_texcoord(nir_builder *b, gl_varying_slot slot)
363 {
364    return b->shader->info.stage == MESA_SHADER_FRAGMENT &&
365           slot >= VARYING_SLOT_TEX0 && slot <= VARYING_SLOT_TEX7;
366 }
367 
368 static inline bool
is_per_vertex(nir_builder * b,gl_varying_slot slot,bool is_input)369 is_per_vertex(nir_builder *b, gl_varying_slot slot, bool is_input)
370 {
371    return !is_patch(slot) &&
372           (b->shader->info.stage == MESA_SHADER_TESS_CTRL ||
373            (is_input && (b->shader->info.stage == MESA_SHADER_TESS_EVAL ||
374                          b->shader->info.stage == MESA_SHADER_GEOMETRY)) ||
375            (!is_input && b->shader->info.stage == MESA_SHADER_MESH));
376 }
377 
378 static inline nir_def *
load_input_output(nir_builder * b,gl_varying_slot slot,unsigned component,nir_alu_type type,unsigned vertex_index,bool output)379 load_input_output(nir_builder *b, gl_varying_slot slot, unsigned component,
380                   nir_alu_type type, unsigned vertex_index, bool output)
381 {
382    unsigned bit_size = type & ~(nir_type_float | nir_type_int | nir_type_uint);
383    nir_def *zero = nir_imm_int(b, 0);
384    nir_def *def;
385 
386    if (is_per_vertex(b, slot, true)) {
387       if (output) {
388          def = nir_load_per_vertex_output(b, 1, bit_size,
389                                           nir_imm_int(b, vertex_index), zero);
390       } else {
391          def = nir_load_per_vertex_input(b, 1, bit_size,
392                                          nir_imm_int(b, vertex_index), zero);
393       }
394    } else {
395       if (output)
396          def = nir_load_output(b, 1, bit_size, zero);
397       else
398          def = nir_load_input(b, 1, bit_size, zero);
399    }
400 
401    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(def->parent_instr);
402    nir_intrinsic_set_base(intr, 0); /* we don't care */
403    nir_intrinsic_set_range(intr, 1);
404    nir_intrinsic_set_component(intr, component);
405    nir_intrinsic_set_dest_type(intr, type);
406 
407    nir_io_semantics sem;
408    memset(&sem, 0, sizeof(sem));
409    sem.location = slot;
410    sem.num_slots = 1;
411    nir_intrinsic_set_io_semantics(intr, sem);
412 
413    return def;
414 }
415 
416 static inline nir_def *
load_input_interp(nir_builder * b,gl_varying_slot slot,unsigned component,nir_alu_type type,unsigned interp)417 load_input_interp(nir_builder *b, gl_varying_slot slot, unsigned component,
418                   nir_alu_type type, unsigned interp)
419 {
420    assert(b->shader->info.stage == MESA_SHADER_FRAGMENT &&
421           interp != INTERP_FLAT && interp != INTERP_CONVERGENT &&
422           interp < INTERP_TES_TRIANGLE);
423    assert(type & nir_type_float);
424 
425    unsigned bit_size = type & ~nir_type_float;
426    nir_def *zero = nir_imm_int(b, 0);
427    nir_def *baryc;
428 
429    switch (interp) {
430    case INTERP_PERSP_PIXEL:
431    case INTERP_LINEAR_PIXEL:
432    case INTERP_COLOR_PIXEL:
433       baryc = nir_load_barycentric_pixel(b, 32);
434       break;
435    case INTERP_PERSP_CENTROID:
436    case INTERP_LINEAR_CENTROID:
437    case INTERP_COLOR_CENTROID:
438       baryc = nir_load_barycentric_centroid(b, 32);
439       break;
440    case INTERP_PERSP_SAMPLE:
441    case INTERP_LINEAR_SAMPLE:
442    case INTERP_COLOR_SAMPLE:
443       baryc = nir_load_barycentric_sample(b, 32);
444       break;
445    case INTERP_PERSP_AT_OFFSET:
446    case INTERP_LINEAR_AT_OFFSET:
447    case INTERP_COLOR_AT_OFFSET:
448       baryc = nir_load_barycentric_at_offset(b, 32, nir_imm_ivec2(b, 1, 2));
449       break;
450    default:
451       unreachable("invalid interp mode");
452    }
453 
454    switch (interp) {
455    case INTERP_PERSP_PIXEL:
456    case INTERP_PERSP_CENTROID:
457    case INTERP_PERSP_SAMPLE:
458    case INTERP_PERSP_AT_OFFSET:
459       nir_intrinsic_set_interp_mode(nir_instr_as_intrinsic(baryc->parent_instr),
460                                     INTERP_MODE_SMOOTH);
461       break;
462    case INTERP_LINEAR_PIXEL:
463    case INTERP_LINEAR_CENTROID:
464    case INTERP_LINEAR_SAMPLE:
465    case INTERP_LINEAR_AT_OFFSET:
466       nir_intrinsic_set_interp_mode(nir_instr_as_intrinsic(baryc->parent_instr),
467                                     INTERP_MODE_NOPERSPECTIVE);
468       break;
469    case INTERP_COLOR_PIXEL:
470    case INTERP_COLOR_CENTROID:
471    case INTERP_COLOR_SAMPLE:
472    case INTERP_COLOR_AT_OFFSET:
473       nir_intrinsic_set_interp_mode(nir_instr_as_intrinsic(baryc->parent_instr),
474                                     INTERP_MODE_NONE);
475       break;
476    default:
477       unreachable("invalid interp mode");
478    }
479 
480    nir_def *def = nir_load_interpolated_input(b, 1, bit_size, baryc, zero);
481 
482    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(def->parent_instr);
483    nir_intrinsic_set_base(intr, 0); /* we don't care */
484    nir_intrinsic_set_component(intr, component);
485    nir_intrinsic_set_dest_type(intr, type);
486 
487    nir_io_semantics sem;
488    memset(&sem, 0, sizeof(sem));
489    sem.location = slot;
490    sem.num_slots = 1;
491    nir_intrinsic_set_io_semantics(intr, sem);
492 
493    return def;
494 }
495 
496 static inline nir_def *
load_interpolated_input_tes(nir_builder * b,gl_varying_slot slot,unsigned component,nir_alu_type type,unsigned interp)497 load_interpolated_input_tes(nir_builder *b, gl_varying_slot slot,
498                             unsigned component, nir_alu_type type,
499                             unsigned interp)
500 {
501    assert(b->shader->info.stage == MESA_SHADER_TESS_EVAL && !is_patch(slot));
502    assert(type & nir_type_float);
503    unsigned bit_size = type & ~nir_type_float;
504    nir_def *zero = nir_imm_int(b, 0);
505    nir_def *tesscoord = nir_load_tess_coord(b);
506    nir_def *def[3];
507 
508    if (bit_size != 32)
509       tesscoord = nir_f2fN(b, tesscoord, bit_size);
510 
511    unsigned remap_uvw[3] = {0, 1, 2};
512    unsigned remap_wuv[3] = {2, 0, 1};
513    unsigned *remap;
514 
515    switch (interp) {
516    case INTERP_TES_TRIANGLE_UVW_FADD:
517    case INTERP_TES_TRIANGLE_UVW_FFMA:
518       remap = remap_uvw;
519       break;
520    case INTERP_TES_TRIANGLE_WUV_FADD:
521    case INTERP_TES_TRIANGLE_WUV_FFMA:
522       remap = remap_wuv;
523       break;
524    default:
525       unreachable("unexpected TES interp mode");
526    }
527 
528    bool use_ffma = interp == INTERP_TES_TRIANGLE_UVW_FFMA ||
529                    interp == INTERP_TES_TRIANGLE_WUV_FFMA;
530 
531    for (unsigned i = 0; i < 3; i++) {
532       def[i] = nir_load_per_vertex_input(b, 1, bit_size, nir_imm_int(b, i),
533                                          zero);
534 
535       nir_intrinsic_instr *intr = nir_instr_as_intrinsic(def[i]->parent_instr);
536       nir_intrinsic_set_base(intr, 0); /* we don't care */
537       nir_intrinsic_set_range(intr, 1);
538       nir_intrinsic_set_component(intr, component);
539       nir_intrinsic_set_dest_type(intr, type);
540 
541       nir_io_semantics sem;
542       memset(&sem, 0, sizeof(sem));
543       sem.location = slot;
544       sem.num_slots = 1;
545       nir_intrinsic_set_io_semantics(intr, sem);
546 
547       if (use_ffma) {
548          if (i == 0)
549             def[i] = nir_fmul(b, def[i], nir_channel(b, tesscoord, remap[i]));
550          else
551             def[i] = nir_ffma(b, def[i], nir_channel(b, tesscoord, remap[i]),
552                               def[i - 1]);
553       } else {
554          def[i] = nir_fmul(b, def[i], nir_channel(b, tesscoord, remap[i]));
555       }
556    }
557 
558    if (use_ffma)
559       return def[2];
560    else
561       return nir_fadd(b, nir_fadd(b, def[0], def[1]), def[2]);
562 }
563 
564 static inline nir_def *
load_input(nir_builder * b,gl_varying_slot slot,unsigned component,nir_alu_type type,unsigned vertex_index,unsigned interp)565 load_input(nir_builder *b, gl_varying_slot slot, unsigned component,
566            nir_alu_type type, unsigned vertex_index, unsigned interp)
567 {
568    if (b->shader->info.stage == MESA_SHADER_FRAGMENT && interp != INTERP_FLAT) {
569       return load_input_interp(b, slot, component, type, interp);
570    } else if (b->shader->info.stage == MESA_SHADER_TESS_EVAL &&
571               interp >= INTERP_TES_TRIANGLE) {
572       return load_interpolated_input_tes(b, slot, component, type, interp);
573    } else {
574       assert(interp == INTERP_FLAT);
575       return load_input_output(b, slot, component, type, vertex_index, false);
576    }
577 }
578 
579 static inline nir_def *
load_output(nir_builder * b,gl_varying_slot slot,unsigned component,nir_alu_type type,unsigned vertex_index)580 load_output(nir_builder *b, gl_varying_slot slot, unsigned component,
581             nir_alu_type type, unsigned vertex_index)
582 {
583    return load_input_output(b, slot, component, type, vertex_index, true);
584 }
585 
586 static inline nir_intrinsic_instr *
store_output(nir_builder * b,gl_varying_slot slot,unsigned component,nir_alu_type type,nir_def * src,int vertex_index)587 store_output(nir_builder *b, gl_varying_slot slot, unsigned component,
588              nir_alu_type type, nir_def *src, int vertex_index)
589 {
590    nir_def *zero = nir_imm_int(b, 0);
591    nir_intrinsic_instr *intr;
592 
593    if (is_per_vertex(b, slot, false)) {
594       assert(b->shader->info.stage == MESA_SHADER_TESS_CTRL ||
595              vertex_index >= 0);
596       nir_def *index = vertex_index >= 0 ? nir_imm_int(b, vertex_index) :
597                                            nir_load_invocation_id(b);
598       intr = nir_store_per_vertex_output(b, src, index, zero);
599    } else {
600       intr = nir_store_output(b, src, zero);
601    }
602 
603    nir_intrinsic_set_base(intr, 0); /* we don't care */
604    nir_intrinsic_set_write_mask(intr, 0x1);
605    nir_intrinsic_set_component(intr, component);
606    nir_intrinsic_set_src_type(intr, type);
607 
608    nir_io_semantics sem;
609    memset(&sem, 0, sizeof(sem));
610    sem.location = slot;
611    sem.num_slots = 1;
612    nir_intrinsic_set_io_semantics(intr, sem);
613 
614    return intr;
615 }
616 
617 static inline nir_intrinsic_instr *
store_ssbo(nir_builder * b,nir_def * src)618 store_ssbo(nir_builder *b, nir_def *src)
619 {
620    return nir_store_ssbo(b, src, nir_imm_int(b, 0), nir_imm_int(b, 0));
621 }
622 
623 /* See can_move_alu_across_interp. */
624 static inline bool
movable_across_interp(nir_builder * b,nir_op op,unsigned interp[3],bool divergent[3],unsigned bit_size)625 movable_across_interp(nir_builder *b, nir_op op, unsigned interp[3],
626                       bool divergent[3], unsigned bit_size)
627 {
628    if ((interp[0] == INTERP_FLAT || !divergent[0]) &&
629        (interp[1] == INTERP_FLAT || !divergent[1]) &&
630        (interp[2] == INTERP_FLAT || !divergent[2]))
631       return true;
632 
633    /* nir_opt_varyings doesn't have an equation for:
634     *    v0 * f2f16(u) + v1 * f2f16(v) + v2 * f2f16(w)
635     */
636    if (b->shader->info.stage == MESA_SHADER_TESS_EVAL && bit_size == 16)
637       return false;
638 
639    switch (op) {
640    case nir_op_fadd:
641    case nir_op_fsub:
642    case nir_op_fneg:
643    case nir_op_mov:
644       return true;
645 
646    case nir_op_fmul:
647    case nir_op_fmulz:
648    case nir_op_ffma:
649    case nir_op_ffmaz:
650       return !divergent[0] || !divergent[1];
651 
652    case nir_op_fdiv:
653       return !divergent[1];
654 
655    case nir_op_flrp:
656       return (!divergent[0] && !divergent[1]) || !divergent[2];
657 
658    default:
659       return false;
660    }
661 }
662