xref: /aosp_15_r20/external/mesa3d/src/compiler/nir/tests/opt_varyings_tests_bicm_binary_alu.cpp (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright 2023 Advanced Micro Devices, Inc.
3  *
4  * SPDX-License-Identifier: MIT
5  */
6 
7 /* Tests for Backward Inter-Shader Code Motion. */
8 
9 #include "nir_opt_varyings_test.h"
10 
11 class nir_opt_varyings_test_bicm_binary_alu : public nir_opt_varyings_test
12 {};
13 
14 #define TEST_ALU_BINARY(producer_stage, consumer_stage, type, bitsize, slot1, slot2, interp1, interp2, alu) \
15 TEST_F(nir_opt_varyings_test_bicm_binary_alu, \
16        alu##_##type##bitsize##_##producer_stage##_##consumer_stage##_##slot1##_##slot2##_##interp1##_##interp2) \
17 { \
18    unsigned pslot[2] = {VARYING_SLOT_##slot1, VARYING_SLOT_##slot2}; \
19    unsigned cslot[2] = {VARYING_SLOT_##slot1, VARYING_SLOT_##slot2}; \
20    unsigned interp[3] = {INTERP_##interp1, INTERP_##interp2}; \
21    bool divergent[3] = {interp[0] != INTERP_CONVERGENT, interp[1] != INTERP_CONVERGENT}; \
22    \
23    /* Choose a random TES interpolation mode, but it must be the same for both inputs. */ \
24    if (interp[0] == INTERP_TES_TRIANGLE && interp[1] == INTERP_TES_TRIANGLE) \
25       interp[0] = interp[1] = INTERP_TES_TRIANGLE + 1 + rand() % 4; \
26    if (interp[0] == INTERP_TES_TRIANGLE) \
27       interp[0] = INTERP_TES_TRIANGLE + 1 + rand() % 4; \
28    if (interp[1] == INTERP_TES_TRIANGLE) \
29       interp[1] = INTERP_TES_TRIANGLE + 1 + rand() % 4; \
30    \
31    if (!divergent[0]) \
32       interp[0] = INTERP_LINEAR_CENTROID; \
33    if (!divergent[1]) \
34       interp[1] = INTERP_LINEAR_CENTROID; \
35    \
36    options.varying_expression_max_cost = NULL; /* don't propagate uniforms */ \
37    create_shaders(MESA_SHADER_##producer_stage, MESA_SHADER_##consumer_stage); \
38    \
39    nir_intrinsic_instr *store[2] = {NULL}; \
40    for (unsigned s = 0; s < 2; s++) { \
41       nir_def *input; \
42       if (!divergent[s]) \
43          input = load_uniform(b1, bitsize, 0); \
44       else \
45          input = load_input(b1, (gl_varying_slot)0, s, nir_type_##type##bitsize, 0, 0); \
46       store[s] = store_output(b1, (gl_varying_slot)pslot[s], s, nir_type_##type##bitsize, input, -1); \
47    } \
48    \
49    nir_def *load[2] = {NULL}; \
50    for (unsigned s = 0; s < 2; s++) \
51       load[s] = load_input(b2, (gl_varying_slot)cslot[s], s, nir_type_##type##bitsize, 0, interp[s]); \
52    \
53    nir_def *value = nir_##alu(b2, load[0], load[1]); \
54    if (value->bit_size == 1) \
55       value = nir_u2u##bitsize(b2, value); \
56    \
57    store_output(b2, VARYING_SLOT_VAR0, 0, nir_type_##type##bitsize, value, 0); \
58    \
59    divergent[0] &= !is_patch((gl_varying_slot)pslot[0]); \
60    divergent[1] &= !is_patch((gl_varying_slot)pslot[1]); \
61    \
62    if ((INTERP_##interp1 == INTERP_##interp2 || !divergent[0] || !divergent[1]) &&\
63        movable_across_interp(b2, nir_op_##alu, interp, divergent, bitsize)) { \
64       ASSERT_EQ(opt_varyings(), (nir_progress_producer | nir_progress_consumer)); \
65       /* An opcode with a convergent non-float result isn't moved into */ \
66       /* the previous shader because a non-float result can't be interpolated. */ \
67       if (!divergent[0] && !divergent[1] && interp[0] != INTERP_FLAT && interp[1] != INTERP_FLAT && \
68           !(nir_op_infos[nir_op_##alu].output_type & nir_type_float)) { \
69          ASSERT_TRUE(!shader_contains_alu_op(b1, nir_op_##alu, bitsize)); \
70          ASSERT_TRUE(shader_contains_alu_op(b2, nir_op_##alu, bitsize)); \
71       } else { \
72          ASSERT_TRUE(shader_contains_alu_op(b1, nir_op_##alu, bitsize)); \
73          /* TES uses fadd and fmul for interpolation, so it's always present. */ \
74          if (MESA_SHADER_##consumer_stage != MESA_SHADER_TESS_EVAL || \
75              (nir_op_##alu != nir_op_fadd && nir_op_##alu != nir_op_fmul && \
76               nir_op_##alu != nir_op_ffma)) { \
77             ASSERT_TRUE(!shader_contains_alu_op(b2, nir_op_##alu, bitsize)); \
78          } \
79       } \
80       ASSERT_TRUE(shader_contains_instr(b1, &store[0]->instr)); \
81       ASSERT_TRUE(!shader_contains_instr(b1, &store[1]->instr)); \
82       ASSERT_TRUE(!shader_contains_def(b2, load[0])); \
83       ASSERT_TRUE(!shader_contains_def(b2, load[1])); \
84    } else { \
85       ASSERT_EQ(opt_varyings(), 0); \
86       ASSERT_TRUE(!shader_contains_alu_op(b1, nir_op_##alu, bitsize)); \
87       ASSERT_TRUE(shader_contains_alu_op(b2, nir_op_##alu, bitsize)); \
88       ASSERT_TRUE(shader_contains_instr(b1, &store[0]->instr)); \
89       ASSERT_TRUE(shader_contains_instr(b1, &store[1]->instr)); \
90       ASSERT_TRUE(shader_contains_def(b2, load[0])); \
91       ASSERT_TRUE(shader_contains_def(b2, load[1])); \
92    } \
93 }
94 
95 #define TEST_ALU_BINARY_FP_OPS(producer_stage, consumer_stage, slot1, slot2, interp1, interp2) \
96    TEST_ALU_BINARY(producer_stage, consumer_stage, float, 16, slot1, slot2, interp1, interp2, fadd) \
97    TEST_ALU_BINARY(producer_stage, consumer_stage, float, 32, slot1, slot2, interp1, interp2, fadd) \
98    TEST_ALU_BINARY(producer_stage, consumer_stage, float, 32, slot1, slot2, interp1, interp2, fdiv) \
99    TEST_ALU_BINARY(producer_stage, consumer_stage, float, 32, slot1, slot2, interp1, interp2, feq) \
100    TEST_ALU_BINARY(producer_stage, consumer_stage, float, 32, slot1, slot2, interp1, interp2, fge) \
101    TEST_ALU_BINARY(producer_stage, consumer_stage, float, 32, slot1, slot2, interp1, interp2, fmin) \
102    TEST_ALU_BINARY(producer_stage, consumer_stage, float, 32, slot1, slot2, interp1, interp2, fmax) \
103    TEST_ALU_BINARY(producer_stage, consumer_stage, float, 32, slot1, slot2, interp1, interp2, fmod) \
104    TEST_ALU_BINARY(producer_stage, consumer_stage, float, 32, slot1, slot2, interp1, interp2, fmul) \
105    TEST_ALU_BINARY(producer_stage, consumer_stage, float, 32, slot1, slot2, interp1, interp2, fmulz) \
106    TEST_ALU_BINARY(producer_stage, consumer_stage, float, 32, slot1, slot2, interp1, interp2, fneu) \
107    TEST_ALU_BINARY(producer_stage, consumer_stage, float, 32, slot1, slot2, interp1, interp2, fpow) \
108    TEST_ALU_BINARY(producer_stage, consumer_stage, float, 32, slot1, slot2, interp1, interp2, frem) \
109    TEST_ALU_BINARY(producer_stage, consumer_stage, float, 32, slot1, slot2, interp1, interp2, fsub) \
110    TEST_ALU_BINARY(producer_stage, consumer_stage, float, 32, slot1, slot2, interp1, interp2, seq) \
111    TEST_ALU_BINARY(producer_stage, consumer_stage, float, 32, slot1, slot2, interp1, interp2, sge)
112 
113 #define TEST_ALU_BINARY_INT_OPS(producer_stage, consumer_stage, slot1, slot2) \
114    TEST_ALU_BINARY(producer_stage, consumer_stage, int, 16, slot1, slot2, FLAT, FLAT, iadd) \
115    TEST_ALU_BINARY(producer_stage, consumer_stage, int, 32, slot1, slot2, FLAT, FLAT, iadd) \
116    TEST_ALU_BINARY(producer_stage, consumer_stage, int, 32, slot1, slot2, FLAT, FLAT, iand) \
117    TEST_ALU_BINARY(producer_stage, consumer_stage, int, 32, slot1, slot2, FLAT, FLAT, idiv) \
118    TEST_ALU_BINARY(producer_stage, consumer_stage, int, 32, slot1, slot2, FLAT, FLAT, ieq) \
119    TEST_ALU_BINARY(producer_stage, consumer_stage, int, 32, slot1, slot2, FLAT, FLAT, ige) \
120    TEST_ALU_BINARY(producer_stage, consumer_stage, int, 32, slot1, slot2, FLAT, FLAT, imax) \
121    TEST_ALU_BINARY(producer_stage, consumer_stage, int, 32, slot1, slot2, FLAT, FLAT, ishl) \
122    TEST_ALU_BINARY(producer_stage, consumer_stage, int, 32, slot1, slot2, FLAT, FLAT, udiv) \
123    TEST_ALU_BINARY(producer_stage, consumer_stage, int, 32, slot1, slot2, FLAT, FLAT, uge) \
124    TEST_ALU_BINARY(producer_stage, consumer_stage, int, 32, slot1, slot2, FLAT, FLAT, umin) \
125    TEST_ALU_BINARY(producer_stage, consumer_stage, int, 32, slot1, slot2, FLAT, FLAT, umul_high)
126 
127 #define TEST_ALU_BINARY_OPS(producer_stage, consumer_stage, slot1, slot2) \
128    TEST_ALU_BINARY_INT_OPS(producer_stage, consumer_stage, slot1, slot2) \
129    TEST_ALU_BINARY_FP_OPS(producer_stage, consumer_stage, slot1, slot2, FLAT, FLAT)
130 
131 #define TEST_ALU_BINARY_OPS_FS_INTERP(producer_stage, consumer_stage, slot1, slot2) \
132    TEST_ALU_BINARY_OPS(producer_stage, consumer_stage, slot1, slot2) \
133    TEST_ALU_BINARY_FP_OPS(producer_stage, consumer_stage, slot1, slot2, FLAT, PERSP_PIXEL) \
134    TEST_ALU_BINARY_FP_OPS(producer_stage, consumer_stage, slot1, slot2, FLAT, CONVERGENT) \
135    TEST_ALU_BINARY_FP_OPS(producer_stage, consumer_stage, slot1, slot2, CONVERGENT, FLAT) \
136    TEST_ALU_BINARY_FP_OPS(producer_stage, consumer_stage, slot1, slot2, PERSP_PIXEL, PERSP_PIXEL) \
137    TEST_ALU_BINARY_FP_OPS(producer_stage, consumer_stage, slot1, slot2, PERSP_PIXEL, PERSP_CENTROID) \
138    TEST_ALU_BINARY_FP_OPS(producer_stage, consumer_stage, slot1, slot2, PERSP_PIXEL, CONVERGENT) \
139    TEST_ALU_BINARY_FP_OPS(producer_stage, consumer_stage, slot1, slot2, CONVERGENT, PERSP_PIXEL) \
140    TEST_ALU_BINARY_FP_OPS(producer_stage, consumer_stage, slot1, slot2, CONVERGENT, CONVERGENT)
141 
142 TEST_ALU_BINARY_FP_OPS(VERTEX, TESS_EVAL, VAR0, VAR1, TES_TRIANGLE, TES_TRIANGLE)
143 TEST_ALU_BINARY_FP_OPS(TESS_CTRL, TESS_EVAL, VAR0, VAR1, TES_TRIANGLE, TES_TRIANGLE)
144 TEST_ALU_BINARY_FP_OPS(TESS_CTRL, TESS_EVAL, VAR0, PATCH0, TES_TRIANGLE, FLAT)
145 TEST_ALU_BINARY_OPS(TESS_CTRL, TESS_EVAL, PATCH0, PATCH1)
146 
147 TEST_ALU_BINARY_OPS_FS_INTERP(VERTEX, FRAGMENT, VAR0, VAR1)
148 TEST_ALU_BINARY_OPS_FS_INTERP(TESS_EVAL, FRAGMENT, VAR0, VAR1)
149 
150 // TODO: unary/ternary, uniform/UBO load/constant
151 
152 }
153