1 /* 2 * Copyright 2023 Advanced Micro Devices, Inc. 3 * 4 * SPDX-License-Identifier: MIT 5 */ 6 7 /* Tests for Backward Inter-Shader Code Motion. */ 8 9 #include "nir_opt_varyings_test.h" 10 11 class nir_opt_varyings_test_bicm_binary_alu : public nir_opt_varyings_test 12 {}; 13 14 #define TEST_ALU_BINARY(producer_stage, consumer_stage, type, bitsize, slot1, slot2, interp1, interp2, alu) \ 15 TEST_F(nir_opt_varyings_test_bicm_binary_alu, \ 16 alu##_##type##bitsize##_##producer_stage##_##consumer_stage##_##slot1##_##slot2##_##interp1##_##interp2) \ 17 { \ 18 unsigned pslot[2] = {VARYING_SLOT_##slot1, VARYING_SLOT_##slot2}; \ 19 unsigned cslot[2] = {VARYING_SLOT_##slot1, VARYING_SLOT_##slot2}; \ 20 unsigned interp[3] = {INTERP_##interp1, INTERP_##interp2}; \ 21 bool divergent[3] = {interp[0] != INTERP_CONVERGENT, interp[1] != INTERP_CONVERGENT}; \ 22 \ 23 /* Choose a random TES interpolation mode, but it must be the same for both inputs. */ \ 24 if (interp[0] == INTERP_TES_TRIANGLE && interp[1] == INTERP_TES_TRIANGLE) \ 25 interp[0] = interp[1] = INTERP_TES_TRIANGLE + 1 + rand() % 4; \ 26 if (interp[0] == INTERP_TES_TRIANGLE) \ 27 interp[0] = INTERP_TES_TRIANGLE + 1 + rand() % 4; \ 28 if (interp[1] == INTERP_TES_TRIANGLE) \ 29 interp[1] = INTERP_TES_TRIANGLE + 1 + rand() % 4; \ 30 \ 31 if (!divergent[0]) \ 32 interp[0] = INTERP_LINEAR_CENTROID; \ 33 if (!divergent[1]) \ 34 interp[1] = INTERP_LINEAR_CENTROID; \ 35 \ 36 options.varying_expression_max_cost = NULL; /* don't propagate uniforms */ \ 37 create_shaders(MESA_SHADER_##producer_stage, MESA_SHADER_##consumer_stage); \ 38 \ 39 nir_intrinsic_instr *store[2] = {NULL}; \ 40 for (unsigned s = 0; s < 2; s++) { \ 41 nir_def *input; \ 42 if (!divergent[s]) \ 43 input = load_uniform(b1, bitsize, 0); \ 44 else \ 45 input = load_input(b1, (gl_varying_slot)0, s, nir_type_##type##bitsize, 0, 0); \ 46 store[s] = store_output(b1, (gl_varying_slot)pslot[s], s, nir_type_##type##bitsize, input, -1); \ 47 } \ 48 \ 49 nir_def *load[2] = {NULL}; \ 50 for (unsigned s = 0; s < 2; s++) \ 51 load[s] = load_input(b2, (gl_varying_slot)cslot[s], s, nir_type_##type##bitsize, 0, interp[s]); \ 52 \ 53 nir_def *value = nir_##alu(b2, load[0], load[1]); \ 54 if (value->bit_size == 1) \ 55 value = nir_u2u##bitsize(b2, value); \ 56 \ 57 store_output(b2, VARYING_SLOT_VAR0, 0, nir_type_##type##bitsize, value, 0); \ 58 \ 59 divergent[0] &= !is_patch((gl_varying_slot)pslot[0]); \ 60 divergent[1] &= !is_patch((gl_varying_slot)pslot[1]); \ 61 \ 62 if ((INTERP_##interp1 == INTERP_##interp2 || !divergent[0] || !divergent[1]) &&\ 63 movable_across_interp(b2, nir_op_##alu, interp, divergent, bitsize)) { \ 64 ASSERT_EQ(opt_varyings(), (nir_progress_producer | nir_progress_consumer)); \ 65 /* An opcode with a convergent non-float result isn't moved into */ \ 66 /* the previous shader because a non-float result can't be interpolated. */ \ 67 if (!divergent[0] && !divergent[1] && interp[0] != INTERP_FLAT && interp[1] != INTERP_FLAT && \ 68 !(nir_op_infos[nir_op_##alu].output_type & nir_type_float)) { \ 69 ASSERT_TRUE(!shader_contains_alu_op(b1, nir_op_##alu, bitsize)); \ 70 ASSERT_TRUE(shader_contains_alu_op(b2, nir_op_##alu, bitsize)); \ 71 } else { \ 72 ASSERT_TRUE(shader_contains_alu_op(b1, nir_op_##alu, bitsize)); \ 73 /* TES uses fadd and fmul for interpolation, so it's always present. */ \ 74 if (MESA_SHADER_##consumer_stage != MESA_SHADER_TESS_EVAL || \ 75 (nir_op_##alu != nir_op_fadd && nir_op_##alu != nir_op_fmul && \ 76 nir_op_##alu != nir_op_ffma)) { \ 77 ASSERT_TRUE(!shader_contains_alu_op(b2, nir_op_##alu, bitsize)); \ 78 } \ 79 } \ 80 ASSERT_TRUE(shader_contains_instr(b1, &store[0]->instr)); \ 81 ASSERT_TRUE(!shader_contains_instr(b1, &store[1]->instr)); \ 82 ASSERT_TRUE(!shader_contains_def(b2, load[0])); \ 83 ASSERT_TRUE(!shader_contains_def(b2, load[1])); \ 84 } else { \ 85 ASSERT_EQ(opt_varyings(), 0); \ 86 ASSERT_TRUE(!shader_contains_alu_op(b1, nir_op_##alu, bitsize)); \ 87 ASSERT_TRUE(shader_contains_alu_op(b2, nir_op_##alu, bitsize)); \ 88 ASSERT_TRUE(shader_contains_instr(b1, &store[0]->instr)); \ 89 ASSERT_TRUE(shader_contains_instr(b1, &store[1]->instr)); \ 90 ASSERT_TRUE(shader_contains_def(b2, load[0])); \ 91 ASSERT_TRUE(shader_contains_def(b2, load[1])); \ 92 } \ 93 } 94 95 #define TEST_ALU_BINARY_FP_OPS(producer_stage, consumer_stage, slot1, slot2, interp1, interp2) \ 96 TEST_ALU_BINARY(producer_stage, consumer_stage, float, 16, slot1, slot2, interp1, interp2, fadd) \ 97 TEST_ALU_BINARY(producer_stage, consumer_stage, float, 32, slot1, slot2, interp1, interp2, fadd) \ 98 TEST_ALU_BINARY(producer_stage, consumer_stage, float, 32, slot1, slot2, interp1, interp2, fdiv) \ 99 TEST_ALU_BINARY(producer_stage, consumer_stage, float, 32, slot1, slot2, interp1, interp2, feq) \ 100 TEST_ALU_BINARY(producer_stage, consumer_stage, float, 32, slot1, slot2, interp1, interp2, fge) \ 101 TEST_ALU_BINARY(producer_stage, consumer_stage, float, 32, slot1, slot2, interp1, interp2, fmin) \ 102 TEST_ALU_BINARY(producer_stage, consumer_stage, float, 32, slot1, slot2, interp1, interp2, fmax) \ 103 TEST_ALU_BINARY(producer_stage, consumer_stage, float, 32, slot1, slot2, interp1, interp2, fmod) \ 104 TEST_ALU_BINARY(producer_stage, consumer_stage, float, 32, slot1, slot2, interp1, interp2, fmul) \ 105 TEST_ALU_BINARY(producer_stage, consumer_stage, float, 32, slot1, slot2, interp1, interp2, fmulz) \ 106 TEST_ALU_BINARY(producer_stage, consumer_stage, float, 32, slot1, slot2, interp1, interp2, fneu) \ 107 TEST_ALU_BINARY(producer_stage, consumer_stage, float, 32, slot1, slot2, interp1, interp2, fpow) \ 108 TEST_ALU_BINARY(producer_stage, consumer_stage, float, 32, slot1, slot2, interp1, interp2, frem) \ 109 TEST_ALU_BINARY(producer_stage, consumer_stage, float, 32, slot1, slot2, interp1, interp2, fsub) \ 110 TEST_ALU_BINARY(producer_stage, consumer_stage, float, 32, slot1, slot2, interp1, interp2, seq) \ 111 TEST_ALU_BINARY(producer_stage, consumer_stage, float, 32, slot1, slot2, interp1, interp2, sge) 112 113 #define TEST_ALU_BINARY_INT_OPS(producer_stage, consumer_stage, slot1, slot2) \ 114 TEST_ALU_BINARY(producer_stage, consumer_stage, int, 16, slot1, slot2, FLAT, FLAT, iadd) \ 115 TEST_ALU_BINARY(producer_stage, consumer_stage, int, 32, slot1, slot2, FLAT, FLAT, iadd) \ 116 TEST_ALU_BINARY(producer_stage, consumer_stage, int, 32, slot1, slot2, FLAT, FLAT, iand) \ 117 TEST_ALU_BINARY(producer_stage, consumer_stage, int, 32, slot1, slot2, FLAT, FLAT, idiv) \ 118 TEST_ALU_BINARY(producer_stage, consumer_stage, int, 32, slot1, slot2, FLAT, FLAT, ieq) \ 119 TEST_ALU_BINARY(producer_stage, consumer_stage, int, 32, slot1, slot2, FLAT, FLAT, ige) \ 120 TEST_ALU_BINARY(producer_stage, consumer_stage, int, 32, slot1, slot2, FLAT, FLAT, imax) \ 121 TEST_ALU_BINARY(producer_stage, consumer_stage, int, 32, slot1, slot2, FLAT, FLAT, ishl) \ 122 TEST_ALU_BINARY(producer_stage, consumer_stage, int, 32, slot1, slot2, FLAT, FLAT, udiv) \ 123 TEST_ALU_BINARY(producer_stage, consumer_stage, int, 32, slot1, slot2, FLAT, FLAT, uge) \ 124 TEST_ALU_BINARY(producer_stage, consumer_stage, int, 32, slot1, slot2, FLAT, FLAT, umin) \ 125 TEST_ALU_BINARY(producer_stage, consumer_stage, int, 32, slot1, slot2, FLAT, FLAT, umul_high) 126 127 #define TEST_ALU_BINARY_OPS(producer_stage, consumer_stage, slot1, slot2) \ 128 TEST_ALU_BINARY_INT_OPS(producer_stage, consumer_stage, slot1, slot2) \ 129 TEST_ALU_BINARY_FP_OPS(producer_stage, consumer_stage, slot1, slot2, FLAT, FLAT) 130 131 #define TEST_ALU_BINARY_OPS_FS_INTERP(producer_stage, consumer_stage, slot1, slot2) \ 132 TEST_ALU_BINARY_OPS(producer_stage, consumer_stage, slot1, slot2) \ 133 TEST_ALU_BINARY_FP_OPS(producer_stage, consumer_stage, slot1, slot2, FLAT, PERSP_PIXEL) \ 134 TEST_ALU_BINARY_FP_OPS(producer_stage, consumer_stage, slot1, slot2, FLAT, CONVERGENT) \ 135 TEST_ALU_BINARY_FP_OPS(producer_stage, consumer_stage, slot1, slot2, CONVERGENT, FLAT) \ 136 TEST_ALU_BINARY_FP_OPS(producer_stage, consumer_stage, slot1, slot2, PERSP_PIXEL, PERSP_PIXEL) \ 137 TEST_ALU_BINARY_FP_OPS(producer_stage, consumer_stage, slot1, slot2, PERSP_PIXEL, PERSP_CENTROID) \ 138 TEST_ALU_BINARY_FP_OPS(producer_stage, consumer_stage, slot1, slot2, PERSP_PIXEL, CONVERGENT) \ 139 TEST_ALU_BINARY_FP_OPS(producer_stage, consumer_stage, slot1, slot2, CONVERGENT, PERSP_PIXEL) \ 140 TEST_ALU_BINARY_FP_OPS(producer_stage, consumer_stage, slot1, slot2, CONVERGENT, CONVERGENT) 141 142 TEST_ALU_BINARY_FP_OPS(VERTEX, TESS_EVAL, VAR0, VAR1, TES_TRIANGLE, TES_TRIANGLE) 143 TEST_ALU_BINARY_FP_OPS(TESS_CTRL, TESS_EVAL, VAR0, VAR1, TES_TRIANGLE, TES_TRIANGLE) 144 TEST_ALU_BINARY_FP_OPS(TESS_CTRL, TESS_EVAL, VAR0, PATCH0, TES_TRIANGLE, FLAT) 145 TEST_ALU_BINARY_OPS(TESS_CTRL, TESS_EVAL, PATCH0, PATCH1) 146 147 TEST_ALU_BINARY_OPS_FS_INTERP(VERTEX, FRAGMENT, VAR0, VAR1) 148 TEST_ALU_BINARY_OPS_FS_INTERP(TESS_EVAL, FRAGMENT, VAR0, VAR1) 149 150 // TODO: unary/ternary, uniform/UBO load/constant 151 152 } 153