1 /* 2 * Copyright © 2020 Valve Corporation 3 * 4 * SPDX-License-Identifier: MIT 5 */ 6 #include "helpers.h" 7 8 using namespace aco; 9 10 BEGIN_TEST(to_hw_instr.swap_subdword) 11 PhysReg v0_lo{256}; 12 PhysReg v0_hi{256}; 13 PhysReg v0_b1{256}; 14 PhysReg v0_b3{256}; 15 PhysReg v1_lo{257}; 16 PhysReg v1_hi{257}; 17 PhysReg v1_b1{257}; 18 PhysReg v1_b3{257}; 19 PhysReg v128_lo{256 + 128}; 20 PhysReg v128_hi{256 + 128}; 21 PhysReg v129_lo{256 + 129}; 22 PhysReg v129_hi{256 + 129}; 23 v0_hi.reg_b += 2; 24 v1_hi.reg_b += 2; 25 v0_b1.reg_b += 1; 26 v1_b1.reg_b += 1; 27 v0_b3.reg_b += 3; 28 v1_b3.reg_b += 3; 29 v128_hi.reg_b += 2; 30 v129_hi.reg_b += 2; 31 32 for (amd_gfx_level lvl : {GFX8, GFX9, GFX11}) { 33 if (!setup_cs(NULL, lvl)) 34 continue; 35 36 //~gfx(8|9|11)>> p_unit_test 0 37 //~gfx8! v1: %0:v[0] = v_alignbyte_b32 %0:v[0][0:16], %0:v[0][16:32], 2 38 //~gfx(9|11)! v1: %0:v[0] = v_pack_b32_f16 hi(%0:v[0][16:32]), %0:v[0][0:16] 39 bld.pseudo(aco_opcode::p_unit_test, Operand::zero()); 40 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b), 41 Operand(v0_hi, v2b), Operand(v0_lo, v2b)); 42 43 //~gfx(8|9|11)! p_unit_test 1 44 //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 45 //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0] 46 //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 47 //~gfx(9|11)! v1: %0:v[0], v1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0] 48 //~gfx[89]! v2b: %0:v[1][16:32] = v_mov_b32 %0:v[0][16:32] dst_sel:uword1 dst_preserve src0_sel:uword1 49 //~gfx11! v2b: %0:v[1][16:32] = v_mov_b16 hi(%0:v[0][16:32]) opsel_hi 50 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1u)); 51 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1), Definition(v1_lo, v2b), 52 Operand(v1_lo, v1), Operand(v0_lo, v2b)); 53 54 //~gfx(8|9|11)! p_unit_test 2 55 //~gfx[89]! v2b: %0:v[0][16:32] = v_mov_b32 %0:v[1][16:32] dst_sel:uword1 dst_preserve src0_sel:uword1 56 //~gfx[89]! v2b: %0:v[1][16:32] = v_mov_b32 %0:v[0][0:16] dst_sel:uword1 dst_preserve src0_sel:uword0 57 //~gfx[89]! v2b: %0:v[1][0:16] = v_xor_b32 %0:v[1][0:16], %0:v[0][0:16] dst_sel:uword0 dst_preserve src0_sel:uword0 src1_sel:uword0 58 //~gfx[89]! v2b: %0:v[0][0:16] = v_xor_b32 %0:v[1][0:16], %0:v[0][0:16] dst_sel:uword0 dst_preserve src0_sel:uword0 src1_sel:uword0 59 //~gfx[89]! v2b: %0:v[1][0:16] = v_xor_b32 %0:v[1][0:16], %0:v[0][0:16] dst_sel:uword0 dst_preserve src0_sel:uword0 src1_sel:uword0 60 //~gfx11! v2b: %0:v[0][16:32] = v_mov_b16 hi(%0:v[1][16:32]) opsel_hi 61 //~gfx11! v2b: %0:v[1][16:32] = v_mov_b16 %0:v[0][0:16] opsel_hi 62 //~gfx11! v2b: %0:v[0][0:16], v2b: %0:v[1][0:16] = v_swap_b16 %0:v[1][0:16], %0:v[0][0:16] 63 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u)); 64 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1), Definition(v1_lo, v2b), 65 Definition(v1_hi, v2b), Operand(v1_lo, v1), Operand(v0_lo, v2b), 66 Operand(v0_lo, v2b)); 67 68 //~gfx(8|9|11)! p_unit_test 3 69 //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 70 //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0] 71 //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 72 //~gfx(9|11)! v1: %0:v[0], v1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0] 73 //~gfx[89]! v2b: %0:v[1][0:16] = v_mov_b32 %0:v[0][0:16] dst_sel:uword0 dst_preserve src0_sel:uword0 74 //~gfx[89]! v1b: %0:v[1][16:24] = v_mov_b32 %0:v[0][16:24] dst_sel:ubyte2 dst_preserve src0_sel:ubyte2 75 //~gfx11! v2b: %0:v[1][0:16] = v_mov_b16 %0:v[0][0:16] 76 //~gfx11! v1: %0:v[1] = v_perm_b32 %0:v[1], %0:v[0], 0x7020504 77 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3u)); 78 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1), Definition(v1_b3, v1b), 79 Operand(v1_lo, v1), Operand(v0_b3, v1b)); 80 81 //~gfx(8|9|11)! p_unit_test 4 82 //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 83 //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0] 84 //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 85 //~gfx(9|11)! v1: %0:v[0], v1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0] 86 //~gfx[89]! v1b: %0:v[1][8:16] = v_mov_b32 %0:v[0][8:16] dst_sel:ubyte1 dst_preserve src0_sel:ubyte1 87 //~gfx[89]! v2b: %0:v[1][16:32] = v_mov_b32 %0:v[0][16:32] dst_sel:uword1 dst_preserve src0_sel:uword1 88 //~gfx11! v1: %0:v[1] = v_perm_b32 %0:v[1], %0:v[0], 0x7060104 89 //~gfx11! v2b: %0:v[1][16:32] = v_mov_b16 hi(%0:v[0][16:32]) opsel_hi 90 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4u)); 91 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1), Definition(v1_lo, v1b), 92 Operand(v1_lo, v1), Operand(v0_lo, v1b)); 93 94 //~gfx(8|9|11)! p_unit_test 5 95 //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[0], %0:v[1] 96 //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[0], %0:v[1] 97 //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[0], %0:v[1] 98 //~gfx(9|11)! v1: %0:v[1], v1: %0:v[0] = v_swap_b32 %0:v[0], %0:v[1] 99 //~gfx[89]! v1b: %0:v[0][8:16] = v_mov_b32 %0:v[1][8:16] dst_sel:ubyte1 dst_preserve src0_sel:ubyte1 100 //~gfx[89]! v1b: %0:v[0][24:32] = v_mov_b32 %0:v[1][24:32] dst_sel:ubyte3 dst_preserve src0_sel:ubyte3 101 //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0x7060104 102 //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0x3060504 103 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5u)); 104 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1b), Definition(v0_hi, v1b), 105 Definition(v1_lo, v1), Operand(v1_lo, v1b), Operand(v1_hi, v1b), 106 Operand(v0_lo, v1)); 107 108 //~gfx(8|9|11)! p_unit_test 6 109 //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 110 //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0] 111 //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 112 //~gfx(9|11)! v1: %0:v[0], v1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0] 113 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(6u)); 114 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b), 115 Definition(v1_lo, v1), Operand(v1_lo, v2b), Operand(v1_hi, v2b), 116 Operand(v0_lo, v1)); 117 118 //~gfx(8|9|11)! p_unit_test 7 119 //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[0], %0:v[1] 120 //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[0], %0:v[1] 121 //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[0], %0:v[1] 122 //~gfx(9|11)! v1: %0:v[1], v1: %0:v[0] = v_swap_b32 %0:v[0], %0:v[1] 123 //~gfx(8|9|11)! v1: %0:v[0] = v_alignbyte_b32 %0:v[0][0:16], %0:v[0][16:32], 2 124 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(7u)); 125 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b), 126 Definition(v1_lo, v1), Operand(v1_hi, v2b), Operand(v1_lo, v2b), 127 Operand(v0_lo, v1)); 128 129 //~gfx(8|9|11)! p_unit_test 8 130 //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 131 //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0] 132 //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 133 //~gfx(9|11)! v1: %0:v[0], v1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0] 134 //~gfx[89]! v1b: %0:v[1][24:32] = v_xor_b32 %0:v[1][24:32], %0:v[0][24:32] dst_sel:ubyte3 dst_preserve src0_sel:ubyte3 src1_sel:ubyte3 135 //~gfx[89]! v1b: %0:v[0][24:32] = v_xor_b32 %0:v[1][24:32], %0:v[0][24:32] dst_sel:ubyte3 dst_preserve src0_sel:ubyte3 src1_sel:ubyte3 136 //~gfx[89]! v1b: %0:v[1][24:32] = v_xor_b32 %0:v[1][24:32], %0:v[0][24:32] dst_sel:ubyte3 dst_preserve src0_sel:ubyte3 src1_sel:ubyte3 137 //~gfx11! v2b: %0:v[0][0:16], v2b: %0:v[1][16:32] = v_swap_b16 hi(%0:v[1][16:32]), %0:v[0][0:16] 138 //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], 0, 0x5060704 139 //~gfx11! v2b: %0:v[0][0:16], v2b: %0:v[1][16:32] = v_swap_b16 hi(%0:v[1][16:32]), %0:v[0][0:16] 140 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(8u)); 141 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v3b), Definition(v1_lo, v3b), 142 Operand(v1_lo, v3b), Operand(v0_lo, v3b)); 143 144 //~gfx(8|9|11)! p_unit_test 9 145 //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 146 //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0] 147 //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 148 //~gfx(9|11)! v1: %0:v[0], v1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0] 149 //~gfx[89]! v1b: %0:v[1][24:32] = v_mov_b32 %0:v[0][24:32] dst_sel:ubyte3 dst_preserve src0_sel:ubyte3 150 //~gfx11! v1: %0:v[1] = v_perm_b32 %0:v[1], %0:v[0], 0x3060504 151 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(9u)); 152 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v3b), Definition(v1_lo, v3b), 153 Definition(v0_b3, v1b), Operand(v1_lo, v3b), Operand(v0_lo, v3b), 154 Operand(v1_b3, v1b)); 155 156 //~gfx(8|9|11)! p_unit_test 10 157 //~gfx[89]! v1b: %0:v[1][8:16] = v_xor_b32 %0:v[1][8:16], %0:v[0][8:16] dst_sel:ubyte1 dst_preserve src0_sel:ubyte1 src1_sel:ubyte1 158 //~gfx[89]! v1b: %0:v[0][8:16] = v_xor_b32 %0:v[1][8:16], %0:v[0][8:16] dst_sel:ubyte1 dst_preserve src0_sel:ubyte1 src1_sel:ubyte1 159 //~gfx[89]! v1b: %0:v[1][8:16] = v_xor_b32 %0:v[1][8:16], %0:v[0][8:16] dst_sel:ubyte1 dst_preserve src0_sel:ubyte1 src1_sel:ubyte1 160 //~gfx11! v2b: %0:v[0][16:32], v2b: %0:v[1][0:16] = v_swap_b16 %0:v[1][0:16], %0:v[0][16:32] opsel_hi 161 //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], 0, 0x5060704 162 //~gfx11! v2b: %0:v[0][16:32], v2b: %0:v[1][0:16] = v_swap_b16 %0:v[1][0:16], %0:v[0][16:32] opsel_hi 163 //~gfx[89]! v1b: %0:v[1][16:24] = v_xor_b32 %0:v[1][16:24], %0:v[0][16:24] dst_sel:ubyte2 dst_preserve src0_sel:ubyte2 src1_sel:ubyte2 164 //~gfx[89]! v1b: %0:v[0][16:24] = v_xor_b32 %0:v[1][16:24], %0:v[0][16:24] dst_sel:ubyte2 dst_preserve src0_sel:ubyte2 src1_sel:ubyte2 165 //~gfx[89]! v1b: %0:v[1][16:24] = v_xor_b32 %0:v[1][16:24], %0:v[0][16:24] dst_sel:ubyte2 dst_preserve src0_sel:ubyte2 src1_sel:ubyte2 166 //~gfx11! v2b: %0:v[0][0:16], v2b: %0:v[1][16:32] = v_swap_b16 hi(%0:v[1][16:32]), %0:v[0][0:16] 167 //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], 0, 0x7040506 168 //~gfx11! v2b: %0:v[0][0:16], v2b: %0:v[1][16:32] = v_swap_b16 hi(%0:v[1][16:32]), %0:v[0][0:16] 169 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(10u)); 170 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_b1, v2b), Definition(v1_b1, v2b), 171 Operand(v1_b1, v2b), Operand(v0_b1, v2b)); 172 173 //~gfx(8|9|11)! p_unit_test 11 174 //~gfx[89]! v2b: %0:v[1][0:16] = v_mov_b32 %0:v[0][16:32] dst_sel:uword0 dst_preserve src0_sel:uword1 175 //~gfx11! v2b: %0:v[1][0:16] = v_mov_b16 hi(%0:v[0][16:32]) 176 //~gfx(8|9|11)! v1: %0:v[0] = v_mov_b32 42 177 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(11u)); 178 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1), Definition(v1_lo, v2b), 179 Operand::c32(42u), Operand(v0_hi, v2b)); 180 181 //~gfx(8|9|11)! p_unit_test 12 182 //~gfx[89]! v1b: %0:v[0][24:32] = v_xor_b32 %0:v[0][24:32], %0:v[0][8:16] dst_sel:ubyte3 dst_preserve src0_sel:ubyte3 src1_sel:ubyte1 183 //~gfx[89]! v1b: %0:v[0][8:16] = v_xor_b32 %0:v[0][24:32], %0:v[0][8:16] dst_sel:ubyte1 dst_preserve src0_sel:ubyte3 src1_sel:ubyte1 184 //~gfx[89]! v1b: %0:v[0][24:32] = v_xor_b32 %0:v[0][24:32], %0:v[0][8:16] dst_sel:ubyte3 dst_preserve src0_sel:ubyte3 src1_sel:ubyte1 185 //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], 0, 0x5060704 186 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(12u)); 187 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_b1, v1b), Definition(v0_b3, v1b), 188 Operand(v0_b3, v1b), Operand(v0_b1, v1b)); 189 190 //~gfx(8|9|11)! p_unit_test 13 191 //~gfx[89]! v2b: %0:v[129][16:32] = v_xor_b32 %0:v[129][16:32], %0:v[128][0:16] dst_sel:uword1 dst_preserve src0_sel:uword1 src1_sel:uword0 192 //~gfx[89]! v2b: %0:v[128][0:16] = v_xor_b32 %0:v[129][16:32], %0:v[128][0:16] dst_sel:uword0 dst_preserve src0_sel:uword1 src1_sel:uword0 193 //~gfx[89]! v2b: %0:v[129][16:32] = v_xor_b32 %0:v[129][16:32], %0:v[128][0:16] dst_sel:uword1 dst_preserve src0_sel:uword1 src1_sel:uword0 194 //~gfx11! v2b: %0:v[128][0:16] = v_xor_b16 hi(%0:v[129][16:32]), %0:v[128][0:16] 195 //~gfx11! v2b: %0:v[129][16:32] = v_xor_b16 hi(%0:v[129][16:32]), %0:v[128][0:16] opsel_hi 196 //~gfx11! v2b: %0:v[128][0:16] = v_xor_b16 hi(%0:v[129][16:32]), %0:v[128][0:16] 197 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(13u)); 198 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v128_lo, v2b), Definition(v129_hi, v2b), 199 Operand(v129_hi, v2b), Operand(v128_lo, v2b)); 200 201 //~gfx(8|9|11)! p_unit_test 14 202 //~gfx[89]! v2b: %0:v[129][0:16] = v_xor_b32 %0:v[129][0:16], %0:v[128][16:32] dst_sel:uword0 dst_preserve src0_sel:uword0 src1_sel:uword1 203 //~gfx[89]! v2b: %0:v[128][16:32] = v_xor_b32 %0:v[129][0:16], %0:v[128][16:32] dst_sel:uword1 dst_preserve src0_sel:uword0 src1_sel:uword1 204 //~gfx[89]! v2b: %0:v[129][0:16] = v_xor_b32 %0:v[129][0:16], %0:v[128][16:32] dst_sel:uword0 dst_preserve src0_sel:uword0 src1_sel:uword1 205 //~gfx11! v2b: %0:v[128][16:32] = v_xor_b16 %0:v[129][0:16], hi(%0:v[128][16:32]) opsel_hi 206 //~gfx11! v2b: %0:v[129][0:16] = v_xor_b16 %0:v[129][0:16], hi(%0:v[128][16:32]) 207 //~gfx11! v2b: %0:v[128][16:32] = v_xor_b16 %0:v[129][0:16], hi(%0:v[128][16:32]) opsel_hi 208 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(14u)); 209 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v128_hi, v2b), Definition(v129_lo, v2b), 210 Operand(v129_lo, v2b), Operand(v128_hi, v2b)); 211 212 //~gfx11! s_nop 213 //~gfx11! s_sendmsg sendmsg(dealloc_vgprs) 214 //~gfx(8|9|11)! s_endpgm 215 216 finish_to_hw_instr_test(); 217 } 218 END_TEST 219 220 BEGIN_TEST(to_hw_instr.subdword_constant) 221 PhysReg v0_lo{256}; 222 PhysReg v0_hi{256}; 223 PhysReg v0_b1{256}; 224 PhysReg v1_lo{257}; 225 PhysReg v1_hi{257}; 226 v0_hi.reg_b += 2; 227 v0_b1.reg_b += 1; 228 v1_hi.reg_b += 2; 229 230 for (amd_gfx_level lvl : {GFX9, GFX10, GFX11}) { 231 if (!setup_cs(NULL, lvl)) 232 continue; 233 234 /* 16-bit pack */ 235 //>> p_unit_test 0 236 //! v1: %_:v[0] = v_pack_b32_f16 0.5, hi(%_:v[1][16:32]) 237 bld.pseudo(aco_opcode::p_unit_test, Operand::zero()); 238 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b), 239 Operand::c16(0x3800), Operand(v1_hi, v2b)); 240 241 //! p_unit_test 1 242 //~gfx9! v2b: %0:v[0][16:32] = v_and_b32 0xffff0000, %0:v[1][16:32] 243 //~gfx9! v1: %0:v[0] = v_or_b32 0x4205, %0:v[0] 244 //~gfx(10|11)! v1: %_:v[0] = v_pack_b32_f16 0x4205, hi(%_:v[1][16:32]) 245 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1u)); 246 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b), 247 Operand::c16(0x4205), Operand(v1_hi, v2b)); 248 249 //! p_unit_test 2 250 //~gfx9! v2b: %0:v[0][16:32] = v_lshlrev_b32 16, %0:v[0][0:16] 251 //~gfx9! v1: %_:v[0] = v_or_b32 0x4205, %_:v[0] 252 //~gfx(10|11)! v1: %0:v[0] = v_pack_b32_f16 0x4205, %0:v[0][0:16] 253 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u)); 254 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b), 255 Operand::c16(0x4205), Operand(v0_lo, v2b)); 256 257 //! p_unit_test 3 258 //! v1: %_:v[0] = v_mov_b32 0x3c003800 259 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3u)); 260 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b), 261 Operand::c16(0x3800), Operand::c16(0x3c00)); 262 263 //! p_unit_test 4 264 //! v1: %_:v[0] = v_mov_b32 0x43064205 265 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4u)); 266 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b), 267 Operand::c16(0x4205), Operand::c16(0x4306)); 268 269 //! p_unit_test 5 270 //! v1: %_:v[0] = v_mov_b32 0x38004205 271 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5u)); 272 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b), 273 Operand::c16(0x4205), Operand::c16(0x3800)); 274 275 /* 16-bit copy */ 276 //! p_unit_test 6 277 //~gfx(9|10)! v2b: %_:v[0][0:16] = v_add_f16 0.5, 0 dst_sel:uword0 dst_preserve src0_sel:uword0 src1_sel:dword 278 //~gfx11! v2b: %0:v[0][0:16] = v_add_f16 0.5, 0 279 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(6u)); 280 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Operand::c16(0x3800)); 281 282 //! p_unit_test 7 283 //~gfx9! v1: %_:v[0] = v_and_b32 0xffff0000, %_:v[0] 284 //~gfx9! v1: %_:v[0] = v_or_b32 0x4205, %_:v[0] 285 //~gfx10! v2b: %_:v[0][0:16] = v_add_u16_e64 0x4205, 0 286 //~gfx11! v2b: %0:v[0][0:16] = v_mov_b16 0x4205 287 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(7u)); 288 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Operand::c16(0x4205)); 289 290 //! p_unit_test 8 291 //~gfx9! v1: %_:v[0] = v_and_b32 0xffff, %_:v[0] 292 //~gfx9! v1: %_:v[0] = v_or_b32 0x42050000, %_:v[0] 293 //~gfx10! v2b: %_:v[0][16:32] = v_add_u16_e64 0x4205, 0 opsel_hi 294 //~gfx11! v2b: %0:v[0][16:32] = v_mov_b16 0x4205 opsel_hi 295 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(8u)); 296 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_hi, v2b), Operand::c16(0x4205)); 297 298 //! p_unit_test 9 299 //~gfx(9|10)! v1b: %_:v[0][8:16] = v_mov_b32 0 dst_sel:ubyte1 dst_preserve src0_sel:dword 300 //~gfx(9|10)! v1b: %_:v[0][16:24] = v_mov_b32 56 dst_sel:ubyte2 dst_preserve src0_sel:dword 301 //~gfx11! v1b: %_:v[0][8:16] = v_cvt_pk_u8_f32 0, 1, %_:v[0] 302 //~gfx11! v1b: %_:v[0][16:24] = v_cvt_pk_u8_f32 0x42600000, 2, %_:v[0] 303 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(9u)); 304 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_b1, v2b), Operand::c16(0x3800)); 305 306 //! p_unit_test 10 307 //~gfx(9|10)! v1b: %_:v[0][8:16] = v_mov_b32 5 dst_sel:ubyte1 dst_preserve src0_sel:dword 308 //~gfx(9|10)! v1b: %_:v[0][16:24] = v_mul_u32_u24 2, 33 dst_sel:ubyte2 dst_preserve src0_sel:dword src1_sel:dword 309 //~gfx11! v1b: %_:v[0][8:16] = v_cvt_pk_u8_f32 0x40a00000, 1, %_:v[0] 310 //~gfx11! v1b: %_:v[0][16:24] = v_cvt_pk_u8_f32 0x42840000, 2, %_:v[0] 311 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(10u)); 312 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_b1, v2b), Operand::c16(0x4205)); 313 314 /* 8-bit copy */ 315 //! p_unit_test 11 316 //~gfx(9|10)! v1b: %_:v[0][0:8] = v_mul_u32_u24 2, 33 dst_sel:ubyte0 dst_preserve src0_sel:dword src1_sel:dword 317 //~gfx11! v1b: %_:v[0][0:8] = v_cvt_pk_u8_f32 0x42840000, 0, %_:v[0] 318 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(11u)); 319 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1b), Operand::c8(0x42)); 320 321 /* 32-bit and 8-bit copy */ 322 //! p_unit_test 12 323 //! v1: %_:v[0] = v_mov_b32 0 324 //~gfx(9|10)! v1b: %_:v[1][0:8] = v_mov_b32 0 dst_sel:ubyte0 dst_preserve src0_sel:dword 325 //~gfx11! v1b: %_:v[1][0:8] = v_cvt_pk_u8_f32 0, 0, %_:v[1] 326 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(12u)); 327 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1), Definition(v1_lo, v1b), 328 Operand::zero(), Operand::zero(1)); 329 330 //! p_unit_test 13 331 //~gfx9! v1: %_:v[0] = v_and_b32 0xffff0000, %_:v[0] 332 //~gfx9! v1: %_:v[0] = v_or_b32 0xff, %_:v[0] 333 //~gfx10! v2b: %_:v[0][0:16] = v_add_u16_e64 0xff, 0 334 //~gfx11! v2b: %0:v[0][0:16] = v_mov_b16 0xff 335 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(13u)); 336 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Operand::c16(0x00ff)); 337 338 //! p_unit_test 14 339 //~gfx9! v1: %_:v[0] = v_and_b32 0xffff, %_:v[0] 340 //~gfx9! v1: %_:v[0] = v_or_b32 0xff000000, %_:v[0] 341 //~gfx10! v2b: %_:v[0][16:32] = v_add_u16_e64 0xff00, 0 opsel_hi 342 //~gfx11! v2b: %0:v[0][16:32] = v_mov_b16 0xffffff00 opsel_hi 343 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(14u)); 344 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_hi, v2b), Operand::c16(0xff00)); 345 346 //! p_unit_test 15 347 //~gfx(9|10)! v2b: %_:v[0][0:16] = v_mov_b32 0 dst_sel:uword0 dst_preserve src0_sel:dword 348 //~gfx11! v2b: %0:v[0][0:16] = v_mov_b16 0 349 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(15u)); 350 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Operand::zero(2)); 351 352 //! p_unit_test 16 353 //~gfx(9|10)! v1b: %_:v[0][0:8] = v_mov_b32 -1 dst_sel:ubyte0 dst_preserve src0_sel:dword 354 //~gfx11! v1b: %_:v[0][0:8] = v_cvt_pk_u8_f32 0x437f0000, 0, %_:v[0] 355 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(16u)); 356 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1b), Operand::c8(0xff)); 357 358 //! p_unit_test 17 359 //~gfx(9|10)! v1b: %_:v[0][0:8] = v_mov_b32 0 dst_sel:ubyte0 dst_preserve src0_sel:dword 360 //~gfx11! v1b: %_:v[0][0:8] = v_cvt_pk_u8_f32 0, 0, %_:v[0] 361 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(17u)); 362 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1b), Operand::zero(1)); 363 364 //~gfx11! s_nop 365 //~gfx11! s_sendmsg sendmsg(dealloc_vgprs) 366 //! s_endpgm 367 368 finish_to_hw_instr_test(); 369 } 370 END_TEST 371 372 BEGIN_TEST(to_hw_instr.self_intersecting_swap) 373 if (!setup_cs(NULL, GFX9)) 374 return; 375 376 PhysReg reg_v1{257}; 377 PhysReg reg_v2{258}; 378 PhysReg reg_v3{259}; 379 PhysReg reg_v7{263}; 380 381 //>> p_unit_test 0 382 //! v1: %0:v[1], v1: %0:v[2] = v_swap_b32 %0:v[2], %0:v[1] 383 //! v1: %0:v[2], v1: %0:v[3] = v_swap_b32 %0:v[3], %0:v[2] 384 //! v1: %0:v[3], v1: %0:v[7] = v_swap_b32 %0:v[7], %0:v[3] 385 //! s_endpgm 386 bld.pseudo(aco_opcode::p_unit_test, Operand::zero()); 387 // v[1:2] = v[2:3] 388 // v3 = v7 389 // v7 = v1 390 bld.pseudo(aco_opcode::p_parallelcopy, Definition(reg_v1, v2), Definition(reg_v3, v1), 391 Definition(reg_v7, v1), Operand(reg_v2, v2), Operand(reg_v7, v1), 392 Operand(reg_v1, v1)); 393 394 finish_to_hw_instr_test(); 395 END_TEST 396 397 BEGIN_TEST(to_hw_instr.extract) 398 PhysReg s0_lo{0}; 399 PhysReg s1_lo{1}; 400 PhysReg v0_lo{256}; 401 PhysReg v1_lo{257}; 402 403 for (amd_gfx_level lvl : {GFX7, GFX8, GFX9, GFX11}) { 404 for (unsigned is_signed = 0; is_signed <= 1; is_signed++) { 405 if (!setup_cs(NULL, lvl, CHIP_UNKNOWN, is_signed ? "_signed" : "_unsigned")) 406 continue; 407 408 #define EXT(idx, size) \ 409 bld.pseudo(aco_opcode::p_extract, Definition(v0_lo, v1), Operand(v1_lo, v1), Operand::c32(idx), \ 410 Operand::c32(size), Operand::c32(is_signed)); 411 412 //; funcs['v_bfe'] = lambda _: 'v_bfe_i32' if variant.endswith('_signed') else 'v_bfe_u32' 413 //; funcs['v_shr'] = lambda _: 'v_ashrrev_i32' if variant.endswith('_signed') else 'v_lshrrev_b32' 414 //; funcs['s_bfe'] = lambda _: 's_bfe_i32' if variant.endswith('_signed') else 's_bfe_u32' 415 //; funcs['s_shr'] = lambda _: 's_ashr_i32' if variant.endswith('_signed') else 's_lshr_b32' 416 //; funcs['byte'] = lambda n: '%cbyte%s' % ('s' if variant.endswith('_signed') else 'u', n) 417 418 //>> p_unit_test 0 419 bld.pseudo(aco_opcode::p_unit_test, Operand::zero()); 420 //! v1: %_:v[0] = @v_bfe %_:v[1], 0, 8 421 EXT(0, 8) 422 //! v1: %_:v[0] = @v_bfe %_:v[1], 8, 8 423 EXT(1, 8) 424 //! v1: %_:v[0] = @v_bfe %_:v[1], 16, 8 425 EXT(2, 8) 426 //! v1: %_:v[0] = @v_shr 24, %_:v[1] 427 EXT(3, 8) 428 //~gfx(7|8|9)_.*! v1: %_:v[0] = @v_bfe %_:v[1], 0, 16 429 //~gfx11_unsigned! v1: %_:v[0] = v_cvt_u32_u16 %_:v[1] 430 //~gfx11_signed! v1: %_:v[0] = v_cvt_i32_i16 %_:v[1] 431 EXT(0, 16) 432 //! v1: %_:v[0] = @v_shr 16, %_:v[1] 433 EXT(1, 16) 434 435 #undef EXT 436 437 #define EXT(idx, size) \ 438 bld.pseudo(aco_opcode::p_extract, Definition(s0_lo, s1), Definition(scc, s1), \ 439 Operand(s1_lo, s1), Operand::c32(idx), Operand::c32(size), Operand::c32(is_signed)); 440 441 //>> p_unit_test 2 442 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u)); 443 //~gfx.*_unsigned! s1: %_:s[0], s1: %_:scc = @s_bfe %_:s[1], 0x80000 444 //~gfx.*_signed! s1: %_:s[0] = s_sext_i32_i8 %_:s[1] 445 EXT(0, 8) 446 //! s1: %_:s[0], s1: %_:scc = @s_bfe %_:s[1], 0x80008 447 EXT(1, 8) 448 //! s1: %_:s[0], s1: %_:scc = @s_bfe %_:s[1], 0x80010 449 EXT(2, 8) 450 //! s1: %_:s[0], s1: %_:scc = @s_shr %_:s[1], 24 451 EXT(3, 8) 452 //~gfx(7|8)_unsigned! s1: %_:s[0], s1: %_:scc = @s_bfe %_:s[1], 0x100000 453 //~gfx(9|11)_unsigned! s1: %_:s[0] = s_pack_ll_b32_b16 %_:s[1], 0 454 //~gfx.*_signed! s1: %_:s[0] = s_sext_i32_i16 %_:s[1] 455 EXT(0, 16) 456 //~gfx(7,8)_unsigned! s1: %_:s[0], s1: %_:scc = @s_shr %_:s[1], 16 457 //~gfx(9|11)_unsigned! s1: %_:s[0] = s_pack_hh_b32_b16 %_:s[1], 0 458 //~gfx.*_signed! s1: %_:s[0], s1: %_:scc = @s_shr %_:s[1], 16 459 EXT(1, 16) 460 461 #undef EXT 462 463 #define EXT(idx, src_b) \ 464 bld.pseudo(aco_opcode::p_extract, Definition(v0_lo, v2b), Operand(v1_lo.advance(src_b), v2b), \ 465 Operand::c32(idx), Operand::c32(8u), Operand::c32(is_signed)); 466 467 //>> p_unit_test 4 468 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4u)); 469 //~gfx(8|9).*! v2b: %_:v[0][0:16] = v_mov_b32 %_:v[1][0:16] dst_sel:uword0 dst_preserve src0_sel:@byte(0) 470 //~gfx11_unsigned! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060c00 471 //~gfx11_signed! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060000 472 //~gfx11_signed! v1: %_:v[0] = v_perm_b32 %_:v[0], 0, 0x7060a04 473 if (lvl != GFX7) 474 EXT(0, 0) 475 //~gfx(8|9).*! v2b: %_:v[0][0:16] = v_mov_b32 %_:v[1][16:32] dst_sel:uword0 dst_preserve src0_sel:@byte(2) 476 //~gfx11_unsigned! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060c02 477 //~gfx11_signed! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060202 478 //~gfx11_signed! v1: %_:v[0] = v_perm_b32 %_:v[0], 0, 0x7060a04 479 if (lvl != GFX7) 480 EXT(0, 2) 481 //~gfx(8|9).*! v2b: %_:v[0][0:16] = v_mov_b32 %_:v[1][0:16] dst_sel:uword0 dst_preserve src0_sel:@byte(1) 482 //~gfx11_unsigned! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060c01 483 //~gfx11_signed! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060801 484 if (lvl != GFX7) 485 EXT(1, 0) 486 //~gfx(8|9).*! v2b: %_:v[0][0:16] = v_mov_b32 %_:v[1][16:32] dst_sel:uword0 dst_preserve src0_sel:@byte(3) 487 //~gfx11_unsigned! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060c03 488 //~gfx11_signed! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060903 489 if (lvl != GFX7) 490 EXT(1, 2) 491 492 #undef EXT 493 494 finish_to_hw_instr_test(); 495 496 //~gfx11_.*! s_nop 497 //~gfx11_.*! s_sendmsg sendmsg(dealloc_vgprs) 498 //! s_endpgm 499 } 500 } 501 END_TEST 502 503 BEGIN_TEST(to_hw_instr.insert) 504 PhysReg s0_lo{0}; 505 PhysReg s1_lo{1}; 506 PhysReg v0_lo{256}; 507 PhysReg v1_lo{257}; 508 509 for (amd_gfx_level lvl : {GFX7, GFX8, GFX9, GFX11}) { 510 if (!setup_cs(NULL, lvl)) 511 continue; 512 513 #define INS(idx, size) \ 514 bld.pseudo(aco_opcode::p_insert, Definition(v0_lo, v1), Operand(v1_lo, v1), Operand::c32(idx), \ 515 Operand::c32(size)); 516 517 //>> p_unit_test 0 518 bld.pseudo(aco_opcode::p_unit_test, Operand::zero()); 519 //! v1: %_:v[0] = v_bfe_u32 %_:v[1], 0, 8 520 INS(0, 8) 521 //~gfx7! v1: %0:v[0] = v_bfe_u32 %0:v[1], 0, 8 522 //~gfx7! v1: %0:v[0] = v_lshlrev_b32 8, %0:v[0] 523 //~gfx(8|9)! v1: %0:v[0] = v_mov_b32 %0:v[1] dst_sel:ubyte1 src0_sel:dword 524 //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0xc0c000c 525 INS(1, 8) 526 //~gfx7! v1: %0:v[0] = v_bfe_u32 %0:v[1], 0, 8 527 //~gfx7! v1: %0:v[0] = v_lshlrev_b32 16, %0:v[0] 528 //~gfx(8|9)! v1: %0:v[0] = v_mov_b32 %0:v[1] dst_sel:ubyte2 src0_sel:dword 529 //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0xc000c0c 530 INS(2, 8) 531 //! v1: %0:v[0] = v_lshlrev_b32 24, %0:v[1] 532 INS(3, 8) 533 //! v1: %0:v[0] = v_bfe_u32 %0:v[1], 0, 16 534 INS(0, 16) 535 //! v1: %0:v[0] = v_lshlrev_b32 16, %0:v[1] 536 INS(1, 16) 537 538 #undef INS 539 540 #define INS(idx, size) \ 541 bld.pseudo(aco_opcode::p_insert, Definition(s0_lo, s1), Definition(scc, s1), \ 542 Operand(s1_lo, s1), Operand::c32(idx), Operand::c32(size)); 543 544 //>> p_unit_test 1 545 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1u)); 546 //! s1: %_:s[0], s1: %_:scc = s_bfe_u32 %_:s[1], 0x80000 547 INS(0, 8) 548 //! s1: %_:s[0], s1: %_:scc = s_bfe_u32 %_:s[1], 0x80000 549 //! s1: %_:s[0], s1: %_:scc = s_lshl_b32 %_:s[0], 8 550 INS(1, 8) 551 //! s1: %_:s[0], s1: %_:scc = s_bfe_u32 %_:s[1], 0x80000 552 //! s1: %_:s[0], s1: %_:scc = s_lshl_b32 %_:s[0], 16 553 INS(2, 8) 554 //! s1: %_:s[0], s1: %_:scc = s_lshl_b32 %_:s[1], 24 555 INS(3, 8) 556 //~gfx(7|8)! s1: %_:s[0], s1: %_:scc = s_bfe_u32 %_:s[1], 0x100000 557 //~gfx(9|11)! s1: %_:s[0] = s_pack_ll_b32_b16 %_:s[1], 0 558 INS(0, 16) 559 //~gfx(7|8)! s1: %_:s[0], s1: %_:scc = s_lshl_b32 %_:s[1], 16 560 //~gfx(9|11)! s1: %_:s[0] = s_pack_ll_b32_b16 0, %_:s[1] 561 INS(1, 16) 562 563 #undef INS 564 565 #define INS(idx, def_b, op_b) \ 566 bld.pseudo(aco_opcode::p_insert, Definition(v0_lo.advance(def_b), v2b), \ 567 Operand(v1_lo.advance(op_b), v2b), Operand::c32(idx), Operand::c32(8u)); 568 569 //>> p_unit_test 2 570 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u)); 571 //~gfx(8|9)! v2b: %0:v[0][0:16] = v_mov_b32 %0:v[1][0:16] dst_sel:uword0 dst_preserve src0_sel:ubyte0 572 //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0x7060c00 573 if (lvl != GFX7) 574 INS(0, 0, 0) 575 //~gfx(8|9)! v2b: %0:v[0][16:32] = v_mov_b32 %0:v[1][0:16] dst_sel:uword1 dst_preserve src0_sel:ubyte0 576 //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0xc000504 577 if (lvl != GFX7) 578 INS(0, 2, 0) 579 //~gfx(8|9)! v2b: %0:v[0][0:16] = v_mov_b32 %0:v[1][16:32] dst_sel:uword0 dst_preserve src0_sel:ubyte2 580 //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0x7060c02 581 if (lvl != GFX7) 582 INS(0, 0, 2) 583 //~gfx(8|9)! v2b: %0:v[0][16:32] = v_mov_b32 %0:v[1][16:32] dst_sel:uword1 dst_preserve src0_sel:ubyte2 584 //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0xc020504 585 if (lvl != GFX7) 586 INS(0, 2, 2) 587 //~gfx8! v1b: %0:v[0][8:16] = v_mov_b32 %0:v[1][0:16] dst_sel:ubyte1 dst_preserve src0_sel:ubyte0 588 //~gfx8! v2b: %0:v[0][0:16] = v_and_b32 0xffffff00, %0:v[1] 589 //~gfx9! v2b: %0:v[0][0:16] = v_lshlrev_b32 8, %0:v[1][0:16] dst_sel:uword0 dst_preserve src0_sel:dword src1_sel:ubyte0 590 //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0x706000c 591 if (lvl != GFX7) 592 INS(1, 0, 0) 593 //~gfx8! v1b: %0:v[0][24:32] = v_mov_b32 %0:v[1][0:16] dst_sel:ubyte3 dst_preserve src0_sel:ubyte0 594 //~gfx8! v2b: %0:v[0][16:32] = v_and_b32 0xff00ffff, %0:v[1] 595 //~gfx9! v2b: %0:v[0][16:32] = v_lshlrev_b32 8, %0:v[1][0:16] dst_sel:uword1 dst_preserve src0_sel:dword src1_sel:ubyte0 596 //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0xc0504 597 if (lvl != GFX7) 598 INS(1, 2, 0) 599 //~gfx8! v1b: %0:v[0][8:16] = v_mov_b32 %0:v[1][16:32] dst_sel:ubyte1 dst_preserve src0_sel:ubyte2 600 //~gfx8! v2b: %0:v[0][0:16] = v_and_b32 0xffffff00, %0:v[1] 601 //~gfx9! v2b: %0:v[0][0:16] = v_lshlrev_b32 8, %0:v[1][16:32] dst_sel:uword0 dst_preserve src0_sel:dword src1_sel:ubyte2 602 //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0x706020c 603 if (lvl != GFX7) 604 INS(1, 0, 2) 605 //~gfx8! v1b: %0:v[0][24:32] = v_mov_b32 %0:v[1][16:32] dst_sel:ubyte3 dst_preserve src0_sel:ubyte2 606 //~gfx8! v2b: %0:v[0][16:32] = v_and_b32 0xff00ffff, %0:v[1] 607 //~gfx9! v2b: %0:v[0][16:32] = v_lshlrev_b32 8, %0:v[1][16:32] dst_sel:uword1 dst_preserve src0_sel:dword src1_sel:ubyte2 608 //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0x20c0504 609 if (lvl != GFX7) 610 INS(1, 2, 2) 611 #undef INS 612 613 finish_to_hw_instr_test(); 614 615 //~gfx11! s_nop 616 //~gfx11! s_sendmsg sendmsg(dealloc_vgprs) 617 //! s_endpgm 618 } 619 END_TEST 620 621 BEGIN_TEST(to_hw_instr.copy_linear_vgpr_scc) 622 if (!setup_cs(NULL, GFX10)) 623 return; 624 625 PhysReg v0_lo{256}; 626 PhysReg v1_lo{257}; 627 628 //>> p_unit_test 0 629 bld.pseudo(aco_opcode::p_unit_test, Operand::zero()); 630 631 /* It would be better if the scc=s0 copy was done later, but handle_operands() is complex 632 * enough 633 */ 634 635 //! v1: %0:v[0] = v_mov_b32 %0:v[1] 636 //! s1: %0:m0 = s_mov_b32 %0:scc 637 //! s2: %0:exec, s1: %0:scc = s_not_b64 %0:exec 638 //! v1: %0:v[0] = v_mov_b32 %0:v[1] 639 //! s2: %0:exec, s1: %0:scc = s_not_b64 %0:exec 640 //! s1: %0:scc = s_cmp_lg_i32 %0:m0, 0 641 Instruction* instr = bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1.as_linear()), 642 Operand(v1_lo, v1.as_linear())); 643 instr->pseudo().scratch_sgpr = m0; 644 instr->pseudo().tmp_in_scc = true; 645 646 finish_to_hw_instr_test(); 647 END_TEST 648 649 BEGIN_TEST(to_hw_instr.swap_linear_vgpr) 650 if (!setup_cs(NULL, GFX10)) 651 return; 652 653 PhysReg reg_v0{256}; 654 PhysReg reg_v1{257}; 655 RegClass v1_linear = v1.as_linear(); 656 657 //>> p_unit_test 0 658 bld.pseudo(aco_opcode::p_unit_test, Operand::zero()); 659 660 //! v1: %0:v[0], v1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0] 661 //! s2: %0:exec, s1: %0:scc = s_not_b64 %0:exec 662 //! v1: %0:v[0], v1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0] 663 //! s2: %0:exec, s1: %0:scc = s_not_b64 %0:exec 664 Instruction* instr = bld.pseudo(aco_opcode::p_parallelcopy, Definition(reg_v0, v1_linear), 665 Definition(reg_v1, v1_linear), Operand(reg_v1, v1_linear), 666 Operand(reg_v0, v1_linear)); 667 instr->pseudo().scratch_sgpr = m0; 668 669 finish_to_hw_instr_test(); 670 END_TEST 671 672 BEGIN_TEST(to_hw_instr.copy_linear_vgpr_v3) 673 if (!setup_cs(NULL, GFX10)) 674 return; 675 676 PhysReg reg_v0{256}; 677 PhysReg reg_v4{256 + 4}; 678 RegClass v3_linear = v3.as_linear(); 679 680 //>> p_unit_test 0 681 bld.pseudo(aco_opcode::p_unit_test, Operand::zero()); 682 683 //! v2: %0:v[0-1] = v_lshrrev_b64 0, %0:v[4-5] 684 //! v1: %0:v[2] = v_mov_b32 %0:v[6] 685 //! s2: %0:exec, s1: %0:scc = s_not_b64 %0:exec 686 //! v2: %0:v[0-1] = v_lshrrev_b64 0, %0:v[4-5] 687 //! v1: %0:v[2] = v_mov_b32 %0:v[6] 688 //! s2: %0:exec, s1: %0:scc = s_not_b64 %0:exec 689 Instruction* instr = bld.pseudo(aco_opcode::p_parallelcopy, Definition(reg_v0, v3_linear), 690 Operand(reg_v4, v3_linear)); 691 instr->pseudo().scratch_sgpr = m0; 692 693 finish_to_hw_instr_test(); 694 END_TEST 695 696 BEGIN_TEST(to_hw_instr.copy_linear_vgpr_coalesce) 697 if (!setup_cs(NULL, GFX10)) 698 return; 699 700 PhysReg reg_v0{256}; 701 PhysReg reg_v1{256 + 1}; 702 PhysReg reg_v4{256 + 4}; 703 PhysReg reg_v5{256 + 5}; 704 RegClass v1_linear = v1.as_linear(); 705 706 //>> p_unit_test 0 707 //! v2: %0:v[0-1] = v_lshrrev_b64 0, %0:v[4-5] 708 //! s2: %0:exec, s1: %0:scc = s_not_b64 %0:exec 709 //! v2: %0:v[0-1] = v_lshrrev_b64 0, %0:v[4-5] 710 //! s2: %0:exec, s1: %0:scc = s_not_b64 %0:exec 711 bld.pseudo(aco_opcode::p_unit_test, Operand::zero()); 712 713 Instruction* instr = bld.pseudo(aco_opcode::p_parallelcopy, Definition(reg_v0, v1_linear), 714 Definition(reg_v1, v1_linear), Operand(reg_v4, v1_linear), 715 Operand(reg_v5, v1_linear)); 716 instr->pseudo().scratch_sgpr = m0; 717 718 finish_to_hw_instr_test(); 719 END_TEST 720 721 BEGIN_TEST(to_hw_instr.pack2x16_constant) 722 PhysReg v0_lo{256}; 723 PhysReg v0_hi{256}; 724 PhysReg v1_lo{257}; 725 PhysReg v1_hi{257}; 726 v0_hi.reg_b += 2; 727 v1_hi.reg_b += 2; 728 729 for (amd_gfx_level lvl : {GFX10, GFX11}) { 730 if (!setup_cs(NULL, lvl)) 731 continue; 732 733 /* prevent usage of v_pack_b32_f16 */ 734 program->blocks[0].fp_mode.denorm16_64 = fp_denorm_flush; 735 736 //>> p_unit_test 0 737 //! v1: %_:v[0] = v_alignbyte_b32 0x3800, %_:v[1][16:32], 2 738 bld.pseudo(aco_opcode::p_unit_test, Operand::zero()); 739 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b), 740 Operand(v1_hi, v2b), Operand::c16(0x3800)); 741 742 //! p_unit_test 1 743 //! v2b: %_:v[0][0:16] = v_lshrrev_b32 16, %_:v[1][16:32] 744 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1)); 745 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b), 746 Operand(v1_hi, v2b), Operand::zero(2)); 747 748 //! p_unit_test 2 749 //~gfx10! v2b: %_:v[0][0:16] = v_and_b32 0xffff, %_:v[1][0:16] 750 //~gfx11! v1: %_:v[0] = v_cvt_u32_u16 %_:v[1][0:16] 751 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2)); 752 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b), 753 Operand(v1_lo, v2b), Operand::zero(2)); 754 755 //! p_unit_test 3 756 //! v2b: %_:v[0][16:32] = v_and_b32 0xffff0000, %_:v[1][16:32] 757 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3)); 758 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b), 759 Operand::zero(2), Operand(v1_hi, v2b)); 760 761 //! p_unit_test 4 762 //! v2b: %_:v[0][16:32] = v_lshlrev_b32 16, %_:v[1][0:16] 763 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4)); 764 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b), 765 Operand::zero(2), Operand(v1_lo, v2b)); 766 767 //~gfx11! s_nop 768 //~gfx11! s_sendmsg sendmsg(dealloc_vgprs) 769 //! s_endpgm 770 771 finish_to_hw_instr_test(); 772 } 773 END_TEST 774