xref: /aosp_15_r20/external/mesa3d/src/amd/compiler/tests/test_to_hw_instr.cpp (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2020 Valve Corporation
3  *
4  * SPDX-License-Identifier: MIT
5  */
6 #include "helpers.h"
7 
8 using namespace aco;
9 
10 BEGIN_TEST(to_hw_instr.swap_subdword)
11    PhysReg v0_lo{256};
12    PhysReg v0_hi{256};
13    PhysReg v0_b1{256};
14    PhysReg v0_b3{256};
15    PhysReg v1_lo{257};
16    PhysReg v1_hi{257};
17    PhysReg v1_b1{257};
18    PhysReg v1_b3{257};
19    PhysReg v128_lo{256 + 128};
20    PhysReg v128_hi{256 + 128};
21    PhysReg v129_lo{256 + 129};
22    PhysReg v129_hi{256 + 129};
23    v0_hi.reg_b += 2;
24    v1_hi.reg_b += 2;
25    v0_b1.reg_b += 1;
26    v1_b1.reg_b += 1;
27    v0_b3.reg_b += 3;
28    v1_b3.reg_b += 3;
29    v128_hi.reg_b += 2;
30    v129_hi.reg_b += 2;
31 
32    for (amd_gfx_level lvl : {GFX8, GFX9, GFX11}) {
33       if (!setup_cs(NULL, lvl))
34          continue;
35 
36       //~gfx(8|9|11)>> p_unit_test 0
37       //~gfx8! v1: %0:v[0] = v_alignbyte_b32 %0:v[0][0:16], %0:v[0][16:32], 2
38       //~gfx(9|11)! v1: %0:v[0] = v_pack_b32_f16 hi(%0:v[0][16:32]), %0:v[0][0:16]
39       bld.pseudo(aco_opcode::p_unit_test, Operand::zero());
40       bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b),
41                  Operand(v0_hi, v2b), Operand(v0_lo, v2b));
42 
43       //~gfx(8|9|11)! p_unit_test 1
44       //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
45       //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0]
46       //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
47       //~gfx(9|11)! v1: %0:v[0],  v1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0]
48       //~gfx[89]! v2b: %0:v[1][16:32] = v_mov_b32 %0:v[0][16:32] dst_sel:uword1 dst_preserve src0_sel:uword1
49       //~gfx11! v2b: %0:v[1][16:32] = v_mov_b16 hi(%0:v[0][16:32]) opsel_hi
50       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1u));
51       bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1), Definition(v1_lo, v2b),
52                  Operand(v1_lo, v1), Operand(v0_lo, v2b));
53 
54       //~gfx(8|9|11)! p_unit_test 2
55       //~gfx[89]! v2b: %0:v[0][16:32] = v_mov_b32 %0:v[1][16:32] dst_sel:uword1 dst_preserve src0_sel:uword1
56       //~gfx[89]! v2b: %0:v[1][16:32] = v_mov_b32 %0:v[0][0:16] dst_sel:uword1 dst_preserve src0_sel:uword0
57       //~gfx[89]! v2b: %0:v[1][0:16] = v_xor_b32 %0:v[1][0:16], %0:v[0][0:16] dst_sel:uword0 dst_preserve src0_sel:uword0 src1_sel:uword0
58       //~gfx[89]! v2b: %0:v[0][0:16] = v_xor_b32 %0:v[1][0:16], %0:v[0][0:16] dst_sel:uword0 dst_preserve src0_sel:uword0 src1_sel:uword0
59       //~gfx[89]! v2b: %0:v[1][0:16] = v_xor_b32 %0:v[1][0:16], %0:v[0][0:16] dst_sel:uword0 dst_preserve src0_sel:uword0 src1_sel:uword0
60       //~gfx11! v2b: %0:v[0][16:32] = v_mov_b16 hi(%0:v[1][16:32]) opsel_hi
61       //~gfx11! v2b: %0:v[1][16:32] = v_mov_b16 %0:v[0][0:16] opsel_hi
62       //~gfx11! v2b: %0:v[0][0:16],  v2b: %0:v[1][0:16] = v_swap_b16 %0:v[1][0:16], %0:v[0][0:16]
63       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u));
64       bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1), Definition(v1_lo, v2b),
65                  Definition(v1_hi, v2b), Operand(v1_lo, v1), Operand(v0_lo, v2b),
66                  Operand(v0_lo, v2b));
67 
68       //~gfx(8|9|11)! p_unit_test 3
69       //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
70       //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0]
71       //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
72       //~gfx(9|11)! v1: %0:v[0],  v1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0]
73       //~gfx[89]! v2b: %0:v[1][0:16] = v_mov_b32 %0:v[0][0:16] dst_sel:uword0 dst_preserve src0_sel:uword0
74       //~gfx[89]! v1b: %0:v[1][16:24] = v_mov_b32 %0:v[0][16:24] dst_sel:ubyte2 dst_preserve src0_sel:ubyte2
75       //~gfx11! v2b: %0:v[1][0:16] = v_mov_b16 %0:v[0][0:16]
76       //~gfx11! v1: %0:v[1] = v_perm_b32 %0:v[1], %0:v[0], 0x7020504
77       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3u));
78       bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1), Definition(v1_b3, v1b),
79                  Operand(v1_lo, v1), Operand(v0_b3, v1b));
80 
81       //~gfx(8|9|11)! p_unit_test 4
82       //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
83       //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0]
84       //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
85       //~gfx(9|11)! v1: %0:v[0],  v1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0]
86       //~gfx[89]! v1b: %0:v[1][8:16] = v_mov_b32 %0:v[0][8:16] dst_sel:ubyte1 dst_preserve src0_sel:ubyte1
87       //~gfx[89]! v2b: %0:v[1][16:32] = v_mov_b32 %0:v[0][16:32] dst_sel:uword1 dst_preserve src0_sel:uword1
88       //~gfx11! v1: %0:v[1] = v_perm_b32 %0:v[1], %0:v[0], 0x7060104
89       //~gfx11! v2b: %0:v[1][16:32] = v_mov_b16 hi(%0:v[0][16:32]) opsel_hi
90       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4u));
91       bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1), Definition(v1_lo, v1b),
92                  Operand(v1_lo, v1), Operand(v0_lo, v1b));
93 
94       //~gfx(8|9|11)! p_unit_test 5
95       //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[0], %0:v[1]
96       //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[0], %0:v[1]
97       //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[0], %0:v[1]
98       //~gfx(9|11)! v1: %0:v[1],  v1: %0:v[0] = v_swap_b32 %0:v[0], %0:v[1]
99       //~gfx[89]! v1b: %0:v[0][8:16] = v_mov_b32 %0:v[1][8:16] dst_sel:ubyte1 dst_preserve src0_sel:ubyte1
100       //~gfx[89]! v1b: %0:v[0][24:32] = v_mov_b32 %0:v[1][24:32] dst_sel:ubyte3 dst_preserve src0_sel:ubyte3
101       //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0x7060104
102       //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0x3060504
103       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5u));
104       bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1b), Definition(v0_hi, v1b),
105                  Definition(v1_lo, v1), Operand(v1_lo, v1b), Operand(v1_hi, v1b),
106                  Operand(v0_lo, v1));
107 
108       //~gfx(8|9|11)! p_unit_test 6
109       //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
110       //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0]
111       //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
112       //~gfx(9|11)! v1: %0:v[0],  v1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0]
113       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(6u));
114       bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b),
115                  Definition(v1_lo, v1), Operand(v1_lo, v2b), Operand(v1_hi, v2b),
116                  Operand(v0_lo, v1));
117 
118       //~gfx(8|9|11)! p_unit_test 7
119       //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[0], %0:v[1]
120       //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[0], %0:v[1]
121       //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[0], %0:v[1]
122       //~gfx(9|11)! v1: %0:v[1],  v1: %0:v[0] = v_swap_b32 %0:v[0], %0:v[1]
123       //~gfx(8|9|11)! v1: %0:v[0] = v_alignbyte_b32 %0:v[0][0:16], %0:v[0][16:32], 2
124       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(7u));
125       bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b),
126                  Definition(v1_lo, v1), Operand(v1_hi, v2b), Operand(v1_lo, v2b),
127                  Operand(v0_lo, v1));
128 
129       //~gfx(8|9|11)! p_unit_test 8
130       //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
131       //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0]
132       //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
133       //~gfx(9|11)! v1: %0:v[0],  v1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0]
134       //~gfx[89]! v1b: %0:v[1][24:32] = v_xor_b32 %0:v[1][24:32], %0:v[0][24:32] dst_sel:ubyte3 dst_preserve src0_sel:ubyte3 src1_sel:ubyte3
135       //~gfx[89]! v1b: %0:v[0][24:32] = v_xor_b32 %0:v[1][24:32], %0:v[0][24:32] dst_sel:ubyte3 dst_preserve src0_sel:ubyte3 src1_sel:ubyte3
136       //~gfx[89]! v1b: %0:v[1][24:32] = v_xor_b32 %0:v[1][24:32], %0:v[0][24:32] dst_sel:ubyte3 dst_preserve src0_sel:ubyte3 src1_sel:ubyte3
137       //~gfx11! v2b: %0:v[0][0:16],  v2b: %0:v[1][16:32] = v_swap_b16 hi(%0:v[1][16:32]), %0:v[0][0:16]
138       //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], 0, 0x5060704
139       //~gfx11! v2b: %0:v[0][0:16],  v2b: %0:v[1][16:32] = v_swap_b16 hi(%0:v[1][16:32]), %0:v[0][0:16]
140       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(8u));
141       bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v3b), Definition(v1_lo, v3b),
142                  Operand(v1_lo, v3b), Operand(v0_lo, v3b));
143 
144       //~gfx(8|9|11)! p_unit_test 9
145       //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
146       //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0]
147       //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
148       //~gfx(9|11)! v1: %0:v[0],  v1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0]
149       //~gfx[89]! v1b: %0:v[1][24:32] = v_mov_b32 %0:v[0][24:32] dst_sel:ubyte3 dst_preserve src0_sel:ubyte3
150       //~gfx11! v1: %0:v[1] = v_perm_b32 %0:v[1], %0:v[0], 0x3060504
151       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(9u));
152       bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v3b), Definition(v1_lo, v3b),
153                  Definition(v0_b3, v1b), Operand(v1_lo, v3b), Operand(v0_lo, v3b),
154                  Operand(v1_b3, v1b));
155 
156       //~gfx(8|9|11)! p_unit_test 10
157       //~gfx[89]! v1b: %0:v[1][8:16] = v_xor_b32 %0:v[1][8:16], %0:v[0][8:16] dst_sel:ubyte1 dst_preserve src0_sel:ubyte1 src1_sel:ubyte1
158       //~gfx[89]! v1b: %0:v[0][8:16] = v_xor_b32 %0:v[1][8:16], %0:v[0][8:16] dst_sel:ubyte1 dst_preserve src0_sel:ubyte1 src1_sel:ubyte1
159       //~gfx[89]! v1b: %0:v[1][8:16] = v_xor_b32 %0:v[1][8:16], %0:v[0][8:16] dst_sel:ubyte1 dst_preserve src0_sel:ubyte1 src1_sel:ubyte1
160       //~gfx11! v2b: %0:v[0][16:32],  v2b: %0:v[1][0:16] = v_swap_b16 %0:v[1][0:16], %0:v[0][16:32] opsel_hi
161       //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], 0, 0x5060704
162       //~gfx11! v2b: %0:v[0][16:32],  v2b: %0:v[1][0:16] = v_swap_b16 %0:v[1][0:16], %0:v[0][16:32] opsel_hi
163       //~gfx[89]! v1b: %0:v[1][16:24] = v_xor_b32 %0:v[1][16:24], %0:v[0][16:24] dst_sel:ubyte2 dst_preserve src0_sel:ubyte2 src1_sel:ubyte2
164       //~gfx[89]! v1b: %0:v[0][16:24] = v_xor_b32 %0:v[1][16:24], %0:v[0][16:24] dst_sel:ubyte2 dst_preserve src0_sel:ubyte2 src1_sel:ubyte2
165       //~gfx[89]! v1b: %0:v[1][16:24] = v_xor_b32 %0:v[1][16:24], %0:v[0][16:24] dst_sel:ubyte2 dst_preserve src0_sel:ubyte2 src1_sel:ubyte2
166       //~gfx11! v2b: %0:v[0][0:16],  v2b: %0:v[1][16:32] = v_swap_b16 hi(%0:v[1][16:32]), %0:v[0][0:16]
167       //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], 0, 0x7040506
168       //~gfx11! v2b: %0:v[0][0:16],  v2b: %0:v[1][16:32] = v_swap_b16 hi(%0:v[1][16:32]), %0:v[0][0:16]
169       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(10u));
170       bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_b1, v2b), Definition(v1_b1, v2b),
171                  Operand(v1_b1, v2b), Operand(v0_b1, v2b));
172 
173       //~gfx(8|9|11)! p_unit_test 11
174       //~gfx[89]! v2b: %0:v[1][0:16] = v_mov_b32 %0:v[0][16:32] dst_sel:uword0 dst_preserve src0_sel:uword1
175       //~gfx11! v2b: %0:v[1][0:16] = v_mov_b16 hi(%0:v[0][16:32])
176       //~gfx(8|9|11)! v1: %0:v[0] = v_mov_b32 42
177       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(11u));
178       bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1), Definition(v1_lo, v2b),
179                  Operand::c32(42u), Operand(v0_hi, v2b));
180 
181       //~gfx(8|9|11)! p_unit_test 12
182       //~gfx[89]! v1b: %0:v[0][24:32] = v_xor_b32 %0:v[0][24:32], %0:v[0][8:16] dst_sel:ubyte3 dst_preserve src0_sel:ubyte3 src1_sel:ubyte1
183       //~gfx[89]! v1b: %0:v[0][8:16] = v_xor_b32 %0:v[0][24:32], %0:v[0][8:16] dst_sel:ubyte1 dst_preserve src0_sel:ubyte3 src1_sel:ubyte1
184       //~gfx[89]! v1b: %0:v[0][24:32] = v_xor_b32 %0:v[0][24:32], %0:v[0][8:16] dst_sel:ubyte3 dst_preserve src0_sel:ubyte3 src1_sel:ubyte1
185       //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], 0, 0x5060704
186       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(12u));
187       bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_b1, v1b), Definition(v0_b3, v1b),
188                  Operand(v0_b3, v1b), Operand(v0_b1, v1b));
189 
190       //~gfx(8|9|11)! p_unit_test 13
191       //~gfx[89]! v2b: %0:v[129][16:32] = v_xor_b32 %0:v[129][16:32], %0:v[128][0:16] dst_sel:uword1 dst_preserve src0_sel:uword1 src1_sel:uword0
192       //~gfx[89]! v2b: %0:v[128][0:16] = v_xor_b32 %0:v[129][16:32], %0:v[128][0:16] dst_sel:uword0 dst_preserve src0_sel:uword1 src1_sel:uword0
193       //~gfx[89]! v2b: %0:v[129][16:32] = v_xor_b32 %0:v[129][16:32], %0:v[128][0:16] dst_sel:uword1 dst_preserve src0_sel:uword1 src1_sel:uword0
194       //~gfx11! v2b: %0:v[128][0:16] = v_xor_b16 hi(%0:v[129][16:32]), %0:v[128][0:16]
195       //~gfx11! v2b: %0:v[129][16:32] = v_xor_b16 hi(%0:v[129][16:32]), %0:v[128][0:16] opsel_hi
196       //~gfx11! v2b: %0:v[128][0:16] = v_xor_b16 hi(%0:v[129][16:32]), %0:v[128][0:16]
197       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(13u));
198       bld.pseudo(aco_opcode::p_parallelcopy, Definition(v128_lo, v2b), Definition(v129_hi, v2b),
199                  Operand(v129_hi, v2b), Operand(v128_lo, v2b));
200 
201       //~gfx(8|9|11)! p_unit_test 14
202       //~gfx[89]! v2b: %0:v[129][0:16] = v_xor_b32 %0:v[129][0:16], %0:v[128][16:32] dst_sel:uword0 dst_preserve src0_sel:uword0 src1_sel:uword1
203       //~gfx[89]! v2b: %0:v[128][16:32] = v_xor_b32 %0:v[129][0:16], %0:v[128][16:32] dst_sel:uword1 dst_preserve src0_sel:uword0 src1_sel:uword1
204       //~gfx[89]! v2b: %0:v[129][0:16] = v_xor_b32 %0:v[129][0:16], %0:v[128][16:32] dst_sel:uword0 dst_preserve src0_sel:uword0 src1_sel:uword1
205       //~gfx11! v2b: %0:v[128][16:32] = v_xor_b16 %0:v[129][0:16], hi(%0:v[128][16:32]) opsel_hi
206       //~gfx11! v2b: %0:v[129][0:16] = v_xor_b16 %0:v[129][0:16], hi(%0:v[128][16:32])
207       //~gfx11! v2b: %0:v[128][16:32] = v_xor_b16 %0:v[129][0:16], hi(%0:v[128][16:32]) opsel_hi
208       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(14u));
209       bld.pseudo(aco_opcode::p_parallelcopy, Definition(v128_hi, v2b), Definition(v129_lo, v2b),
210                  Operand(v129_lo, v2b), Operand(v128_hi, v2b));
211 
212       //~gfx11! s_nop
213       //~gfx11! s_sendmsg sendmsg(dealloc_vgprs)
214       //~gfx(8|9|11)! s_endpgm
215 
216       finish_to_hw_instr_test();
217    }
218 END_TEST
219 
220 BEGIN_TEST(to_hw_instr.subdword_constant)
221    PhysReg v0_lo{256};
222    PhysReg v0_hi{256};
223    PhysReg v0_b1{256};
224    PhysReg v1_lo{257};
225    PhysReg v1_hi{257};
226    v0_hi.reg_b += 2;
227    v0_b1.reg_b += 1;
228    v1_hi.reg_b += 2;
229 
230    for (amd_gfx_level lvl : {GFX9, GFX10, GFX11}) {
231       if (!setup_cs(NULL, lvl))
232          continue;
233 
234       /* 16-bit pack */
235       //>> p_unit_test 0
236       //! v1: %_:v[0] = v_pack_b32_f16 0.5, hi(%_:v[1][16:32])
237       bld.pseudo(aco_opcode::p_unit_test, Operand::zero());
238       bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b),
239                  Operand::c16(0x3800), Operand(v1_hi, v2b));
240 
241       //! p_unit_test 1
242       //~gfx9! v2b: %0:v[0][16:32] = v_and_b32 0xffff0000, %0:v[1][16:32]
243       //~gfx9! v1: %0:v[0] = v_or_b32 0x4205, %0:v[0]
244       //~gfx(10|11)! v1: %_:v[0] = v_pack_b32_f16 0x4205, hi(%_:v[1][16:32])
245       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1u));
246       bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b),
247                  Operand::c16(0x4205), Operand(v1_hi, v2b));
248 
249       //! p_unit_test 2
250       //~gfx9! v2b: %0:v[0][16:32] = v_lshlrev_b32 16, %0:v[0][0:16]
251       //~gfx9! v1: %_:v[0] = v_or_b32 0x4205, %_:v[0]
252       //~gfx(10|11)! v1: %0:v[0] = v_pack_b32_f16 0x4205, %0:v[0][0:16]
253       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u));
254       bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b),
255                  Operand::c16(0x4205), Operand(v0_lo, v2b));
256 
257       //! p_unit_test 3
258       //! v1: %_:v[0] = v_mov_b32 0x3c003800
259       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3u));
260       bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b),
261                  Operand::c16(0x3800), Operand::c16(0x3c00));
262 
263       //! p_unit_test 4
264       //! v1: %_:v[0] = v_mov_b32 0x43064205
265       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4u));
266       bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b),
267                  Operand::c16(0x4205), Operand::c16(0x4306));
268 
269       //! p_unit_test 5
270       //! v1: %_:v[0] = v_mov_b32 0x38004205
271       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5u));
272       bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b),
273                  Operand::c16(0x4205), Operand::c16(0x3800));
274 
275       /* 16-bit copy */
276       //! p_unit_test 6
277       //~gfx(9|10)! v2b: %_:v[0][0:16] = v_add_f16 0.5, 0 dst_sel:uword0 dst_preserve src0_sel:uword0 src1_sel:dword
278       //~gfx11! v2b: %0:v[0][0:16] = v_add_f16 0.5, 0
279       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(6u));
280       bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Operand::c16(0x3800));
281 
282       //! p_unit_test 7
283       //~gfx9! v1: %_:v[0] = v_and_b32 0xffff0000, %_:v[0]
284       //~gfx9! v1: %_:v[0] = v_or_b32 0x4205, %_:v[0]
285       //~gfx10! v2b: %_:v[0][0:16] = v_add_u16_e64 0x4205, 0
286       //~gfx11! v2b: %0:v[0][0:16] = v_mov_b16 0x4205
287       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(7u));
288       bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Operand::c16(0x4205));
289 
290       //! p_unit_test 8
291       //~gfx9! v1: %_:v[0] = v_and_b32 0xffff, %_:v[0]
292       //~gfx9! v1: %_:v[0] = v_or_b32 0x42050000, %_:v[0]
293       //~gfx10! v2b: %_:v[0][16:32] = v_add_u16_e64 0x4205, 0 opsel_hi
294       //~gfx11! v2b: %0:v[0][16:32] = v_mov_b16 0x4205 opsel_hi
295       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(8u));
296       bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_hi, v2b), Operand::c16(0x4205));
297 
298       //! p_unit_test 9
299       //~gfx(9|10)! v1b: %_:v[0][8:16] = v_mov_b32 0 dst_sel:ubyte1 dst_preserve src0_sel:dword
300       //~gfx(9|10)! v1b: %_:v[0][16:24] = v_mov_b32 56 dst_sel:ubyte2 dst_preserve src0_sel:dword
301       //~gfx11! v1b: %_:v[0][8:16] = v_cvt_pk_u8_f32 0, 1, %_:v[0]
302       //~gfx11! v1b: %_:v[0][16:24] = v_cvt_pk_u8_f32 0x42600000, 2, %_:v[0]
303       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(9u));
304       bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_b1, v2b), Operand::c16(0x3800));
305 
306       //! p_unit_test 10
307       //~gfx(9|10)! v1b: %_:v[0][8:16] = v_mov_b32 5 dst_sel:ubyte1 dst_preserve src0_sel:dword
308       //~gfx(9|10)! v1b: %_:v[0][16:24] = v_mul_u32_u24 2, 33 dst_sel:ubyte2 dst_preserve src0_sel:dword src1_sel:dword
309       //~gfx11! v1b: %_:v[0][8:16] = v_cvt_pk_u8_f32 0x40a00000, 1, %_:v[0]
310       //~gfx11! v1b: %_:v[0][16:24] = v_cvt_pk_u8_f32 0x42840000, 2, %_:v[0]
311       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(10u));
312       bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_b1, v2b), Operand::c16(0x4205));
313 
314       /* 8-bit copy */
315       //! p_unit_test 11
316       //~gfx(9|10)! v1b: %_:v[0][0:8] = v_mul_u32_u24 2, 33 dst_sel:ubyte0 dst_preserve src0_sel:dword src1_sel:dword
317       //~gfx11! v1b: %_:v[0][0:8] = v_cvt_pk_u8_f32 0x42840000, 0, %_:v[0]
318       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(11u));
319       bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1b), Operand::c8(0x42));
320 
321       /* 32-bit and 8-bit copy */
322       //! p_unit_test 12
323       //! v1: %_:v[0] = v_mov_b32 0
324       //~gfx(9|10)! v1b: %_:v[1][0:8] = v_mov_b32 0 dst_sel:ubyte0 dst_preserve src0_sel:dword
325       //~gfx11! v1b: %_:v[1][0:8] = v_cvt_pk_u8_f32 0, 0, %_:v[1]
326       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(12u));
327       bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1), Definition(v1_lo, v1b),
328                  Operand::zero(), Operand::zero(1));
329 
330       //! p_unit_test 13
331       //~gfx9! v1: %_:v[0] = v_and_b32 0xffff0000, %_:v[0]
332       //~gfx9! v1: %_:v[0] = v_or_b32 0xff, %_:v[0]
333       //~gfx10! v2b: %_:v[0][0:16] = v_add_u16_e64 0xff, 0
334       //~gfx11! v2b: %0:v[0][0:16] = v_mov_b16 0xff
335       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(13u));
336       bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Operand::c16(0x00ff));
337 
338       //! p_unit_test 14
339       //~gfx9! v1: %_:v[0] = v_and_b32 0xffff, %_:v[0]
340       //~gfx9! v1: %_:v[0] = v_or_b32 0xff000000, %_:v[0]
341       //~gfx10! v2b: %_:v[0][16:32] = v_add_u16_e64 0xff00, 0 opsel_hi
342       //~gfx11! v2b: %0:v[0][16:32] = v_mov_b16 0xffffff00 opsel_hi
343       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(14u));
344       bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_hi, v2b), Operand::c16(0xff00));
345 
346       //! p_unit_test 15
347       //~gfx(9|10)! v2b: %_:v[0][0:16] = v_mov_b32 0 dst_sel:uword0 dst_preserve src0_sel:dword
348       //~gfx11! v2b: %0:v[0][0:16] = v_mov_b16 0
349       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(15u));
350       bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Operand::zero(2));
351 
352       //! p_unit_test 16
353       //~gfx(9|10)! v1b: %_:v[0][0:8] = v_mov_b32 -1 dst_sel:ubyte0 dst_preserve src0_sel:dword
354       //~gfx11! v1b: %_:v[0][0:8] = v_cvt_pk_u8_f32 0x437f0000, 0, %_:v[0]
355       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(16u));
356       bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1b), Operand::c8(0xff));
357 
358       //! p_unit_test 17
359       //~gfx(9|10)! v1b: %_:v[0][0:8] = v_mov_b32 0 dst_sel:ubyte0 dst_preserve src0_sel:dword
360       //~gfx11! v1b: %_:v[0][0:8] = v_cvt_pk_u8_f32 0, 0, %_:v[0]
361       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(17u));
362       bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1b), Operand::zero(1));
363 
364       //~gfx11! s_nop
365       //~gfx11! s_sendmsg sendmsg(dealloc_vgprs)
366       //! s_endpgm
367 
368       finish_to_hw_instr_test();
369    }
370 END_TEST
371 
372 BEGIN_TEST(to_hw_instr.self_intersecting_swap)
373    if (!setup_cs(NULL, GFX9))
374       return;
375 
376    PhysReg reg_v1{257};
377    PhysReg reg_v2{258};
378    PhysReg reg_v3{259};
379    PhysReg reg_v7{263};
380 
381    //>> p_unit_test 0
382    //! v1: %0:v[1],  v1: %0:v[2] = v_swap_b32 %0:v[2], %0:v[1]
383    //! v1: %0:v[2],  v1: %0:v[3] = v_swap_b32 %0:v[3], %0:v[2]
384    //! v1: %0:v[3],  v1: %0:v[7] = v_swap_b32 %0:v[7], %0:v[3]
385    //! s_endpgm
386    bld.pseudo(aco_opcode::p_unit_test, Operand::zero());
387    // v[1:2] = v[2:3]
388    // v3 = v7
389    // v7 = v1
390    bld.pseudo(aco_opcode::p_parallelcopy, Definition(reg_v1, v2), Definition(reg_v3, v1),
391               Definition(reg_v7, v1), Operand(reg_v2, v2), Operand(reg_v7, v1),
392               Operand(reg_v1, v1));
393 
394    finish_to_hw_instr_test();
395 END_TEST
396 
397 BEGIN_TEST(to_hw_instr.extract)
398    PhysReg s0_lo{0};
399    PhysReg s1_lo{1};
400    PhysReg v0_lo{256};
401    PhysReg v1_lo{257};
402 
403    for (amd_gfx_level lvl : {GFX7, GFX8, GFX9, GFX11}) {
404       for (unsigned is_signed = 0; is_signed <= 1; is_signed++) {
405          if (!setup_cs(NULL, lvl, CHIP_UNKNOWN, is_signed ? "_signed" : "_unsigned"))
406             continue;
407 
408 #define EXT(idx, size)                                                                             \
409    bld.pseudo(aco_opcode::p_extract, Definition(v0_lo, v1), Operand(v1_lo, v1), Operand::c32(idx), \
410               Operand::c32(size), Operand::c32(is_signed));
411 
412          //; funcs['v_bfe'] = lambda _: 'v_bfe_i32' if variant.endswith('_signed') else 'v_bfe_u32'
413          //; funcs['v_shr'] = lambda _: 'v_ashrrev_i32' if variant.endswith('_signed') else 'v_lshrrev_b32'
414          //; funcs['s_bfe'] = lambda _: 's_bfe_i32' if variant.endswith('_signed') else 's_bfe_u32'
415          //; funcs['s_shr'] = lambda _: 's_ashr_i32' if variant.endswith('_signed') else 's_lshr_b32'
416          //; funcs['byte'] = lambda n: '%cbyte%s' % ('s' if variant.endswith('_signed') else 'u', n)
417 
418          //>> p_unit_test 0
419          bld.pseudo(aco_opcode::p_unit_test, Operand::zero());
420          //! v1: %_:v[0] = @v_bfe %_:v[1], 0, 8
421          EXT(0, 8)
422          //! v1: %_:v[0] = @v_bfe %_:v[1], 8, 8
423          EXT(1, 8)
424          //! v1: %_:v[0] = @v_bfe %_:v[1], 16, 8
425          EXT(2, 8)
426          //! v1: %_:v[0] = @v_shr 24, %_:v[1]
427          EXT(3, 8)
428          //~gfx(7|8|9)_.*! v1: %_:v[0] = @v_bfe %_:v[1], 0, 16
429          //~gfx11_unsigned! v1: %_:v[0] = v_cvt_u32_u16 %_:v[1]
430          //~gfx11_signed! v1: %_:v[0] = v_cvt_i32_i16 %_:v[1]
431          EXT(0, 16)
432          //! v1: %_:v[0] = @v_shr 16, %_:v[1]
433          EXT(1, 16)
434 
435 #undef EXT
436 
437 #define EXT(idx, size)                                                                             \
438    bld.pseudo(aco_opcode::p_extract, Definition(s0_lo, s1), Definition(scc, s1),                   \
439               Operand(s1_lo, s1), Operand::c32(idx), Operand::c32(size), Operand::c32(is_signed));
440 
441          //>> p_unit_test 2
442          bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u));
443          //~gfx.*_unsigned! s1: %_:s[0],  s1: %_:scc = @s_bfe %_:s[1], 0x80000
444          //~gfx.*_signed! s1: %_:s[0] = s_sext_i32_i8 %_:s[1]
445          EXT(0, 8)
446          //! s1: %_:s[0],  s1: %_:scc = @s_bfe %_:s[1], 0x80008
447          EXT(1, 8)
448          //! s1: %_:s[0],  s1: %_:scc = @s_bfe %_:s[1], 0x80010
449          EXT(2, 8)
450          //! s1: %_:s[0],  s1: %_:scc = @s_shr %_:s[1], 24
451          EXT(3, 8)
452          //~gfx(7|8)_unsigned! s1: %_:s[0],  s1: %_:scc = @s_bfe %_:s[1], 0x100000
453          //~gfx(9|11)_unsigned! s1: %_:s[0] = s_pack_ll_b32_b16 %_:s[1], 0
454          //~gfx.*_signed! s1: %_:s[0] = s_sext_i32_i16 %_:s[1]
455          EXT(0, 16)
456          //~gfx(7,8)_unsigned! s1: %_:s[0],  s1: %_:scc = @s_shr %_:s[1], 16
457          //~gfx(9|11)_unsigned! s1: %_:s[0] = s_pack_hh_b32_b16 %_:s[1], 0
458          //~gfx.*_signed! s1: %_:s[0],  s1: %_:scc = @s_shr %_:s[1], 16
459          EXT(1, 16)
460 
461 #undef EXT
462 
463 #define EXT(idx, src_b)                                                                            \
464    bld.pseudo(aco_opcode::p_extract, Definition(v0_lo, v2b), Operand(v1_lo.advance(src_b), v2b),   \
465               Operand::c32(idx), Operand::c32(8u), Operand::c32(is_signed));
466 
467          //>> p_unit_test 4
468          bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4u));
469          //~gfx(8|9).*! v2b: %_:v[0][0:16] = v_mov_b32 %_:v[1][0:16] dst_sel:uword0 dst_preserve src0_sel:@byte(0)
470          //~gfx11_unsigned! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060c00
471          //~gfx11_signed! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060000
472          //~gfx11_signed! v1: %_:v[0] = v_perm_b32 %_:v[0], 0, 0x7060a04
473          if (lvl != GFX7)
474             EXT(0, 0)
475          //~gfx(8|9).*! v2b: %_:v[0][0:16] = v_mov_b32 %_:v[1][16:32] dst_sel:uword0 dst_preserve src0_sel:@byte(2)
476          //~gfx11_unsigned! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060c02
477          //~gfx11_signed! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060202
478          //~gfx11_signed! v1: %_:v[0] = v_perm_b32 %_:v[0], 0, 0x7060a04
479          if (lvl != GFX7)
480             EXT(0, 2)
481          //~gfx(8|9).*! v2b: %_:v[0][0:16] = v_mov_b32 %_:v[1][0:16] dst_sel:uword0 dst_preserve src0_sel:@byte(1)
482          //~gfx11_unsigned! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060c01
483          //~gfx11_signed! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060801
484          if (lvl != GFX7)
485             EXT(1, 0)
486          //~gfx(8|9).*! v2b: %_:v[0][0:16] = v_mov_b32 %_:v[1][16:32] dst_sel:uword0 dst_preserve src0_sel:@byte(3)
487          //~gfx11_unsigned! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060c03
488          //~gfx11_signed! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060903
489          if (lvl != GFX7)
490             EXT(1, 2)
491 
492 #undef EXT
493 
494          finish_to_hw_instr_test();
495 
496          //~gfx11_.*! s_nop
497          //~gfx11_.*! s_sendmsg sendmsg(dealloc_vgprs)
498          //! s_endpgm
499       }
500    }
501 END_TEST
502 
503 BEGIN_TEST(to_hw_instr.insert)
504    PhysReg s0_lo{0};
505    PhysReg s1_lo{1};
506    PhysReg v0_lo{256};
507    PhysReg v1_lo{257};
508 
509    for (amd_gfx_level lvl : {GFX7, GFX8, GFX9, GFX11}) {
510       if (!setup_cs(NULL, lvl))
511          continue;
512 
513 #define INS(idx, size)                                                                             \
514    bld.pseudo(aco_opcode::p_insert, Definition(v0_lo, v1), Operand(v1_lo, v1), Operand::c32(idx),  \
515               Operand::c32(size));
516 
517       //>> p_unit_test 0
518       bld.pseudo(aco_opcode::p_unit_test, Operand::zero());
519       //! v1: %_:v[0] = v_bfe_u32 %_:v[1], 0, 8
520       INS(0, 8)
521       //~gfx7! v1: %0:v[0] = v_bfe_u32 %0:v[1], 0, 8
522       //~gfx7! v1: %0:v[0] = v_lshlrev_b32 8, %0:v[0]
523       //~gfx(8|9)! v1: %0:v[0] = v_mov_b32 %0:v[1] dst_sel:ubyte1 src0_sel:dword
524       //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0xc0c000c
525       INS(1, 8)
526       //~gfx7! v1: %0:v[0] = v_bfe_u32 %0:v[1], 0, 8
527       //~gfx7! v1: %0:v[0] = v_lshlrev_b32 16, %0:v[0]
528       //~gfx(8|9)! v1: %0:v[0] = v_mov_b32 %0:v[1] dst_sel:ubyte2 src0_sel:dword
529       //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0xc000c0c
530       INS(2, 8)
531       //! v1: %0:v[0] = v_lshlrev_b32 24, %0:v[1]
532       INS(3, 8)
533       //! v1: %0:v[0] = v_bfe_u32 %0:v[1], 0, 16
534       INS(0, 16)
535       //! v1: %0:v[0] = v_lshlrev_b32 16, %0:v[1]
536       INS(1, 16)
537 
538 #undef INS
539 
540 #define INS(idx, size)                                                                             \
541    bld.pseudo(aco_opcode::p_insert, Definition(s0_lo, s1), Definition(scc, s1),                    \
542               Operand(s1_lo, s1), Operand::c32(idx), Operand::c32(size));
543 
544       //>> p_unit_test 1
545       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1u));
546       //! s1: %_:s[0],  s1: %_:scc = s_bfe_u32 %_:s[1], 0x80000
547       INS(0, 8)
548       //! s1: %_:s[0],  s1: %_:scc = s_bfe_u32 %_:s[1], 0x80000
549       //! s1: %_:s[0],  s1: %_:scc = s_lshl_b32 %_:s[0], 8
550       INS(1, 8)
551       //! s1: %_:s[0],  s1: %_:scc = s_bfe_u32 %_:s[1], 0x80000
552       //! s1: %_:s[0],  s1: %_:scc = s_lshl_b32 %_:s[0], 16
553       INS(2, 8)
554       //! s1: %_:s[0],  s1: %_:scc = s_lshl_b32 %_:s[1], 24
555       INS(3, 8)
556       //~gfx(7|8)! s1: %_:s[0],  s1: %_:scc = s_bfe_u32 %_:s[1], 0x100000
557       //~gfx(9|11)! s1: %_:s[0] = s_pack_ll_b32_b16 %_:s[1], 0
558       INS(0, 16)
559       //~gfx(7|8)! s1: %_:s[0],  s1: %_:scc = s_lshl_b32 %_:s[1], 16
560       //~gfx(9|11)! s1: %_:s[0] = s_pack_ll_b32_b16 0, %_:s[1]
561       INS(1, 16)
562 
563 #undef INS
564 
565 #define INS(idx, def_b, op_b)                                                                      \
566    bld.pseudo(aco_opcode::p_insert, Definition(v0_lo.advance(def_b), v2b),                         \
567               Operand(v1_lo.advance(op_b), v2b), Operand::c32(idx), Operand::c32(8u));
568 
569       //>> p_unit_test 2
570       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u));
571       //~gfx(8|9)! v2b: %0:v[0][0:16] = v_mov_b32 %0:v[1][0:16] dst_sel:uword0 dst_preserve src0_sel:ubyte0
572       //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0x7060c00
573       if (lvl != GFX7)
574          INS(0, 0, 0)
575       //~gfx(8|9)! v2b: %0:v[0][16:32] = v_mov_b32 %0:v[1][0:16] dst_sel:uword1 dst_preserve src0_sel:ubyte0
576       //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0xc000504
577       if (lvl != GFX7)
578          INS(0, 2, 0)
579       //~gfx(8|9)! v2b: %0:v[0][0:16] = v_mov_b32 %0:v[1][16:32] dst_sel:uword0 dst_preserve src0_sel:ubyte2
580       //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0x7060c02
581       if (lvl != GFX7)
582          INS(0, 0, 2)
583       //~gfx(8|9)! v2b: %0:v[0][16:32] = v_mov_b32 %0:v[1][16:32] dst_sel:uword1 dst_preserve src0_sel:ubyte2
584       //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0xc020504
585       if (lvl != GFX7)
586          INS(0, 2, 2)
587       //~gfx8! v1b: %0:v[0][8:16] = v_mov_b32 %0:v[1][0:16] dst_sel:ubyte1 dst_preserve src0_sel:ubyte0
588       //~gfx8! v2b: %0:v[0][0:16] = v_and_b32 0xffffff00, %0:v[1]
589       //~gfx9! v2b: %0:v[0][0:16] = v_lshlrev_b32 8, %0:v[1][0:16] dst_sel:uword0 dst_preserve src0_sel:dword src1_sel:ubyte0
590       //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0x706000c
591       if (lvl != GFX7)
592          INS(1, 0, 0)
593       //~gfx8! v1b: %0:v[0][24:32] = v_mov_b32 %0:v[1][0:16] dst_sel:ubyte3 dst_preserve src0_sel:ubyte0
594       //~gfx8! v2b: %0:v[0][16:32] = v_and_b32 0xff00ffff, %0:v[1]
595       //~gfx9! v2b: %0:v[0][16:32] = v_lshlrev_b32 8, %0:v[1][0:16] dst_sel:uword1 dst_preserve src0_sel:dword src1_sel:ubyte0
596       //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0xc0504
597       if (lvl != GFX7)
598          INS(1, 2, 0)
599       //~gfx8! v1b: %0:v[0][8:16] = v_mov_b32 %0:v[1][16:32] dst_sel:ubyte1 dst_preserve src0_sel:ubyte2
600       //~gfx8! v2b: %0:v[0][0:16] = v_and_b32 0xffffff00, %0:v[1]
601       //~gfx9! v2b: %0:v[0][0:16] = v_lshlrev_b32 8, %0:v[1][16:32] dst_sel:uword0 dst_preserve src0_sel:dword src1_sel:ubyte2
602       //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0x706020c
603       if (lvl != GFX7)
604          INS(1, 0, 2)
605       //~gfx8! v1b: %0:v[0][24:32] = v_mov_b32 %0:v[1][16:32] dst_sel:ubyte3 dst_preserve src0_sel:ubyte2
606       //~gfx8! v2b: %0:v[0][16:32] = v_and_b32 0xff00ffff, %0:v[1]
607       //~gfx9! v2b: %0:v[0][16:32] = v_lshlrev_b32 8, %0:v[1][16:32] dst_sel:uword1 dst_preserve src0_sel:dword src1_sel:ubyte2
608       //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0x20c0504
609       if (lvl != GFX7)
610          INS(1, 2, 2)
611 #undef INS
612 
613       finish_to_hw_instr_test();
614 
615       //~gfx11! s_nop
616       //~gfx11! s_sendmsg sendmsg(dealloc_vgprs)
617       //! s_endpgm
618    }
619 END_TEST
620 
621 BEGIN_TEST(to_hw_instr.copy_linear_vgpr_scc)
622    if (!setup_cs(NULL, GFX10))
623       return;
624 
625    PhysReg v0_lo{256};
626    PhysReg v1_lo{257};
627 
628    //>> p_unit_test 0
629    bld.pseudo(aco_opcode::p_unit_test, Operand::zero());
630 
631    /* It would be better if the scc=s0 copy was done later, but handle_operands() is complex
632     * enough
633     */
634 
635    //! v1: %0:v[0] = v_mov_b32 %0:v[1]
636    //! s1: %0:m0 = s_mov_b32 %0:scc
637    //! s2: %0:exec,  s1: %0:scc = s_not_b64 %0:exec
638    //! v1: %0:v[0] = v_mov_b32 %0:v[1]
639    //! s2: %0:exec,  s1: %0:scc = s_not_b64 %0:exec
640    //! s1: %0:scc = s_cmp_lg_i32 %0:m0, 0
641    Instruction* instr = bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1.as_linear()),
642                                    Operand(v1_lo, v1.as_linear()));
643    instr->pseudo().scratch_sgpr = m0;
644    instr->pseudo().tmp_in_scc = true;
645 
646    finish_to_hw_instr_test();
647 END_TEST
648 
649 BEGIN_TEST(to_hw_instr.swap_linear_vgpr)
650    if (!setup_cs(NULL, GFX10))
651       return;
652 
653    PhysReg reg_v0{256};
654    PhysReg reg_v1{257};
655    RegClass v1_linear = v1.as_linear();
656 
657    //>> p_unit_test 0
658    bld.pseudo(aco_opcode::p_unit_test, Operand::zero());
659 
660    //! v1: %0:v[0],  v1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0]
661    //! s2: %0:exec,  s1: %0:scc = s_not_b64 %0:exec
662    //! v1: %0:v[0],  v1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0]
663    //! s2: %0:exec,  s1: %0:scc = s_not_b64 %0:exec
664    Instruction* instr = bld.pseudo(aco_opcode::p_parallelcopy, Definition(reg_v0, v1_linear),
665                                    Definition(reg_v1, v1_linear), Operand(reg_v1, v1_linear),
666                                    Operand(reg_v0, v1_linear));
667    instr->pseudo().scratch_sgpr = m0;
668 
669    finish_to_hw_instr_test();
670 END_TEST
671 
672 BEGIN_TEST(to_hw_instr.copy_linear_vgpr_v3)
673    if (!setup_cs(NULL, GFX10))
674       return;
675 
676    PhysReg reg_v0{256};
677    PhysReg reg_v4{256 + 4};
678    RegClass v3_linear = v3.as_linear();
679 
680    //>> p_unit_test 0
681    bld.pseudo(aco_opcode::p_unit_test, Operand::zero());
682 
683    //! v2: %0:v[0-1] = v_lshrrev_b64 0, %0:v[4-5]
684    //! v1: %0:v[2] = v_mov_b32 %0:v[6]
685    //! s2: %0:exec,  s1: %0:scc = s_not_b64 %0:exec
686    //! v2: %0:v[0-1] = v_lshrrev_b64 0, %0:v[4-5]
687    //! v1: %0:v[2] = v_mov_b32 %0:v[6]
688    //! s2: %0:exec,  s1: %0:scc = s_not_b64 %0:exec
689    Instruction* instr = bld.pseudo(aco_opcode::p_parallelcopy, Definition(reg_v0, v3_linear),
690                                    Operand(reg_v4, v3_linear));
691    instr->pseudo().scratch_sgpr = m0;
692 
693    finish_to_hw_instr_test();
694 END_TEST
695 
696 BEGIN_TEST(to_hw_instr.copy_linear_vgpr_coalesce)
697    if (!setup_cs(NULL, GFX10))
698       return;
699 
700    PhysReg reg_v0{256};
701    PhysReg reg_v1{256 + 1};
702    PhysReg reg_v4{256 + 4};
703    PhysReg reg_v5{256 + 5};
704    RegClass v1_linear = v1.as_linear();
705 
706    //>> p_unit_test 0
707    //! v2: %0:v[0-1] = v_lshrrev_b64 0, %0:v[4-5]
708    //! s2: %0:exec,  s1: %0:scc = s_not_b64 %0:exec
709    //! v2: %0:v[0-1] = v_lshrrev_b64 0, %0:v[4-5]
710    //! s2: %0:exec,  s1: %0:scc = s_not_b64 %0:exec
711    bld.pseudo(aco_opcode::p_unit_test, Operand::zero());
712 
713    Instruction* instr = bld.pseudo(aco_opcode::p_parallelcopy, Definition(reg_v0, v1_linear),
714                                    Definition(reg_v1, v1_linear), Operand(reg_v4, v1_linear),
715                                    Operand(reg_v5, v1_linear));
716    instr->pseudo().scratch_sgpr = m0;
717 
718    finish_to_hw_instr_test();
719 END_TEST
720 
721 BEGIN_TEST(to_hw_instr.pack2x16_constant)
722    PhysReg v0_lo{256};
723    PhysReg v0_hi{256};
724    PhysReg v1_lo{257};
725    PhysReg v1_hi{257};
726    v0_hi.reg_b += 2;
727    v1_hi.reg_b += 2;
728 
729    for (amd_gfx_level lvl : {GFX10, GFX11}) {
730       if (!setup_cs(NULL, lvl))
731          continue;
732 
733       /* prevent usage of v_pack_b32_f16 */
734       program->blocks[0].fp_mode.denorm16_64 = fp_denorm_flush;
735 
736       //>> p_unit_test 0
737       //! v1: %_:v[0] = v_alignbyte_b32 0x3800, %_:v[1][16:32], 2
738       bld.pseudo(aco_opcode::p_unit_test, Operand::zero());
739       bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b),
740                  Operand(v1_hi, v2b), Operand::c16(0x3800));
741 
742       //! p_unit_test 1
743       //! v2b: %_:v[0][0:16] = v_lshrrev_b32 16, %_:v[1][16:32]
744       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1));
745       bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b),
746                  Operand(v1_hi, v2b), Operand::zero(2));
747 
748       //! p_unit_test 2
749       //~gfx10! v2b: %_:v[0][0:16] = v_and_b32 0xffff, %_:v[1][0:16]
750       //~gfx11! v1: %_:v[0] = v_cvt_u32_u16 %_:v[1][0:16]
751       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2));
752       bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b),
753                  Operand(v1_lo, v2b), Operand::zero(2));
754 
755       //! p_unit_test 3
756       //! v2b: %_:v[0][16:32] = v_and_b32 0xffff0000, %_:v[1][16:32]
757       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3));
758       bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b),
759                  Operand::zero(2), Operand(v1_hi, v2b));
760 
761       //! p_unit_test 4
762       //! v2b: %_:v[0][16:32] = v_lshlrev_b32 16, %_:v[1][0:16]
763       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4));
764       bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b),
765                  Operand::zero(2), Operand(v1_lo, v2b));
766 
767       //~gfx11! s_nop
768       //~gfx11! s_sendmsg sendmsg(dealloc_vgprs)
769       //! s_endpgm
770 
771       finish_to_hw_instr_test();
772    }
773 END_TEST
774