xref: /aosp_15_r20/external/mesa3d/src/amd/compiler/tests/test_regalloc.cpp (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2020 Valve Corporation
3  *
4  * SPDX-License-Identifier: MIT
5  */
6 #include "helpers.h"
7 
8 using namespace aco;
9 
10 BEGIN_TEST(regalloc.subdword_alloc.reuse_16bit_operands)
11    /* Registers of operands should be "recycled" for the output. But if the
12     * input is smaller than the output, that's not generally possible. The
13     * first v_cvt_f32_f16 instruction below uses the upper 16 bits of v0
14     * while the lower 16 bits are still live, so the output must be stored in
15     * a register other than v0. For the second v_cvt_f32_f16, the original
16     * value stored in v0 is no longer used and hence it's safe to store the
17     * result in v0, which might or might not happen.
18     */
19 
20    /* TODO: is this possible to do on GFX11? */
21    for (amd_gfx_level cc = GFX8; cc <= GFX10_3; cc = (amd_gfx_level)((unsigned)cc + 1)) {
22       for (bool pessimistic : {false, true}) {
23          const char* subvariant = pessimistic ? "/pessimistic" : "/optimistic";
24 
25          //>> v1: %_:v[#a] = p_startpgm
26          if (!setup_cs("v1", (amd_gfx_level)cc, CHIP_UNKNOWN, subvariant))
27             return;
28 
29          //! v2b: %_:v[#a][0:16], v2b: %res1:v[#a][16:32] = p_split_vector %_:v[#a]
30          Builder::Result tmp =
31             bld.pseudo(aco_opcode::p_split_vector, bld.def(v2b), bld.def(v2b), inputs[0]);
32 
33          //! v1: %_:v[#b] = v_cvt_f32_f16 %_:v[#a][16:32] dst_sel:dword src0_sel:uword1
34          //! v1: %_:v[#_] = v_cvt_f32_f16 %_:v[#a][0:16]
35          //; success = (b != a)
36          auto result1 = bld.vop1(aco_opcode::v_cvt_f32_f16, bld.def(v1), tmp.def(1).getTemp());
37          auto result2 = bld.vop1(aco_opcode::v_cvt_f32_f16, bld.def(v1), tmp.def(0).getTemp());
38          writeout(0, result1);
39          writeout(1, result2);
40 
41          finish_ra_test(ra_test_policy{pessimistic});
42       }
43    }
44 END_TEST
45 
46 BEGIN_TEST(regalloc._32bit_partial_write)
47    //>> v1: %_:v[0] = p_startpgm
48    if (!setup_cs("v1", GFX10))
49       return;
50 
51    /* ensure high 16 bits are occupied */
52    //! v2b: %_:v[0][0:16], v2b: %_:v[0][16:32] = p_split_vector %_:v[0]
53    Temp hi =
54       bld.pseudo(aco_opcode::p_split_vector, bld.def(v2b), bld.def(v2b), inputs[0]).def(1).getTemp();
55 
56    /* This test checks if this instruction uses SDWA. */
57    //! v2b: %_:v[0][0:16] = v_not_b32 0 dst_sel:uword0 dst_preserve src0_sel:dword
58    Temp lo = bld.vop1(aco_opcode::v_not_b32, bld.def(v2b), Operand::zero());
59 
60    //! v1: %_:v[0] = p_create_vector %_:v[0][0:16], %_:v[0][16:32]
61    bld.pseudo(aco_opcode::p_create_vector, bld.def(v1), lo, hi);
62 
63    finish_ra_test(ra_test_policy());
64 END_TEST
65 
66 BEGIN_TEST(regalloc.precolor.swap)
67    //>> s2: %op0:s[0-1] = p_startpgm
68    if (!setup_cs("s2", GFX10))
69       return;
70 
71    program->dev.sgpr_limit = 4;
72 
73    //! s2: %op1:s[2-3] = p_unit_test
74    Temp op1 = bld.pseudo(aco_opcode::p_unit_test, bld.def(s2));
75 
76    //! s2: %op0_2:s[2-3], s2: %op1_2:s[0-1] = p_parallelcopy %op0:s[0-1], %op1:s[2-3]
77    //! p_unit_test %op0_2:s[2-3], %op1_2:s[0-1]
78    Operand op(inputs[0]);
79    op.setFixed(PhysReg(2));
80    bld.pseudo(aco_opcode::p_unit_test, op, op1);
81 
82    finish_ra_test(ra_test_policy());
83 END_TEST
84 
85 BEGIN_TEST(regalloc.precolor.blocking_vector)
86    //>> s2: %tmp0:s[0-1], s1: %tmp1:s[2] = p_startpgm
87    if (!setup_cs("s2 s1", GFX10))
88       return;
89 
90    //! s1: %tmp1_2:s[1], s2: %tmp0_2:s[2-3] = p_parallelcopy %tmp1:s[2], %tmp0:s[0-1]
91    //! p_unit_test %tmp1_2:s[1]
92    Operand op(inputs[1]);
93    op.setFixed(PhysReg(1));
94    bld.pseudo(aco_opcode::p_unit_test, op);
95 
96    //! p_unit_test %tmp0_2:s[2-3]
97    bld.pseudo(aco_opcode::p_unit_test, inputs[0]);
98 
99    finish_ra_test(ra_test_policy());
100 END_TEST
101 
102 BEGIN_TEST(regalloc.precolor.vector.test)
103    //>> s2: %tmp0:s[0-1], s1: %tmp1:s[2], s1: %tmp2:s[3] = p_startpgm
104    if (!setup_cs("s2 s1 s1", GFX10))
105       return;
106 
107    //! s2: %tmp0_2:s[2-3], s1: %tmp2_2:s[#t2] = p_parallelcopy %tmp0:s[0-1], %tmp2:s[3]
108    //! p_unit_test %tmp0_2:s[2-3]
109    Operand op(inputs[0]);
110    op.setFixed(PhysReg(2));
111    bld.pseudo(aco_opcode::p_unit_test, op);
112 
113    //! p_unit_test %tmp2_2:s[#t2]
114    bld.pseudo(aco_opcode::p_unit_test, inputs[2]);
115 
116    finish_ra_test(ra_test_policy());
117 END_TEST
118 
119 BEGIN_TEST(regalloc.precolor.vector.collect)
120    //>> s2: %tmp0:s[0-1], s1: %tmp1:s[2], s1: %tmp2:s[3] = p_startpgm
121    if (!setup_cs("s2 s1 s1", GFX10))
122       return;
123 
124    //! s2: %tmp0_2:s[2-3], s1: %tmp1_2:s[#t1], s1: %tmp2_2:s[#t2] = p_parallelcopy %tmp0:s[0-1], %tmp1:s[2], %tmp2:s[3]
125    //! p_unit_test %tmp0_2:s[2-3]
126    Operand op(inputs[0]);
127    op.setFixed(PhysReg(2));
128    bld.pseudo(aco_opcode::p_unit_test, op);
129 
130    //! p_unit_test %tmp1_2:s[#t1], %tmp2_2:s[#t2]
131    bld.pseudo(aco_opcode::p_unit_test, inputs[1], inputs[2]);
132 
133    finish_ra_test(ra_test_policy());
134 END_TEST
135 
136 BEGIN_TEST(regalloc.precolor.vgpr_move)
137    //>> v1: %tmp0:v[0], v1: %tmp1:v[1] = p_startpgm
138    if (!setup_cs("v1 v1", GFX10))
139       return;
140 
141    //! v1: %tmp1_2:v[0], v1: %tmp0_2:v[#t0] = p_parallelcopy %tmp1:v[1], %tmp0:v[0]
142    //! p_unit_test %tmp0_2:v[#t0], %tmp1_2:v[0]
143    bld.pseudo(aco_opcode::p_unit_test, inputs[0], Operand(inputs[1], PhysReg(256)));
144 
145    finish_ra_test(ra_test_policy());
146 END_TEST
147 
148 BEGIN_TEST(regalloc.precolor.multiple_operands)
149    //>> v1: %tmp0:v[0], v1: %tmp1:v[1], v1: %tmp2:v[2], v1: %tmp3:v[3] = p_startpgm
150    if (!setup_cs("v1 v1 v1 v1", GFX10))
151       return;
152 
153    //! v1: %tmp3_2:v[0], v1: %tmp0_2:v[1], v1: %tmp1_2:v[2], v1: %tmp2_2:v[3] = p_parallelcopy %tmp3:v[3], %tmp0:v[0], %tmp1:v[1], %tmp2:v[2]
154    //! p_unit_test %tmp3_2:v[0], %tmp0_2:v[1], %tmp1_2:v[2], %tmp2_2:v[3]
155    bld.pseudo(aco_opcode::p_unit_test, Operand(inputs[3], PhysReg(256 + 0)),
156               Operand(inputs[0], PhysReg(256 + 1)), Operand(inputs[1], PhysReg(256 + 2)),
157               Operand(inputs[2], PhysReg(256 + 3)));
158 
159    finish_ra_test(ra_test_policy());
160 END_TEST
161 
162 BEGIN_TEST(regalloc.precolor.different_regs)
163    //>> v1: %tmp0:v[0] = p_startpgm
164    if (!setup_cs("v1", GFX10))
165       return;
166 
167    //! v1: %tmp1:v[1], v1: %tmp2:v[2] = p_parallelcopy %tmp0:v[0], %tmp0:v[0]
168    //! p_unit_test %tmp0:v[0], %tmp1:v[1], %tmp2:v[2]
169    bld.pseudo(aco_opcode::p_unit_test, Operand(inputs[0], PhysReg(256 + 0)),
170               Operand(inputs[0], PhysReg(256 + 1)), Operand(inputs[0], PhysReg(256 + 2)));
171 
172    finish_ra_test(ra_test_policy());
173 END_TEST
174 
175 BEGIN_TEST(regalloc.branch_def_phis_at_merge_block)
176    //>> p_startpgm
177    if (!setup_cs("", GFX10))
178       return;
179 
180    program->blocks[0].kind &= ~block_kind_top_level;
181 
182    //! s2: %_:s[2-3] = p_branch
183    bld.branch(aco_opcode::p_branch, bld.def(s2));
184 
185    //! BB1
186    //! /* logical preds: / linear preds: BB0, / kind: uniform, */
187    bld.reset(program->create_and_insert_block());
188    program->blocks[1].linear_preds.push_back(0);
189 
190    //! s2: %tmp:s[0-1] = p_linear_phi 0
191    Temp tmp = bld.pseudo(aco_opcode::p_linear_phi, bld.def(s2), Operand::c64(0u));
192 
193    //! p_unit_test %tmp:s[0-1]
194    bld.pseudo(aco_opcode::p_unit_test, tmp);
195 
196    finish_ra_test(ra_test_policy());
197 END_TEST
198 
199 BEGIN_TEST(regalloc.branch_def_phis_at_branch_block)
200    //>> p_startpgm
201    if (!setup_cs("", GFX10))
202       return;
203 
204    //! s2: %tmp:s[0-1] = p_unit_test
205    Temp tmp = bld.pseudo(aco_opcode::p_unit_test, bld.def(s2));
206 
207    //! s2: %_:s[2-3] = p_cbranch_z %0:scc
208    bld.branch(aco_opcode::p_cbranch_z, bld.def(s2), Operand(scc, s1));
209 
210    //! BB1
211    //! /* logical preds: / linear preds: BB0, / kind: */
212    bld.reset(program->create_and_insert_block());
213    program->blocks[1].linear_preds.push_back(0);
214 
215    //! p_unit_test %tmp:s[0-1]
216    bld.pseudo(aco_opcode::p_unit_test, tmp);
217    bld.branch(aco_opcode::p_branch, bld.def(s2));
218 
219    bld.reset(program->create_and_insert_block());
220    program->blocks[2].linear_preds.push_back(0);
221 
222    bld.branch(aco_opcode::p_branch, bld.def(s2));
223 
224    bld.reset(program->create_and_insert_block());
225    program->blocks[3].linear_preds.push_back(1);
226    program->blocks[3].linear_preds.push_back(2);
227    program->blocks[3].kind |= block_kind_top_level;
228 
229    finish_ra_test(ra_test_policy());
230 END_TEST
231 
232 BEGIN_TEST(regalloc.vintrp_fp16)
233    //>> v1: %in0:v[0], s1: %in1:s[0], v1: %in2:v[1] = p_startpgm
234    if (!setup_cs("v1 s1 v1", GFX10))
235       return;
236 
237    //! s1: %npm:m0 = p_parallelcopy %in1:s[0]
238    //! v2b: %lo:v[2][0:16] = v_interp_p2_f16 %in0:v[0], %npm:m0, %in2:v[1] attr0.x
239    Temp lo = bld.vintrp(aco_opcode::v_interp_p2_f16, bld.def(v2b), inputs[0], bld.m0(inputs[1]),
240                         inputs[2], 0, 0, false);
241    //! v2b: %hi:v[2][16:32] = v_interp_p2_hi_f16 %in0:v[0], %npm:m0, %in2:v[1] attr0.x high
242    Temp hi = bld.vintrp(aco_opcode::v_interp_p2_f16, bld.def(v2b), inputs[0], bld.m0(inputs[1]),
243                         inputs[2], 0, 0, true);
244    //! v1: %res:v[2] = p_create_vector %lo:v[2][0:16], %hi:v[2][16:32]
245    Temp res = bld.pseudo(aco_opcode::p_create_vector, bld.def(v1), lo, hi);
246    //! p_unit_test %res:v[2]
247    bld.pseudo(aco_opcode::p_unit_test, res);
248 
249    finish_ra_test(ra_test_policy());
250 END_TEST
251 
252 BEGIN_TEST(regalloc.vinterp_fp16)
253    //>> v1: %in0:v[0], v1: %in1:v[1], v1: %in2:v[2] = p_startpgm
254    if (!setup_cs("v1 v1 v1", GFX11))
255       return;
256 
257    //! v2b: %lo:v[3][0:16], v2b: %hi:v[3][16:32] = p_split_vector %in0:v[0]
258    Temp lo = bld.tmp(v2b);
259    Temp hi = bld.tmp(v2b);
260    bld.pseudo(aco_opcode::p_split_vector, Definition(lo), Definition(hi), inputs[0]);
261 
262    //! v1: %tmp0:v[1] = v_interp_p10_f16_f32_inreg %lo:v[3][0:16], %in1:v[1], hi(%hi:v[3][16:32])
263    //! p_unit_test %tmp0:v[1]
264    Temp tmp0 =
265       bld.vinterp_inreg(aco_opcode::v_interp_p10_f16_f32_inreg, bld.def(v1), lo, inputs[1], hi);
266    bld.pseudo(aco_opcode::p_unit_test, tmp0);
267 
268    //! v2b: %tmp1:v[#r][16:32] = v_interp_p2_f16_f32_inreg %in0:v[0], %in2:v[2], %tmp0:v[1] opsel_hi
269    //! v1: %tmp2:v[#r] = p_create_vector 0, %tmp1:v[#r][16:32]
270    //! p_unit_test %tmp2:v[#r]
271    Temp tmp1 = bld.vinterp_inreg(aco_opcode::v_interp_p2_f16_f32_inreg, bld.def(v2b), inputs[0],
272                                  inputs[2], tmp0);
273    Temp tmp2 = bld.pseudo(aco_opcode::p_create_vector, bld.def(v1), Operand::zero(2), tmp1);
274    bld.pseudo(aco_opcode::p_unit_test, tmp2);
275 
276    finish_ra_test(ra_test_policy());
277 END_TEST
278 
279 BEGIN_TEST(regalloc.writelane)
280    //>> v1: %in0:v[0], s1: %in1:s[0], s1: %in2:s[1], s1: %in3:s[2] = p_startpgm
281    if (!setup_cs("v1 s1 s1 s1", GFX8))
282       return;
283 
284    //! s1: %tmp:m0 = p_parallelcopy %int3:s[2]
285    Temp tmp = bld.copy(bld.def(s1, m0), inputs[3]);
286 
287    //! s1: %in1_2:m0,  s1: %tmp_2:s[#t2] = p_parallelcopy %in1:s[0], %tmp:m0
288    //! v1: %tmp2:v[0] = v_writelane_b32_e64 %in1_2:m0, %in2:s[1], %in0:v[0]
289    Temp tmp2 = bld.writelane(bld.def(v1), inputs[1], inputs[2], inputs[0]);
290 
291    //! p_unit_test %tmp_2:s[#t2], %tmp2:v[0]
292    bld.pseudo(aco_opcode::p_unit_test, tmp, tmp2);
293 
294    finish_ra_test(ra_test_policy());
295 END_TEST
296 
297 static void
end_linear_vgpr(Temp tmp)298 end_linear_vgpr(Temp tmp)
299 {
300    bld.pseudo(aco_opcode::p_end_linear_vgpr, tmp);
301 }
302 
303 BEGIN_TEST(regalloc.linear_vgpr.alloc.basic)
304    if (!setup_cs("", GFX8))
305       return;
306 
307    //>> lv1: %ltmp0:v[31] = p_start_linear_vgpr
308    //! lv1: %ltmp1:v[30] = p_start_linear_vgpr
309    //! p_end_linear_vgpr %ltmp0:v[31]
310    //! lv1: %ltmp2:v[31] = p_start_linear_vgpr
311    //! p_end_linear_vgpr %ltmp1:v[30]
312    //! p_end_linear_vgpr %ltmp2:v[31]
313    Temp ltmp0 = bld.pseudo(aco_opcode::p_start_linear_vgpr, bld.def(v1.as_linear()));
314    Temp ltmp1 = bld.pseudo(aco_opcode::p_start_linear_vgpr, bld.def(v1.as_linear()));
315    end_linear_vgpr(ltmp0);
316    Temp ltmp2 = bld.pseudo(aco_opcode::p_start_linear_vgpr, bld.def(v1.as_linear()));
317    end_linear_vgpr(ltmp1);
318    end_linear_vgpr(ltmp2);
319 
320    finish_ra_test(ra_test_policy());
321 END_TEST
322 
323 BEGIN_TEST(regalloc.linear_vgpr.alloc.compact_grow)
324    for (bool pessimistic : {false, true}) {
325       const char* subvariant = pessimistic ? "_pessimistic" : "_optimistic";
326       //>> v1: %in0:v[0] = p_startpgm
327       if (!setup_cs("v1", GFX8, CHIP_UNKNOWN, subvariant))
328          continue;
329 
330       //! lv1: %ltmp0:v[31] = p_start_linear_vgpr
331       //! lv1: %ltmp1:v[30] = p_start_linear_vgpr
332       //! p_end_linear_vgpr %ltmp0:v[31]
333       Temp ltmp0 = bld.pseudo(aco_opcode::p_start_linear_vgpr, bld.def(v1.as_linear()));
334       Temp ltmp1 = bld.pseudo(aco_opcode::p_start_linear_vgpr, bld.def(v1.as_linear()));
335       end_linear_vgpr(ltmp0);
336 
337       //! v1: %tmp:v[29] = p_parallelcopy %in0:v[0]
338       Temp tmp = bld.pseudo(aco_opcode::p_parallelcopy, bld.def(v1, PhysReg(256 + 29)), inputs[0]);
339 
340       /* When there's not enough space in the linear VGPR area for a new one, the area is compacted
341        * and the beginning is chosen. Any variables which are in the way, are moved.
342        */
343       //! lv1: %ltmp1_2:v[31] = p_parallelcopy %ltmp1:v[30]
344       //! v1: %tmp_2:v[#_] = p_parallelcopy %tmp:v[29]
345       //! lv2: %ltmp2:v[29-30] = p_start_linear_vgpr
346       Temp ltmp2 = bld.pseudo(aco_opcode::p_start_linear_vgpr, bld.def(v2.as_linear()));
347 
348       //! p_end_linear_vgpr %ltmp1_2:v[31]
349       //! p_end_linear_vgpr %ltmp2:v[29-30]
350       end_linear_vgpr(ltmp1);
351       end_linear_vgpr(ltmp2);
352 
353       //! p_unit_test %tmp_2:v[#_]
354       bld.pseudo(aco_opcode::p_unit_test, tmp);
355 
356       finish_ra_test(ra_test_policy{pessimistic});
357    }
358 END_TEST
359 
360 BEGIN_TEST(regalloc.linear_vgpr.alloc.compact_shrink)
361    for (bool pessimistic : {false, true}) {
362       const char* subvariant = pessimistic ? "_pessimistic" : "_optimistic";
363       //>> v1: %in0:v[0] = p_startpgm
364       if (!setup_cs("v1", GFX8, CHIP_UNKNOWN, subvariant))
365          continue;
366 
367       //! lv1: %ltmp0:v[31] = p_start_linear_vgpr
368       //! lv1: %ltmp1:v[30] = p_start_linear_vgpr
369       //! lv1: %ltmp2:v[29] = p_start_linear_vgpr
370       //! lv1: %ltmp3:v[28] = p_start_linear_vgpr
371       //! lv1: %ltmp4:v[27] = p_start_linear_vgpr
372       //! p_end_linear_vgpr %ltmp0:v[31]
373       //! p_end_linear_vgpr %ltmp2:v[29]
374       //! p_end_linear_vgpr %ltmp4:v[27]
375       Temp ltmp0 = bld.pseudo(aco_opcode::p_start_linear_vgpr, bld.def(v1.as_linear()));
376       Temp ltmp1 = bld.pseudo(aco_opcode::p_start_linear_vgpr, bld.def(v1.as_linear()));
377       Temp ltmp2 = bld.pseudo(aco_opcode::p_start_linear_vgpr, bld.def(v1.as_linear()));
378       Temp ltmp3 = bld.pseudo(aco_opcode::p_start_linear_vgpr, bld.def(v1.as_linear()));
379       Temp ltmp4 = bld.pseudo(aco_opcode::p_start_linear_vgpr, bld.def(v1.as_linear()));
380       end_linear_vgpr(ltmp0);
381       end_linear_vgpr(ltmp2);
382       end_linear_vgpr(ltmp4);
383 
384       /* Unlike regalloc.linear_vgpr.alloc.compact_grow, this shrinks the linear VGPR area. */
385       //! lv1: %ltmp3_2:v[30], lv1: %ltmp1_2:v[31] = p_parallelcopy %ltmp3:v[28], %ltmp1:v[30]
386       //! lv2: %ltmp5:v[28-29] = p_start_linear_vgpr
387       Temp ltmp5 = bld.pseudo(aco_opcode::p_start_linear_vgpr, bld.def(v2.as_linear()));
388 
389       /* There should be enough space for 28 normal VGPRs. */
390       //! v28: %_:v[0-27] = p_unit_test
391       bld.pseudo(aco_opcode::p_unit_test, bld.def(RegClass::get(RegType::vgpr, 28 * 4)));
392 
393       //! p_end_linear_vgpr %ltmp1_2:v[31]
394       //! p_end_linear_vgpr %ltmp3_2:v[30]
395       //! p_end_linear_vgpr %ltmp5:v[28-29]
396       end_linear_vgpr(ltmp1);
397       end_linear_vgpr(ltmp3);
398       end_linear_vgpr(ltmp5);
399 
400       finish_ra_test(ra_test_policy{pessimistic});
401    }
402 END_TEST
403 
404 BEGIN_TEST(regalloc.linear_vgpr.alloc.compact_for_normal)
405    for (bool pessimistic : {false, true}) {
406       const char* subvariant = pessimistic ? "_pessimistic" : "_optimistic";
407       //>> v1: %in0:v[0] = p_startpgm
408       if (!setup_cs("v1", GFX8, CHIP_UNKNOWN, subvariant))
409          continue;
410 
411       //! lv1: %ltmp0:v[31] = p_start_linear_vgpr
412       //! lv1: %ltmp1:v[30] = p_start_linear_vgpr
413       //! p_end_linear_vgpr %ltmp0:v[31]
414       Temp ltmp0 = bld.pseudo(aco_opcode::p_start_linear_vgpr, bld.def(v1.as_linear()));
415       Temp ltmp1 = bld.pseudo(aco_opcode::p_start_linear_vgpr, bld.def(v1.as_linear()));
416       end_linear_vgpr(ltmp0);
417 
418       //! lv1: %ltmp1_2:v[31] = p_parallelcopy %ltmp1:v[30]
419       //! v31: %_:v[0-30] = p_unit_test
420       bld.pseudo(aco_opcode::p_unit_test, bld.def(RegClass::get(RegType::vgpr, 31 * 4)));
421 
422       //! p_end_linear_vgpr %ltmp1_2:v[31]
423       end_linear_vgpr(ltmp1);
424 
425       finish_ra_test(ra_test_policy{pessimistic});
426    }
427 END_TEST
428 
429 BEGIN_TEST(regalloc.linear_vgpr.alloc.compact_for_vec)
430    for (bool pessimistic : {false, true}) {
431       const char* subvariant = pessimistic ? "_pessimistic" : "_optimistic";
432       //>> v1: %in0:v[0] = p_startpgm
433       if (!setup_cs("v1", GFX8, CHIP_UNKNOWN, subvariant))
434          continue;
435 
436       //! lv1: %ltmp0:v[31] = p_start_linear_vgpr
437       //! lv1: %ltmp1:v[30] = p_start_linear_vgpr
438       //! p_end_linear_vgpr %ltmp0:v[31]
439       Temp ltmp0 = bld.pseudo(aco_opcode::p_start_linear_vgpr, bld.def(v1.as_linear()));
440       Temp ltmp1 = bld.pseudo(aco_opcode::p_start_linear_vgpr, bld.def(v1.as_linear()));
441       end_linear_vgpr(ltmp0);
442 
443       //! lv1: %ltmp1_2:v[31] = p_parallelcopy %ltmp1:v[30]
444       //! v31: %_:v[0-30] = p_create_vector v31: undef
445       RegClass v31 = RegClass::get(RegType::vgpr, 31 * 4);
446       bld.pseudo(aco_opcode::p_create_vector, bld.def(v31), Operand(v31));
447 
448       //! p_end_linear_vgpr %ltmp1_2:v[31]
449       end_linear_vgpr(ltmp1);
450 
451       finish_ra_test(ra_test_policy{pessimistic});
452    }
453 END_TEST
454 
455 BEGIN_TEST(regalloc.linear_vgpr.alloc.killed_op)
456    for (bool pessimistic : {false, true}) {
457       const char* subvariant = pessimistic ? "_pessimistic" : "_optimistic";
458       if (!setup_cs("", GFX8, CHIP_UNKNOWN, subvariant))
459          continue;
460 
461       //>> v31: %tmp0:v[0-30] = p_unit_test
462       //! v1: %tmp1:v[31] = p_unit_test
463       Temp tmp0 =
464          bld.pseudo(aco_opcode::p_unit_test, bld.def(RegClass::get(RegType::vgpr, 31 * 4)));
465       Temp tmp1 = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1));
466 
467       //! lv1: %ltmp0:v[31] = p_start_linear_vgpr %tmp1:v[31]
468       //! p_end_linear_vgpr %ltmp0:v[31]
469       Temp ltmp0 = bld.pseudo(aco_opcode::p_start_linear_vgpr, bld.def(v1.as_linear()), tmp1);
470       end_linear_vgpr(ltmp0);
471 
472       bld.pseudo(aco_opcode::p_unit_test, tmp0);
473 
474       finish_ra_test(ra_test_policy{pessimistic});
475    }
476 END_TEST
477 
478 BEGIN_TEST(regalloc.linear_vgpr.alloc.move_killed_op)
479    for (bool pessimistic : {false, true}) {
480       const char* subvariant = pessimistic ? "_pessimistic" : "_optimistic";
481       if (!setup_cs("", GFX8, CHIP_UNKNOWN, subvariant))
482          continue;
483 
484       //>> v30: %tmp0:v[0-29] = p_unit_test
485       //! v1: %tmp1:v[30] = p_unit_test
486       //! v1: %tmp2:v[31] = p_unit_test
487       Temp tmp0 =
488          bld.pseudo(aco_opcode::p_unit_test, bld.def(RegClass::get(RegType::vgpr, 30 * 4)));
489       Temp tmp1 = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1));
490       Temp tmp2 = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1));
491 
492       //~gfx8_optimistic! v1: %tmp1_2:v[31], v1: %tmp2_2:v[30] = p_parallelcopy %tmp1:v[30], %tmp2:v[31]
493       //~gfx8_pessimistic! v1: %tmp2_2:v[30], v1: %tmp1_2:v[31] = p_parallelcopy %tmp2:v[31], %tmp1:v[30]
494       //! lv1: %ltmp0:v[31] = p_start_linear_vgpr %tmp1_2:v[31]
495       //! p_end_linear_vgpr %ltmp0:v[31]
496       Temp ltmp0 = bld.pseudo(aco_opcode::p_start_linear_vgpr, bld.def(v1.as_linear()), tmp1);
497       end_linear_vgpr(ltmp0);
498 
499       //! p_unit_test %tmp0:v[0-29], %tmp2_2:v[30]
500       bld.pseudo(aco_opcode::p_unit_test, tmp0, tmp2);
501 
502       finish_ra_test(ra_test_policy{pessimistic});
503    }
504 END_TEST
505 
506 BEGIN_TEST(regalloc.linear_vgpr.compact_for_future_def)
507    for (bool cbr : {false, true}) {
508       const char* subvariant = cbr ? "_cbranch" : "_branch";
509       if (!setup_cs("", GFX8, CHIP_UNKNOWN, subvariant))
510          continue;
511 
512       //>> lv2: %ltmp0:v[30-31] = p_start_linear_vgpr
513       //! lv1: %ltmp1:v[29] = p_start_linear_vgpr
514       //! lv1: %ltmp2:v[28] = p_start_linear_vgpr
515       //! p_end_linear_vgpr %ltmp1:v[29]
516       Temp ltmp0 = bld.pseudo(aco_opcode::p_start_linear_vgpr, bld.def(v2.as_linear()));
517       Temp ltmp1 = bld.pseudo(aco_opcode::p_start_linear_vgpr, bld.def(v1.as_linear()));
518       Temp ltmp2 = bld.pseudo(aco_opcode::p_start_linear_vgpr, bld.def(v1.as_linear()));
519       end_linear_vgpr(ltmp1);
520 
521       //! s1: %scc_tmp:scc = p_unit_test
522       Temp scc_tmp = bld.pseudo(aco_opcode::p_unit_test, bld.def(s1, scc));
523 
524       //! lv1: %ltmp2_2:v[29] = p_parallelcopy %ltmp2:v[28]
525       //~gfx8_cbranch! s2: %_:s[0-1] = p_cbranch_z %scc_tmp:scc
526       //~gfx8_branch! s2: %_:s[0-1] = p_branch
527       if (cbr)
528          bld.branch(aco_opcode::p_cbranch_z, bld.def(s2), bld.scc(scc_tmp));
529       else
530          bld.branch(aco_opcode::p_branch, bld.def(s2));
531 
532       //! BB1
533       //! /* logical preds: BB0, / linear preds: BB0, / kind: */
534       bld.reset(program->create_and_insert_block());
535       program->blocks[1].linear_preds.push_back(0);
536       program->blocks[1].logical_preds.push_back(0);
537 
538       //! v29: %_:v[0-28] = p_unit_test
539       //! s2: %_:s[0-1] = p_branch
540       bld.pseudo(aco_opcode::p_unit_test, bld.def(RegClass::get(RegType::vgpr, 29 * 4)));
541       bld.branch(aco_opcode::p_branch, bld.def(s2));
542 
543       //! BB2
544       //! /* logical preds: BB1, / linear preds: BB1, / kind: uniform, top-level, */
545       bld.reset(program->create_and_insert_block());
546       program->blocks[2].linear_preds.push_back(1);
547       program->blocks[2].logical_preds.push_back(1);
548       program->blocks[2].kind |= block_kind_top_level;
549 
550       //! p_end_linear_vgpr %ltmp0_2:v[30-31]
551       //! p_end_linear_vgpr %ltmp2_2:v[29]
552       end_linear_vgpr(ltmp0);
553       end_linear_vgpr(ltmp2);
554 
555       finish_ra_test(ra_test_policy());
556 
557       //~gfx8_cbranch>> lv1: %ltmp2_2:v[29] = p_parallelcopy %ltmp2:v[28] scc:1 scratch:s1
558       //~gfx8_branch>> lv1: %ltmp2_2:v[29] = p_parallelcopy %ltmp2:v[28] scc:0 scratch:s0
559       aco_ptr<Instruction>& parallelcopy = program->blocks[0].instructions[6];
560       aco_print_instr(program->gfx_level, parallelcopy.get(), output);
561       if (parallelcopy->isPseudo()) {
562          fprintf(output, " scc:%u scratch:s%u\n", parallelcopy->pseudo().tmp_in_scc,
563                  parallelcopy->pseudo().scratch_sgpr.reg());
564       } else {
565          fprintf(output, "\n");
566       }
567    }
568 END_TEST
569 
570 BEGIN_TEST(regalloc.linear_vgpr.compact_for_future_phis)
571    for (bool cbr : {false, true}) {
572       const char* subvariant = cbr ? "_cbranch" : "_branch";
573       if (!setup_cs("", GFX8, CHIP_UNKNOWN, subvariant))
574          continue;
575 
576       //>> lv1: %ltmp0:v[31] = p_start_linear_vgpr
577       //! lv1: %ltmp1:v[30] = p_start_linear_vgpr
578       //! lv1: %ltmp2:v[29] = p_start_linear_vgpr
579       //! p_end_linear_vgpr %ltmp1:v[30]
580       Temp ltmp0 = bld.pseudo(aco_opcode::p_start_linear_vgpr, bld.def(v1.as_linear()));
581       Temp ltmp1 = bld.pseudo(aco_opcode::p_start_linear_vgpr, bld.def(v1.as_linear()));
582       Temp ltmp2 = bld.pseudo(aco_opcode::p_start_linear_vgpr, bld.def(v1.as_linear()));
583       end_linear_vgpr(ltmp1);
584 
585       //! lv1: %ltmp2_2:v[30] = p_parallelcopy %ltmp2:v[29]
586       //~gfx8_cbranch! s2: %_:s[0-1] = p_cbranch_z %_:scc
587       //~gfx8_branch! s2: %_:s[0-1] = p_branch
588       if (cbr)
589          bld.branch(aco_opcode::p_cbranch_z, bld.def(s2), Operand(scc, s1));
590       else
591          bld.branch(aco_opcode::p_branch, bld.def(s2));
592 
593       //! BB1
594       //! /* logical preds: BB0, / linear preds: BB0, / kind: */
595       bld.reset(program->create_and_insert_block());
596       program->blocks[1].linear_preds.push_back(0);
597       program->blocks[1].logical_preds.push_back(0);
598 
599       //! s2: %_:s[0-1] = p_branch
600       bld.branch(aco_opcode::p_branch, bld.def(s2));
601 
602       //! BB2
603       //! /* logical preds: BB1, / linear preds: BB1, / kind: uniform, top-level, */
604       bld.reset(program->create_and_insert_block());
605       program->blocks[2].linear_preds.push_back(1);
606       program->blocks[2].logical_preds.push_back(1);
607       program->blocks[2].kind |= block_kind_top_level;
608 
609       RegClass v30 = RegClass::get(RegType::vgpr, 30 * 4);
610       //! v30: %tmp:v[0-29] = p_phi v30: undef
611       //! p_unit_test %tmp:v[0-29]
612       Temp tmp = bld.pseudo(aco_opcode::p_phi, bld.def(v30), Operand(v30));
613       bld.pseudo(aco_opcode::p_unit_test, tmp);
614 
615       //! p_end_linear_vgpr %ltmp0_2:v[31]
616       //! p_end_linear_vgpr %ltmp2_2:v[30]
617       end_linear_vgpr(ltmp0);
618       end_linear_vgpr(ltmp2);
619 
620       finish_ra_test(ra_test_policy());
621    }
622 END_TEST
623