1 /*
2 * Copyright © 2020 Valve Corporation
3 *
4 * SPDX-License-Identifier: MIT
5 */
6 #include "helpers.h"
7
8 using namespace aco;
9
10 BEGIN_TEST(regalloc.subdword_alloc.reuse_16bit_operands)
11 /* Registers of operands should be "recycled" for the output. But if the
12 * input is smaller than the output, that's not generally possible. The
13 * first v_cvt_f32_f16 instruction below uses the upper 16 bits of v0
14 * while the lower 16 bits are still live, so the output must be stored in
15 * a register other than v0. For the second v_cvt_f32_f16, the original
16 * value stored in v0 is no longer used and hence it's safe to store the
17 * result in v0, which might or might not happen.
18 */
19
20 /* TODO: is this possible to do on GFX11? */
21 for (amd_gfx_level cc = GFX8; cc <= GFX10_3; cc = (amd_gfx_level)((unsigned)cc + 1)) {
22 for (bool pessimistic : {false, true}) {
23 const char* subvariant = pessimistic ? "/pessimistic" : "/optimistic";
24
25 //>> v1: %_:v[#a] = p_startpgm
26 if (!setup_cs("v1", (amd_gfx_level)cc, CHIP_UNKNOWN, subvariant))
27 return;
28
29 //! v2b: %_:v[#a][0:16], v2b: %res1:v[#a][16:32] = p_split_vector %_:v[#a]
30 Builder::Result tmp =
31 bld.pseudo(aco_opcode::p_split_vector, bld.def(v2b), bld.def(v2b), inputs[0]);
32
33 //! v1: %_:v[#b] = v_cvt_f32_f16 %_:v[#a][16:32] dst_sel:dword src0_sel:uword1
34 //! v1: %_:v[#_] = v_cvt_f32_f16 %_:v[#a][0:16]
35 //; success = (b != a)
36 auto result1 = bld.vop1(aco_opcode::v_cvt_f32_f16, bld.def(v1), tmp.def(1).getTemp());
37 auto result2 = bld.vop1(aco_opcode::v_cvt_f32_f16, bld.def(v1), tmp.def(0).getTemp());
38 writeout(0, result1);
39 writeout(1, result2);
40
41 finish_ra_test(ra_test_policy{pessimistic});
42 }
43 }
44 END_TEST
45
46 BEGIN_TEST(regalloc._32bit_partial_write)
47 //>> v1: %_:v[0] = p_startpgm
48 if (!setup_cs("v1", GFX10))
49 return;
50
51 /* ensure high 16 bits are occupied */
52 //! v2b: %_:v[0][0:16], v2b: %_:v[0][16:32] = p_split_vector %_:v[0]
53 Temp hi =
54 bld.pseudo(aco_opcode::p_split_vector, bld.def(v2b), bld.def(v2b), inputs[0]).def(1).getTemp();
55
56 /* This test checks if this instruction uses SDWA. */
57 //! v2b: %_:v[0][0:16] = v_not_b32 0 dst_sel:uword0 dst_preserve src0_sel:dword
58 Temp lo = bld.vop1(aco_opcode::v_not_b32, bld.def(v2b), Operand::zero());
59
60 //! v1: %_:v[0] = p_create_vector %_:v[0][0:16], %_:v[0][16:32]
61 bld.pseudo(aco_opcode::p_create_vector, bld.def(v1), lo, hi);
62
63 finish_ra_test(ra_test_policy());
64 END_TEST
65
66 BEGIN_TEST(regalloc.precolor.swap)
67 //>> s2: %op0:s[0-1] = p_startpgm
68 if (!setup_cs("s2", GFX10))
69 return;
70
71 program->dev.sgpr_limit = 4;
72
73 //! s2: %op1:s[2-3] = p_unit_test
74 Temp op1 = bld.pseudo(aco_opcode::p_unit_test, bld.def(s2));
75
76 //! s2: %op0_2:s[2-3], s2: %op1_2:s[0-1] = p_parallelcopy %op0:s[0-1], %op1:s[2-3]
77 //! p_unit_test %op0_2:s[2-3], %op1_2:s[0-1]
78 Operand op(inputs[0]);
79 op.setFixed(PhysReg(2));
80 bld.pseudo(aco_opcode::p_unit_test, op, op1);
81
82 finish_ra_test(ra_test_policy());
83 END_TEST
84
85 BEGIN_TEST(regalloc.precolor.blocking_vector)
86 //>> s2: %tmp0:s[0-1], s1: %tmp1:s[2] = p_startpgm
87 if (!setup_cs("s2 s1", GFX10))
88 return;
89
90 //! s1: %tmp1_2:s[1], s2: %tmp0_2:s[2-3] = p_parallelcopy %tmp1:s[2], %tmp0:s[0-1]
91 //! p_unit_test %tmp1_2:s[1]
92 Operand op(inputs[1]);
93 op.setFixed(PhysReg(1));
94 bld.pseudo(aco_opcode::p_unit_test, op);
95
96 //! p_unit_test %tmp0_2:s[2-3]
97 bld.pseudo(aco_opcode::p_unit_test, inputs[0]);
98
99 finish_ra_test(ra_test_policy());
100 END_TEST
101
102 BEGIN_TEST(regalloc.precolor.vector.test)
103 //>> s2: %tmp0:s[0-1], s1: %tmp1:s[2], s1: %tmp2:s[3] = p_startpgm
104 if (!setup_cs("s2 s1 s1", GFX10))
105 return;
106
107 //! s2: %tmp0_2:s[2-3], s1: %tmp2_2:s[#t2] = p_parallelcopy %tmp0:s[0-1], %tmp2:s[3]
108 //! p_unit_test %tmp0_2:s[2-3]
109 Operand op(inputs[0]);
110 op.setFixed(PhysReg(2));
111 bld.pseudo(aco_opcode::p_unit_test, op);
112
113 //! p_unit_test %tmp2_2:s[#t2]
114 bld.pseudo(aco_opcode::p_unit_test, inputs[2]);
115
116 finish_ra_test(ra_test_policy());
117 END_TEST
118
119 BEGIN_TEST(regalloc.precolor.vector.collect)
120 //>> s2: %tmp0:s[0-1], s1: %tmp1:s[2], s1: %tmp2:s[3] = p_startpgm
121 if (!setup_cs("s2 s1 s1", GFX10))
122 return;
123
124 //! s2: %tmp0_2:s[2-3], s1: %tmp1_2:s[#t1], s1: %tmp2_2:s[#t2] = p_parallelcopy %tmp0:s[0-1], %tmp1:s[2], %tmp2:s[3]
125 //! p_unit_test %tmp0_2:s[2-3]
126 Operand op(inputs[0]);
127 op.setFixed(PhysReg(2));
128 bld.pseudo(aco_opcode::p_unit_test, op);
129
130 //! p_unit_test %tmp1_2:s[#t1], %tmp2_2:s[#t2]
131 bld.pseudo(aco_opcode::p_unit_test, inputs[1], inputs[2]);
132
133 finish_ra_test(ra_test_policy());
134 END_TEST
135
136 BEGIN_TEST(regalloc.precolor.vgpr_move)
137 //>> v1: %tmp0:v[0], v1: %tmp1:v[1] = p_startpgm
138 if (!setup_cs("v1 v1", GFX10))
139 return;
140
141 //! v1: %tmp1_2:v[0], v1: %tmp0_2:v[#t0] = p_parallelcopy %tmp1:v[1], %tmp0:v[0]
142 //! p_unit_test %tmp0_2:v[#t0], %tmp1_2:v[0]
143 bld.pseudo(aco_opcode::p_unit_test, inputs[0], Operand(inputs[1], PhysReg(256)));
144
145 finish_ra_test(ra_test_policy());
146 END_TEST
147
148 BEGIN_TEST(regalloc.precolor.multiple_operands)
149 //>> v1: %tmp0:v[0], v1: %tmp1:v[1], v1: %tmp2:v[2], v1: %tmp3:v[3] = p_startpgm
150 if (!setup_cs("v1 v1 v1 v1", GFX10))
151 return;
152
153 //! v1: %tmp3_2:v[0], v1: %tmp0_2:v[1], v1: %tmp1_2:v[2], v1: %tmp2_2:v[3] = p_parallelcopy %tmp3:v[3], %tmp0:v[0], %tmp1:v[1], %tmp2:v[2]
154 //! p_unit_test %tmp3_2:v[0], %tmp0_2:v[1], %tmp1_2:v[2], %tmp2_2:v[3]
155 bld.pseudo(aco_opcode::p_unit_test, Operand(inputs[3], PhysReg(256 + 0)),
156 Operand(inputs[0], PhysReg(256 + 1)), Operand(inputs[1], PhysReg(256 + 2)),
157 Operand(inputs[2], PhysReg(256 + 3)));
158
159 finish_ra_test(ra_test_policy());
160 END_TEST
161
162 BEGIN_TEST(regalloc.precolor.different_regs)
163 //>> v1: %tmp0:v[0] = p_startpgm
164 if (!setup_cs("v1", GFX10))
165 return;
166
167 //! v1: %tmp1:v[1], v1: %tmp2:v[2] = p_parallelcopy %tmp0:v[0], %tmp0:v[0]
168 //! p_unit_test %tmp0:v[0], %tmp1:v[1], %tmp2:v[2]
169 bld.pseudo(aco_opcode::p_unit_test, Operand(inputs[0], PhysReg(256 + 0)),
170 Operand(inputs[0], PhysReg(256 + 1)), Operand(inputs[0], PhysReg(256 + 2)));
171
172 finish_ra_test(ra_test_policy());
173 END_TEST
174
175 BEGIN_TEST(regalloc.branch_def_phis_at_merge_block)
176 //>> p_startpgm
177 if (!setup_cs("", GFX10))
178 return;
179
180 program->blocks[0].kind &= ~block_kind_top_level;
181
182 //! s2: %_:s[2-3] = p_branch
183 bld.branch(aco_opcode::p_branch, bld.def(s2));
184
185 //! BB1
186 //! /* logical preds: / linear preds: BB0, / kind: uniform, */
187 bld.reset(program->create_and_insert_block());
188 program->blocks[1].linear_preds.push_back(0);
189
190 //! s2: %tmp:s[0-1] = p_linear_phi 0
191 Temp tmp = bld.pseudo(aco_opcode::p_linear_phi, bld.def(s2), Operand::c64(0u));
192
193 //! p_unit_test %tmp:s[0-1]
194 bld.pseudo(aco_opcode::p_unit_test, tmp);
195
196 finish_ra_test(ra_test_policy());
197 END_TEST
198
199 BEGIN_TEST(regalloc.branch_def_phis_at_branch_block)
200 //>> p_startpgm
201 if (!setup_cs("", GFX10))
202 return;
203
204 //! s2: %tmp:s[0-1] = p_unit_test
205 Temp tmp = bld.pseudo(aco_opcode::p_unit_test, bld.def(s2));
206
207 //! s2: %_:s[2-3] = p_cbranch_z %0:scc
208 bld.branch(aco_opcode::p_cbranch_z, bld.def(s2), Operand(scc, s1));
209
210 //! BB1
211 //! /* logical preds: / linear preds: BB0, / kind: */
212 bld.reset(program->create_and_insert_block());
213 program->blocks[1].linear_preds.push_back(0);
214
215 //! p_unit_test %tmp:s[0-1]
216 bld.pseudo(aco_opcode::p_unit_test, tmp);
217 bld.branch(aco_opcode::p_branch, bld.def(s2));
218
219 bld.reset(program->create_and_insert_block());
220 program->blocks[2].linear_preds.push_back(0);
221
222 bld.branch(aco_opcode::p_branch, bld.def(s2));
223
224 bld.reset(program->create_and_insert_block());
225 program->blocks[3].linear_preds.push_back(1);
226 program->blocks[3].linear_preds.push_back(2);
227 program->blocks[3].kind |= block_kind_top_level;
228
229 finish_ra_test(ra_test_policy());
230 END_TEST
231
232 BEGIN_TEST(regalloc.vintrp_fp16)
233 //>> v1: %in0:v[0], s1: %in1:s[0], v1: %in2:v[1] = p_startpgm
234 if (!setup_cs("v1 s1 v1", GFX10))
235 return;
236
237 //! s1: %npm:m0 = p_parallelcopy %in1:s[0]
238 //! v2b: %lo:v[2][0:16] = v_interp_p2_f16 %in0:v[0], %npm:m0, %in2:v[1] attr0.x
239 Temp lo = bld.vintrp(aco_opcode::v_interp_p2_f16, bld.def(v2b), inputs[0], bld.m0(inputs[1]),
240 inputs[2], 0, 0, false);
241 //! v2b: %hi:v[2][16:32] = v_interp_p2_hi_f16 %in0:v[0], %npm:m0, %in2:v[1] attr0.x high
242 Temp hi = bld.vintrp(aco_opcode::v_interp_p2_f16, bld.def(v2b), inputs[0], bld.m0(inputs[1]),
243 inputs[2], 0, 0, true);
244 //! v1: %res:v[2] = p_create_vector %lo:v[2][0:16], %hi:v[2][16:32]
245 Temp res = bld.pseudo(aco_opcode::p_create_vector, bld.def(v1), lo, hi);
246 //! p_unit_test %res:v[2]
247 bld.pseudo(aco_opcode::p_unit_test, res);
248
249 finish_ra_test(ra_test_policy());
250 END_TEST
251
252 BEGIN_TEST(regalloc.vinterp_fp16)
253 //>> v1: %in0:v[0], v1: %in1:v[1], v1: %in2:v[2] = p_startpgm
254 if (!setup_cs("v1 v1 v1", GFX11))
255 return;
256
257 //! v2b: %lo:v[3][0:16], v2b: %hi:v[3][16:32] = p_split_vector %in0:v[0]
258 Temp lo = bld.tmp(v2b);
259 Temp hi = bld.tmp(v2b);
260 bld.pseudo(aco_opcode::p_split_vector, Definition(lo), Definition(hi), inputs[0]);
261
262 //! v1: %tmp0:v[1] = v_interp_p10_f16_f32_inreg %lo:v[3][0:16], %in1:v[1], hi(%hi:v[3][16:32])
263 //! p_unit_test %tmp0:v[1]
264 Temp tmp0 =
265 bld.vinterp_inreg(aco_opcode::v_interp_p10_f16_f32_inreg, bld.def(v1), lo, inputs[1], hi);
266 bld.pseudo(aco_opcode::p_unit_test, tmp0);
267
268 //! v2b: %tmp1:v[#r][16:32] = v_interp_p2_f16_f32_inreg %in0:v[0], %in2:v[2], %tmp0:v[1] opsel_hi
269 //! v1: %tmp2:v[#r] = p_create_vector 0, %tmp1:v[#r][16:32]
270 //! p_unit_test %tmp2:v[#r]
271 Temp tmp1 = bld.vinterp_inreg(aco_opcode::v_interp_p2_f16_f32_inreg, bld.def(v2b), inputs[0],
272 inputs[2], tmp0);
273 Temp tmp2 = bld.pseudo(aco_opcode::p_create_vector, bld.def(v1), Operand::zero(2), tmp1);
274 bld.pseudo(aco_opcode::p_unit_test, tmp2);
275
276 finish_ra_test(ra_test_policy());
277 END_TEST
278
279 BEGIN_TEST(regalloc.writelane)
280 //>> v1: %in0:v[0], s1: %in1:s[0], s1: %in2:s[1], s1: %in3:s[2] = p_startpgm
281 if (!setup_cs("v1 s1 s1 s1", GFX8))
282 return;
283
284 //! s1: %tmp:m0 = p_parallelcopy %int3:s[2]
285 Temp tmp = bld.copy(bld.def(s1, m0), inputs[3]);
286
287 //! s1: %in1_2:m0, s1: %tmp_2:s[#t2] = p_parallelcopy %in1:s[0], %tmp:m0
288 //! v1: %tmp2:v[0] = v_writelane_b32_e64 %in1_2:m0, %in2:s[1], %in0:v[0]
289 Temp tmp2 = bld.writelane(bld.def(v1), inputs[1], inputs[2], inputs[0]);
290
291 //! p_unit_test %tmp_2:s[#t2], %tmp2:v[0]
292 bld.pseudo(aco_opcode::p_unit_test, tmp, tmp2);
293
294 finish_ra_test(ra_test_policy());
295 END_TEST
296
297 static void
end_linear_vgpr(Temp tmp)298 end_linear_vgpr(Temp tmp)
299 {
300 bld.pseudo(aco_opcode::p_end_linear_vgpr, tmp);
301 }
302
303 BEGIN_TEST(regalloc.linear_vgpr.alloc.basic)
304 if (!setup_cs("", GFX8))
305 return;
306
307 //>> lv1: %ltmp0:v[31] = p_start_linear_vgpr
308 //! lv1: %ltmp1:v[30] = p_start_linear_vgpr
309 //! p_end_linear_vgpr %ltmp0:v[31]
310 //! lv1: %ltmp2:v[31] = p_start_linear_vgpr
311 //! p_end_linear_vgpr %ltmp1:v[30]
312 //! p_end_linear_vgpr %ltmp2:v[31]
313 Temp ltmp0 = bld.pseudo(aco_opcode::p_start_linear_vgpr, bld.def(v1.as_linear()));
314 Temp ltmp1 = bld.pseudo(aco_opcode::p_start_linear_vgpr, bld.def(v1.as_linear()));
315 end_linear_vgpr(ltmp0);
316 Temp ltmp2 = bld.pseudo(aco_opcode::p_start_linear_vgpr, bld.def(v1.as_linear()));
317 end_linear_vgpr(ltmp1);
318 end_linear_vgpr(ltmp2);
319
320 finish_ra_test(ra_test_policy());
321 END_TEST
322
323 BEGIN_TEST(regalloc.linear_vgpr.alloc.compact_grow)
324 for (bool pessimistic : {false, true}) {
325 const char* subvariant = pessimistic ? "_pessimistic" : "_optimistic";
326 //>> v1: %in0:v[0] = p_startpgm
327 if (!setup_cs("v1", GFX8, CHIP_UNKNOWN, subvariant))
328 continue;
329
330 //! lv1: %ltmp0:v[31] = p_start_linear_vgpr
331 //! lv1: %ltmp1:v[30] = p_start_linear_vgpr
332 //! p_end_linear_vgpr %ltmp0:v[31]
333 Temp ltmp0 = bld.pseudo(aco_opcode::p_start_linear_vgpr, bld.def(v1.as_linear()));
334 Temp ltmp1 = bld.pseudo(aco_opcode::p_start_linear_vgpr, bld.def(v1.as_linear()));
335 end_linear_vgpr(ltmp0);
336
337 //! v1: %tmp:v[29] = p_parallelcopy %in0:v[0]
338 Temp tmp = bld.pseudo(aco_opcode::p_parallelcopy, bld.def(v1, PhysReg(256 + 29)), inputs[0]);
339
340 /* When there's not enough space in the linear VGPR area for a new one, the area is compacted
341 * and the beginning is chosen. Any variables which are in the way, are moved.
342 */
343 //! lv1: %ltmp1_2:v[31] = p_parallelcopy %ltmp1:v[30]
344 //! v1: %tmp_2:v[#_] = p_parallelcopy %tmp:v[29]
345 //! lv2: %ltmp2:v[29-30] = p_start_linear_vgpr
346 Temp ltmp2 = bld.pseudo(aco_opcode::p_start_linear_vgpr, bld.def(v2.as_linear()));
347
348 //! p_end_linear_vgpr %ltmp1_2:v[31]
349 //! p_end_linear_vgpr %ltmp2:v[29-30]
350 end_linear_vgpr(ltmp1);
351 end_linear_vgpr(ltmp2);
352
353 //! p_unit_test %tmp_2:v[#_]
354 bld.pseudo(aco_opcode::p_unit_test, tmp);
355
356 finish_ra_test(ra_test_policy{pessimistic});
357 }
358 END_TEST
359
360 BEGIN_TEST(regalloc.linear_vgpr.alloc.compact_shrink)
361 for (bool pessimistic : {false, true}) {
362 const char* subvariant = pessimistic ? "_pessimistic" : "_optimistic";
363 //>> v1: %in0:v[0] = p_startpgm
364 if (!setup_cs("v1", GFX8, CHIP_UNKNOWN, subvariant))
365 continue;
366
367 //! lv1: %ltmp0:v[31] = p_start_linear_vgpr
368 //! lv1: %ltmp1:v[30] = p_start_linear_vgpr
369 //! lv1: %ltmp2:v[29] = p_start_linear_vgpr
370 //! lv1: %ltmp3:v[28] = p_start_linear_vgpr
371 //! lv1: %ltmp4:v[27] = p_start_linear_vgpr
372 //! p_end_linear_vgpr %ltmp0:v[31]
373 //! p_end_linear_vgpr %ltmp2:v[29]
374 //! p_end_linear_vgpr %ltmp4:v[27]
375 Temp ltmp0 = bld.pseudo(aco_opcode::p_start_linear_vgpr, bld.def(v1.as_linear()));
376 Temp ltmp1 = bld.pseudo(aco_opcode::p_start_linear_vgpr, bld.def(v1.as_linear()));
377 Temp ltmp2 = bld.pseudo(aco_opcode::p_start_linear_vgpr, bld.def(v1.as_linear()));
378 Temp ltmp3 = bld.pseudo(aco_opcode::p_start_linear_vgpr, bld.def(v1.as_linear()));
379 Temp ltmp4 = bld.pseudo(aco_opcode::p_start_linear_vgpr, bld.def(v1.as_linear()));
380 end_linear_vgpr(ltmp0);
381 end_linear_vgpr(ltmp2);
382 end_linear_vgpr(ltmp4);
383
384 /* Unlike regalloc.linear_vgpr.alloc.compact_grow, this shrinks the linear VGPR area. */
385 //! lv1: %ltmp3_2:v[30], lv1: %ltmp1_2:v[31] = p_parallelcopy %ltmp3:v[28], %ltmp1:v[30]
386 //! lv2: %ltmp5:v[28-29] = p_start_linear_vgpr
387 Temp ltmp5 = bld.pseudo(aco_opcode::p_start_linear_vgpr, bld.def(v2.as_linear()));
388
389 /* There should be enough space for 28 normal VGPRs. */
390 //! v28: %_:v[0-27] = p_unit_test
391 bld.pseudo(aco_opcode::p_unit_test, bld.def(RegClass::get(RegType::vgpr, 28 * 4)));
392
393 //! p_end_linear_vgpr %ltmp1_2:v[31]
394 //! p_end_linear_vgpr %ltmp3_2:v[30]
395 //! p_end_linear_vgpr %ltmp5:v[28-29]
396 end_linear_vgpr(ltmp1);
397 end_linear_vgpr(ltmp3);
398 end_linear_vgpr(ltmp5);
399
400 finish_ra_test(ra_test_policy{pessimistic});
401 }
402 END_TEST
403
404 BEGIN_TEST(regalloc.linear_vgpr.alloc.compact_for_normal)
405 for (bool pessimistic : {false, true}) {
406 const char* subvariant = pessimistic ? "_pessimistic" : "_optimistic";
407 //>> v1: %in0:v[0] = p_startpgm
408 if (!setup_cs("v1", GFX8, CHIP_UNKNOWN, subvariant))
409 continue;
410
411 //! lv1: %ltmp0:v[31] = p_start_linear_vgpr
412 //! lv1: %ltmp1:v[30] = p_start_linear_vgpr
413 //! p_end_linear_vgpr %ltmp0:v[31]
414 Temp ltmp0 = bld.pseudo(aco_opcode::p_start_linear_vgpr, bld.def(v1.as_linear()));
415 Temp ltmp1 = bld.pseudo(aco_opcode::p_start_linear_vgpr, bld.def(v1.as_linear()));
416 end_linear_vgpr(ltmp0);
417
418 //! lv1: %ltmp1_2:v[31] = p_parallelcopy %ltmp1:v[30]
419 //! v31: %_:v[0-30] = p_unit_test
420 bld.pseudo(aco_opcode::p_unit_test, bld.def(RegClass::get(RegType::vgpr, 31 * 4)));
421
422 //! p_end_linear_vgpr %ltmp1_2:v[31]
423 end_linear_vgpr(ltmp1);
424
425 finish_ra_test(ra_test_policy{pessimistic});
426 }
427 END_TEST
428
429 BEGIN_TEST(regalloc.linear_vgpr.alloc.compact_for_vec)
430 for (bool pessimistic : {false, true}) {
431 const char* subvariant = pessimistic ? "_pessimistic" : "_optimistic";
432 //>> v1: %in0:v[0] = p_startpgm
433 if (!setup_cs("v1", GFX8, CHIP_UNKNOWN, subvariant))
434 continue;
435
436 //! lv1: %ltmp0:v[31] = p_start_linear_vgpr
437 //! lv1: %ltmp1:v[30] = p_start_linear_vgpr
438 //! p_end_linear_vgpr %ltmp0:v[31]
439 Temp ltmp0 = bld.pseudo(aco_opcode::p_start_linear_vgpr, bld.def(v1.as_linear()));
440 Temp ltmp1 = bld.pseudo(aco_opcode::p_start_linear_vgpr, bld.def(v1.as_linear()));
441 end_linear_vgpr(ltmp0);
442
443 //! lv1: %ltmp1_2:v[31] = p_parallelcopy %ltmp1:v[30]
444 //! v31: %_:v[0-30] = p_create_vector v31: undef
445 RegClass v31 = RegClass::get(RegType::vgpr, 31 * 4);
446 bld.pseudo(aco_opcode::p_create_vector, bld.def(v31), Operand(v31));
447
448 //! p_end_linear_vgpr %ltmp1_2:v[31]
449 end_linear_vgpr(ltmp1);
450
451 finish_ra_test(ra_test_policy{pessimistic});
452 }
453 END_TEST
454
455 BEGIN_TEST(regalloc.linear_vgpr.alloc.killed_op)
456 for (bool pessimistic : {false, true}) {
457 const char* subvariant = pessimistic ? "_pessimistic" : "_optimistic";
458 if (!setup_cs("", GFX8, CHIP_UNKNOWN, subvariant))
459 continue;
460
461 //>> v31: %tmp0:v[0-30] = p_unit_test
462 //! v1: %tmp1:v[31] = p_unit_test
463 Temp tmp0 =
464 bld.pseudo(aco_opcode::p_unit_test, bld.def(RegClass::get(RegType::vgpr, 31 * 4)));
465 Temp tmp1 = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1));
466
467 //! lv1: %ltmp0:v[31] = p_start_linear_vgpr %tmp1:v[31]
468 //! p_end_linear_vgpr %ltmp0:v[31]
469 Temp ltmp0 = bld.pseudo(aco_opcode::p_start_linear_vgpr, bld.def(v1.as_linear()), tmp1);
470 end_linear_vgpr(ltmp0);
471
472 bld.pseudo(aco_opcode::p_unit_test, tmp0);
473
474 finish_ra_test(ra_test_policy{pessimistic});
475 }
476 END_TEST
477
478 BEGIN_TEST(regalloc.linear_vgpr.alloc.move_killed_op)
479 for (bool pessimistic : {false, true}) {
480 const char* subvariant = pessimistic ? "_pessimistic" : "_optimistic";
481 if (!setup_cs("", GFX8, CHIP_UNKNOWN, subvariant))
482 continue;
483
484 //>> v30: %tmp0:v[0-29] = p_unit_test
485 //! v1: %tmp1:v[30] = p_unit_test
486 //! v1: %tmp2:v[31] = p_unit_test
487 Temp tmp0 =
488 bld.pseudo(aco_opcode::p_unit_test, bld.def(RegClass::get(RegType::vgpr, 30 * 4)));
489 Temp tmp1 = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1));
490 Temp tmp2 = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1));
491
492 //~gfx8_optimistic! v1: %tmp1_2:v[31], v1: %tmp2_2:v[30] = p_parallelcopy %tmp1:v[30], %tmp2:v[31]
493 //~gfx8_pessimistic! v1: %tmp2_2:v[30], v1: %tmp1_2:v[31] = p_parallelcopy %tmp2:v[31], %tmp1:v[30]
494 //! lv1: %ltmp0:v[31] = p_start_linear_vgpr %tmp1_2:v[31]
495 //! p_end_linear_vgpr %ltmp0:v[31]
496 Temp ltmp0 = bld.pseudo(aco_opcode::p_start_linear_vgpr, bld.def(v1.as_linear()), tmp1);
497 end_linear_vgpr(ltmp0);
498
499 //! p_unit_test %tmp0:v[0-29], %tmp2_2:v[30]
500 bld.pseudo(aco_opcode::p_unit_test, tmp0, tmp2);
501
502 finish_ra_test(ra_test_policy{pessimistic});
503 }
504 END_TEST
505
506 BEGIN_TEST(regalloc.linear_vgpr.compact_for_future_def)
507 for (bool cbr : {false, true}) {
508 const char* subvariant = cbr ? "_cbranch" : "_branch";
509 if (!setup_cs("", GFX8, CHIP_UNKNOWN, subvariant))
510 continue;
511
512 //>> lv2: %ltmp0:v[30-31] = p_start_linear_vgpr
513 //! lv1: %ltmp1:v[29] = p_start_linear_vgpr
514 //! lv1: %ltmp2:v[28] = p_start_linear_vgpr
515 //! p_end_linear_vgpr %ltmp1:v[29]
516 Temp ltmp0 = bld.pseudo(aco_opcode::p_start_linear_vgpr, bld.def(v2.as_linear()));
517 Temp ltmp1 = bld.pseudo(aco_opcode::p_start_linear_vgpr, bld.def(v1.as_linear()));
518 Temp ltmp2 = bld.pseudo(aco_opcode::p_start_linear_vgpr, bld.def(v1.as_linear()));
519 end_linear_vgpr(ltmp1);
520
521 //! s1: %scc_tmp:scc = p_unit_test
522 Temp scc_tmp = bld.pseudo(aco_opcode::p_unit_test, bld.def(s1, scc));
523
524 //! lv1: %ltmp2_2:v[29] = p_parallelcopy %ltmp2:v[28]
525 //~gfx8_cbranch! s2: %_:s[0-1] = p_cbranch_z %scc_tmp:scc
526 //~gfx8_branch! s2: %_:s[0-1] = p_branch
527 if (cbr)
528 bld.branch(aco_opcode::p_cbranch_z, bld.def(s2), bld.scc(scc_tmp));
529 else
530 bld.branch(aco_opcode::p_branch, bld.def(s2));
531
532 //! BB1
533 //! /* logical preds: BB0, / linear preds: BB0, / kind: */
534 bld.reset(program->create_and_insert_block());
535 program->blocks[1].linear_preds.push_back(0);
536 program->blocks[1].logical_preds.push_back(0);
537
538 //! v29: %_:v[0-28] = p_unit_test
539 //! s2: %_:s[0-1] = p_branch
540 bld.pseudo(aco_opcode::p_unit_test, bld.def(RegClass::get(RegType::vgpr, 29 * 4)));
541 bld.branch(aco_opcode::p_branch, bld.def(s2));
542
543 //! BB2
544 //! /* logical preds: BB1, / linear preds: BB1, / kind: uniform, top-level, */
545 bld.reset(program->create_and_insert_block());
546 program->blocks[2].linear_preds.push_back(1);
547 program->blocks[2].logical_preds.push_back(1);
548 program->blocks[2].kind |= block_kind_top_level;
549
550 //! p_end_linear_vgpr %ltmp0_2:v[30-31]
551 //! p_end_linear_vgpr %ltmp2_2:v[29]
552 end_linear_vgpr(ltmp0);
553 end_linear_vgpr(ltmp2);
554
555 finish_ra_test(ra_test_policy());
556
557 //~gfx8_cbranch>> lv1: %ltmp2_2:v[29] = p_parallelcopy %ltmp2:v[28] scc:1 scratch:s1
558 //~gfx8_branch>> lv1: %ltmp2_2:v[29] = p_parallelcopy %ltmp2:v[28] scc:0 scratch:s0
559 aco_ptr<Instruction>& parallelcopy = program->blocks[0].instructions[6];
560 aco_print_instr(program->gfx_level, parallelcopy.get(), output);
561 if (parallelcopy->isPseudo()) {
562 fprintf(output, " scc:%u scratch:s%u\n", parallelcopy->pseudo().tmp_in_scc,
563 parallelcopy->pseudo().scratch_sgpr.reg());
564 } else {
565 fprintf(output, "\n");
566 }
567 }
568 END_TEST
569
570 BEGIN_TEST(regalloc.linear_vgpr.compact_for_future_phis)
571 for (bool cbr : {false, true}) {
572 const char* subvariant = cbr ? "_cbranch" : "_branch";
573 if (!setup_cs("", GFX8, CHIP_UNKNOWN, subvariant))
574 continue;
575
576 //>> lv1: %ltmp0:v[31] = p_start_linear_vgpr
577 //! lv1: %ltmp1:v[30] = p_start_linear_vgpr
578 //! lv1: %ltmp2:v[29] = p_start_linear_vgpr
579 //! p_end_linear_vgpr %ltmp1:v[30]
580 Temp ltmp0 = bld.pseudo(aco_opcode::p_start_linear_vgpr, bld.def(v1.as_linear()));
581 Temp ltmp1 = bld.pseudo(aco_opcode::p_start_linear_vgpr, bld.def(v1.as_linear()));
582 Temp ltmp2 = bld.pseudo(aco_opcode::p_start_linear_vgpr, bld.def(v1.as_linear()));
583 end_linear_vgpr(ltmp1);
584
585 //! lv1: %ltmp2_2:v[30] = p_parallelcopy %ltmp2:v[29]
586 //~gfx8_cbranch! s2: %_:s[0-1] = p_cbranch_z %_:scc
587 //~gfx8_branch! s2: %_:s[0-1] = p_branch
588 if (cbr)
589 bld.branch(aco_opcode::p_cbranch_z, bld.def(s2), Operand(scc, s1));
590 else
591 bld.branch(aco_opcode::p_branch, bld.def(s2));
592
593 //! BB1
594 //! /* logical preds: BB0, / linear preds: BB0, / kind: */
595 bld.reset(program->create_and_insert_block());
596 program->blocks[1].linear_preds.push_back(0);
597 program->blocks[1].logical_preds.push_back(0);
598
599 //! s2: %_:s[0-1] = p_branch
600 bld.branch(aco_opcode::p_branch, bld.def(s2));
601
602 //! BB2
603 //! /* logical preds: BB1, / linear preds: BB1, / kind: uniform, top-level, */
604 bld.reset(program->create_and_insert_block());
605 program->blocks[2].linear_preds.push_back(1);
606 program->blocks[2].logical_preds.push_back(1);
607 program->blocks[2].kind |= block_kind_top_level;
608
609 RegClass v30 = RegClass::get(RegType::vgpr, 30 * 4);
610 //! v30: %tmp:v[0-29] = p_phi v30: undef
611 //! p_unit_test %tmp:v[0-29]
612 Temp tmp = bld.pseudo(aco_opcode::p_phi, bld.def(v30), Operand(v30));
613 bld.pseudo(aco_opcode::p_unit_test, tmp);
614
615 //! p_end_linear_vgpr %ltmp0_2:v[31]
616 //! p_end_linear_vgpr %ltmp2_2:v[30]
617 end_linear_vgpr(ltmp0);
618 end_linear_vgpr(ltmp2);
619
620 finish_ra_test(ra_test_policy());
621 }
622 END_TEST
623