xref: /aosp_15_r20/external/mesa3d/src/asahi/compiler/test/test-optimizer.cpp (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright 2021 Collabora, Ltd.
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include "agx_builder.h"
7 #include "agx_compiler.h"
8 #include "agx_test.h"
9 
10 #include <gtest/gtest.h>
11 
12 static void
agx_optimize_and_dce(agx_context * ctx)13 agx_optimize_and_dce(agx_context *ctx)
14 {
15    agx_optimizer(ctx);
16    agx_dce(ctx, true);
17 }
18 
19 #define CASE(instr, expected, size, returns)                                   \
20    INSTRUCTION_CASE(                                                           \
21       {                                                                        \
22          UNUSED agx_index out = agx_temp(b->shader, AGX_SIZE_##size);          \
23          instr;                                                                \
24          if (returns)                                                          \
25             agx_unit_test(b, out);                                             \
26       },                                                                       \
27       {                                                                        \
28          UNUSED agx_index out = agx_temp(b->shader, AGX_SIZE_##size);          \
29          expected;                                                             \
30          if (returns)                                                          \
31             agx_unit_test(b, out);                                             \
32       },                                                                       \
33       agx_optimize_and_dce)
34 
35 #define NEGCASE(instr, size) CASE(instr, instr, size, true)
36 
37 #define CASE16(instr, expected) CASE(instr, expected, 16, true)
38 #define CASE32(instr, expected) CASE(instr, expected, 32, true)
39 
40 #define CASE_NO_RETURN(instr, expected)                                        \
41    CASE(instr, expected, 32 /* irrelevant */, false)
42 
43 #define NEGCASE16(instr) NEGCASE(instr, 16)
44 #define NEGCASE32(instr) NEGCASE(instr, 32)
45 
46 static inline agx_index
agx_fmov(agx_builder * b,agx_index s0)47 agx_fmov(agx_builder *b, agx_index s0)
48 {
49    agx_index tmp = agx_temp(b->shader, s0.size);
50    agx_fmov_to(b, tmp, s0);
51    return tmp;
52 }
53 
54 class Optimizer : public testing::Test {
55  protected:
Optimizer()56    Optimizer()
57    {
58       mem_ctx = ralloc_context(NULL);
59 
60       wx = agx_register(0, AGX_SIZE_32);
61       wy = agx_register(2, AGX_SIZE_32);
62       wz = agx_register(4, AGX_SIZE_32);
63 
64       hx = agx_register(0, AGX_SIZE_16);
65       hy = agx_register(1, AGX_SIZE_16);
66       hz = agx_register(2, AGX_SIZE_16);
67    }
68 
~Optimizer()69    ~Optimizer()
70    {
71       ralloc_free(mem_ctx);
72    }
73 
74    void *mem_ctx;
75 
76    agx_index wx, wy, wz, hx, hy, hz;
77 };
78 
TEST_F(Optimizer,FloatCopyprop)79 TEST_F(Optimizer, FloatCopyprop)
80 {
81    CASE32(agx_fadd_to(b, out, agx_abs(agx_fmov(b, wx)), wy),
82           agx_fadd_to(b, out, agx_abs(wx), wy));
83 
84    CASE32(agx_fadd_to(b, out, agx_neg(agx_fmov(b, wx)), wy),
85           agx_fadd_to(b, out, agx_neg(wx), wy));
86 }
87 
TEST_F(Optimizer,FloatConversion)88 TEST_F(Optimizer, FloatConversion)
89 {
90    CASE32(
91       {
92          agx_index cvt = agx_temp(b->shader, AGX_SIZE_32);
93          agx_fmov_to(b, cvt, hx);
94          agx_fadd_to(b, out, cvt, wy);
95       },
96       { agx_fadd_to(b, out, hx, wy); });
97 
98    CASE16(
99       {
100          agx_index sum = agx_temp(b->shader, AGX_SIZE_32);
101          agx_fadd_to(b, sum, wx, wy);
102          agx_fmov_to(b, out, sum);
103       },
104       { agx_fadd_to(b, out, wx, wy); });
105 }
106 
TEST_F(Optimizer,FusedFABSNEG)107 TEST_F(Optimizer, FusedFABSNEG)
108 {
109    CASE32(agx_fadd_to(b, out, agx_fmov(b, agx_abs(wx)), wy),
110           agx_fadd_to(b, out, agx_abs(wx), wy));
111 
112    CASE32(agx_fmul_to(b, out, wx, agx_fmov(b, agx_neg(agx_abs(wx)))),
113           agx_fmul_to(b, out, wx, agx_neg(agx_abs(wx))));
114 }
115 
TEST_F(Optimizer,FusedFabsAbsorb)116 TEST_F(Optimizer, FusedFabsAbsorb)
117 {
118    CASE32(agx_fadd_to(b, out, agx_abs(agx_fmov(b, agx_abs(wx))), wy),
119           agx_fadd_to(b, out, agx_abs(wx), wy));
120 }
121 
TEST_F(Optimizer,FusedFnegCancel)122 TEST_F(Optimizer, FusedFnegCancel)
123 {
124    CASE32(agx_fmul_to(b, out, wx, agx_neg(agx_fmov(b, agx_neg(wx)))),
125           agx_fmul_to(b, out, wx, wx));
126 
127    CASE32(agx_fmul_to(b, out, wx, agx_neg(agx_fmov(b, agx_neg(agx_abs(wx))))),
128           agx_fmul_to(b, out, wx, agx_abs(wx)));
129 }
130 
TEST_F(Optimizer,FusedNot)131 TEST_F(Optimizer, FusedNot)
132 {
133    CASE32(agx_not_to(b, out, agx_and(b, wx, wx)), agx_nand_to(b, out, wx, wx));
134 
135    CASE32(agx_not_to(b, out, agx_or(b, wx, wx)), agx_nor_to(b, out, wx, wx));
136 
137    CASE32(agx_not_to(b, out, agx_xor(b, wx, wx)), agx_xnor_to(b, out, wx, wx));
138 
139    CASE32(agx_xor_to(b, out, agx_not(b, wx), agx_not(b, wx)),
140           agx_xor_to(b, out, wx, wx));
141 
142    CASE32(agx_xor_to(b, out, agx_not(b, wx), wx), agx_xnor_to(b, out, wx, wx));
143 
144    CASE32(agx_xor_to(b, out, wx, agx_not(b, wx)), agx_xnor_to(b, out, wx, wx));
145 
146    CASE32(agx_nand_to(b, out, agx_not(b, wx), agx_not(b, wx)),
147           agx_or_to(b, out, wx, wx));
148 
149    CASE32(agx_andn1_to(b, out, agx_not(b, wx), wx), agx_and_to(b, out, wx, wx));
150 
151    CASE32(agx_andn1_to(b, out, wx, agx_not(b, wx)), agx_nor_to(b, out, wx, wx));
152 
153    CASE32(agx_andn2_to(b, out, agx_not(b, wx), wx), agx_nor_to(b, out, wx, wx));
154 
155    CASE32(agx_andn2_to(b, out, wx, agx_not(b, wx)), agx_and_to(b, out, wx, wx));
156 
157    CASE32(agx_xor_to(b, out, agx_not(b, wx), agx_uniform(8, AGX_SIZE_32)),
158           agx_xnor_to(b, out, wx, agx_uniform(8, AGX_SIZE_32)));
159 
160    CASE32(agx_or_to(b, out, agx_immediate(123), agx_not(b, wx)),
161           agx_orn2_to(b, out, agx_immediate(123), wx));
162 
163    CASE32(agx_xor_to(b, out, wx, agx_not(b, wy)), agx_xnor_to(b, out, wx, wy));
164 
165    CASE32(agx_xor_to(b, out, wy, agx_not(b, wx)), agx_xnor_to(b, out, wy, wx));
166 
167    CASE32(agx_and_to(b, out, agx_not(b, wx), wy), agx_andn1_to(b, out, wx, wy));
168 
169    CASE32(agx_or_to(b, out, wx, agx_not(b, wy)), agx_orn2_to(b, out, wx, wy));
170 }
171 
TEST_F(Optimizer,FmulFsatF2F16)172 TEST_F(Optimizer, FmulFsatF2F16)
173 {
174    CASE16(
175       {
176          agx_index tmp = agx_temp(b->shader, AGX_SIZE_32);
177          agx_fmov_to(b, tmp, agx_fmul(b, wx, wy))->saturate = true;
178          agx_fmov_to(b, out, tmp);
179       },
180       { agx_fmul_to(b, out, wx, wy)->saturate = true; });
181 }
182 
TEST_F(Optimizer,Copyprop)183 TEST_F(Optimizer, Copyprop)
184 {
185    CASE32(agx_fmul_to(b, out, wx, agx_mov(b, wy)), agx_fmul_to(b, out, wx, wy));
186    CASE32(agx_fmul_to(b, out, agx_mov(b, wx), agx_mov(b, wy)),
187           agx_fmul_to(b, out, wx, wy));
188 }
189 
TEST_F(Optimizer,InlineHazards)190 TEST_F(Optimizer, InlineHazards)
191 {
192    NEGCASE32({
193       agx_index zero = agx_mov_imm(b, AGX_SIZE_32, 0);
194       agx_instr *I = agx_collect_to(b, out, 4);
195 
196       I->src[0] = zero;
197       I->src[1] = wy;
198       I->src[2] = wz;
199       I->src[3] = wz;
200    });
201 }
202 
TEST_F(Optimizer,CopypropRespectsAbsNeg)203 TEST_F(Optimizer, CopypropRespectsAbsNeg)
204 {
205    CASE32(agx_fadd_to(b, out, agx_abs(agx_mov(b, wx)), wy),
206           agx_fadd_to(b, out, agx_abs(wx), wy));
207 
208    CASE32(agx_fadd_to(b, out, agx_neg(agx_mov(b, wx)), wy),
209           agx_fadd_to(b, out, agx_neg(wx), wy));
210 
211    CASE32(agx_fadd_to(b, out, agx_neg(agx_abs(agx_mov(b, wx))), wy),
212           agx_fadd_to(b, out, agx_neg(agx_abs(wx)), wy));
213 }
214 
TEST_F(Optimizer,IntCopyprop)215 TEST_F(Optimizer, IntCopyprop)
216 {
217    CASE32(agx_xor_to(b, out, agx_mov(b, wx), wy), agx_xor_to(b, out, wx, wy));
218 }
219 
TEST_F(Optimizer,CopypropSplitMovedUniform64)220 TEST_F(Optimizer, CopypropSplitMovedUniform64)
221 {
222    CASE32(
223       {
224          /* emit_load_preamble puts in the move, so we do too */
225          agx_index mov = agx_mov(b, agx_uniform(40, AGX_SIZE_64));
226          agx_instr *spl = agx_split(b, 2, mov);
227          spl->dest[0] = agx_temp(b->shader, AGX_SIZE_32);
228          spl->dest[1] = agx_temp(b->shader, AGX_SIZE_32);
229          agx_xor_to(b, out, spl->dest[0], spl->dest[1]);
230       },
231       {
232          agx_xor_to(b, out, agx_uniform(40, AGX_SIZE_32),
233                     agx_uniform(42, AGX_SIZE_32));
234       });
235 }
236 
TEST_F(Optimizer,IntCopypropDoesntConvert)237 TEST_F(Optimizer, IntCopypropDoesntConvert)
238 {
239    NEGCASE32({
240       agx_index cvt = agx_temp(b->shader, AGX_SIZE_32);
241       agx_mov_to(b, cvt, hx);
242       agx_xor_to(b, out, cvt, wy);
243    });
244 }
245 
TEST_F(Optimizer,SkipPreloads)246 TEST_F(Optimizer, SkipPreloads)
247 {
248    NEGCASE32({
249       agx_index preload = agx_preload(b, agx_register(0, AGX_SIZE_32));
250       agx_xor_to(b, out, preload, wy);
251    });
252 }
253 
TEST_F(Optimizer,NoConversionsOn16BitALU)254 TEST_F(Optimizer, NoConversionsOn16BitALU)
255 {
256    NEGCASE16({
257       agx_index cvt = agx_temp(b->shader, AGX_SIZE_16);
258       agx_fmov_to(b, cvt, wx);
259       agx_fadd_to(b, out, cvt, hy);
260    });
261 
262    NEGCASE32(agx_fmov_to(b, out, agx_fadd(b, hx, hy)));
263 }
264 
TEST_F(Optimizer,BallotCondition)265 TEST_F(Optimizer, BallotCondition)
266 {
267    CASE32(agx_ballot_to(b, out, agx_icmp(b, wx, wy, AGX_ICOND_UEQ, true)),
268           agx_icmp_ballot_to(b, out, wx, wy, AGX_ICOND_UEQ, true));
269 
270    CASE32(agx_ballot_to(b, out, agx_fcmp(b, wx, wy, AGX_FCOND_GE, false)),
271           agx_fcmp_ballot_to(b, out, wx, wy, AGX_FCOND_GE, false));
272 
273    CASE32(agx_quad_ballot_to(b, out, agx_icmp(b, wx, wy, AGX_ICOND_UEQ, true)),
274           agx_icmp_quad_ballot_to(b, out, wx, wy, AGX_ICOND_UEQ, true));
275 
276    CASE32(agx_quad_ballot_to(b, out, agx_fcmp(b, wx, wy, AGX_FCOND_GT, false)),
277           agx_fcmp_quad_ballot_to(b, out, wx, wy, AGX_FCOND_GT, false));
278 }
279 
TEST_F(Optimizer,BallotMultipleUses)280 TEST_F(Optimizer, BallotMultipleUses)
281 {
282    CASE32(
283       {
284          agx_index cmp = agx_fcmp(b, wx, wy, AGX_FCOND_GT, false);
285          agx_index ballot = agx_quad_ballot(b, cmp);
286          agx_fadd_to(b, out, cmp, ballot);
287       },
288       {
289          agx_index cmp = agx_fcmp(b, wx, wy, AGX_FCOND_GT, false);
290          agx_index ballot =
291             agx_fcmp_quad_ballot(b, wx, wy, AGX_FCOND_GT, false);
292          agx_fadd_to(b, out, cmp, ballot);
293       });
294 }
295 
296 /*
297  * We had a bug where the ballot optimization didn't check the agx_index's type
298  * so would fuse constants with overlapping values. An unrelated common code
299  * change surfaced this in CTS case:
300  *
301  *    dEQP-VK.subgroups.vote.frag_helper.subgroupallequal_bool_fragment
302  *
303  * We passed Vulkan CTS without hitting it though, hence the targeted test.
304  */
TEST_F(Optimizer,BallotConstant)305 TEST_F(Optimizer, BallotConstant)
306 {
307    CASE32(
308       {
309          agx_index cmp = agx_fcmp(b, wx, wy, AGX_FCOND_GT, false);
310          agx_index ballot = agx_quad_ballot(b, agx_immediate(cmp.value));
311          agx_index ballot2 = agx_quad_ballot(b, cmp);
312          agx_fadd_to(b, out, ballot, agx_fadd(b, ballot2, cmp));
313       },
314       {
315          agx_index cmp = agx_fcmp(b, wx, wy, AGX_FCOND_GT, false);
316          agx_index ballot = agx_quad_ballot(b, agx_immediate(cmp.value));
317          agx_index ballot2 =
318             agx_fcmp_quad_ballot(b, wx, wy, AGX_FCOND_GT, false);
319          agx_fadd_to(b, out, ballot, agx_fadd(b, ballot2, cmp));
320       });
321 }
322 
TEST_F(Optimizer,IfCondition)323 TEST_F(Optimizer, IfCondition)
324 {
325    CASE_NO_RETURN(agx_if_icmp(b, agx_icmp(b, wx, wy, AGX_ICOND_UEQ, true),
326                               agx_zero(), 1, AGX_ICOND_UEQ, true, NULL),
327                   agx_if_icmp(b, wx, wy, 1, AGX_ICOND_UEQ, true, NULL));
328 
329    CASE_NO_RETURN(agx_if_icmp(b, agx_fcmp(b, wx, wy, AGX_FCOND_EQ, true),
330                               agx_zero(), 1, AGX_ICOND_UEQ, true, NULL),
331                   agx_if_fcmp(b, wx, wy, 1, AGX_FCOND_EQ, true, NULL));
332 
333    CASE_NO_RETURN(agx_if_icmp(b, agx_fcmp(b, hx, hy, AGX_FCOND_LT, false),
334                               agx_zero(), 1, AGX_ICOND_UEQ, true, NULL),
335                   agx_if_fcmp(b, hx, hy, 1, AGX_FCOND_LT, false, NULL));
336 }
337 
TEST_F(Optimizer,SelectCondition)338 TEST_F(Optimizer, SelectCondition)
339 {
340    CASE32(agx_icmpsel_to(b, out, agx_icmp(b, wx, wy, AGX_ICOND_UEQ, false),
341                          agx_zero(), wz, wx, AGX_ICOND_UEQ),
342           agx_icmpsel_to(b, out, wx, wy, wx, wz, AGX_ICOND_UEQ));
343 
344    CASE32(agx_icmpsel_to(b, out, agx_icmp(b, wx, wy, AGX_ICOND_UEQ, true),
345                          agx_zero(), wz, wx, AGX_ICOND_UEQ),
346           agx_icmpsel_to(b, out, wx, wy, wz, wx, AGX_ICOND_UEQ));
347 
348    CASE32(agx_icmpsel_to(b, out, agx_fcmp(b, wx, wy, AGX_FCOND_EQ, false),
349                          agx_zero(), wz, wx, AGX_ICOND_UEQ),
350           agx_fcmpsel_to(b, out, wx, wy, wx, wz, AGX_FCOND_EQ));
351 
352    CASE32(agx_icmpsel_to(b, out, agx_fcmp(b, wx, wy, AGX_FCOND_LT, true),
353                          agx_zero(), wz, wx, AGX_ICOND_UEQ),
354           agx_fcmpsel_to(b, out, wx, wy, wz, wx, AGX_FCOND_LT));
355 }
356 
TEST_F(Optimizer,IfInverted)357 TEST_F(Optimizer, IfInverted)
358 {
359    CASE_NO_RETURN(
360       agx_if_icmp(b, agx_xor(b, hx, agx_immediate(1)), agx_zero(), 1,
361                   AGX_ICOND_UEQ, true, NULL),
362       agx_if_icmp(b, hx, agx_zero(), 1, AGX_ICOND_UEQ, false, NULL));
363 
364    CASE_NO_RETURN(agx_if_icmp(b, agx_xor(b, hx, agx_immediate(1)), agx_zero(),
365                               1, AGX_ICOND_UEQ, false, NULL),
366                   agx_if_icmp(b, hx, agx_zero(), 1, AGX_ICOND_UEQ, true, NULL));
367 }
368 
TEST_F(Optimizer,IfInvertedCondition)369 TEST_F(Optimizer, IfInvertedCondition)
370 {
371    CASE_NO_RETURN(
372       agx_if_icmp(
373          b,
374          agx_xor(b, agx_icmp(b, wx, wy, AGX_ICOND_UEQ, true), agx_immediate(1)),
375          agx_zero(), 1, AGX_ICOND_UEQ, true, NULL),
376       agx_if_icmp(b, wx, wy, 1, AGX_ICOND_UEQ, false, NULL));
377 
378    CASE_NO_RETURN(
379       agx_if_icmp(
380          b,
381          agx_xor(b, agx_fcmp(b, wx, wy, AGX_FCOND_EQ, true), agx_immediate(1)),
382          agx_zero(), 1, AGX_ICOND_UEQ, true, NULL),
383       agx_if_fcmp(b, wx, wy, 1, AGX_FCOND_EQ, false, NULL));
384 
385    CASE_NO_RETURN(
386       agx_if_icmp(
387          b,
388          agx_xor(b, agx_fcmp(b, hx, hy, AGX_FCOND_LT, false), agx_immediate(1)),
389          agx_zero(), 1, AGX_ICOND_UEQ, true, NULL),
390       agx_if_fcmp(b, hx, hy, 1, AGX_FCOND_LT, true, NULL));
391 }
392