1 /*
2 * Copyright 2021 Collabora, Ltd.
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include "agx_builder.h"
7 #include "agx_compiler.h"
8 #include "agx_test.h"
9
10 #include <gtest/gtest.h>
11
12 static void
agx_optimize_and_dce(agx_context * ctx)13 agx_optimize_and_dce(agx_context *ctx)
14 {
15 agx_optimizer(ctx);
16 agx_dce(ctx, true);
17 }
18
19 #define CASE(instr, expected, size, returns) \
20 INSTRUCTION_CASE( \
21 { \
22 UNUSED agx_index out = agx_temp(b->shader, AGX_SIZE_##size); \
23 instr; \
24 if (returns) \
25 agx_unit_test(b, out); \
26 }, \
27 { \
28 UNUSED agx_index out = agx_temp(b->shader, AGX_SIZE_##size); \
29 expected; \
30 if (returns) \
31 agx_unit_test(b, out); \
32 }, \
33 agx_optimize_and_dce)
34
35 #define NEGCASE(instr, size) CASE(instr, instr, size, true)
36
37 #define CASE16(instr, expected) CASE(instr, expected, 16, true)
38 #define CASE32(instr, expected) CASE(instr, expected, 32, true)
39
40 #define CASE_NO_RETURN(instr, expected) \
41 CASE(instr, expected, 32 /* irrelevant */, false)
42
43 #define NEGCASE16(instr) NEGCASE(instr, 16)
44 #define NEGCASE32(instr) NEGCASE(instr, 32)
45
46 static inline agx_index
agx_fmov(agx_builder * b,agx_index s0)47 agx_fmov(agx_builder *b, agx_index s0)
48 {
49 agx_index tmp = agx_temp(b->shader, s0.size);
50 agx_fmov_to(b, tmp, s0);
51 return tmp;
52 }
53
54 class Optimizer : public testing::Test {
55 protected:
Optimizer()56 Optimizer()
57 {
58 mem_ctx = ralloc_context(NULL);
59
60 wx = agx_register(0, AGX_SIZE_32);
61 wy = agx_register(2, AGX_SIZE_32);
62 wz = agx_register(4, AGX_SIZE_32);
63
64 hx = agx_register(0, AGX_SIZE_16);
65 hy = agx_register(1, AGX_SIZE_16);
66 hz = agx_register(2, AGX_SIZE_16);
67 }
68
~Optimizer()69 ~Optimizer()
70 {
71 ralloc_free(mem_ctx);
72 }
73
74 void *mem_ctx;
75
76 agx_index wx, wy, wz, hx, hy, hz;
77 };
78
TEST_F(Optimizer,FloatCopyprop)79 TEST_F(Optimizer, FloatCopyprop)
80 {
81 CASE32(agx_fadd_to(b, out, agx_abs(agx_fmov(b, wx)), wy),
82 agx_fadd_to(b, out, agx_abs(wx), wy));
83
84 CASE32(agx_fadd_to(b, out, agx_neg(agx_fmov(b, wx)), wy),
85 agx_fadd_to(b, out, agx_neg(wx), wy));
86 }
87
TEST_F(Optimizer,FloatConversion)88 TEST_F(Optimizer, FloatConversion)
89 {
90 CASE32(
91 {
92 agx_index cvt = agx_temp(b->shader, AGX_SIZE_32);
93 agx_fmov_to(b, cvt, hx);
94 agx_fadd_to(b, out, cvt, wy);
95 },
96 { agx_fadd_to(b, out, hx, wy); });
97
98 CASE16(
99 {
100 agx_index sum = agx_temp(b->shader, AGX_SIZE_32);
101 agx_fadd_to(b, sum, wx, wy);
102 agx_fmov_to(b, out, sum);
103 },
104 { agx_fadd_to(b, out, wx, wy); });
105 }
106
TEST_F(Optimizer,FusedFABSNEG)107 TEST_F(Optimizer, FusedFABSNEG)
108 {
109 CASE32(agx_fadd_to(b, out, agx_fmov(b, agx_abs(wx)), wy),
110 agx_fadd_to(b, out, agx_abs(wx), wy));
111
112 CASE32(agx_fmul_to(b, out, wx, agx_fmov(b, agx_neg(agx_abs(wx)))),
113 agx_fmul_to(b, out, wx, agx_neg(agx_abs(wx))));
114 }
115
TEST_F(Optimizer,FusedFabsAbsorb)116 TEST_F(Optimizer, FusedFabsAbsorb)
117 {
118 CASE32(agx_fadd_to(b, out, agx_abs(agx_fmov(b, agx_abs(wx))), wy),
119 agx_fadd_to(b, out, agx_abs(wx), wy));
120 }
121
TEST_F(Optimizer,FusedFnegCancel)122 TEST_F(Optimizer, FusedFnegCancel)
123 {
124 CASE32(agx_fmul_to(b, out, wx, agx_neg(agx_fmov(b, agx_neg(wx)))),
125 agx_fmul_to(b, out, wx, wx));
126
127 CASE32(agx_fmul_to(b, out, wx, agx_neg(agx_fmov(b, agx_neg(agx_abs(wx))))),
128 agx_fmul_to(b, out, wx, agx_abs(wx)));
129 }
130
TEST_F(Optimizer,FusedNot)131 TEST_F(Optimizer, FusedNot)
132 {
133 CASE32(agx_not_to(b, out, agx_and(b, wx, wx)), agx_nand_to(b, out, wx, wx));
134
135 CASE32(agx_not_to(b, out, agx_or(b, wx, wx)), agx_nor_to(b, out, wx, wx));
136
137 CASE32(agx_not_to(b, out, agx_xor(b, wx, wx)), agx_xnor_to(b, out, wx, wx));
138
139 CASE32(agx_xor_to(b, out, agx_not(b, wx), agx_not(b, wx)),
140 agx_xor_to(b, out, wx, wx));
141
142 CASE32(agx_xor_to(b, out, agx_not(b, wx), wx), agx_xnor_to(b, out, wx, wx));
143
144 CASE32(agx_xor_to(b, out, wx, agx_not(b, wx)), agx_xnor_to(b, out, wx, wx));
145
146 CASE32(agx_nand_to(b, out, agx_not(b, wx), agx_not(b, wx)),
147 agx_or_to(b, out, wx, wx));
148
149 CASE32(agx_andn1_to(b, out, agx_not(b, wx), wx), agx_and_to(b, out, wx, wx));
150
151 CASE32(agx_andn1_to(b, out, wx, agx_not(b, wx)), agx_nor_to(b, out, wx, wx));
152
153 CASE32(agx_andn2_to(b, out, agx_not(b, wx), wx), agx_nor_to(b, out, wx, wx));
154
155 CASE32(agx_andn2_to(b, out, wx, agx_not(b, wx)), agx_and_to(b, out, wx, wx));
156
157 CASE32(agx_xor_to(b, out, agx_not(b, wx), agx_uniform(8, AGX_SIZE_32)),
158 agx_xnor_to(b, out, wx, agx_uniform(8, AGX_SIZE_32)));
159
160 CASE32(agx_or_to(b, out, agx_immediate(123), agx_not(b, wx)),
161 agx_orn2_to(b, out, agx_immediate(123), wx));
162
163 CASE32(agx_xor_to(b, out, wx, agx_not(b, wy)), agx_xnor_to(b, out, wx, wy));
164
165 CASE32(agx_xor_to(b, out, wy, agx_not(b, wx)), agx_xnor_to(b, out, wy, wx));
166
167 CASE32(agx_and_to(b, out, agx_not(b, wx), wy), agx_andn1_to(b, out, wx, wy));
168
169 CASE32(agx_or_to(b, out, wx, agx_not(b, wy)), agx_orn2_to(b, out, wx, wy));
170 }
171
TEST_F(Optimizer,FmulFsatF2F16)172 TEST_F(Optimizer, FmulFsatF2F16)
173 {
174 CASE16(
175 {
176 agx_index tmp = agx_temp(b->shader, AGX_SIZE_32);
177 agx_fmov_to(b, tmp, agx_fmul(b, wx, wy))->saturate = true;
178 agx_fmov_to(b, out, tmp);
179 },
180 { agx_fmul_to(b, out, wx, wy)->saturate = true; });
181 }
182
TEST_F(Optimizer,Copyprop)183 TEST_F(Optimizer, Copyprop)
184 {
185 CASE32(agx_fmul_to(b, out, wx, agx_mov(b, wy)), agx_fmul_to(b, out, wx, wy));
186 CASE32(agx_fmul_to(b, out, agx_mov(b, wx), agx_mov(b, wy)),
187 agx_fmul_to(b, out, wx, wy));
188 }
189
TEST_F(Optimizer,InlineHazards)190 TEST_F(Optimizer, InlineHazards)
191 {
192 NEGCASE32({
193 agx_index zero = agx_mov_imm(b, AGX_SIZE_32, 0);
194 agx_instr *I = agx_collect_to(b, out, 4);
195
196 I->src[0] = zero;
197 I->src[1] = wy;
198 I->src[2] = wz;
199 I->src[3] = wz;
200 });
201 }
202
TEST_F(Optimizer,CopypropRespectsAbsNeg)203 TEST_F(Optimizer, CopypropRespectsAbsNeg)
204 {
205 CASE32(agx_fadd_to(b, out, agx_abs(agx_mov(b, wx)), wy),
206 agx_fadd_to(b, out, agx_abs(wx), wy));
207
208 CASE32(agx_fadd_to(b, out, agx_neg(agx_mov(b, wx)), wy),
209 agx_fadd_to(b, out, agx_neg(wx), wy));
210
211 CASE32(agx_fadd_to(b, out, agx_neg(agx_abs(agx_mov(b, wx))), wy),
212 agx_fadd_to(b, out, agx_neg(agx_abs(wx)), wy));
213 }
214
TEST_F(Optimizer,IntCopyprop)215 TEST_F(Optimizer, IntCopyprop)
216 {
217 CASE32(agx_xor_to(b, out, agx_mov(b, wx), wy), agx_xor_to(b, out, wx, wy));
218 }
219
TEST_F(Optimizer,CopypropSplitMovedUniform64)220 TEST_F(Optimizer, CopypropSplitMovedUniform64)
221 {
222 CASE32(
223 {
224 /* emit_load_preamble puts in the move, so we do too */
225 agx_index mov = agx_mov(b, agx_uniform(40, AGX_SIZE_64));
226 agx_instr *spl = agx_split(b, 2, mov);
227 spl->dest[0] = agx_temp(b->shader, AGX_SIZE_32);
228 spl->dest[1] = agx_temp(b->shader, AGX_SIZE_32);
229 agx_xor_to(b, out, spl->dest[0], spl->dest[1]);
230 },
231 {
232 agx_xor_to(b, out, agx_uniform(40, AGX_SIZE_32),
233 agx_uniform(42, AGX_SIZE_32));
234 });
235 }
236
TEST_F(Optimizer,IntCopypropDoesntConvert)237 TEST_F(Optimizer, IntCopypropDoesntConvert)
238 {
239 NEGCASE32({
240 agx_index cvt = agx_temp(b->shader, AGX_SIZE_32);
241 agx_mov_to(b, cvt, hx);
242 agx_xor_to(b, out, cvt, wy);
243 });
244 }
245
TEST_F(Optimizer,SkipPreloads)246 TEST_F(Optimizer, SkipPreloads)
247 {
248 NEGCASE32({
249 agx_index preload = agx_preload(b, agx_register(0, AGX_SIZE_32));
250 agx_xor_to(b, out, preload, wy);
251 });
252 }
253
TEST_F(Optimizer,NoConversionsOn16BitALU)254 TEST_F(Optimizer, NoConversionsOn16BitALU)
255 {
256 NEGCASE16({
257 agx_index cvt = agx_temp(b->shader, AGX_SIZE_16);
258 agx_fmov_to(b, cvt, wx);
259 agx_fadd_to(b, out, cvt, hy);
260 });
261
262 NEGCASE32(agx_fmov_to(b, out, agx_fadd(b, hx, hy)));
263 }
264
TEST_F(Optimizer,BallotCondition)265 TEST_F(Optimizer, BallotCondition)
266 {
267 CASE32(agx_ballot_to(b, out, agx_icmp(b, wx, wy, AGX_ICOND_UEQ, true)),
268 agx_icmp_ballot_to(b, out, wx, wy, AGX_ICOND_UEQ, true));
269
270 CASE32(agx_ballot_to(b, out, agx_fcmp(b, wx, wy, AGX_FCOND_GE, false)),
271 agx_fcmp_ballot_to(b, out, wx, wy, AGX_FCOND_GE, false));
272
273 CASE32(agx_quad_ballot_to(b, out, agx_icmp(b, wx, wy, AGX_ICOND_UEQ, true)),
274 agx_icmp_quad_ballot_to(b, out, wx, wy, AGX_ICOND_UEQ, true));
275
276 CASE32(agx_quad_ballot_to(b, out, agx_fcmp(b, wx, wy, AGX_FCOND_GT, false)),
277 agx_fcmp_quad_ballot_to(b, out, wx, wy, AGX_FCOND_GT, false));
278 }
279
TEST_F(Optimizer,BallotMultipleUses)280 TEST_F(Optimizer, BallotMultipleUses)
281 {
282 CASE32(
283 {
284 agx_index cmp = agx_fcmp(b, wx, wy, AGX_FCOND_GT, false);
285 agx_index ballot = agx_quad_ballot(b, cmp);
286 agx_fadd_to(b, out, cmp, ballot);
287 },
288 {
289 agx_index cmp = agx_fcmp(b, wx, wy, AGX_FCOND_GT, false);
290 agx_index ballot =
291 agx_fcmp_quad_ballot(b, wx, wy, AGX_FCOND_GT, false);
292 agx_fadd_to(b, out, cmp, ballot);
293 });
294 }
295
296 /*
297 * We had a bug where the ballot optimization didn't check the agx_index's type
298 * so would fuse constants with overlapping values. An unrelated common code
299 * change surfaced this in CTS case:
300 *
301 * dEQP-VK.subgroups.vote.frag_helper.subgroupallequal_bool_fragment
302 *
303 * We passed Vulkan CTS without hitting it though, hence the targeted test.
304 */
TEST_F(Optimizer,BallotConstant)305 TEST_F(Optimizer, BallotConstant)
306 {
307 CASE32(
308 {
309 agx_index cmp = agx_fcmp(b, wx, wy, AGX_FCOND_GT, false);
310 agx_index ballot = agx_quad_ballot(b, agx_immediate(cmp.value));
311 agx_index ballot2 = agx_quad_ballot(b, cmp);
312 agx_fadd_to(b, out, ballot, agx_fadd(b, ballot2, cmp));
313 },
314 {
315 agx_index cmp = agx_fcmp(b, wx, wy, AGX_FCOND_GT, false);
316 agx_index ballot = agx_quad_ballot(b, agx_immediate(cmp.value));
317 agx_index ballot2 =
318 agx_fcmp_quad_ballot(b, wx, wy, AGX_FCOND_GT, false);
319 agx_fadd_to(b, out, ballot, agx_fadd(b, ballot2, cmp));
320 });
321 }
322
TEST_F(Optimizer,IfCondition)323 TEST_F(Optimizer, IfCondition)
324 {
325 CASE_NO_RETURN(agx_if_icmp(b, agx_icmp(b, wx, wy, AGX_ICOND_UEQ, true),
326 agx_zero(), 1, AGX_ICOND_UEQ, true, NULL),
327 agx_if_icmp(b, wx, wy, 1, AGX_ICOND_UEQ, true, NULL));
328
329 CASE_NO_RETURN(agx_if_icmp(b, agx_fcmp(b, wx, wy, AGX_FCOND_EQ, true),
330 agx_zero(), 1, AGX_ICOND_UEQ, true, NULL),
331 agx_if_fcmp(b, wx, wy, 1, AGX_FCOND_EQ, true, NULL));
332
333 CASE_NO_RETURN(agx_if_icmp(b, agx_fcmp(b, hx, hy, AGX_FCOND_LT, false),
334 agx_zero(), 1, AGX_ICOND_UEQ, true, NULL),
335 agx_if_fcmp(b, hx, hy, 1, AGX_FCOND_LT, false, NULL));
336 }
337
TEST_F(Optimizer,SelectCondition)338 TEST_F(Optimizer, SelectCondition)
339 {
340 CASE32(agx_icmpsel_to(b, out, agx_icmp(b, wx, wy, AGX_ICOND_UEQ, false),
341 agx_zero(), wz, wx, AGX_ICOND_UEQ),
342 agx_icmpsel_to(b, out, wx, wy, wx, wz, AGX_ICOND_UEQ));
343
344 CASE32(agx_icmpsel_to(b, out, agx_icmp(b, wx, wy, AGX_ICOND_UEQ, true),
345 agx_zero(), wz, wx, AGX_ICOND_UEQ),
346 agx_icmpsel_to(b, out, wx, wy, wz, wx, AGX_ICOND_UEQ));
347
348 CASE32(agx_icmpsel_to(b, out, agx_fcmp(b, wx, wy, AGX_FCOND_EQ, false),
349 agx_zero(), wz, wx, AGX_ICOND_UEQ),
350 agx_fcmpsel_to(b, out, wx, wy, wx, wz, AGX_FCOND_EQ));
351
352 CASE32(agx_icmpsel_to(b, out, agx_fcmp(b, wx, wy, AGX_FCOND_LT, true),
353 agx_zero(), wz, wx, AGX_ICOND_UEQ),
354 agx_fcmpsel_to(b, out, wx, wy, wz, wx, AGX_FCOND_LT));
355 }
356
TEST_F(Optimizer,IfInverted)357 TEST_F(Optimizer, IfInverted)
358 {
359 CASE_NO_RETURN(
360 agx_if_icmp(b, agx_xor(b, hx, agx_immediate(1)), agx_zero(), 1,
361 AGX_ICOND_UEQ, true, NULL),
362 agx_if_icmp(b, hx, agx_zero(), 1, AGX_ICOND_UEQ, false, NULL));
363
364 CASE_NO_RETURN(agx_if_icmp(b, agx_xor(b, hx, agx_immediate(1)), agx_zero(),
365 1, AGX_ICOND_UEQ, false, NULL),
366 agx_if_icmp(b, hx, agx_zero(), 1, AGX_ICOND_UEQ, true, NULL));
367 }
368
TEST_F(Optimizer,IfInvertedCondition)369 TEST_F(Optimizer, IfInvertedCondition)
370 {
371 CASE_NO_RETURN(
372 agx_if_icmp(
373 b,
374 agx_xor(b, agx_icmp(b, wx, wy, AGX_ICOND_UEQ, true), agx_immediate(1)),
375 agx_zero(), 1, AGX_ICOND_UEQ, true, NULL),
376 agx_if_icmp(b, wx, wy, 1, AGX_ICOND_UEQ, false, NULL));
377
378 CASE_NO_RETURN(
379 agx_if_icmp(
380 b,
381 agx_xor(b, agx_fcmp(b, wx, wy, AGX_FCOND_EQ, true), agx_immediate(1)),
382 agx_zero(), 1, AGX_ICOND_UEQ, true, NULL),
383 agx_if_fcmp(b, wx, wy, 1, AGX_FCOND_EQ, false, NULL));
384
385 CASE_NO_RETURN(
386 agx_if_icmp(
387 b,
388 agx_xor(b, agx_fcmp(b, hx, hy, AGX_FCOND_LT, false), agx_immediate(1)),
389 agx_zero(), 1, AGX_ICOND_UEQ, true, NULL),
390 agx_if_fcmp(b, hx, hy, 1, AGX_FCOND_LT, true, NULL));
391 }
392