xref: /aosp_15_r20/external/mesa3d/src/asahi/compiler/test/test-lower-parallel-copy.cpp (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright 2021 Collabora, Ltd.
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include "agx_builder.h"
7 #include "agx_compiler.h"
8 #include "agx_test.h"
9 
10 #include <gtest/gtest.h>
11 
12 #define CASE(copies, expected)                                                 \
13    do {                                                                        \
14       agx_builder *A = agx_test_builder(mem_ctx);                              \
15       agx_builder *B = agx_test_builder(mem_ctx);                              \
16                                                                                \
17       agx_emit_parallel_copies(A, copies, ARRAY_SIZE(copies));                 \
18                                                                                \
19       {                                                                        \
20          agx_builder *b = B;                                                   \
21          expected;                                                             \
22       }                                                                        \
23                                                                                \
24       ASSERT_SHADER_EQUAL(A->shader, B->shader);                               \
25    } while (0)
26 
27 static inline void
extr_swap(agx_builder * b,agx_index x)28 extr_swap(agx_builder *b, agx_index x)
29 {
30    x.size = AGX_SIZE_32;
31    agx_extr_to(b, x, x, x, agx_immediate(16), 0);
32 }
33 
34 static inline void
xor_swap(agx_builder * b,agx_index x,agx_index y)35 xor_swap(agx_builder *b, agx_index x, agx_index y)
36 {
37    agx_xor_to(b, x, x, y);
38    agx_xor_to(b, y, x, y);
39    agx_xor_to(b, x, x, y);
40 }
41 
42 class LowerParallelCopy : public testing::Test {
43  protected:
LowerParallelCopy()44    LowerParallelCopy()
45    {
46       mem_ctx = ralloc_context(NULL);
47    }
48 
~LowerParallelCopy()49    ~LowerParallelCopy()
50    {
51       ralloc_free(mem_ctx);
52    }
53 
54    void *mem_ctx;
55 };
56 
TEST_F(LowerParallelCopy,UnrelatedCopies)57 TEST_F(LowerParallelCopy, UnrelatedCopies)
58 {
59    struct agx_copy test_1[] = {
60       {.dest = 0, .src = agx_register(2, AGX_SIZE_32)},
61       {.dest = 4, .src = agx_register(6, AGX_SIZE_32)},
62    };
63 
64    CASE(test_1, {
65       agx_mov_to(b, agx_register(0, AGX_SIZE_32), agx_register(2, AGX_SIZE_32));
66       agx_mov_to(b, agx_register(4, AGX_SIZE_32), agx_register(6, AGX_SIZE_32));
67    });
68 
69    struct agx_copy test_2[] = {
70       {.dest = 0, .src = agx_register(1, AGX_SIZE_16)},
71       {.dest = 4, .src = agx_register(5, AGX_SIZE_16)},
72    };
73 
74    CASE(test_2, {
75       agx_mov_to(b, agx_register(0, AGX_SIZE_16), agx_register(1, AGX_SIZE_16));
76       agx_mov_to(b, agx_register(4, AGX_SIZE_16), agx_register(5, AGX_SIZE_16));
77    });
78 }
79 
TEST_F(LowerParallelCopy,RelatedSource)80 TEST_F(LowerParallelCopy, RelatedSource)
81 {
82    struct agx_copy test_1[] = {
83       {.dest = 0, .src = agx_register(2, AGX_SIZE_32)},
84       {.dest = 4, .src = agx_register(2, AGX_SIZE_32)},
85    };
86 
87    CASE(test_1, {
88       agx_mov_to(b, agx_register(0, AGX_SIZE_32), agx_register(2, AGX_SIZE_32));
89       agx_mov_to(b, agx_register(4, AGX_SIZE_32), agx_register(2, AGX_SIZE_32));
90    });
91 
92    struct agx_copy test_2[] = {
93       {.dest = 0, .src = agx_register(1, AGX_SIZE_16)},
94       {.dest = 4, .src = agx_register(1, AGX_SIZE_16)},
95    };
96 
97    CASE(test_2, {
98       agx_mov_to(b, agx_register(0, AGX_SIZE_16), agx_register(1, AGX_SIZE_16));
99       agx_mov_to(b, agx_register(4, AGX_SIZE_16), agx_register(1, AGX_SIZE_16));
100    });
101 }
102 
TEST_F(LowerParallelCopy,DependentCopies)103 TEST_F(LowerParallelCopy, DependentCopies)
104 {
105    struct agx_copy test_1[] = {
106       {.dest = 0, .src = agx_register(2, AGX_SIZE_32)},
107       {.dest = 4, .src = agx_register(0, AGX_SIZE_32)},
108    };
109 
110    CASE(test_1, {
111       agx_mov_to(b, agx_register(4, AGX_SIZE_32), agx_register(0, AGX_SIZE_32));
112       agx_mov_to(b, agx_register(0, AGX_SIZE_32), agx_register(2, AGX_SIZE_32));
113    });
114 
115    struct agx_copy test_2[] = {
116       {.dest = 0, .src = agx_register(1, AGX_SIZE_16)},
117       {.dest = 4, .src = agx_register(0, AGX_SIZE_16)},
118    };
119 
120    CASE(test_2, {
121       agx_mov_to(b, agx_register(4, AGX_SIZE_16), agx_register(0, AGX_SIZE_16));
122       agx_mov_to(b, agx_register(0, AGX_SIZE_16), agx_register(1, AGX_SIZE_16));
123    });
124 }
125 
TEST_F(LowerParallelCopy,ManyDependentCopies)126 TEST_F(LowerParallelCopy, ManyDependentCopies)
127 {
128    struct agx_copy test_1[] = {
129       {.dest = 0, .src = agx_register(2, AGX_SIZE_32)},
130       {.dest = 4, .src = agx_register(0, AGX_SIZE_32)},
131       {.dest = 8, .src = agx_register(6, AGX_SIZE_32)},
132       {.dest = 6, .src = agx_register(4, AGX_SIZE_32)},
133    };
134 
135    CASE(test_1, {
136       agx_mov_to(b, agx_register(8, AGX_SIZE_32), agx_register(6, AGX_SIZE_32));
137       agx_mov_to(b, agx_register(6, AGX_SIZE_32), agx_register(4, AGX_SIZE_32));
138       agx_mov_to(b, agx_register(4, AGX_SIZE_32), agx_register(0, AGX_SIZE_32));
139       agx_mov_to(b, agx_register(0, AGX_SIZE_32), agx_register(2, AGX_SIZE_32));
140    });
141 
142    struct agx_copy test_2[] = {
143       {.dest = 0, .src = agx_register(1, AGX_SIZE_16)},
144       {.dest = 2, .src = agx_register(0, AGX_SIZE_16)},
145       {.dest = 4, .src = agx_register(3, AGX_SIZE_16)},
146       {.dest = 3, .src = agx_register(2, AGX_SIZE_16)},
147    };
148 
149    CASE(test_2, {
150       agx_mov_to(b, agx_register(4, AGX_SIZE_16), agx_register(3, AGX_SIZE_16));
151       agx_mov_to(b, agx_register(3, AGX_SIZE_16), agx_register(2, AGX_SIZE_16));
152       agx_mov_to(b, agx_register(2, AGX_SIZE_16), agx_register(0, AGX_SIZE_16));
153       agx_mov_to(b, agx_register(0, AGX_SIZE_16), agx_register(1, AGX_SIZE_16));
154    });
155 }
156 
TEST_F(LowerParallelCopy,Swap)157 TEST_F(LowerParallelCopy, Swap)
158 {
159    struct agx_copy test_1[] = {
160       {.dest = 0, .src = agx_register(2, AGX_SIZE_32)},
161       {.dest = 2, .src = agx_register(0, AGX_SIZE_32)},
162    };
163 
164    CASE(test_1, {
165       xor_swap(b, agx_register(0, AGX_SIZE_32), agx_register(2, AGX_SIZE_32));
166    });
167 
168    struct agx_copy test_2[] = {
169       {.dest = 0, .src = agx_register(1, AGX_SIZE_16)},
170       {.dest = 1, .src = agx_register(0, AGX_SIZE_16)},
171    };
172 
173    CASE(test_2, { extr_swap(b, agx_register(0, AGX_SIZE_16)); });
174 }
175 
TEST_F(LowerParallelCopy,Cycle3)176 TEST_F(LowerParallelCopy, Cycle3)
177 {
178    struct agx_copy test[] = {
179       {.dest = 0, .src = agx_register(1, AGX_SIZE_16)},
180       {.dest = 1, .src = agx_register(2, AGX_SIZE_16)},
181       {.dest = 2, .src = agx_register(0, AGX_SIZE_16)},
182    };
183 
184    CASE(test, {
185       extr_swap(b, agx_register(0, AGX_SIZE_16));
186       xor_swap(b, agx_register(1, AGX_SIZE_16), agx_register(2, AGX_SIZE_16));
187    });
188 }
189 
TEST_F(LowerParallelCopy,Immediate64)190 TEST_F(LowerParallelCopy, Immediate64)
191 {
192    agx_index imm = agx_immediate(10);
193    imm.size = AGX_SIZE_64;
194 
195    struct agx_copy test_1[] = {
196       {.dest = 4, .src = imm},
197    };
198 
199    CASE(test_1, {
200       agx_mov_imm_to(b, agx_register(4, AGX_SIZE_32), 10);
201       agx_mov_imm_to(b, agx_register(6, AGX_SIZE_32), 0);
202    });
203 }
204 
205 /* Test case from Hack et al */
TEST_F(LowerParallelCopy,TwoSwaps)206 TEST_F(LowerParallelCopy, TwoSwaps)
207 {
208    struct agx_copy test[] = {
209       {.dest = 4, .src = agx_register(2, AGX_SIZE_32)},
210       {.dest = 6, .src = agx_register(4, AGX_SIZE_32)},
211       {.dest = 2, .src = agx_register(6, AGX_SIZE_32)},
212       {.dest = 8, .src = agx_register(8, AGX_SIZE_32)},
213    };
214 
215    CASE(test, {
216       xor_swap(b, agx_register(4, AGX_SIZE_32), agx_register(2, AGX_SIZE_32));
217       xor_swap(b, agx_register(6, AGX_SIZE_32), agx_register(2, AGX_SIZE_32));
218    });
219 }
220 
TEST_F(LowerParallelCopy,VectorizeAlignedHalfRegs)221 TEST_F(LowerParallelCopy, VectorizeAlignedHalfRegs)
222 {
223    struct agx_copy test[] = {
224       {.dest = 0, .src = agx_register(10, AGX_SIZE_16)},
225       {.dest = 1, .src = agx_register(11, AGX_SIZE_16)},
226       {.dest = 2, .src = agx_uniform(8, AGX_SIZE_16)},
227       {.dest = 3, .src = agx_uniform(9, AGX_SIZE_16)},
228    };
229 
230    CASE(test, {
231       agx_mov_to(b, agx_register(0, AGX_SIZE_32),
232                  agx_register(10, AGX_SIZE_32));
233       agx_mov_to(b, agx_register(2, AGX_SIZE_32), agx_uniform(8, AGX_SIZE_32));
234    });
235 }
236 
TEST_F(LowerParallelCopy,StackCopies)237 TEST_F(LowerParallelCopy, StackCopies)
238 {
239    struct agx_copy test[] = {
240       {.dest = 21, .dest_mem = true, .src = agx_register(20, AGX_SIZE_16)},
241       {.dest = 22, .dest_mem = true, .src = agx_register(22, AGX_SIZE_32)},
242       {.dest = 0, .src = agx_memory_register(10, AGX_SIZE_16)},
243       {.dest = 1, .src = agx_memory_register(11, AGX_SIZE_16)},
244       {.dest = 0, .dest_mem = true, .src = agx_memory_register(12, AGX_SIZE_16)},
245       {.dest = 1, .dest_mem = true, .src = agx_memory_register(13, AGX_SIZE_16)},
246       {.dest = 2,
247        .dest_mem = true,
248        .src = agx_memory_register(804, AGX_SIZE_32)},
249       {.dest = 804,
250        .dest_mem = true,
251        .src = agx_memory_register(2, AGX_SIZE_32)},
252       {.dest = 807,
253        .dest_mem = true,
254        .src = agx_memory_register(808, AGX_SIZE_16)},
255       {.dest = 808,
256        .dest_mem = true,
257        .src = agx_memory_register(807, AGX_SIZE_16)},
258    };
259 
260    CASE(test, {
261       /* Vectorized fill */
262       agx_mov_to(b, agx_register(0, AGX_SIZE_32),
263                  agx_memory_register(10, AGX_SIZE_32));
264 
265       /* Regular spills */
266       agx_mov_to(b, agx_memory_register(21, AGX_SIZE_16),
267                  agx_register(20, AGX_SIZE_16));
268       agx_mov_to(b, agx_memory_register(22, AGX_SIZE_32),
269                  agx_register(22, AGX_SIZE_32));
270 
271       /* Vectorized stack->stack copy */
272       agx_mov_to(b, agx_register(2, AGX_SIZE_32),
273                  agx_memory_register(12, AGX_SIZE_32));
274 
275       agx_mov_to(b, agx_memory_register(0, AGX_SIZE_32),
276                  agx_register(2, AGX_SIZE_32));
277 
278       /* Stack swap: 32-bit */
279       agx_index temp1 = agx_register(4, AGX_SIZE_32);
280       agx_index temp2 = agx_register(6, AGX_SIZE_32);
281       agx_index spilled_gpr_vec2 = agx_register(0, AGX_SIZE_32);
282       spilled_gpr_vec2.channels_m1++;
283 
284       agx_mov_to(b, temp1, agx_memory_register(2, AGX_SIZE_32));
285       agx_mov_to(b, temp2, agx_memory_register(804, AGX_SIZE_32));
286       agx_mov_to(b, agx_memory_register(804, AGX_SIZE_32), temp1);
287       agx_mov_to(b, agx_memory_register(2, AGX_SIZE_32), temp2);
288 
289       /* Stack swap: 16-bit */
290       spilled_gpr_vec2.size = AGX_SIZE_16;
291       temp1.size = AGX_SIZE_16;
292       temp2.size = AGX_SIZE_16;
293 
294       agx_mov_to(b, temp1, agx_memory_register(807, AGX_SIZE_16));
295       agx_mov_to(b, temp2, agx_memory_register(808, AGX_SIZE_16));
296       agx_mov_to(b, agx_memory_register(808, AGX_SIZE_16), temp1);
297       agx_mov_to(b, agx_memory_register(807, AGX_SIZE_16), temp2);
298    });
299 }
300 
301 #if 0
302 TEST_F(LowerParallelCopy, LooksLikeASwap) {
303    struct agx_copy test[] = {
304         { .dest = 0, .src = agx_register(2, AGX_SIZE_32) },
305         { .dest = 2, .src = agx_register(0, AGX_SIZE_32) },
306         { .dest = 4, .src = agx_register(2, AGX_SIZE_32) },
307    };
308 
309    CASE(test, {
310          agx_mov_to(b, agx_register(4, AGX_SIZE_32), agx_register(2, AGX_SIZE_32));
311          agx_mov_to(b, agx_register(2, AGX_SIZE_32), agx_register(0, AGX_SIZE_32));
312          agx_mov_to(b, agx_register(0, AGX_SIZE_32), agx_register(4, AGX_SIZE_32));
313    });
314 }
315 #endif
316