1 /*
2 * Copyright (C) 2022 Collabora, Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #include "bi_builder.h"
25 #include "bi_test.h"
26 #include "va_compiler.h"
27 #include "valhall_enums.h"
28
29 #include <gtest/gtest.h>
30
31 static void
strip_nops(bi_context * ctx)32 strip_nops(bi_context *ctx)
33 {
34 bi_foreach_instr_global_safe(ctx, I) {
35 if (I->op == BI_OPCODE_NOP)
36 bi_remove_instruction(I);
37 }
38 }
39
40 #define CASE(shader_stage, test) \
41 do { \
42 bi_builder *A = bit_builder(mem_ctx); \
43 bi_builder *B = bit_builder(mem_ctx); \
44 { \
45 UNUSED bi_builder *b = A; \
46 A->shader->stage = MESA_SHADER_##shader_stage; \
47 test; \
48 } \
49 strip_nops(A->shader); \
50 va_insert_flow_control_nops(A->shader); \
51 { \
52 UNUSED bi_builder *b = B; \
53 B->shader->stage = MESA_SHADER_##shader_stage; \
54 test; \
55 } \
56 ASSERT_SHADER_EQUAL(A->shader, B->shader); \
57 } while (0)
58
59 #define flow(f) bi_nop(b)->flow = VA_FLOW_##f
60
61 class InsertFlow : public testing::Test {
62 protected:
InsertFlow()63 InsertFlow()
64 {
65 mem_ctx = ralloc_context(NULL);
66 }
67
~InsertFlow()68 ~InsertFlow()
69 {
70 ralloc_free(mem_ctx);
71 }
72
73 void *mem_ctx;
74 };
75
TEST_F(InsertFlow,PreserveEmptyShader)76 TEST_F(InsertFlow, PreserveEmptyShader)
77 {
78 CASE(FRAGMENT, {});
79 }
80
TEST_F(InsertFlow,TilebufferWait7)81 TEST_F(InsertFlow, TilebufferWait7)
82 {
83 CASE(FRAGMENT, {
84 flow(DISCARD);
85 bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
86 flow(WAIT);
87 bi_blend_to(b, bi_register(0), bi_register(4), bi_register(5),
88 bi_register(6), bi_register(7), bi_register(8),
89 BI_REGISTER_FORMAT_AUTO, 4, 4);
90 flow(END);
91 });
92
93 CASE(FRAGMENT, {
94 flow(DISCARD);
95 bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
96 flow(WAIT);
97 bi_st_tile(b, bi_register(0), bi_register(4), bi_register(5),
98 bi_register(6), BI_REGISTER_FORMAT_AUTO, BI_VECSIZE_V4);
99 flow(END);
100 });
101
102 CASE(FRAGMENT, {
103 flow(DISCARD);
104 bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
105 flow(WAIT);
106 bi_ld_tile_to(b, bi_register(0), bi_register(4), bi_register(5),
107 bi_register(6), BI_REGISTER_FORMAT_AUTO, BI_VECSIZE_V4);
108 flow(END);
109 });
110 }
111
TEST_F(InsertFlow,AtestWait6AndWait0After)112 TEST_F(InsertFlow, AtestWait6AndWait0After)
113 {
114 CASE(FRAGMENT, {
115 flow(DISCARD);
116 bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
117 flow(WAIT0126);
118 bi_atest_to(b, bi_register(0), bi_register(4), bi_register(5),
119 bi_fau(BIR_FAU_ATEST_PARAM, false));
120 flow(WAIT0);
121 flow(END);
122 });
123 }
124
TEST_F(InsertFlow,ZSEmitWait6)125 TEST_F(InsertFlow, ZSEmitWait6)
126 {
127 CASE(FRAGMENT, {
128 flow(DISCARD);
129 bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
130 flow(WAIT0126);
131 bi_zs_emit_to(b, bi_register(0), bi_register(4), bi_register(5),
132 bi_register(6), true, true);
133 flow(END);
134 });
135 }
136
TEST_F(InsertFlow,LoadThenUnrelatedThenUse)137 TEST_F(InsertFlow, LoadThenUnrelatedThenUse)
138 {
139 CASE(VERTEX, {
140 bi_ld_attr_imm_to(b, bi_register(16), bi_register(60), bi_register(61),
141 BI_REGISTER_FORMAT_F32, BI_VECSIZE_V4, 1);
142 bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
143 flow(WAIT0);
144 bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(19));
145 flow(END);
146 });
147 }
148
TEST_F(InsertFlow,SingleLdVar)149 TEST_F(InsertFlow, SingleLdVar)
150 {
151 CASE(FRAGMENT, {
152 flow(DISCARD);
153 bi_ld_var_buf_imm_f16_to(b, bi_register(2), bi_register(61),
154 BI_REGISTER_FORMAT_F16, BI_SAMPLE_CENTER,
155 BI_SOURCE_FORMAT_F16, BI_UPDATE_RETRIEVE,
156 BI_VECSIZE_V4, 0);
157 flow(WAIT0);
158 flow(END);
159 });
160 }
161
TEST_F(InsertFlow,SerializeLdVars)162 TEST_F(InsertFlow, SerializeLdVars)
163 {
164 CASE(FRAGMENT, {
165 flow(DISCARD);
166 bi_ld_var_buf_imm_f16_to(b, bi_register(16), bi_register(61),
167 BI_REGISTER_FORMAT_F16, BI_SAMPLE_CENTER,
168 BI_SOURCE_FORMAT_F16, BI_UPDATE_STORE,
169 BI_VECSIZE_V4, 0);
170 bi_ld_var_buf_imm_f16_to(b, bi_register(2), bi_register(61),
171 BI_REGISTER_FORMAT_F16, BI_SAMPLE_CENTER,
172 BI_SOURCE_FORMAT_F16, BI_UPDATE_RETRIEVE,
173 BI_VECSIZE_V4, 0);
174 flow(WAIT0);
175 bi_ld_var_buf_imm_f16_to(b, bi_register(8), bi_register(61),
176 BI_REGISTER_FORMAT_F16, BI_SAMPLE_CENTER,
177 BI_SOURCE_FORMAT_F16, BI_UPDATE_STORE,
178 BI_VECSIZE_V4, 1);
179 flow(WAIT0);
180 flow(END);
181 });
182 }
183
TEST_F(InsertFlow,Clper)184 TEST_F(InsertFlow, Clper)
185 {
186 CASE(FRAGMENT, {
187 bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
188 bi_clper_i32_to(b, bi_register(0), bi_register(4), bi_register(8),
189 BI_INACTIVE_RESULT_ZERO, BI_LANE_OP_NONE,
190 BI_SUBGROUP_SUBGROUP4);
191 flow(DISCARD);
192 bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
193 flow(END);
194 });
195 }
196
TEST_F(InsertFlow,TextureImplicit)197 TEST_F(InsertFlow, TextureImplicit)
198 {
199 CASE(FRAGMENT, {
200 bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
201 bi_tex_single_to(
202 b, bi_register(0), bi_register(4), bi_register(8), bi_register(12),
203 false, BI_DIMENSION_2D, BI_REGISTER_FORMAT_F32, false, false,
204 BI_VA_LOD_MODE_COMPUTED_LOD, false, BI_WRITE_MASK_RGBA, 4);
205 flow(DISCARD);
206 flow(WAIT0);
207 bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
208 flow(END);
209 });
210 }
211
TEST_F(InsertFlow,TextureExplicit)212 TEST_F(InsertFlow, TextureExplicit)
213 {
214 CASE(FRAGMENT, {
215 flow(DISCARD);
216 bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
217 bi_tex_single_to(b, bi_register(0), bi_register(4), bi_register(8),
218 bi_register(12), false, BI_DIMENSION_2D,
219 BI_REGISTER_FORMAT_F32, false, false,
220 BI_VA_LOD_MODE_ZERO_LOD, false, BI_WRITE_MASK_RGBA, 4);
221 flow(WAIT0);
222 bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
223 flow(END);
224 });
225 }
226
227 /* A
228 * / \
229 * B C
230 * \ /
231 * D
232 */
TEST_F(InsertFlow,DiamondCFG)233 TEST_F(InsertFlow, DiamondCFG)
234 {
235 CASE(FRAGMENT, {
236 bi_block *A = bi_start_block(&b->shader->blocks);
237 bi_block *B = bit_block(b->shader);
238 bi_block *C = bit_block(b->shader);
239 bi_block *D = bit_block(b->shader);
240
241 bi_block_add_successor(A, B);
242 bi_block_add_successor(A, C);
243
244 bi_block_add_successor(B, D);
245 bi_block_add_successor(C, D);
246
247 /* B uses helper invocations, no other block does.
248 *
249 * That means B and C need to discard helpers.
250 */
251 b->cursor = bi_after_block(B);
252 bi_clper_i32_to(b, bi_register(0), bi_register(4), bi_register(8),
253 BI_INACTIVE_RESULT_ZERO, BI_LANE_OP_NONE,
254 BI_SUBGROUP_SUBGROUP4);
255 flow(DISCARD);
256 flow(RECONVERGE);
257
258 b->cursor = bi_after_block(C);
259 flow(DISCARD);
260 bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
261 flow(RECONVERGE);
262
263 b->cursor = bi_after_block(D);
264 flow(END);
265 });
266 }
267
TEST_F(InsertFlow,BarrierBug)268 TEST_F(InsertFlow, BarrierBug)
269 {
270 CASE(KERNEL, {
271 bi_instr *I = bi_store_i32(b, bi_register(0), bi_register(2),
272 bi_register(4), BI_SEG_NONE, 0);
273 I->slot = 2;
274
275 bi_fadd_f32_to(b, bi_register(10), bi_register(10), bi_register(10));
276 flow(WAIT2);
277 bi_barrier(b);
278 flow(WAIT);
279 flow(END);
280 });
281 }
282