1 /*
2 * Copyright © 2020 Google, Inc.
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include <err.h>
7 #include <stdio.h>
8
9 #include "ir3.h"
10 #include "ir3_assembler.h"
11 #include "ir3_shader.h"
12
13 /*
14 * A test for delay-slot calculation. Each test specifies ir3 assembly
15 * for one or more instructions and the last instruction that consumes
16 * the previously produced values. And the expected number of delay
17 * slots that would be needed before that last instruction. Any source
18 * registers in the last instruction which are not written in a previous
19 * instruction are not counted.
20 */
21
22 /* clang-format off */
23 #define TEST(n, ...) { # __VA_ARGS__, n }
24 /* clang-format on */
25
26 static const struct test {
27 const char *asmstr;
28 unsigned expected_delay;
29 } tests[] = {
30 /* clang-format off */
31 TEST(6,
32 add.f r0.x, r2.x, r2.y
33 rsq r0.x, r0.x
34 ),
35 TEST(3,
36 mov.f32f32 r0.x, c0.x
37 mov.f32f32 r0.y, c0.y
38 add.f r0.x, r0.x, r0.y
39 ),
40 TEST(2,
41 mov.f32f32 r0.x, c0.x
42 mov.f32f32 r0.y, c0.y
43 mov.f32f32 r0.z, c0.z
44 mad.f32 r0.x, r0.x, r0.y, r0.z
45 ),
46 TEST(0,
47 mov.f32f32 r0.x, c0.x
48 rcp r0.x, r0.y
49 add.f r0.x, r0.x, c0.x
50 ),
51 TEST(2,
52 mov.f32f32 r0.x, c0.x
53 mov.f32f32 r0.y, c0.y
54 (rpt1)add.f r0.x, (r)r0.x, (r)c0.x
55 ),
56 TEST(2,
57 (rpt1)mov.f32f32 r0.x, c0.x
58 (rpt1)add.f r0.x, (r)r0.x, (r)c0.x
59 ),
60 TEST(3,
61 mov.f32f32 r0.y, c0.y
62 mov.f32f32 r0.x, c0.x
63 (rpt1)add.f r0.x, (r)r0.x, (r)c0.x
64 ),
65 TEST(1,
66 (rpt2)mov.f32f32 r0.x, (r)c0.x
67 add.f r0.x, r0.x, c0.x
68 ),
69 TEST(2,
70 (rpt2)mov.f32f32 r0.x, (r)c0.x
71 add.f r0.x, r0.x, r0.y
72 ),
73 TEST(2,
74 (rpt1)mov.f32f32 r0.x, (r)c0.x
75 (rpt1)add.f r0.x, (r)r0.x, c0.x
76 ),
77 TEST(1,
78 (rpt1)mov.f32f32 r0.y, (r)c0.x
79 (rpt1)add.f r0.x, (r)r0.x, c0.x
80 ),
81 TEST(3,
82 (rpt1)mov.f32f32 r0.x, (r)c0.x
83 (rpt1)add.f r0.x, (r)r0.y, c0.x
84 ),
85 /* clang-format on */
86 };
87
88 static struct ir3_shader *
parse_asm(struct ir3_compiler * c,const char * asmstr)89 parse_asm(struct ir3_compiler *c, const char *asmstr)
90 {
91 struct ir3_kernel_info info = {};
92 FILE *in = fmemopen((void *)asmstr, strlen(asmstr), "r");
93 struct ir3_shader *shader = ir3_parse_asm(c, &info, in);
94
95 fclose(in);
96
97 if (!shader)
98 errx(-1, "assembler failed");
99
100 return shader;
101 }
102
103 /**
104 * ir3_delay_calc_* relies on the src/dst wrmask being correct even for ALU
105 * instructions, so this sets it here.
106 *
107 * Note that this is not clever enough to know how many src/dst there are
108 * for various tex/mem instructions. But the rules for tex consuming alu
109 * are the same as sfu consuming alu.
110 */
111 static void
fixup_wrmask(struct ir3 * ir)112 fixup_wrmask(struct ir3 *ir)
113 {
114 struct ir3_block *block = ir3_start_block(ir);
115
116 foreach_instr_safe (instr, &block->instr_list) {
117 instr->dsts[0]->wrmask = MASK(instr->repeat + 1);
118 foreach_src (reg, instr) {
119 if (reg->flags & (IR3_REG_CONST | IR3_REG_IMMED))
120 continue;
121
122 if (reg->flags & IR3_REG_R)
123 reg->wrmask = MASK(instr->repeat + 1);
124 else
125 reg->wrmask = 1;
126 }
127 }
128 }
129
130 /* Calculate the number of nops added before the last instruction by
131 * ir3_legalize.
132 */
133 static unsigned
calc_nops(struct ir3_block * block,struct ir3_instruction * last)134 calc_nops(struct ir3_block *block, struct ir3_instruction *last)
135 {
136 unsigned nops = 0;
137
138 foreach_instr_rev (instr, &block->instr_list) {
139 if (instr == last)
140 continue;
141
142 if (instr->opc == OPC_NOP) {
143 nops += 1 + instr->repeat;
144 } else {
145 if (is_alu(instr))
146 nops += instr->nop;
147 break;
148 }
149 }
150
151 return nops;
152 }
153
154 int
main(int argc,char ** argv)155 main(int argc, char **argv)
156 {
157 struct ir3_compiler *c;
158 int result = 0;
159
160 struct fd_dev_id dev_id = {
161 .gpu_id = 630,
162 };
163
164 c = ir3_compiler_create(NULL, &dev_id, fd_dev_info_raw(&dev_id), &(struct ir3_compiler_options){});
165
166 for (int i = 0; i < ARRAY_SIZE(tests); i++) {
167 const struct test *test = &tests[i];
168 struct ir3_shader *shader = parse_asm(c, test->asmstr);
169 struct ir3 *ir = shader->variants->ir;
170
171 fixup_wrmask(ir);
172
173 ir3_debug_print(ir, "AFTER fixup_wrmask");
174
175 struct ir3_block *block =
176 list_first_entry(&ir->block_list, struct ir3_block, node);
177 struct ir3_instruction *last = NULL;
178
179 foreach_instr_rev (instr, &block->instr_list) {
180 if (is_meta(instr))
181 continue;
182 last = instr;
183 break;
184 }
185
186 int max_bary;
187 ir3_legalize(ir, shader->variants, &max_bary);
188
189 unsigned n = calc_nops(block, last);
190
191 if (n != test->expected_delay) {
192 printf("%d: FAIL: Expected delay %u, but got %u, for:\n%s\n", i,
193 test->expected_delay, n, test->asmstr);
194 result = -1;
195 } else {
196 printf("%d: PASS\n", i);
197 }
198
199 ir3_shader_destroy(shader);
200 }
201
202 ir3_compiler_destroy(c);
203
204 return result;
205 }
206