xref: /aosp_15_r20/external/mesa3d/src/freedreno/ir3/tests/delay.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2020 Google, Inc.
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include <err.h>
7 #include <stdio.h>
8 
9 #include "ir3.h"
10 #include "ir3_assembler.h"
11 #include "ir3_shader.h"
12 
13 /*
14  * A test for delay-slot calculation.  Each test specifies ir3 assembly
15  * for one or more instructions and the last instruction that consumes
16  * the previously produced values.  And the expected number of delay
17  * slots that would be needed before that last instruction.  Any source
18  * registers in the last instruction which are not written in a previous
19  * instruction are not counted.
20  */
21 
22 /* clang-format off */
23 #define TEST(n, ...) { # __VA_ARGS__, n }
24 /* clang-format on */
25 
26 static const struct test {
27    const char *asmstr;
28    unsigned expected_delay;
29 } tests[] = {
30    /* clang-format off */
31    TEST(6,
32       add.f r0.x, r2.x, r2.y
33       rsq r0.x, r0.x
34    ),
35    TEST(3,
36       mov.f32f32 r0.x, c0.x
37       mov.f32f32 r0.y, c0.y
38       add.f r0.x, r0.x, r0.y
39    ),
40    TEST(2,
41       mov.f32f32 r0.x, c0.x
42       mov.f32f32 r0.y, c0.y
43       mov.f32f32 r0.z, c0.z
44       mad.f32 r0.x, r0.x, r0.y, r0.z
45    ),
46    TEST(0,
47       mov.f32f32 r0.x, c0.x
48       rcp r0.x, r0.y
49       add.f r0.x, r0.x, c0.x
50    ),
51    TEST(2,
52       mov.f32f32 r0.x, c0.x
53       mov.f32f32 r0.y, c0.y
54       (rpt1)add.f r0.x, (r)r0.x, (r)c0.x
55    ),
56    TEST(2,
57       (rpt1)mov.f32f32 r0.x, c0.x
58       (rpt1)add.f r0.x, (r)r0.x, (r)c0.x
59    ),
60    TEST(3,
61       mov.f32f32 r0.y, c0.y
62       mov.f32f32 r0.x, c0.x
63       (rpt1)add.f r0.x, (r)r0.x, (r)c0.x
64    ),
65    TEST(1,
66       (rpt2)mov.f32f32 r0.x, (r)c0.x
67       add.f r0.x, r0.x, c0.x
68    ),
69    TEST(2,
70       (rpt2)mov.f32f32 r0.x, (r)c0.x
71       add.f r0.x, r0.x, r0.y
72    ),
73    TEST(2,
74       (rpt1)mov.f32f32 r0.x, (r)c0.x
75       (rpt1)add.f r0.x, (r)r0.x, c0.x
76    ),
77    TEST(1,
78       (rpt1)mov.f32f32 r0.y, (r)c0.x
79       (rpt1)add.f r0.x, (r)r0.x, c0.x
80    ),
81    TEST(3,
82       (rpt1)mov.f32f32 r0.x, (r)c0.x
83       (rpt1)add.f r0.x, (r)r0.y, c0.x
84    ),
85    /* clang-format on */
86 };
87 
88 static struct ir3_shader *
parse_asm(struct ir3_compiler * c,const char * asmstr)89 parse_asm(struct ir3_compiler *c, const char *asmstr)
90 {
91    struct ir3_kernel_info info = {};
92    FILE *in = fmemopen((void *)asmstr, strlen(asmstr), "r");
93    struct ir3_shader *shader = ir3_parse_asm(c, &info, in);
94 
95    fclose(in);
96 
97    if (!shader)
98       errx(-1, "assembler failed");
99 
100    return shader;
101 }
102 
103 /**
104  * ir3_delay_calc_* relies on the src/dst wrmask being correct even for ALU
105  * instructions, so this sets it here.
106  *
107  * Note that this is not clever enough to know how many src/dst there are
108  * for various tex/mem instructions.  But the rules for tex consuming alu
109  * are the same as sfu consuming alu.
110  */
111 static void
fixup_wrmask(struct ir3 * ir)112 fixup_wrmask(struct ir3 *ir)
113 {
114    struct ir3_block *block = ir3_start_block(ir);
115 
116    foreach_instr_safe (instr, &block->instr_list) {
117       instr->dsts[0]->wrmask = MASK(instr->repeat + 1);
118       foreach_src (reg, instr) {
119          if (reg->flags & (IR3_REG_CONST | IR3_REG_IMMED))
120             continue;
121 
122          if (reg->flags & IR3_REG_R)
123             reg->wrmask = MASK(instr->repeat + 1);
124          else
125             reg->wrmask = 1;
126       }
127    }
128 }
129 
130 /* Calculate the number of nops added before the last instruction by
131  * ir3_legalize.
132  */
133 static unsigned
calc_nops(struct ir3_block * block,struct ir3_instruction * last)134 calc_nops(struct ir3_block *block, struct ir3_instruction *last)
135 {
136    unsigned nops = 0;
137 
138    foreach_instr_rev (instr, &block->instr_list) {
139       if (instr == last)
140          continue;
141 
142       if (instr->opc == OPC_NOP) {
143          nops += 1 + instr->repeat;
144       } else {
145          if (is_alu(instr))
146             nops += instr->nop;
147          break;
148       }
149    }
150 
151    return nops;
152 }
153 
154 int
main(int argc,char ** argv)155 main(int argc, char **argv)
156 {
157    struct ir3_compiler *c;
158    int result = 0;
159 
160    struct fd_dev_id dev_id = {
161          .gpu_id = 630,
162    };
163 
164    c = ir3_compiler_create(NULL, &dev_id, fd_dev_info_raw(&dev_id), &(struct ir3_compiler_options){});
165 
166    for (int i = 0; i < ARRAY_SIZE(tests); i++) {
167       const struct test *test = &tests[i];
168       struct ir3_shader *shader = parse_asm(c, test->asmstr);
169       struct ir3 *ir = shader->variants->ir;
170 
171       fixup_wrmask(ir);
172 
173       ir3_debug_print(ir, "AFTER fixup_wrmask");
174 
175       struct ir3_block *block =
176          list_first_entry(&ir->block_list, struct ir3_block, node);
177       struct ir3_instruction *last = NULL;
178 
179       foreach_instr_rev (instr, &block->instr_list) {
180          if (is_meta(instr))
181             continue;
182          last = instr;
183          break;
184       }
185 
186       int max_bary;
187       ir3_legalize(ir, shader->variants, &max_bary);
188 
189       unsigned n = calc_nops(block, last);
190 
191       if (n != test->expected_delay) {
192          printf("%d: FAIL: Expected delay %u, but got %u, for:\n%s\n", i,
193                 test->expected_delay, n, test->asmstr);
194          result = -1;
195       } else {
196          printf("%d: PASS\n", i);
197       }
198 
199       ir3_shader_destroy(shader);
200    }
201 
202    ir3_compiler_destroy(c);
203 
204    return result;
205 }
206