xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/freedreno/a2xx/ir2_cp.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2018 Jonathan Marek <[email protected]>
3  * SPDX-License-Identifier: MIT
4  *
5  * Authors:
6  *    Jonathan Marek <[email protected]>
7  */
8 
9 #include "ir2_private.h"
10 
11 static bool
is_mov(struct ir2_instr * instr)12 is_mov(struct ir2_instr *instr)
13 {
14    return instr->type == IR2_ALU && instr->alu.vector_opc == MAXv &&
15           instr->src_count == 1;
16 }
17 
18 static void
src_combine(struct ir2_src * src,struct ir2_src b)19 src_combine(struct ir2_src *src, struct ir2_src b)
20 {
21    src->num = b.num;
22    src->type = b.type;
23    src->swizzle = swiz_merge(b.swizzle, src->swizzle);
24    if (!src->abs) /* if we have abs we don't care about previous negate */
25       src->negate ^= b.negate;
26    src->abs |= b.abs;
27 }
28 
29 /* cp_src: replace src regs when they refer to a mov instruction
30  * example:
31  *	ALU:      MAXv    R7 = C7, C7
32  *	ALU:      MULADDv R7 = R7, R10, R0.xxxx
33  * becomes:
34  *	ALU:      MULADDv R7 = C7, R10, R0.xxxx
35  */
36 void
cp_src(struct ir2_context * ctx)37 cp_src(struct ir2_context *ctx)
38 {
39    struct ir2_instr *p;
40 
41    ir2_foreach_instr (instr, ctx) {
42       ir2_foreach_src (src, instr) {
43          /* loop to replace recursively */
44          do {
45             if (src->type != IR2_SRC_SSA)
46                break;
47 
48             p = &ctx->instr[src->num];
49             /* don't work across blocks to avoid possible issues */
50             if (p->block_idx != instr->block_idx)
51                break;
52 
53             if (!is_mov(p))
54                break;
55 
56             if (p->alu.saturate)
57                break;
58 
59             /* cant apply abs to const src, const src only for alu */
60             if (p->src[0].type == IR2_SRC_CONST &&
61                 (src->abs || instr->type != IR2_ALU))
62                break;
63 
64             src_combine(src, p->src[0]);
65          } while (1);
66       }
67    }
68 }
69 
70 /* cp_export: replace mov to export when possible
71  * in the cp_src pass we bypass any mov instructions related
72  * to the src registers, but for exports for need something different
73  * example:
74  *	ALU:      MAXv    R3.x___ = C9.x???, C9.x???
75  *	ALU:      MAXv    R3._y__ = R0.?x??, C8.?x??
76  *	ALU:      MAXv    export0 = R3.yyyx, R3.yyyx
77  * becomes:
78  *	ALU:      MAXv    export0.___w = C9.???x, C9.???x
79  *	ALU:      MAXv    export0.xyz_ = R0.xxx?, C8.xxx?
80  *
81  */
82 void
cp_export(struct ir2_context * ctx)83 cp_export(struct ir2_context *ctx)
84 {
85    struct ir2_instr *c[4], *ins[4];
86    struct ir2_src *src;
87    struct ir2_reg *reg;
88    unsigned ncomp;
89 
90    ir2_foreach_instr (instr, ctx) {
91       if (!is_export(instr)) /* TODO */
92          continue;
93 
94       if (!is_mov(instr))
95          continue;
96 
97       src = &instr->src[0];
98 
99       if (src->negate || src->abs) /* TODO handle these cases */
100          continue;
101 
102       if (src->type == IR2_SRC_INPUT || src->type == IR2_SRC_CONST)
103          continue;
104 
105       reg = get_reg_src(ctx, src);
106       ncomp = dst_ncomp(instr);
107 
108       unsigned reswiz[4] = {};
109       unsigned num_instr = 0;
110 
111       /* fill array c with pointers to instrs that write each component */
112       if (src->type == IR2_SRC_SSA) {
113          struct ir2_instr *instr = &ctx->instr[src->num];
114 
115          if (instr->type != IR2_ALU)
116             continue;
117 
118          for (int i = 0; i < ncomp; i++)
119             c[i] = instr;
120 
121          ins[num_instr++] = instr;
122          reswiz[0] = src->swizzle;
123       } else {
124          bool ok = true;
125          unsigned write_mask = 0;
126 
127          ir2_foreach_instr (instr, ctx) {
128             if (instr->is_ssa || instr->reg != reg)
129                continue;
130 
131             /* set by non-ALU */
132             if (instr->type != IR2_ALU) {
133                ok = false;
134                break;
135             }
136 
137             /* component written more than once */
138             if (write_mask & instr->alu.write_mask) {
139                ok = false;
140                break;
141             }
142 
143             write_mask |= instr->alu.write_mask;
144 
145             /* src pointers for components */
146             for (int i = 0, j = 0; i < 4; i++) {
147                unsigned k = swiz_get(src->swizzle, i);
148                if (instr->alu.write_mask & 1 << k) {
149                   c[i] = instr;
150 
151                   /* reswiz = compressed src->swizzle */
152                   unsigned x = 0;
153                   for (int i = 0; i < k; i++)
154                      x += !!(instr->alu.write_mask & 1 << i);
155 
156                   assert(src->swizzle || x == j);
157                   reswiz[num_instr] |= swiz_set(x, j++);
158                }
159             }
160             ins[num_instr++] = instr;
161          }
162          if (!ok)
163             continue;
164       }
165 
166       bool redirect = true;
167 
168       /* must all be in same block */
169       for (int i = 0; i < ncomp; i++)
170          redirect &= (c[i]->block_idx == instr->block_idx);
171 
172       /* no other instr using the value */
173       ir2_foreach_instr (p, ctx) {
174          if (p == instr)
175             continue;
176          ir2_foreach_src (src, p)
177             redirect &= reg != get_reg_src(ctx, src);
178       }
179 
180       if (!redirect)
181          continue;
182 
183       /* redirect the instructions writing to the register */
184       for (int i = 0; i < num_instr; i++) {
185          struct ir2_instr *p = ins[i];
186 
187          p->alu.export = instr->alu.export;
188          p->alu.write_mask = 0;
189          p->is_ssa = true;
190          p->ssa.ncomp = 0;
191          memset(p->ssa.comp, 0, sizeof(p->ssa.comp));
192          p->alu.saturate |= instr->alu.saturate;
193 
194          switch (p->alu.vector_opc) {
195          case PRED_SETE_PUSHv ... PRED_SETGTE_PUSHv:
196          case DOT2ADDv:
197          case DOT3v:
198          case DOT4v:
199          case CUBEv:
200             continue;
201          default:
202             break;
203          }
204          ir2_foreach_src (s, p)
205             swiz_merge_p(&s->swizzle, reswiz[i]);
206       }
207 
208       for (int i = 0; i < ncomp; i++) {
209          c[i]->alu.write_mask |= (1 << i);
210          c[i]->ssa.ncomp++;
211       }
212       instr->type = IR2_NONE;
213       instr->need_emit = false;
214    }
215 }
216