1 /*
2 * Copyright © 2018 Jonathan Marek <[email protected]>
3 * SPDX-License-Identifier: MIT
4 *
5 * Authors:
6 * Jonathan Marek <[email protected]>
7 */
8
9 #include "ir2_private.h"
10
11 static bool
is_mov(struct ir2_instr * instr)12 is_mov(struct ir2_instr *instr)
13 {
14 return instr->type == IR2_ALU && instr->alu.vector_opc == MAXv &&
15 instr->src_count == 1;
16 }
17
18 static void
src_combine(struct ir2_src * src,struct ir2_src b)19 src_combine(struct ir2_src *src, struct ir2_src b)
20 {
21 src->num = b.num;
22 src->type = b.type;
23 src->swizzle = swiz_merge(b.swizzle, src->swizzle);
24 if (!src->abs) /* if we have abs we don't care about previous negate */
25 src->negate ^= b.negate;
26 src->abs |= b.abs;
27 }
28
29 /* cp_src: replace src regs when they refer to a mov instruction
30 * example:
31 * ALU: MAXv R7 = C7, C7
32 * ALU: MULADDv R7 = R7, R10, R0.xxxx
33 * becomes:
34 * ALU: MULADDv R7 = C7, R10, R0.xxxx
35 */
36 void
cp_src(struct ir2_context * ctx)37 cp_src(struct ir2_context *ctx)
38 {
39 struct ir2_instr *p;
40
41 ir2_foreach_instr (instr, ctx) {
42 ir2_foreach_src (src, instr) {
43 /* loop to replace recursively */
44 do {
45 if (src->type != IR2_SRC_SSA)
46 break;
47
48 p = &ctx->instr[src->num];
49 /* don't work across blocks to avoid possible issues */
50 if (p->block_idx != instr->block_idx)
51 break;
52
53 if (!is_mov(p))
54 break;
55
56 if (p->alu.saturate)
57 break;
58
59 /* cant apply abs to const src, const src only for alu */
60 if (p->src[0].type == IR2_SRC_CONST &&
61 (src->abs || instr->type != IR2_ALU))
62 break;
63
64 src_combine(src, p->src[0]);
65 } while (1);
66 }
67 }
68 }
69
70 /* cp_export: replace mov to export when possible
71 * in the cp_src pass we bypass any mov instructions related
72 * to the src registers, but for exports for need something different
73 * example:
74 * ALU: MAXv R3.x___ = C9.x???, C9.x???
75 * ALU: MAXv R3._y__ = R0.?x??, C8.?x??
76 * ALU: MAXv export0 = R3.yyyx, R3.yyyx
77 * becomes:
78 * ALU: MAXv export0.___w = C9.???x, C9.???x
79 * ALU: MAXv export0.xyz_ = R0.xxx?, C8.xxx?
80 *
81 */
82 void
cp_export(struct ir2_context * ctx)83 cp_export(struct ir2_context *ctx)
84 {
85 struct ir2_instr *c[4], *ins[4];
86 struct ir2_src *src;
87 struct ir2_reg *reg;
88 unsigned ncomp;
89
90 ir2_foreach_instr (instr, ctx) {
91 if (!is_export(instr)) /* TODO */
92 continue;
93
94 if (!is_mov(instr))
95 continue;
96
97 src = &instr->src[0];
98
99 if (src->negate || src->abs) /* TODO handle these cases */
100 continue;
101
102 if (src->type == IR2_SRC_INPUT || src->type == IR2_SRC_CONST)
103 continue;
104
105 reg = get_reg_src(ctx, src);
106 ncomp = dst_ncomp(instr);
107
108 unsigned reswiz[4] = {};
109 unsigned num_instr = 0;
110
111 /* fill array c with pointers to instrs that write each component */
112 if (src->type == IR2_SRC_SSA) {
113 struct ir2_instr *instr = &ctx->instr[src->num];
114
115 if (instr->type != IR2_ALU)
116 continue;
117
118 for (int i = 0; i < ncomp; i++)
119 c[i] = instr;
120
121 ins[num_instr++] = instr;
122 reswiz[0] = src->swizzle;
123 } else {
124 bool ok = true;
125 unsigned write_mask = 0;
126
127 ir2_foreach_instr (instr, ctx) {
128 if (instr->is_ssa || instr->reg != reg)
129 continue;
130
131 /* set by non-ALU */
132 if (instr->type != IR2_ALU) {
133 ok = false;
134 break;
135 }
136
137 /* component written more than once */
138 if (write_mask & instr->alu.write_mask) {
139 ok = false;
140 break;
141 }
142
143 write_mask |= instr->alu.write_mask;
144
145 /* src pointers for components */
146 for (int i = 0, j = 0; i < 4; i++) {
147 unsigned k = swiz_get(src->swizzle, i);
148 if (instr->alu.write_mask & 1 << k) {
149 c[i] = instr;
150
151 /* reswiz = compressed src->swizzle */
152 unsigned x = 0;
153 for (int i = 0; i < k; i++)
154 x += !!(instr->alu.write_mask & 1 << i);
155
156 assert(src->swizzle || x == j);
157 reswiz[num_instr] |= swiz_set(x, j++);
158 }
159 }
160 ins[num_instr++] = instr;
161 }
162 if (!ok)
163 continue;
164 }
165
166 bool redirect = true;
167
168 /* must all be in same block */
169 for (int i = 0; i < ncomp; i++)
170 redirect &= (c[i]->block_idx == instr->block_idx);
171
172 /* no other instr using the value */
173 ir2_foreach_instr (p, ctx) {
174 if (p == instr)
175 continue;
176 ir2_foreach_src (src, p)
177 redirect &= reg != get_reg_src(ctx, src);
178 }
179
180 if (!redirect)
181 continue;
182
183 /* redirect the instructions writing to the register */
184 for (int i = 0; i < num_instr; i++) {
185 struct ir2_instr *p = ins[i];
186
187 p->alu.export = instr->alu.export;
188 p->alu.write_mask = 0;
189 p->is_ssa = true;
190 p->ssa.ncomp = 0;
191 memset(p->ssa.comp, 0, sizeof(p->ssa.comp));
192 p->alu.saturate |= instr->alu.saturate;
193
194 switch (p->alu.vector_opc) {
195 case PRED_SETE_PUSHv ... PRED_SETGTE_PUSHv:
196 case DOT2ADDv:
197 case DOT3v:
198 case DOT4v:
199 case CUBEv:
200 continue;
201 default:
202 break;
203 }
204 ir2_foreach_src (s, p)
205 swiz_merge_p(&s->swizzle, reswiz[i]);
206 }
207
208 for (int i = 0; i < ncomp; i++) {
209 c[i]->alu.write_mask |= (1 << i);
210 c[i]->ssa.ncomp++;
211 }
212 instr->type = IR2_NONE;
213 instr->need_emit = false;
214 }
215 }
216