1 /*
2 * Copyright (C) 2020 Collabora Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors (Collabora):
24 * Alyssa Rosenzweig <[email protected]>
25 */
26
27 #include "bi_builder.h"
28 #include "compiler.h"
29
30 bool
bi_has_arg(const bi_instr * ins,bi_index arg)31 bi_has_arg(const bi_instr *ins, bi_index arg)
32 {
33 if (!ins)
34 return false;
35
36 bi_foreach_src(ins, s) {
37 if (bi_is_equiv(ins->src[s], arg))
38 return true;
39 }
40
41 return false;
42 }
43
44 /* Precondition: valid 16-bit or 32-bit register format. Returns whether it is
45 * 32-bit. Note auto reads to 32-bit registers even if the memory format is
46 * 16-bit, so is considered as such here */
47
48 bool
bi_is_regfmt_16(enum bi_register_format fmt)49 bi_is_regfmt_16(enum bi_register_format fmt)
50 {
51 switch (fmt) {
52 case BI_REGISTER_FORMAT_F16:
53 case BI_REGISTER_FORMAT_S16:
54 case BI_REGISTER_FORMAT_U16:
55 return true;
56 case BI_REGISTER_FORMAT_F32:
57 case BI_REGISTER_FORMAT_S32:
58 case BI_REGISTER_FORMAT_U32:
59 case BI_REGISTER_FORMAT_AUTO:
60 return false;
61 default:
62 unreachable("Invalid register format");
63 }
64 }
65
66 static unsigned
bi_count_staging_registers(const bi_instr * ins)67 bi_count_staging_registers(const bi_instr *ins)
68 {
69 enum bi_sr_count count = bi_opcode_props[ins->op].sr_count;
70 unsigned vecsize = ins->vecsize + 1; /* XXX: off-by-one */
71
72 switch (count) {
73 case BI_SR_COUNT_0 ... BI_SR_COUNT_4:
74 return count;
75 case BI_SR_COUNT_FORMAT:
76 return bi_is_regfmt_16(ins->register_format) ? DIV_ROUND_UP(vecsize, 2)
77 : vecsize;
78 case BI_SR_COUNT_VECSIZE:
79 return vecsize;
80 case BI_SR_COUNT_SR_COUNT:
81 return ins->sr_count;
82 }
83
84 unreachable("Invalid sr_count");
85 }
86
87 unsigned
bi_count_read_registers(const bi_instr * ins,unsigned s)88 bi_count_read_registers(const bi_instr *ins, unsigned s)
89 {
90 /* ATOM reads 1 but writes 2. Exception for ACMPXCHG */
91 if (s == 0 && ins->op == BI_OPCODE_ATOM_RETURN_I32)
92 return (ins->atom_opc == BI_ATOM_OPC_ACMPXCHG) ? 2 : 1;
93 else if (s == 0 && bi_opcode_props[ins->op].sr_read)
94 return bi_count_staging_registers(ins);
95 else if (s == 4 && ins->op == BI_OPCODE_BLEND)
96 return ins->sr_count_2; /* Dual source blending */
97 else if (s == 0 && ins->op == BI_OPCODE_SPLIT_I32)
98 return ins->nr_dests;
99 else
100 return 1;
101 }
102
103 unsigned
bi_count_write_registers(const bi_instr * ins,unsigned d)104 bi_count_write_registers(const bi_instr *ins, unsigned d)
105 {
106 if (d == 0 && bi_opcode_props[ins->op].sr_write) {
107 switch (ins->op) {
108 case BI_OPCODE_TEXC:
109 case BI_OPCODE_TEXC_DUAL:
110 if (ins->sr_count_2)
111 return ins->sr_count;
112 else
113 return bi_is_regfmt_16(ins->register_format) ? 2 : 4;
114
115 case BI_OPCODE_TEX_SINGLE:
116 case BI_OPCODE_TEX_FETCH:
117 case BI_OPCODE_TEX_GATHER: {
118 unsigned chans = util_bitcount(ins->write_mask);
119
120 return bi_is_regfmt_16(ins->register_format) ? DIV_ROUND_UP(chans, 2)
121 : chans;
122 }
123
124 case BI_OPCODE_ACMPXCHG_I32:
125 /* Reads 2 but writes 1 */
126 return 1;
127
128 case BI_OPCODE_ATOM1_RETURN_I32:
129 /* Allow omitting the destination for plain ATOM1 */
130 return bi_is_null(ins->dest[0]) ? 0 : ins->sr_count;
131 default:
132 return bi_count_staging_registers(ins);
133 }
134 } else if (ins->op == BI_OPCODE_SEG_ADD_I64) {
135 return 2;
136 } else if (ins->op == BI_OPCODE_TEXC_DUAL && d == 1) {
137 return ins->sr_count_2;
138 } else if (ins->op == BI_OPCODE_COLLECT_I32 && d == 0) {
139 return ins->nr_srcs;
140 }
141
142 return 1;
143 }
144
145 unsigned
bi_writemask(const bi_instr * ins,unsigned d)146 bi_writemask(const bi_instr *ins, unsigned d)
147 {
148 unsigned mask = BITFIELD_MASK(bi_count_write_registers(ins, d));
149 unsigned shift = ins->dest[d].offset;
150 return (mask << shift);
151 }
152
153 bi_clause *
bi_next_clause(bi_context * ctx,bi_block * block,bi_clause * clause)154 bi_next_clause(bi_context *ctx, bi_block *block, bi_clause *clause)
155 {
156 if (!block && !clause)
157 return NULL;
158
159 /* Try the first clause in this block if we're starting from scratch */
160 if (!clause && !list_is_empty(&block->clauses))
161 return list_first_entry(&block->clauses, bi_clause, link);
162
163 /* Try the next clause in this block */
164 if (clause && clause->link.next != &block->clauses)
165 return list_first_entry(&(clause->link), bi_clause, link);
166
167 /* Try the next block, or the one after that if it's empty, etc .*/
168 bi_block *next_block = bi_next_block(block);
169
170 bi_foreach_block_from(ctx, next_block, block) {
171 if (!list_is_empty(&block->clauses))
172 return list_first_entry(&block->clauses, bi_clause, link);
173 }
174
175 return NULL;
176 }
177
178 /* Does an instruction have a side effect not captured by its register
179 * destination? Applies to certain message-passing instructions, +DISCARD, and
180 * branching only, used in dead code elimation. Branches are characterized by
181 * `last` which applies to them and some atomics, +BARRIER, +BLEND which
182 * implies no loss of generality */
183
184 bool
bi_side_effects(const bi_instr * I)185 bi_side_effects(const bi_instr *I)
186 {
187 if (bi_opcode_props[I->op].last)
188 return true;
189
190 switch (I->op) {
191 case BI_OPCODE_DISCARD_F32:
192 case BI_OPCODE_DISCARD_B32:
193 return true;
194 default:
195 break;
196 }
197
198 switch (bi_opcode_props[I->op].message) {
199 case BIFROST_MESSAGE_NONE:
200 case BIFROST_MESSAGE_VARYING:
201 case BIFROST_MESSAGE_ATTRIBUTE:
202 case BIFROST_MESSAGE_TEX:
203 case BIFROST_MESSAGE_VARTEX:
204 case BIFROST_MESSAGE_LOAD:
205 case BIFROST_MESSAGE_64BIT:
206 return false;
207
208 case BIFROST_MESSAGE_STORE:
209 case BIFROST_MESSAGE_ATOMIC:
210 case BIFROST_MESSAGE_BARRIER:
211 case BIFROST_MESSAGE_BLEND:
212 case BIFROST_MESSAGE_Z_STENCIL:
213 case BIFROST_MESSAGE_ATEST:
214 case BIFROST_MESSAGE_JOB:
215 return true;
216
217 case BIFROST_MESSAGE_TILE:
218 return (I->op != BI_OPCODE_LD_TILE);
219 }
220
221 unreachable("Invalid message type");
222 }
223
224 /* Branch reconvergence is required when the execution mask may change
225 * between adjacent instructions (clauses). This occurs for conditional
226 * branches and for the last instruction (clause) in a block whose
227 * fallthrough successor has multiple predecessors.
228 */
229
230 bool
bi_reconverge_branches(bi_block * block)231 bi_reconverge_branches(bi_block *block)
232 {
233 if (bi_num_successors(block) == 1)
234 return bi_num_predecessors(block->successors[0]) > 1;
235 else
236 return true;
237 }
238
239 /*
240 * When MUX.i32 or MUX.v2i16 is used to multiplex entire sources, they can be
241 * replaced by CSEL as follows:
242 *
243 * MUX.neg(x, y, b) -> CSEL.s.lt(b, 0, x, y)
244 * MUX.int_zero(x, y, b) -> CSEL.i.eq(b, 0, x, y)
245 * MUX.fp_zero(x, y, b) -> CSEL.f.eq(b, 0, x, y)
246 *
247 * MUX.bit cannot be transformed like this.
248 *
249 * Note that MUX.v2i16 has partial support for swizzles, which CSEL.v2i16 lacks.
250 * So we must check the swizzles too.
251 */
252 bool
bi_can_replace_with_csel(bi_instr * I)253 bi_can_replace_with_csel(bi_instr *I)
254 {
255 return ((I->op == BI_OPCODE_MUX_I32) || (I->op == BI_OPCODE_MUX_V2I16)) &&
256 (I->mux != BI_MUX_BIT) && (I->src[0].swizzle == BI_SWIZZLE_H01) &&
257 (I->src[1].swizzle == BI_SWIZZLE_H01) &&
258 (I->src[2].swizzle == BI_SWIZZLE_H01);
259 }
260
261 static enum bi_opcode
bi_csel_for_mux(bool must_sign,bool b32,enum bi_mux mux)262 bi_csel_for_mux(bool must_sign, bool b32, enum bi_mux mux)
263 {
264 switch (mux) {
265 case BI_MUX_INT_ZERO:
266 if (must_sign)
267 return b32 ? BI_OPCODE_CSEL_U32 : BI_OPCODE_CSEL_V2U16;
268 else
269 return b32 ? BI_OPCODE_CSEL_I32 : BI_OPCODE_CSEL_V2I16;
270 case BI_MUX_NEG:
271 return b32 ? BI_OPCODE_CSEL_S32 : BI_OPCODE_CSEL_V2S16;
272 case BI_MUX_FP_ZERO:
273 return b32 ? BI_OPCODE_CSEL_F32 : BI_OPCODE_CSEL_V2F16;
274 default:
275 unreachable("No CSEL for MUX.bit");
276 }
277 }
278
279 bi_instr *
bi_csel_from_mux(bi_builder * b,const bi_instr * I,bool must_sign)280 bi_csel_from_mux(bi_builder *b, const bi_instr *I, bool must_sign)
281 {
282 assert(I->op == BI_OPCODE_MUX_I32 || I->op == BI_OPCODE_MUX_V2I16);
283
284 /* Build a new CSEL */
285 enum bi_cmpf cmpf = (I->mux == BI_MUX_NEG) ? BI_CMPF_LT : BI_CMPF_EQ;
286 bi_instr *csel = bi_csel_u32_to(b, I->dest[0], I->src[2], bi_zero(),
287 I->src[0], I->src[1], cmpf);
288
289 /* Fixup the opcode and use it */
290 csel->op = bi_csel_for_mux(must_sign, I->op == BI_OPCODE_MUX_I32, I->mux);
291 return csel;
292 }
293