xref: /aosp_15_r20/external/mesa3d/src/amd/compiler/aco_lower_subdword.cpp (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2024 Valve Corporation
3  *
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #include "aco_builder.h"
8 #include "aco_ir.h"
9 
10 namespace aco {
11 
12 namespace {
13 
14 Temp
dword_temp(Temp tmp)15 dword_temp(Temp tmp)
16 {
17    if (!tmp.regClass().is_subdword())
18       return tmp;
19 
20    RegClass rc = RegClass(tmp.type(), tmp.size());
21    if (tmp.regClass().is_linear())
22       rc = rc.as_linear();
23    return Temp(tmp.id(), rc);
24 }
25 
26 Definition
dword_def(Program * program,Definition def)27 dword_def(Program* program, Definition def)
28 {
29    def.setTemp(dword_temp(def.getTemp()));
30 
31    if (def.isTemp())
32       program->temp_rc[def.tempId()] = def.regClass();
33 
34    return def;
35 }
36 
37 Operand
dword_op(Operand op,bool convert_const)38 dword_op(Operand op, bool convert_const)
39 {
40    if (op.isTemp() || op.isUndefined())
41       op.setTemp(dword_temp(op.getTemp()));
42    else if (convert_const && op.isConstant() && op.bytes() < 4)
43       op = Operand::c32(op.constantValue());
44    return op;
45 }
46 
47 struct op_info {
48    Operand op;
49    unsigned offset; /* byte offset into op. */
50    unsigned bytes;  /* how many bytes to use after offset. */
51 };
52 
53 void
emit_pack(Builder & bld,Definition def,std::vector<op_info> operands)54 emit_pack(Builder& bld, Definition def, std::vector<op_info> operands)
55 {
56    assert(def.regClass().type() == RegType::vgpr);
57 
58    /* split definition into dwords. */
59    if (def.size() > 1) {
60       aco_ptr<Instruction> vec{
61          create_instruction(aco_opcode::p_create_vector, Format::PSEUDO, def.size(), 1)};
62       vec->definitions[0] = def;
63 
64       unsigned op_idx = 0;
65       for (unsigned i = 0; i < def.size(); i++) {
66          std::vector<op_info> sub_operands;
67          Definition sub_def = bld.def(v1);
68          vec->operands[i] = Operand(sub_def.getTemp());
69          unsigned sub_bytes = 0;
70          while (sub_bytes < 4) {
71             unsigned new_bytes = MIN2(operands[op_idx].bytes, 4 - sub_bytes);
72             sub_bytes += new_bytes;
73 
74             sub_operands.push_back({operands[op_idx].op, operands[op_idx].offset, new_bytes});
75 
76             if (new_bytes == operands[op_idx].bytes) {
77                op_idx++;
78                if (op_idx >= operands.size())
79                   break;
80             } else {
81                operands[op_idx].offset += new_bytes;
82                operands[op_idx].bytes -= new_bytes;
83             }
84          }
85 
86          emit_pack(bld, sub_def, std::move(sub_operands));
87       }
88 
89       bld.insert(std::move(vec));
90       return;
91    }
92 
93    /* split operands into dwords. */
94    for (unsigned i = 0; i < operands.size(); i++) {
95       Operand op = operands[i].op;
96       unsigned offset = operands[i].offset;
97       unsigned bytes = operands[i].bytes;
98 
99       if (op.isUndefined() || op.isConstant()) {
100          if (op.isConstant())
101             operands[i].op = Operand::c32(op.constantValue64() >> (offset * 8));
102          else
103             operands[i].op = Operand(v1);
104          operands[i].offset = 0;
105          continue;
106       }
107 
108       if (op.size() == 1)
109          continue;
110 
111       assert(!op.isFixed());
112 
113       RegClass rc = op.isOfType(RegType::vgpr) ? v1 : s1;
114 
115       aco_ptr<Instruction> split{
116          create_instruction(aco_opcode::p_split_vector, Format::PSEUDO, 1, op.size())};
117       split->operands[0] = op;
118       for (unsigned j = 0; j < op.size(); j++)
119          split->definitions[j] = bld.def(rc);
120 
121       unsigned dword_off = offset / 4;
122       unsigned new_bytes = MIN2(4 - (offset % 4), bytes);
123       operands[i].op = Operand(split->definitions[dword_off++].getTemp());
124       operands[i].offset = offset % 4;
125       operands[i].bytes = new_bytes;
126       if (new_bytes != bytes) {
127          i++;
128          operands.insert(
129             std::next(operands.begin(), i),
130             {Operand(split->definitions[dword_off++].getTemp()), 0, bytes - new_bytes});
131       }
132 
133       bld.insert(std::move(split));
134    }
135 
136    /* remove undef operands */
137    for (unsigned i = 0; i < operands.size(); i++) {
138       Operand op = operands[i].op;
139       unsigned bytes = operands[i].bytes;
140       if (!op.isUndefined())
141          continue;
142 
143       if (i != operands.size() - 1) {
144          unsigned offset = operands[i + 1].offset;
145          operands[i + 1].offset -= MIN2(offset, bytes);
146          bytes -= MIN2(offset, bytes);
147       }
148 
149       if (i != 0) {
150          unsigned rem = 4 - (operands[i - 1].bytes + operands[i - 1].offset);
151          operands[i - 1].bytes += MIN2(rem, bytes);
152          bytes -= MIN2(rem, bytes);
153       }
154 
155       if (bytes == 0) {
156          operands.erase(std::next(operands.begin(), i));
157          i--;
158       } else {
159          operands[i].op = Operand::c32(0);
160          operands[i].bytes = bytes;
161       }
162    }
163 
164    /* combine constant operands */
165    for (unsigned i = 1; i < operands.size(); i++) {
166       if (!operands[i].op.isConstant())
167          continue;
168       assert(operands[i].offset == 0);
169 
170       if (!operands[i - 1].op.isConstant())
171          continue;
172 
173       unsigned bytes = operands[i - 1].bytes;
174       uint32_t prev = operands[i - 1].op.constantValue() & BITFIELD_MASK(bytes * 8);
175       uint32_t current = operands[i].op.constantValue() << (bytes * 8);
176 
177       operands[i - 1].op = Operand::c32(prev | current);
178       operands[i - 1].bytes += operands[i].bytes;
179       operands.erase(std::next(operands.begin(), i));
180       i--;
181    }
182 
183    if (operands.size() == 1) {
184       Operand op = operands[0].op;
185       unsigned offset = operands[0].offset;
186       if (offset != 0) {
187          if (op.isOfType(RegType::vgpr))
188             bld.vop2(aco_opcode::v_lshrrev_b32, def, Operand::c32(offset * 8), op);
189          else
190             bld.vop2_e64(aco_opcode::v_lshrrev_b32, def, Operand::c32(offset * 8), op);
191       } else {
192          bld.copy(def, op);
193       }
194       return;
195    }
196 
197    Operand curr = operands[0].op;
198    unsigned shift = (4 - (operands[0].bytes + operands[0].offset)) * 8;
199    if (shift != 0) {
200       if (curr.isConstant())
201          curr = Operand::c32(curr.constantValue() << shift);
202       else if (curr.isOfType(RegType::vgpr))
203          curr = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand::c32(shift), curr);
204       else
205          curr = bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), bld.def(s1, scc), curr,
206                          Operand::c32(shift));
207    }
208 
209    if (curr.isLiteral())
210       curr = bld.copy(bld.def(s1), curr);
211 
212    unsigned packed_bytes = operands[0].bytes;
213    for (unsigned i = 1; i < operands.size(); i++) {
214       Operand op = operands[i].op;
215       unsigned offset = operands[i].offset;
216 
217       if (offset) {
218          if (op.isOfType(RegType::vgpr))
219             op = bld.vop2(aco_opcode::v_lshrrev_b32, bld.def(v1), Operand::c32(offset * 8), op);
220          else
221             op = bld.sop2(aco_opcode::s_lshr_b32, bld.def(s1), bld.def(s1, scc), op,
222                           Operand::c32(offset * 8));
223       }
224 
225       if (curr.isOfType(RegType::sgpr) && (op.isOfType(RegType::sgpr) || op.isLiteral()))
226          op = bld.copy(bld.def(v1), op);
227       else if (op.isLiteral())
228          op = bld.copy(bld.def(s1), op);
229 
230       Definition next = i + 1 == operands.size() ? def : bld.def(v1);
231       unsigned bytes = i + 1 == operands.size() ? 4 - packed_bytes : operands[i].bytes;
232       curr = bld.vop3(aco_opcode::v_alignbyte_b32, next, op, curr, Operand::c32(bytes));
233       packed_bytes += bytes;
234    }
235 }
236 
237 void
emit_split_vector(Builder & bld,aco_ptr<Instruction> & instr)238 emit_split_vector(Builder& bld, aco_ptr<Instruction>& instr)
239 {
240    bool needs_lowering = false;
241    for (Definition& def : instr->definitions)
242       needs_lowering |= def.regClass().is_subdword();
243 
244    if (!needs_lowering) {
245       bld.insert(std::move(instr));
246       return;
247    }
248 
249    std::vector<op_info> operands = {{dword_op(instr->operands[0], true), 0, 0}};
250    for (Definition& def : instr->definitions) {
251       operands[0].bytes = def.bytes();
252       emit_pack(bld, dword_def(bld.program, def), operands);
253       operands[0].offset += def.bytes();
254    }
255 }
256 
257 void
emit_create_vector(Builder & bld,aco_ptr<Instruction> & instr)258 emit_create_vector(Builder& bld, aco_ptr<Instruction>& instr)
259 {
260    instr->definitions[0] = dword_def(bld.program, instr->definitions[0]);
261    bool needs_lowering = false;
262    for (Operand& op : instr->operands)
263       needs_lowering |= (op.hasRegClass() && op.regClass().is_subdword()) || op.bytes() < 4;
264 
265    if (!needs_lowering) {
266       bld.insert(std::move(instr));
267       return;
268    }
269 
270    std::vector<op_info> operands;
271    operands.reserve(instr->operands.size());
272    for (const Operand& op : instr->operands)
273       operands.push_back({dword_op(op, true), 0, op.bytes()});
274 
275    emit_pack(bld, instr->definitions[0], std::move(operands));
276 }
277 
278 void
process_block(Program * program,Block * block)279 process_block(Program* program, Block* block)
280 {
281    std::vector<aco_ptr<Instruction>> instructions;
282    instructions.reserve(block->instructions.size());
283 
284    Builder bld(program, &instructions);
285    for (unsigned idx = 0; idx < block->instructions.size(); idx++) {
286       aco_ptr<Instruction> instr = std::move(block->instructions[idx]);
287 
288       if (instr->opcode == aco_opcode::p_split_vector) {
289          emit_split_vector(bld, instr);
290       } else if (instr->opcode == aco_opcode::p_create_vector) {
291          emit_create_vector(bld, instr);
292       } else if (instr->opcode == aco_opcode::p_extract_vector &&
293                  instr->definitions[0].regClass().is_subdword()) {
294          const Definition& def = instr->definitions[0];
295          unsigned offset = def.bytes() * instr->operands[1].constantValue();
296          std::vector<op_info> operands = {
297             {dword_op(instr->operands[0], true), offset, def.bytes()}};
298          emit_pack(bld, dword_def(program, def), std::move(operands));
299       } else {
300          for (Definition& def : instr->definitions)
301             def = dword_def(program, def);
302 
303          for (Operand& op : instr->operands)
304             op = dword_op(op, instr->isPseudo());
305 
306          bld.insert(std::move(instr));
307       }
308    }
309 
310    block->instructions = std::move(instructions);
311 }
312 
313 } /* end namespace */
314 
315 void
lower_subdword(Program * program)316 lower_subdword(Program* program)
317 {
318    for (Block& block : program->blocks)
319       process_block(program, &block);
320 }
321 
322 } /* end namespace aco */
323