xref: /aosp_15_r20/external/mesa3d/src/nouveau/mme/mme_tu104_builder.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2022 Collabora Ltd.
3  * SPDX-License-Identifier: MIT
4  */
5 #include "mme_builder.h"
6 
7 #include <stdio.h>
8 #include <stdlib.h>
9 
10 #define MME_TU104_MAX_REGS 23
11 
12 void
mme_tu104_builder_init(struct mme_builder * b)13 mme_tu104_builder_init(struct mme_builder *b)
14 {
15    mme_reg_alloc_init(&b->reg_alloc, BITFIELD_MASK(MME_TU104_MAX_REGS));
16 }
17 
18 static void
mme_tu104_new_inst(struct mme_tu104_builder * tb)19 mme_tu104_new_inst(struct mme_tu104_builder *tb)
20 {
21    struct mme_tu104_inst noop = { MME_TU104_INST_DEFAULTS };
22    assert(tb->inst_count < ARRAY_SIZE(tb->insts));
23    tb->insts[tb->inst_count] = noop;
24    tb->inst_count++;
25    tb->inst_parts = 0;
26 }
27 
28 static struct mme_tu104_inst *
mme_tu104_cur_inst(struct mme_tu104_builder * tb)29 mme_tu104_cur_inst(struct mme_tu104_builder *tb)
30 {
31    assert(tb->inst_count > 0 && tb->inst_count < ARRAY_SIZE(tb->insts));
32    return &tb->insts[tb->inst_count - 1];
33 }
34 
35 static inline void
mme_tu104_set_inst_parts(struct mme_tu104_builder * tb,enum mme_tu104_instr_parts parts)36 mme_tu104_set_inst_parts(struct mme_tu104_builder *tb,
37                          enum mme_tu104_instr_parts parts)
38 {
39    assert(!(tb->inst_parts & parts));
40    tb->inst_parts |= parts;
41 }
42 
43 void
mme_tu104_add_inst(struct mme_builder * b,const struct mme_tu104_inst * inst)44 mme_tu104_add_inst(struct mme_builder *b,
45                    const struct mme_tu104_inst *inst)
46 {
47    struct mme_tu104_builder *tb = &b->tu104;
48 
49    if (tb->inst_parts || tb->inst_count == 0)
50       mme_tu104_new_inst(&b->tu104);
51    *mme_tu104_cur_inst(tb) = *inst;
52    mme_tu104_new_inst(tb);
53 }
54 
55 static unsigned
mme_tu104_reg_num_imms(enum mme_tu104_reg reg)56 mme_tu104_reg_num_imms(enum mme_tu104_reg reg)
57 {
58    switch (reg) {
59    case MME_TU104_REG_IMM:
60    case MME_TU104_REG_IMMPAIR:
61       return 1;
62    case MME_TU104_REG_IMM32:
63       return 2;
64    default:
65       return 0;
66    }
67 }
68 
69 static bool
mme_tu104_next_inst_can_add_alu(struct mme_tu104_builder * tb,const struct mme_tu104_alu * alu,bool must_be_alu0)70 mme_tu104_next_inst_can_add_alu(struct mme_tu104_builder *tb,
71                                 const struct mme_tu104_alu *alu,
72                                 bool must_be_alu0)
73 {
74    if (tb->inst_count == 0)
75       return false;
76 
77    /* Most ALU can be re-ordered with respect to outputs but a couple can't.
78     * In the case where it may depend on an output, flush if we have one.
79     */
80    if (mme_tu104_alu_op_may_depend_on_mthd(alu->op) &&
81        tb->inst_parts & (MME_TU104_INSTR_PART_MTHD0 |
82                          MME_TU104_INSTR_PART_EMIT0 |
83                          MME_TU104_INSTR_PART_MTHD1 |
84                          MME_TU104_INSTR_PART_EMIT1))
85       return false;
86 
87    if (must_be_alu0 && (tb->inst_parts & MME_TU104_INSTR_PART_ALU0))
88       return false;
89 
90    if (tb->inst_parts & MME_TU104_INSTR_PART_ALU1) {
91       assert(tb->inst_parts & MME_TU104_INSTR_PART_ALU0);
92       return false;
93    }
94 
95    assert(alu->src[0] != MME_TU104_REG_LOAD1 &&
96           alu->src[1] != MME_TU104_REG_LOAD0 &&
97           alu->src[1] != MME_TU104_REG_LOAD1);
98    if (alu->src[0] == MME_TU104_REG_LOAD0 &&
99        (tb->inst_parts & MME_TU104_INSTR_PART_LOAD1))
100       return false;
101 
102    const unsigned used_imms =
103       util_bitcount(tb->inst_parts & (MME_TU104_INSTR_PART_IMM0 |
104                                       MME_TU104_INSTR_PART_IMM1));
105 
106    const unsigned num_imms = mme_tu104_alu_op_has_implicit_imm(alu->op) +
107                              mme_tu104_reg_num_imms(alu->src[0]) +
108                              mme_tu104_reg_num_imms(alu->src[1]);
109    assert(num_imms <= 2);
110    if (num_imms + used_imms > 2)
111       return false;
112 
113    if (mme_tu104_alu_op_has_implicit_imm(alu->op) &&
114        (tb->inst_parts & MME_TU104_INSTR_PART_ALU0) &&
115        (tb->inst_parts & MME_TU104_INSTR_PART_IMM1))
116       return false;
117 
118    struct mme_tu104_inst *cur = mme_tu104_cur_inst(tb);
119 
120    if ((tb->inst_parts & MME_TU104_INSTR_PART_ALU0) &&
121        mme_tu104_alus_have_dependency(&cur->alu[0], alu))
122       return false;
123 
124    /* No idea why the HW has this rule but it does */
125    if (alu->op == MME_TU104_ALU_OP_STATE &&
126        (tb->inst_parts & MME_TU104_INSTR_PART_ALU0) &&
127        cur->alu[0].op == MME_TU104_ALU_OP_STATE)
128       return false;
129 
130    return true;
131 }
132 
133 static unsigned
mme_tu104_push_alu(struct mme_tu104_builder * tb,const struct mme_tu104_alu * alu,uint16_t imm0,uint16_t imm1,uint16_t implicit_imm,bool must_be_alu0)134 mme_tu104_push_alu(struct mme_tu104_builder *tb,
135                    const struct mme_tu104_alu *alu,
136                    uint16_t imm0, uint16_t imm1,
137                    uint16_t implicit_imm,
138                    bool must_be_alu0)
139 {
140    if (!mme_tu104_next_inst_can_add_alu(tb, alu, must_be_alu0))
141       mme_tu104_new_inst(tb);
142 
143    if (mme_tu104_alu_op_has_implicit_imm(alu->op) &&
144        (tb->inst_parts & MME_TU104_INSTR_PART_IMM0))
145       tb->inst_parts |= MME_TU104_INSTR_PART_ALU0;
146 
147    assert(mme_tu104_next_inst_can_add_alu(tb, alu, must_be_alu0));
148 
149    struct mme_tu104_inst *inst = mme_tu104_cur_inst(tb);
150    unsigned alu_idx = (tb->inst_parts & MME_TU104_INSTR_PART_ALU0) != 0;
151    assert(alu_idx == 0 || !must_be_alu0);
152 
153    switch (alu->op) {
154    case MME_TU104_ALU_OP_ADDC:
155       assert(inst->alu[0].op == MME_TU104_ALU_OP_ADD);
156       assert(alu_idx == 1);
157       break;
158    case MME_TU104_ALU_OP_SUBB:
159       assert(inst->alu[0].op == MME_TU104_ALU_OP_SUB);
160       assert(alu_idx == 1);
161       break;
162    case MME_TU104_ALU_OP_MULH:
163       assert(inst->alu[0].op == MME_TU104_ALU_OP_MUL ||
164              inst->alu[0].op == MME_TU104_ALU_OP_MULU);
165       assert(alu_idx == 1);
166       break;
167    default:
168       break;
169    }
170 
171    mme_tu104_set_inst_parts(tb, MME_TU104_INSTR_PART_ALU0 << alu_idx);
172    inst->alu[alu_idx] = *alu;
173 
174    if (alu->src[0] == MME_TU104_REG_LOAD0) {
175       unsigned next_load = (tb->inst_parts & MME_TU104_INSTR_PART_LOAD0) != 0;
176       mme_tu104_set_inst_parts(tb, MME_TU104_INSTR_PART_LOAD0 << next_load);
177       inst->alu[alu_idx].src[0] = MME_TU104_REG_LOAD0 + next_load;
178    }
179 
180    unsigned next_imm = (tb->inst_parts & MME_TU104_INSTR_PART_IMM0) != 0;
181    const unsigned num_imms = mme_tu104_reg_num_imms(alu->src[0]) +
182                              mme_tu104_reg_num_imms(alu->src[1]);
183 
184    if (mme_tu104_alu_op_has_implicit_imm(alu->op)) {
185       mme_tu104_set_inst_parts(tb, MME_TU104_INSTR_PART_IMM0 << alu_idx);
186       inst->imm[alu_idx] = implicit_imm;
187       assert(num_imms <= 1);
188       next_imm = 1 - alu_idx;
189    }
190 
191    if (num_imms == 1) {
192       mme_tu104_set_inst_parts(tb, MME_TU104_INSTR_PART_IMM0 << next_imm);
193       inst->imm[next_imm] = imm0;
194       assert(alu->src[0] != MME_TU104_REG_IMM32 &&
195              alu->src[0] != MME_TU104_REG_IMMPAIR &&
196              alu->src[1] != MME_TU104_REG_IMM32 &&
197              alu->src[1] != MME_TU104_REG_IMMPAIR);
198       if (alu->src[0] == MME_TU104_REG_IMM && alu_idx != next_imm)
199          inst->alu[alu_idx].src[0] = MME_TU104_REG_IMMPAIR;
200       if (alu->src[1] == MME_TU104_REG_IMM && alu_idx != next_imm)
201          inst->alu[alu_idx].src[1] = MME_TU104_REG_IMMPAIR;
202    } else if (num_imms == 2) {
203       mme_tu104_set_inst_parts(tb, MME_TU104_INSTR_PART_IMM0 |
204                                    MME_TU104_INSTR_PART_IMM1);
205       inst->imm[0] = imm0;
206       inst->imm[1] = imm1;
207    }
208 
209    return alu_idx;
210 }
211 
212 static inline enum mme_tu104_reg
mme_value_alu_reg(struct mme_value val)213 mme_value_alu_reg(struct mme_value val)
214 {
215    switch (val.type) {
216    case MME_VALUE_TYPE_ZERO:
217       return MME_TU104_REG_ZERO;
218    case MME_VALUE_TYPE_IMM:
219       if (val.imm == 0)
220          return MME_TU104_REG_ZERO;
221       else if (val.imm == (uint32_t)(int16_t)val.imm)
222          return MME_TU104_REG_IMM;
223       else
224          return MME_TU104_REG_IMM32;
225    case MME_VALUE_TYPE_REG:
226       assert(val.reg <= 23);
227       return MME_TU104_REG_R0 + val.reg;
228    }
229    unreachable("Invalid value type");
230 }
231 
232 static void
build_alu_to(struct mme_builder * b,struct mme_value dst,enum mme_tu104_alu_op op,struct mme_value x,struct mme_value y,uint16_t implicit_imm,bool must_be_alu0)233 build_alu_to(struct mme_builder *b,
234              struct mme_value dst,
235              enum mme_tu104_alu_op op,
236              struct mme_value x,
237              struct mme_value y,
238              uint16_t implicit_imm,
239              bool must_be_alu0)
240 {
241    assert(dst.type == MME_VALUE_TYPE_ZERO ||
242           dst.type == MME_VALUE_TYPE_REG);
243 
244    enum mme_tu104_reg x_reg = mme_value_alu_reg(x);
245    enum mme_tu104_reg y_reg = mme_value_alu_reg(y);
246 
247    unsigned num_imms = mme_tu104_alu_op_has_implicit_imm(op) +
248                        mme_tu104_reg_num_imms(x_reg) +
249                        mme_tu104_reg_num_imms(y_reg);
250    while (num_imms > 2) {
251       if (y_reg == MME_TU104_REG_IMM32) {
252          y = mme_mov(b, y);
253          y_reg = mme_value_alu_reg(y);
254          num_imms -= 2;
255       } else if (x_reg == MME_TU104_REG_IMM32) {
256          x = mme_mov(b, x);
257          x_reg = mme_value_alu_reg(x);
258          num_imms -= 2;
259       } else if (mme_tu104_reg_num_imms(y_reg) > 0) {
260          assert(mme_tu104_reg_num_imms(y_reg) == 1);
261          y = mme_mov(b, y);
262          y_reg = mme_value_alu_reg(y);
263          num_imms--;
264       } else if (mme_tu104_reg_num_imms(x_reg) > 0) {
265          assert(mme_tu104_reg_num_imms(x_reg) == 1);
266          x = mme_mov(b, x);
267          x_reg = mme_value_alu_reg(x);
268          num_imms--;
269       }
270    }
271 
272    uint16_t imm0 = 0, imm1 = 0;
273    if (x_reg == MME_TU104_REG_IMM32) {
274       assert(mme_tu104_reg_num_imms(y_reg) == 0);
275       imm0 = x.imm >> 16;
276       imm1 = x.imm;
277    } else if (y_reg == MME_TU104_REG_IMM32) {
278       assert(mme_tu104_reg_num_imms(x_reg) == 0);
279       imm0 = y.imm >> 16;
280       imm1 = y.imm;
281    } else if (x_reg == MME_TU104_REG_IMM) {
282       assert(mme_tu104_reg_num_imms(y_reg) <= 1);
283       imm0 = x.imm;
284       if (y_reg == MME_TU104_REG_IMM) {
285          imm1 = y.imm;
286          y_reg = MME_TU104_REG_IMMPAIR;
287       }
288    } else if (y_reg == MME_TU104_REG_IMM) {
289       imm0 = y.imm;
290    } else {
291       assert(mme_tu104_reg_num_imms(x_reg) == 0);
292       assert(mme_tu104_reg_num_imms(y_reg) == 0);
293    }
294 
295    struct mme_tu104_alu alu = {
296       .dst = mme_value_alu_reg(dst),
297       .op = op,
298       .src = { x_reg, y_reg },
299    };
300    mme_tu104_push_alu(&b->tu104, &alu, imm0, imm1, implicit_imm, must_be_alu0);
301 }
302 
303 static enum mme_tu104_alu_op
mme_to_tu104_alu_op(enum mme_alu_op op)304 mme_to_tu104_alu_op(enum mme_alu_op op)
305 {
306    switch (op) {
307 #define ALU_CASE(op) case MME_ALU_OP_##op: return MME_TU104_ALU_OP_##op;
308    ALU_CASE(ADD)
309    ALU_CASE(ADDC)
310    ALU_CASE(SUB)
311    ALU_CASE(SUBB)
312    ALU_CASE(MUL)
313    ALU_CASE(MULH)
314    ALU_CASE(MULU)
315    ALU_CASE(CLZ)
316    ALU_CASE(SLL)
317    ALU_CASE(SRL)
318    ALU_CASE(SRA)
319    ALU_CASE(AND)
320    ALU_CASE(NAND)
321    ALU_CASE(OR)
322    ALU_CASE(XOR)
323    ALU_CASE(SLT)
324    ALU_CASE(SLTU)
325    ALU_CASE(SLE)
326    ALU_CASE(SLEU)
327    ALU_CASE(SEQ)
328    ALU_CASE(DREAD)
329    ALU_CASE(DWRITE)
330 #undef ALU_CASE
331    default:
332       unreachable("Unsupported MME ALU op");
333    }
334 }
335 
336 void
mme_tu104_alu_to(struct mme_builder * b,struct mme_value dst,enum mme_alu_op op,struct mme_value x,struct mme_value y)337 mme_tu104_alu_to(struct mme_builder *b,
338                  struct mme_value dst,
339                  enum mme_alu_op op,
340                  struct mme_value x,
341                  struct mme_value y)
342 {
343    switch (op) {
344    case MME_ALU_OP_NOT:
345       mme_xor_to(b, dst, x, mme_imm(~(uint32_t)0));
346       break;
347 
348    case MME_ALU_OP_AND_NOT: {
349       struct mme_value not_y;
350       switch (y.type) {
351       case MME_VALUE_TYPE_ZERO:
352          not_y = mme_imm(~(uint32_t)0);
353          break;
354 
355       case MME_VALUE_TYPE_IMM:
356          if (y.imm == ~(uint32_t)0)
357             not_y = mme_zero();
358          else
359             not_y = mme_imm(~y.imm);
360          break;
361 
362       case MME_VALUE_TYPE_REG:
363          not_y = mme_not(b, y);
364          break;
365 
366       default:
367          unreachable("Unknown MME value type");
368       }
369 
370       mme_and_to(b, dst, x, not_y);
371 
372       if (not_y.type == MME_VALUE_TYPE_REG)
373          mme_free_reg(b, not_y);
374       break;
375    }
376 
377    default:
378       build_alu_to(b, dst, mme_to_tu104_alu_op(op), x, y, 0, false);
379    }
380 }
381 
382 void
mme_tu104_alu64_to(struct mme_builder * b,struct mme_value64 dst,enum mme_alu_op op_lo,enum mme_alu_op op_hi,struct mme_value64 x,struct mme_value64 y)383 mme_tu104_alu64_to(struct mme_builder *b,
384                    struct mme_value64 dst,
385                    enum mme_alu_op op_lo,
386                    enum mme_alu_op op_hi,
387                    struct mme_value64 x,
388                    struct mme_value64 y)
389 {
390    assert(dst.lo.type == MME_VALUE_TYPE_REG);
391    assert(dst.hi.type == MME_VALUE_TYPE_REG);
392 
393    /* We can't have any non-zero immediates in the high part or else we might
394     * get half-way through emitting and realize we've run out.
395     */
396    if (x.hi.type == MME_VALUE_TYPE_IMM && x.hi.imm != 0)
397       x.hi = mme_mov(b, x.hi);
398    if (y.hi.type == MME_VALUE_TYPE_IMM && y.hi.imm != 0)
399       y.hi = mme_mov(b, y.hi);
400 
401    build_alu_to(b, dst.lo, mme_to_tu104_alu_op(op_lo), x.lo, y.lo, 0, true);
402    build_alu_to(b, dst.hi, mme_to_tu104_alu_op(op_hi), x.hi, y.hi, 0, false);
403 }
404 
405 void
mme_tu104_merge_to(struct mme_builder * b,struct mme_value dst,struct mme_value x,struct mme_value y,uint16_t dst_pos,uint16_t bits,uint16_t src_pos)406 mme_tu104_merge_to(struct mme_builder *b, struct mme_value dst,
407                    struct mme_value x, struct mme_value y,
408                    uint16_t dst_pos, uint16_t bits, uint16_t src_pos)
409 {
410    assert(dst_pos < 32);
411    assert(bits < 32);
412    assert(src_pos < 32);
413    uint32_t ctrl = (dst_pos << 10) | (bits << 5) | src_pos;
414    build_alu_to(b, dst, MME_TU104_ALU_OP_MERGE, x, y, ctrl, false);
415 }
416 
417 void
mme_tu104_state_arr_to(struct mme_builder * b,struct mme_value dst,uint16_t state,struct mme_value index)418 mme_tu104_state_arr_to(struct mme_builder *b, struct mme_value dst,
419                        uint16_t state, struct mme_value index)
420 {
421    assert(state % 4 == 0);
422    build_alu_to(b, dst, MME_TU104_ALU_OP_STATE,
423                 mme_imm(state >> 2), index, 0, false);
424 }
425 
426 void
mme_tu104_load_barrier(struct mme_builder * b)427 mme_tu104_load_barrier(struct mme_builder *b)
428 {
429    build_alu_to(b, mme_zero(), MME_TU104_ALU_OP_EXTENDED,
430                 mme_imm(0x1000), mme_imm(1), 0, false);
431 }
432 
433 void
mme_tu104_load_to(struct mme_builder * b,struct mme_value dst)434 mme_tu104_load_to(struct mme_builder *b, struct mme_value dst)
435 {
436    assert(dst.type == MME_VALUE_TYPE_REG ||
437           dst.type == MME_VALUE_TYPE_ZERO);
438 
439    struct mme_tu104_alu alu = {
440       .dst = mme_value_alu_reg(dst),
441       .op = MME_TU104_ALU_OP_ADD,
442       .src = {
443          MME_TU104_REG_LOAD0,
444          MME_TU104_REG_ZERO,
445       },
446    };
447    mme_tu104_push_alu(&b->tu104, &alu, 0, 0, 0, 0);
448 }
449 
450 static bool
mme_tu104_next_inst_can_add_mthd(struct mme_tu104_builder * tb,enum mme_tu104_out_op out)451 mme_tu104_next_inst_can_add_mthd(struct mme_tu104_builder *tb,
452                                  enum mme_tu104_out_op out)
453 {
454    if (tb->inst_count == 0)
455       return false;
456 
457    if (tb->inst_parts & MME_TU104_INSTR_PART_MTHD1) {
458       assert(tb->inst_parts & MME_TU104_INSTR_PART_MTHD0);
459       return false;
460    }
461 
462    /* We can't have a mthd in an op with STATE */
463    struct mme_tu104_inst *cur = mme_tu104_cur_inst(tb);
464    if (((tb->inst_parts & MME_TU104_INSTR_PART_ALU0) &&
465         cur->alu[0].op == MME_TU104_ALU_OP_STATE) ||
466        ((tb->inst_parts & MME_TU104_INSTR_PART_ALU1) &&
467         cur->alu[1].op == MME_TU104_ALU_OP_STATE))
468       return false;
469 
470    if (out == MME_TU104_OUT_OP_IMM0 &&
471        (tb->inst_parts & MME_TU104_INSTR_PART_IMM0) &&
472        (tb->inst_parts & MME_TU104_INSTR_PART_IMM1))
473       return false;
474 
475    return true;
476 }
477 
478 static void
mme_tu104_push_mthd(struct mme_tu104_builder * tb,enum mme_tu104_out_op out,uint16_t imm)479 mme_tu104_push_mthd(struct mme_tu104_builder *tb,
480                     enum mme_tu104_out_op out,
481                     uint16_t imm)
482 {
483    struct mme_tu104_inst *inst = mme_tu104_cur_inst(tb);
484    if (out == MME_TU104_OUT_OP_IMM0) {
485       unsigned imm_idx = (tb->inst_parts & MME_TU104_INSTR_PART_IMM0) != 0;
486       mme_tu104_set_inst_parts(tb, MME_TU104_INSTR_PART_IMM0 << imm_idx);
487       out = MME_TU104_OUT_OP_IMM0 + imm_idx;
488       inst->imm[imm_idx] = imm;
489    }
490    unsigned mthd_idx = (tb->inst_parts & MME_TU104_INSTR_PART_MTHD0) != 0;
491    /* If we're pushing mthd1, the next emit MUST be emit1 */
492    if (mthd_idx > 0 && !(tb->inst_parts & MME_TU104_INSTR_PART_EMIT0))
493       mme_tu104_set_inst_parts(tb, MME_TU104_INSTR_PART_EMIT0);
494    mme_tu104_set_inst_parts(tb, MME_TU104_INSTR_PART_MTHD0 << mthd_idx);
495    inst->out[mthd_idx].mthd = out;
496 }
497 
498 void
mme_tu104_mthd(struct mme_builder * b,uint16_t mthd,struct mme_value index)499 mme_tu104_mthd(struct mme_builder *b, uint16_t mthd, struct mme_value index)
500 {
501    struct mme_tu104_builder *tb = &b->tu104;
502 
503    assert(mthd % 4 == 0);
504    uint32_t mthd_imm = (1 << 12) | (mthd >> 2);
505 
506    if (index.type == MME_VALUE_TYPE_REG) {
507       if (!mme_tu104_next_inst_can_add_mthd(tb, MME_TU104_OUT_OP_ALU0))
508          mme_tu104_new_inst(tb);
509 
510       const struct mme_tu104_alu alu = {
511          .dst = MME_TU104_REG_ZERO,
512          .op = MME_TU104_ALU_OP_ADD,
513          .src = {
514             MME_TU104_REG_IMM,
515             mme_value_alu_reg(index),
516          },
517       };
518       unsigned alu_idx = mme_tu104_push_alu(tb, &alu, mthd_imm, 0, 0, false);
519       mme_tu104_push_mthd(tb, MME_TU104_OUT_OP_ALU0 + alu_idx, 0);
520    } else {
521       if (!mme_tu104_next_inst_can_add_mthd(tb, MME_TU104_OUT_OP_IMM0))
522          mme_tu104_new_inst(tb);
523 
524       if (index.type == MME_VALUE_TYPE_IMM)
525          mthd_imm += index.imm;
526 
527       mme_tu104_push_mthd(tb, MME_TU104_OUT_OP_IMM0, mthd_imm);
528    }
529 }
530 
531 static bool
mme_tu104_next_inst_can_add_emit(struct mme_tu104_builder * tb,enum mme_tu104_out_op out,uint32_t imm)532 mme_tu104_next_inst_can_add_emit(struct mme_tu104_builder *tb,
533                                  enum mme_tu104_out_op out,
534                                  uint32_t imm)
535 {
536    assert(tb->inst_count > 0);
537 
538    if (tb->inst_parts & MME_TU104_INSTR_PART_EMIT1) {
539       assert(tb->inst_parts & MME_TU104_INSTR_PART_EMIT0);
540       return false;
541    }
542 
543    const unsigned used_imms =
544       util_bitcount(tb->inst_parts & (MME_TU104_INSTR_PART_IMM0 |
545                                       MME_TU104_INSTR_PART_IMM1));
546    if (out == MME_TU104_OUT_OP_IMM0 && used_imms > 1)
547       return false;
548    if (out == MME_TU104_OUT_OP_IMM32 && used_imms > 0)
549       return false;
550 
551    return true;
552 }
553 
554 static void
mme_tu104_push_emit(struct mme_tu104_builder * tb,enum mme_tu104_out_op out,uint32_t imm)555 mme_tu104_push_emit(struct mme_tu104_builder *tb,
556                     enum mme_tu104_out_op out,
557                     uint32_t imm)
558 {
559    struct mme_tu104_inst *inst = mme_tu104_cur_inst(tb);
560    if (out == MME_TU104_OUT_OP_IMM0) {
561       unsigned imm_idx = (tb->inst_parts & MME_TU104_INSTR_PART_IMM0) != 0;
562       mme_tu104_set_inst_parts(tb, MME_TU104_INSTR_PART_IMM0 << imm_idx);
563       out = MME_TU104_OUT_OP_IMM0 + imm_idx;
564       inst->imm[imm_idx] = imm;
565    } else if (out == MME_TU104_OUT_OP_IMM32) {
566       mme_tu104_set_inst_parts(tb, MME_TU104_INSTR_PART_IMM0 |
567                                    MME_TU104_INSTR_PART_IMM1);
568       inst->imm[0] = imm >> 16;
569       inst->imm[1] = imm;
570    }
571    unsigned emit_idx = (tb->inst_parts & MME_TU104_INSTR_PART_EMIT0) != 0;
572    mme_tu104_set_inst_parts(tb, MME_TU104_INSTR_PART_EMIT0 << emit_idx);
573    /* If we're pushing emitN, the next mthd MUST be mthdN+1 */
574    if (!(tb->inst_parts & (MME_TU104_INSTR_PART_MTHD0 << emit_idx)))
575       mme_tu104_set_inst_parts(tb, MME_TU104_INSTR_PART_MTHD0 << emit_idx);
576    inst->out[emit_idx].emit = out;
577 }
578 
579 static int
find_alu_idx_for_dst(const struct mme_tu104_inst * inst,struct mme_value dst)580 find_alu_idx_for_dst(const struct mme_tu104_inst *inst,
581                      struct mme_value dst)
582 {
583    assert(dst.type == MME_VALUE_TYPE_REG);
584    for (int i = 0; i < 2; i++) {
585       if (inst->alu[i].dst == mme_value_alu_reg(dst))
586          return i;
587    }
588    return -1;
589 }
590 
591 void
mme_tu104_emit(struct mme_builder * b,struct mme_value data)592 mme_tu104_emit(struct mme_builder *b, struct mme_value data)
593 {
594    struct mme_tu104_builder *tb = &b->tu104;
595 
596    if (data.type == MME_VALUE_TYPE_REG) {
597       if (!mme_tu104_next_inst_can_add_emit(tb, MME_TU104_OUT_OP_ALU0, 0))
598          mme_tu104_new_inst(tb);
599 
600       struct mme_tu104_inst *inst = mme_tu104_cur_inst(tb);
601       int alu_idx = find_alu_idx_for_dst(inst, data);
602       if (alu_idx < 0) {
603          const struct mme_tu104_alu alu = {
604             .dst = MME_TU104_REG_ZERO,
605             .op = MME_TU104_ALU_OP_ADD,
606             .src = {
607                mme_value_alu_reg(data),
608                MME_TU104_REG_ZERO,
609             },
610          };
611          alu_idx = mme_tu104_push_alu(tb, &alu, 0, 0, 0, false);
612       }
613       mme_tu104_push_emit(tb, MME_TU104_OUT_OP_ALU0 + alu_idx, 0);
614    } else {
615       enum mme_tu104_out_op out;
616       uint32_t imm;
617       if (data.type == MME_VALUE_TYPE_ZERO) {
618          out = MME_TU104_OUT_OP_IMM0;
619          imm = 0;
620       } else {
621          assert(data.type == MME_VALUE_TYPE_IMM);
622          imm = data.imm;
623          out = data.imm == (uint16_t)data.imm ? MME_TU104_OUT_OP_IMM0 :
624                                                 MME_TU104_OUT_OP_IMM32;
625       }
626       if (!mme_tu104_next_inst_can_add_emit(tb, out, 0))
627          mme_tu104_new_inst(tb);
628 
629       mme_tu104_push_emit(tb, out, imm);
630    }
631 }
632 
633 static enum mme_tu104_alu_op
mme_cmp_to_tu104_branch_op(enum mme_cmp_op op)634 mme_cmp_to_tu104_branch_op(enum mme_cmp_op op)
635 {
636    switch (op) {
637 #define CMP_CASE(op) case MME_CMP_OP_##op: return MME_TU104_ALU_OP_B##op;
638    CMP_CASE(LT)
639    CMP_CASE(LTU)
640    CMP_CASE(LE)
641    CMP_CASE(LEU)
642    CMP_CASE(EQ)
643 #undef CMP_CASE
644    default:
645       unreachable("Unsupported MME CMP op");
646    }
647 }
648 
649 static void
mme_tu104_start_cf(struct mme_builder * b,enum mme_cf_type type,enum mme_tu104_alu_op op,struct mme_value x,struct mme_value y,uint16_t control)650 mme_tu104_start_cf(struct mme_builder *b,
651                    enum mme_cf_type type,
652                    enum mme_tu104_alu_op op,
653                    struct mme_value x,
654                    struct mme_value y,
655                    uint16_t control)
656 {
657    struct mme_tu104_builder *tb = &b->tu104;
658 
659    /* The HW seems to want at least LOOP to always be in alu0 */
660    build_alu_to(b, mme_zero(), op, x, y, control, true);
661 
662    uint16_t ip = tb->inst_count - 1;
663    assert(tb->insts[ip].alu[0].op == op);
664 
665    tb->cf_stack[tb->cf_depth++] = (struct mme_cf) {
666       .type = type,
667       .start_ip = ip,
668    };
669 
670    /* The inside of control-flow needs to start with a new instruction */
671    mme_tu104_new_inst(tb);
672 }
673 
674 static struct mme_cf
mme_tu104_end_cf(struct mme_builder * b,enum mme_cf_type type)675 mme_tu104_end_cf(struct mme_builder *b, enum mme_cf_type type)
676 {
677    struct mme_tu104_builder *tb = &b->tu104;
678 
679    if (tb->inst_parts)
680       mme_tu104_new_inst(tb);
681 
682    assert(tb->cf_depth > 0);
683    struct mme_cf cf = tb->cf_stack[--tb->cf_depth];
684    assert(cf.type == type);
685 
686    int delta = tb->inst_count - cf.start_ip - 1;
687    assert(delta > 0 && delta < (1 << 13));
688    tb->insts[cf.start_ip].imm[0] |= delta;
689 
690    return cf;
691 }
692 
693 void
mme_tu104_start_loop(struct mme_builder * b,struct mme_value count)694 mme_tu104_start_loop(struct mme_builder *b, struct mme_value count)
695 {
696    mme_tu104_start_cf(b, MME_CF_TYPE_LOOP, MME_TU104_ALU_OP_LOOP,
697                       count, mme_zero(), 0);
698 }
699 
700 void
mme_tu104_end_loop(struct mme_builder * b)701 mme_tu104_end_loop(struct mme_builder *b)
702 {
703    mme_tu104_end_cf(b, MME_CF_TYPE_LOOP);
704 }
705 
706 void
mme_tu104_start_if(struct mme_builder * b,enum mme_cmp_op op,bool if_true,struct mme_value x,struct mme_value y)707 mme_tu104_start_if(struct mme_builder *b,
708                    enum mme_cmp_op op, bool if_true,
709                    struct mme_value x, struct mme_value y)
710 {
711    uint16_t control = if_true ? 0 : BITFIELD_BIT(15);
712    mme_tu104_start_cf(b, MME_CF_TYPE_IF, mme_cmp_to_tu104_branch_op(op),
713                       x, y, control);
714 }
715 
716 void
mme_tu104_end_if(struct mme_builder * b)717 mme_tu104_end_if(struct mme_builder *b)
718 {
719    mme_tu104_end_cf(b, MME_CF_TYPE_IF);
720 }
721 
722 void
mme_tu104_start_while(struct mme_builder * b)723 mme_tu104_start_while(struct mme_builder *b)
724 {
725    mme_tu104_start_cf(b, MME_CF_TYPE_WHILE, MME_TU104_ALU_OP_JAL,
726                       mme_zero(), mme_zero(), BITFIELD_BIT(15));
727 }
728 
729 void
mme_tu104_end_while(struct mme_builder * b,enum mme_cmp_op cmp,bool if_true,struct mme_value x,struct mme_value y)730 mme_tu104_end_while(struct mme_builder *b,
731                     enum mme_cmp_op cmp,
732                     bool if_true,
733                     struct mme_value x,
734                     struct mme_value y)
735 {
736    struct mme_tu104_builder *tb = &b->tu104;
737 
738    struct mme_cf cf = mme_tu104_end_cf(b, MME_CF_TYPE_WHILE);
739 
740    int delta = tb->inst_count - cf.start_ip - 2;
741    uint16_t control = (-delta & BITFIELD_MASK(13)) |
742                       (if_true ? BITFIELD_BIT(15) : 0);
743    build_alu_to(b, mme_zero(), mme_cmp_to_tu104_branch_op(cmp),
744                 x, y, control, true);
745 
746    /* Start a new instruction so next thing to come along doesn't end up being
747     * the 2nd half of of our back-edge while.
748     */
749    mme_tu104_new_inst(tb);
750 }
751 
mme_tu104_exit_if(struct mme_builder * b,enum mme_cmp_op op,bool if_true,struct mme_value x,struct mme_value y)752 void mme_tu104_exit_if(struct mme_builder *b,
753                        enum mme_cmp_op op,
754                        bool if_true,
755                        struct mme_value x,
756                        struct mme_value y)
757 {
758    struct mme_tu104_builder *tb = &b->tu104;
759 
760    /* we reverse it as we want to take the branch if the condition is true */
761    uint16_t control = if_true ? BITFIELD_BIT(15) : 0;
762    /* magic offset to exit the macro */
763    control |= 0x1000;
764    build_alu_to(b, mme_zero(), mme_cmp_to_tu104_branch_op(op), x, y, control,
765                 true);
766 
767    mme_tu104_new_inst(tb);
768 }
769 
770 uint32_t *
mme_tu104_builder_finish(struct mme_tu104_builder * tb,size_t * size_out)771 mme_tu104_builder_finish(struct mme_tu104_builder *tb, size_t *size_out)
772 {
773    assert(tb->cf_depth == 0);
774 
775    /* TODO: If there are at least two instructions and we can guarantee the
776     * last two instructions get exeucted (not in control-flow), we don't need
777     * to add a pair of NOPs.
778     */
779    mme_tu104_new_inst(tb);
780    mme_tu104_new_inst(tb);
781    tb->insts[tb->inst_count - 2].end_next = true;
782 
783    if (0)
784       mme_tu104_print(stderr, tb->insts, tb->inst_count);
785 
786    size_t enc_size = tb->inst_count * 3 * sizeof(uint32_t);
787    uint32_t *enc = malloc(enc_size);
788    if (enc != NULL) {
789       mme_tu104_encode(enc, tb->inst_count, tb->insts);
790       *size_out = enc_size;
791    }
792    return enc;
793 }
794 
795 void
mme_tu104_builder_dump(struct mme_builder * b,FILE * fp)796 mme_tu104_builder_dump(struct mme_builder *b, FILE *fp)
797 {
798    struct mme_tu104_builder *tb = &b->tu104;
799 
800    mme_tu104_print(stderr, tb->insts, tb->inst_count);
801 }
802