xref: /aosp_15_r20/external/mesa3d/src/panfrost/compiler/bifrost/disassemble.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright (C) 2019 Connor Abbott <[email protected]>
3  * Copyright (C) 2019 Lyude Paul <[email protected]>
4  * Copyright (C) 2019 Ryan Houdek <[email protected]>
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23  * SOFTWARE.
24  */
25 
26 #include <assert.h>
27 #include <inttypes.h>
28 #include <stdbool.h>
29 #include <stdint.h>
30 #include <stdio.h>
31 #include <string.h>
32 
33 #include "util/compiler.h"
34 #include "util/macros.h"
35 #include "bi_disasm.h"
36 #include "bi_print_common.h"
37 #include "bifrost.h"
38 #include "disassemble.h"
39 #include "../bifrost.h"
40 
41 // return bits (high, lo]
42 static uint64_t
bits(uint32_t word,unsigned lo,unsigned high)43 bits(uint32_t word, unsigned lo, unsigned high)
44 {
45    if (high == 32)
46       return word >> lo;
47    return (word & ((1 << high) - 1)) >> lo;
48 }
49 
50 // each of these structs represents an instruction that's dispatched in one
51 // cycle. Note that these instructions are packed in funny ways within the
52 // clause, hence the need for a separate struct.
53 struct bifrost_alu_inst {
54    uint32_t fma_bits;
55    uint32_t add_bits;
56    uint64_t reg_bits;
57 };
58 
59 static unsigned
get_reg0(struct bifrost_regs regs)60 get_reg0(struct bifrost_regs regs)
61 {
62    if (regs.ctrl == 0)
63       return regs.reg0 | ((regs.reg1 & 0x1) << 5);
64 
65    return regs.reg0 <= regs.reg1 ? regs.reg0 : 63 - regs.reg0;
66 }
67 
68 static unsigned
get_reg1(struct bifrost_regs regs)69 get_reg1(struct bifrost_regs regs)
70 {
71    return regs.reg0 <= regs.reg1 ? regs.reg1 : 63 - regs.reg1;
72 }
73 
74 // this represents the decoded version of the ctrl register field.
75 struct bifrost_reg_ctrl {
76    bool read_reg0;
77    bool read_reg1;
78    struct bifrost_reg_ctrl_23 slot23;
79 };
80 
81 static void
dump_header(FILE * fp,struct bifrost_header header,bool verbose)82 dump_header(FILE *fp, struct bifrost_header header, bool verbose)
83 {
84    fprintf(fp, "ds(%u) ", header.dependency_slot);
85 
86    if (header.staging_barrier)
87       fprintf(fp, "osrb ");
88 
89    fprintf(fp, "%s ", bi_flow_control_name(header.flow_control));
90 
91    if (header.suppress_inf)
92       fprintf(fp, "inf_suppress ");
93    if (header.suppress_nan)
94       fprintf(fp, "nan_suppress ");
95 
96    if (header.flush_to_zero == BIFROST_FTZ_DX11)
97       fprintf(fp, "ftz_dx11 ");
98    else if (header.flush_to_zero == BIFROST_FTZ_ALWAYS)
99       fprintf(fp, "ftz_hsa ");
100    if (header.flush_to_zero == BIFROST_FTZ_ABRUPT)
101       fprintf(fp, "ftz_au ");
102 
103    assert(!header.zero1);
104    assert(!header.zero2);
105 
106    if (header.float_exceptions == BIFROST_EXCEPTIONS_DISABLED)
107       fprintf(fp, "fpe_ts ");
108    else if (header.float_exceptions == BIFROST_EXCEPTIONS_PRECISE_DIVISION)
109       fprintf(fp, "fpe_pd ");
110    else if (header.float_exceptions == BIFROST_EXCEPTIONS_PRECISE_SQRT)
111       fprintf(fp, "fpe_psqr ");
112 
113    if (header.message_type)
114       fprintf(fp, "%s ", bi_message_type_name(header.message_type));
115 
116    if (header.terminate_discarded_threads)
117       fprintf(fp, "td ");
118 
119    if (header.next_clause_prefetch)
120       fprintf(fp, "ncph ");
121 
122    if (header.next_message_type)
123       fprintf(fp, "next_%s ", bi_message_type_name(header.next_message_type));
124    if (header.dependency_wait != 0) {
125       fprintf(fp, "dwb(");
126       bool first = true;
127       for (unsigned i = 0; i < 8; i++) {
128          if (header.dependency_wait & (1 << i)) {
129             if (!first) {
130                fprintf(fp, ", ");
131             }
132             fprintf(fp, "%u", i);
133             first = false;
134          }
135       }
136       fprintf(fp, ") ");
137    }
138 
139    fprintf(fp, "\n");
140 }
141 
142 static struct bifrost_reg_ctrl
DecodeRegCtrl(FILE * fp,struct bifrost_regs regs,bool first)143 DecodeRegCtrl(FILE *fp, struct bifrost_regs regs, bool first)
144 {
145    struct bifrost_reg_ctrl decoded = {};
146    unsigned ctrl;
147    if (regs.ctrl == 0) {
148       ctrl = regs.reg1 >> 2;
149       decoded.read_reg0 = !(regs.reg1 & 0x2);
150       decoded.read_reg1 = false;
151    } else {
152       ctrl = regs.ctrl;
153       decoded.read_reg0 = decoded.read_reg1 = true;
154    }
155 
156    /* Modify control based on state */
157    if (first)
158       ctrl = (ctrl & 0x7) | ((ctrl & 0x8) << 1);
159    else if (regs.reg2 == regs.reg3)
160       ctrl += 16;
161 
162    decoded.slot23 = bifrost_reg_ctrl_lut[ctrl];
163    ASSERTED struct bifrost_reg_ctrl_23 reserved = {0};
164    assert(memcmp(&decoded.slot23, &reserved, sizeof(reserved)));
165 
166    return decoded;
167 }
168 
169 static void
dump_regs(FILE * fp,struct bifrost_regs srcs,bool first)170 dump_regs(FILE *fp, struct bifrost_regs srcs, bool first)
171 {
172    struct bifrost_reg_ctrl ctrl = DecodeRegCtrl(fp, srcs, first);
173    fprintf(fp, "    # ");
174    if (ctrl.read_reg0)
175       fprintf(fp, "slot 0: r%u ", get_reg0(srcs));
176    if (ctrl.read_reg1)
177       fprintf(fp, "slot 1: r%u ", get_reg1(srcs));
178 
179    const char *slot3_fma = ctrl.slot23.slot3_fma ? "FMA" : "ADD";
180 
181    if (ctrl.slot23.slot2 == BIFROST_OP_WRITE)
182       fprintf(fp, "slot 2: r%u (write FMA) ", srcs.reg2);
183    else if (ctrl.slot23.slot2 == BIFROST_OP_WRITE_LO)
184       fprintf(fp, "slot 2: r%u (write lo FMA) ", srcs.reg2);
185    else if (ctrl.slot23.slot2 == BIFROST_OP_WRITE_HI)
186       fprintf(fp, "slot 2: r%u (write hi FMA) ", srcs.reg2);
187    else if (ctrl.slot23.slot2 == BIFROST_OP_READ)
188       fprintf(fp, "slot 2: r%u (read) ", srcs.reg2);
189 
190    if (ctrl.slot23.slot3 == BIFROST_OP_WRITE)
191       fprintf(fp, "slot 3: r%u (write %s) ", srcs.reg3, slot3_fma);
192    else if (ctrl.slot23.slot3 == BIFROST_OP_WRITE_LO)
193       fprintf(fp, "slot 3: r%u (write lo %s) ", srcs.reg3, slot3_fma);
194    else if (ctrl.slot23.slot3 == BIFROST_OP_WRITE_HI)
195       fprintf(fp, "slot 3: r%u (write hi %s) ", srcs.reg3, slot3_fma);
196 
197    if (srcs.fau_idx)
198       fprintf(fp, "fau %X ", srcs.fau_idx);
199 
200    fprintf(fp, "\n");
201 }
202 
203 static void
bi_disasm_dest_mask(FILE * fp,enum bifrost_reg_op op)204 bi_disasm_dest_mask(FILE *fp, enum bifrost_reg_op op)
205 {
206    if (op == BIFROST_OP_WRITE_LO)
207       fprintf(fp, ".h0");
208    else if (op == BIFROST_OP_WRITE_HI)
209       fprintf(fp, ".h1");
210 }
211 
212 void
bi_disasm_dest_fma(FILE * fp,struct bifrost_regs * next_regs,bool last)213 bi_disasm_dest_fma(FILE *fp, struct bifrost_regs *next_regs, bool last)
214 {
215    /* If this is the last instruction, next_regs points to the first reg entry. */
216    struct bifrost_reg_ctrl ctrl = DecodeRegCtrl(fp, *next_regs, last);
217    if (ctrl.slot23.slot2 >= BIFROST_OP_WRITE) {
218       fprintf(fp, "r%u:t0", next_regs->reg2);
219       bi_disasm_dest_mask(fp, ctrl.slot23.slot2);
220    } else if (ctrl.slot23.slot3 >= BIFROST_OP_WRITE && ctrl.slot23.slot3_fma) {
221       fprintf(fp, "r%u:t0", next_regs->reg3);
222       bi_disasm_dest_mask(fp, ctrl.slot23.slot3);
223    } else
224       fprintf(fp, "t0");
225 }
226 
227 void
bi_disasm_dest_add(FILE * fp,struct bifrost_regs * next_regs,bool last)228 bi_disasm_dest_add(FILE *fp, struct bifrost_regs *next_regs, bool last)
229 {
230    /* If this is the last instruction, next_regs points to the first reg entry. */
231    struct bifrost_reg_ctrl ctrl = DecodeRegCtrl(fp, *next_regs, last);
232 
233    if (ctrl.slot23.slot3 >= BIFROST_OP_WRITE && !ctrl.slot23.slot3_fma) {
234       fprintf(fp, "r%u:t1", next_regs->reg3);
235       bi_disasm_dest_mask(fp, ctrl.slot23.slot3);
236    } else
237       fprintf(fp, "t1");
238 }
239 
240 static void
dump_const_imm(FILE * fp,uint32_t imm)241 dump_const_imm(FILE *fp, uint32_t imm)
242 {
243    union {
244       float f;
245       uint32_t i;
246    } fi;
247    fi.i = imm;
248    fprintf(fp, "0x%08x /* %f */", imm, fi.f);
249 }
250 
251 static void
dump_pc_imm(FILE * fp,uint64_t imm,unsigned branch_offset,enum bi_constmod mod,bool high32)252 dump_pc_imm(FILE *fp, uint64_t imm, unsigned branch_offset,
253             enum bi_constmod mod, bool high32)
254 {
255    if (mod == BI_CONSTMOD_PC_HI && !high32) {
256       dump_const_imm(fp, imm);
257       return;
258    }
259 
260    /* 60-bit sign-extend */
261    uint64_t zx64 = (imm << 4);
262    int64_t sx64 = zx64;
263    sx64 >>= 4;
264 
265    /* 28-bit sign extend x 2 */
266    uint32_t imm32[2] = {(uint32_t)imm, (uint32_t)(imm >> 32)};
267    uint32_t zx32[2] = {imm32[0] << 4, imm32[1] << 4};
268    int32_t sx32[2] = {zx32[0], zx32[1]};
269    sx32[0] >>= 4;
270    sx32[1] >>= 4;
271 
272    int64_t offs = 0;
273 
274    switch (mod) {
275    case BI_CONSTMOD_PC_LO:
276       offs = sx64;
277       break;
278    case BI_CONSTMOD_PC_HI:
279       offs = sx32[1];
280       break;
281    case BI_CONSTMOD_PC_LO_HI:
282       offs = sx32[high32];
283       break;
284    default:
285       unreachable("Invalid PC modifier");
286    }
287 
288    assert((offs & 15) == 0);
289    fprintf(fp, "clause_%" PRId64, branch_offset + (offs / 16));
290 
291    if (mod == BI_CONSTMOD_PC_LO && high32)
292       fprintf(fp, " >> 32");
293 
294    /* While technically in spec, referencing the current clause as (pc +
295     * 0) likely indicates an unintended infinite loop  */
296    if (offs == 0)
297       fprintf(fp, " /* XXX: likely an infinite loop */");
298 }
299 
300 /* Convert an index to an embedded constant in FAU-RAM to the index of the
301  * embedded constant. No, it's not in order. Yes, really. */
302 
303 static unsigned
const_fau_to_idx(unsigned fau_value)304 const_fau_to_idx(unsigned fau_value)
305 {
306    unsigned map[8] = {~0, ~0, 4, 5, 0, 1, 2, 3};
307 
308    assert(map[fau_value] < 6);
309    return map[fau_value];
310 }
311 
312 static void
dump_fau_src(FILE * fp,struct bifrost_regs srcs,unsigned branch_offset,struct bi_constants * consts,bool high32)313 dump_fau_src(FILE *fp, struct bifrost_regs srcs, unsigned branch_offset,
314              struct bi_constants *consts, bool high32)
315 {
316    if (srcs.fau_idx & 0x80) {
317       unsigned uniform = (srcs.fau_idx & 0x7f);
318       fprintf(fp, "u%u.w%u", uniform, high32);
319    } else if (srcs.fau_idx >= 0x20) {
320       unsigned idx = const_fau_to_idx(srcs.fau_idx >> 4);
321       uint64_t imm = consts->raw[idx];
322       imm |= (srcs.fau_idx & 0xf);
323       if (consts->mods[idx] != BI_CONSTMOD_NONE)
324          dump_pc_imm(fp, imm, branch_offset, consts->mods[idx], high32);
325       else if (high32)
326          dump_const_imm(fp, imm >> 32);
327       else
328          dump_const_imm(fp, imm);
329    } else {
330       switch (srcs.fau_idx) {
331       case 0:
332          fprintf(fp, "#0");
333          break;
334       case 1:
335          fprintf(fp, "lane_id");
336          break;
337       case 2:
338          fprintf(fp, "warp_id");
339          break;
340       case 3:
341          fprintf(fp, "core_id");
342          break;
343       case 4:
344          fprintf(fp, "framebuffer_size");
345          break;
346       case 5:
347          fprintf(fp, "atest_datum");
348          break;
349       case 6:
350          fprintf(fp, "sample");
351          break;
352       case 8:
353       case 9:
354       case 10:
355       case 11:
356       case 12:
357       case 13:
358       case 14:
359       case 15:
360          fprintf(fp, "blend_descriptor_%u", (unsigned)srcs.fau_idx - 8);
361          break;
362       default:
363          fprintf(fp, "XXX - reserved%u", (unsigned)srcs.fau_idx);
364          break;
365       }
366 
367       if (high32)
368          fprintf(fp, ".y");
369       else
370          fprintf(fp, ".x");
371    }
372 }
373 
374 void
dump_src(FILE * fp,unsigned src,struct bifrost_regs srcs,unsigned branch_offset,struct bi_constants * consts,bool isFMA)375 dump_src(FILE *fp, unsigned src, struct bifrost_regs srcs,
376          unsigned branch_offset, struct bi_constants *consts, bool isFMA)
377 {
378    switch (src) {
379    case 0:
380       fprintf(fp, "r%u", get_reg0(srcs));
381       break;
382    case 1:
383       fprintf(fp, "r%u", get_reg1(srcs));
384       break;
385    case 2:
386       fprintf(fp, "r%u", srcs.reg2);
387       break;
388    case 3:
389       if (isFMA)
390          fprintf(fp, "#0");
391       else
392          fprintf(fp, "t"); // i.e. the output of FMA this cycle
393       break;
394    case 4:
395       dump_fau_src(fp, srcs, branch_offset, consts, false);
396       break;
397    case 5:
398       dump_fau_src(fp, srcs, branch_offset, consts, true);
399       break;
400    case 6:
401       fprintf(fp, "t0");
402       break;
403    case 7:
404       fprintf(fp, "t1");
405       break;
406    }
407 }
408 
409 /* Tables for decoding M0, or if M0 == 7, M1 respectively.
410  *
411  * XXX: It's not clear if the third entry of M1_table corresponding to (7, 2)
412  * should have PC_LO_HI in the EC1 slot, or it's a weird hybrid mode? I would
413  * say this needs testing but no code should ever actually use this mode.
414  */
415 
416 static const enum bi_constmod M1_table[7][2] = {
417    {BI_CONSTMOD_NONE, BI_CONSTMOD_NONE},
418    {BI_CONSTMOD_PC_LO, BI_CONSTMOD_NONE},
419    {BI_CONSTMOD_PC_LO, BI_CONSTMOD_PC_LO},
420    {~0, ~0},
421    {BI_CONSTMOD_PC_HI, BI_CONSTMOD_NONE},
422    {BI_CONSTMOD_PC_HI, BI_CONSTMOD_PC_HI},
423    {BI_CONSTMOD_PC_LO, BI_CONSTMOD_NONE},
424 };
425 
426 static const enum bi_constmod M2_table[4][2] = {
427    {BI_CONSTMOD_PC_LO_HI, BI_CONSTMOD_NONE},
428    {BI_CONSTMOD_PC_LO_HI, BI_CONSTMOD_PC_HI},
429    {BI_CONSTMOD_PC_LO_HI, BI_CONSTMOD_PC_LO_HI},
430    {BI_CONSTMOD_PC_LO_HI, BI_CONSTMOD_PC_HI},
431 };
432 
433 static void
decode_M(enum bi_constmod * mod,unsigned M1,unsigned M2,bool single)434 decode_M(enum bi_constmod *mod, unsigned M1, unsigned M2, bool single)
435 {
436    if (M1 >= 8) {
437       mod[0] = BI_CONSTMOD_NONE;
438 
439       if (!single)
440          mod[1] = BI_CONSTMOD_NONE;
441 
442       return;
443    } else if (M1 == 7) {
444       assert(M2 < 4);
445       memcpy(mod, M2_table[M2], sizeof(*mod) * (single ? 1 : 2));
446    } else {
447       assert(M1 != 3);
448       memcpy(mod, M1_table[M1], sizeof(*mod) * (single ? 1 : 2));
449    }
450 }
451 
452 static void
dump_clause(FILE * fp,const uint32_t * words,unsigned * size,unsigned offset,bool verbose)453 dump_clause(FILE *fp, const uint32_t *words, unsigned *size, unsigned offset,
454             bool verbose)
455 {
456    // State for a decoded clause
457    struct bifrost_alu_inst instrs[8] = {};
458    struct bi_constants consts = {};
459    unsigned num_instrs = 0;
460    unsigned num_consts = 0;
461    uint64_t header_bits = 0;
462 
463    unsigned i;
464    for (i = 0;; i++, words += 4) {
465       if (verbose) {
466          fprintf(fp, "# ");
467          for (int j = 0; j < 4; j++)
468             fprintf(fp, "%08x ", words[3 - j]); // low bit on the right
469          fprintf(fp, "\n");
470       }
471       unsigned tag = bits(words[0], 0, 8);
472 
473       // speculatively decode some things that are common between many formats,
474       // so we can share some code
475       struct bifrost_alu_inst main_instr = {};
476       // 20 bits
477       main_instr.add_bits = bits(words[2], 2, 32 - 13);
478       // 23 bits
479       main_instr.fma_bits = bits(words[1], 11, 32) | bits(words[2], 0, 2)
480                                                         << (32 - 11);
481       // 35 bits
482       main_instr.reg_bits = ((uint64_t)bits(words[1], 0, 11)) << 24 |
483                             (uint64_t)bits(words[0], 8, 32);
484 
485       uint64_t const0 = bits(words[0], 8, 32) << 4 | (uint64_t)words[1] << 28 |
486                         bits(words[2], 0, 4) << 60;
487       uint64_t const1 = bits(words[2], 4, 32) << 4 | (uint64_t)words[3] << 32;
488 
489       /* Z-bit */
490       bool stop = tag & 0x40;
491 
492       if (verbose) {
493          fprintf(fp, "# tag: 0x%02x\n", tag);
494       }
495       if (tag & 0x80) {
496          /* Format 5 or 10 */
497          unsigned idx = stop ? 5 : 2;
498          main_instr.add_bits |= ((tag >> 3) & 0x7) << 17;
499          instrs[idx + 1] = main_instr;
500          instrs[idx].add_bits = bits(words[3], 0, 17) | ((tag & 0x7) << 17);
501          instrs[idx].fma_bits |= bits(words[2], 19, 32) << 10;
502          consts.raw[0] = bits(words[3], 17, 32) << 4;
503       } else {
504          bool done = false;
505          switch ((tag >> 3) & 0x7) {
506          case 0x0:
507             switch (tag & 0x7) {
508             case 0x3:
509                /* Format 1 */
510                main_instr.add_bits |= bits(words[3], 29, 32) << 17;
511                instrs[1] = main_instr;
512                num_instrs = 2;
513                done = stop;
514                break;
515             case 0x4:
516                /* Format 3 */
517                instrs[2].add_bits =
518                   bits(words[3], 0, 17) | bits(words[3], 29, 32) << 17;
519                instrs[2].fma_bits |= bits(words[2], 19, 32) << 10;
520                consts.raw[0] = const0;
521                decode_M(&consts.mods[0], bits(words[2], 4, 8),
522                         bits(words[2], 8, 12), true);
523                num_instrs = 3;
524                num_consts = 1;
525                done = stop;
526                break;
527             case 0x1:
528             case 0x5:
529                /* Format 4 */
530                instrs[2].add_bits =
531                   bits(words[3], 0, 17) | bits(words[3], 29, 32) << 17;
532                instrs[2].fma_bits |= bits(words[2], 19, 32) << 10;
533                main_instr.add_bits |= bits(words[3], 26, 29) << 17;
534                instrs[3] = main_instr;
535                if ((tag & 0x7) == 0x5) {
536                   num_instrs = 4;
537                   done = stop;
538                }
539                break;
540             case 0x6:
541                /* Format 8 */
542                instrs[5].add_bits =
543                   bits(words[3], 0, 17) | bits(words[3], 29, 32) << 17;
544                instrs[5].fma_bits |= bits(words[2], 19, 32) << 10;
545                consts.raw[0] = const0;
546                decode_M(&consts.mods[0], bits(words[2], 4, 8),
547                         bits(words[2], 8, 12), true);
548                num_instrs = 6;
549                num_consts = 1;
550                done = stop;
551                break;
552             case 0x7:
553                /* Format 9 */
554                instrs[5].add_bits =
555                   bits(words[3], 0, 17) | bits(words[3], 29, 32) << 17;
556                instrs[5].fma_bits |= bits(words[2], 19, 32) << 10;
557                main_instr.add_bits |= bits(words[3], 26, 29) << 17;
558                instrs[6] = main_instr;
559                num_instrs = 7;
560                done = stop;
561                break;
562             default:
563                unreachable("[INSTR_INVALID_ENC] Invalid tag bits");
564             }
565             break;
566          case 0x2:
567          case 0x3: {
568             /* Format 6 or 11 */
569             unsigned idx = ((tag >> 3) & 0x7) == 2 ? 4 : 7;
570             main_instr.add_bits |= (tag & 0x7) << 17;
571             instrs[idx] = main_instr;
572             consts.raw[0] |=
573                (bits(words[2], 19, 32) | ((uint64_t)words[3] << 13)) << 19;
574             num_consts = 1;
575             num_instrs = idx + 1;
576             done = stop;
577             break;
578          }
579          case 0x4: {
580             /* Format 2 */
581             unsigned idx = stop ? 4 : 1;
582             main_instr.add_bits |= (tag & 0x7) << 17;
583             instrs[idx] = main_instr;
584             instrs[idx + 1].fma_bits |= bits(words[3], 22, 32);
585             instrs[idx + 1].reg_bits =
586                bits(words[2], 19, 32) | (bits(words[3], 0, 22) << (32 - 19));
587             break;
588          }
589          case 0x1:
590             /* Format 0 - followed by constants */
591             num_instrs = 1;
592             done = stop;
593             FALLTHROUGH;
594          case 0x5:
595             /* Format 0 - followed by instructions */
596             header_bits =
597                bits(words[2], 19, 32) | ((uint64_t)words[3] << (32 - 19));
598             main_instr.add_bits |= (tag & 0x7) << 17;
599             instrs[0] = main_instr;
600             break;
601          case 0x6:
602          case 0x7: {
603             /* Format 12 */
604             unsigned pos = tag & 0xf;
605 
606             struct {
607                unsigned const_idx;
608                unsigned nr_tuples;
609             } pos_table[0x10] = {{0, 1}, {0, 2}, {0, 4}, {1, 3},
610                                  {1, 5}, {2, 4}, {0, 7}, {1, 6},
611                                  {3, 5}, {1, 8}, {2, 7}, {3, 6},
612                                  {3, 8}, {4, 7}, {5, 6}, {~0, ~0}};
613 
614             ASSERTED bool valid_count = pos_table[pos].nr_tuples == num_instrs;
615             assert(valid_count && "INSTR_INVALID_ENC");
616 
617             unsigned const_idx = pos_table[pos].const_idx;
618 
619             if (num_consts < const_idx + 2)
620                num_consts = const_idx + 2;
621 
622             consts.raw[const_idx] = const0;
623             consts.raw[const_idx + 1] = const1;
624 
625             /* Calculate M values from A, B and 4-bit
626              * unsigned arithmetic. Mathematically it
627              * should be (A - B) % 16 but we use this
628              * alternate form to avoid sign issues */
629 
630             unsigned A1 = bits(words[2], 0, 4);
631             unsigned B1 = bits(words[3], 28, 32);
632             unsigned A2 = bits(words[1], 0, 4);
633             unsigned B2 = bits(words[2], 28, 32);
634 
635             unsigned M1 = (16 + A1 - B1) & 0xF;
636             unsigned M2 = (16 + A2 - B2) & 0xF;
637 
638             decode_M(&consts.mods[const_idx], M1, M2, false);
639 
640             done = stop;
641             break;
642          }
643          default:
644             break;
645          }
646 
647          if (done)
648             break;
649       }
650    }
651 
652    *size = i + 1;
653 
654    if (verbose) {
655       fprintf(fp, "# header: %012" PRIx64 "\n", header_bits);
656    }
657 
658    struct bifrost_header header;
659    memcpy((char *)&header, (char *)&header_bits, sizeof(struct bifrost_header));
660    dump_header(fp, header, verbose);
661 
662    fprintf(fp, "{\n");
663    for (i = 0; i < num_instrs; i++) {
664       struct bifrost_regs regs, next_regs;
665       if (i + 1 == num_instrs) {
666          memcpy((char *)&next_regs, (char *)&instrs[0].reg_bits,
667                 sizeof(next_regs));
668       } else {
669          memcpy((char *)&next_regs, (char *)&instrs[i + 1].reg_bits,
670                 sizeof(next_regs));
671       }
672 
673       memcpy((char *)&regs, (char *)&instrs[i].reg_bits, sizeof(regs));
674 
675       if (verbose) {
676          fprintf(fp, "    # regs: %016" PRIx64 "\n", instrs[i].reg_bits);
677          dump_regs(fp, regs, i == 0);
678       }
679 
680       bi_disasm_fma(fp, instrs[i].fma_bits, &regs, &next_regs,
681                     header.staging_register, offset, &consts,
682                     i + 1 == num_instrs);
683 
684       bi_disasm_add(fp, instrs[i].add_bits, &regs, &next_regs,
685                     header.staging_register, offset, &consts,
686                     i + 1 == num_instrs);
687    }
688    fprintf(fp, "}\n");
689 
690    if (verbose) {
691       for (unsigned i = 0; i < num_consts; i++) {
692          fprintf(fp, "# const%d: %08" PRIx64 "\n", 2 * i,
693                  consts.raw[i] & 0xffffffff);
694          fprintf(fp, "# const%d: %08" PRIx64 "\n", 2 * i + 1,
695                  consts.raw[i] >> 32);
696       }
697    }
698 
699    fprintf(fp, "\n");
700    return;
701 }
702 
703 void
disassemble_bifrost(FILE * fp,const void * code,size_t size,bool verbose)704 disassemble_bifrost(FILE *fp, const void *code, size_t size, bool verbose)
705 {
706    const uint32_t *words = (const uint32_t *)code;
707    const uint32_t *words_end = words + (size / 4);
708    // used for displaying branch targets
709    unsigned offset = 0;
710    while (words != words_end) {
711       /* Shaders have zero bytes at the end for padding; stop
712        * disassembling when we hit them. */
713       if (*words == 0)
714          break;
715 
716       fprintf(fp, "clause_%u:\n", offset);
717 
718       unsigned size;
719       dump_clause(fp, words, &size, offset, verbose);
720 
721       words += size * 4;
722       offset += size;
723    }
724 }
725