/* * Copyright © 2010 Intel Corporation * SPDX-License-Identifier: MIT */ #include "brw_cfg.h" #include "brw_disasm.h" #include "brw_fs.h" #include "brw_private.h" #include "dev/intel_debug.h" #include "util/half_float.h" using namespace brw; void brw_print_instructions_to_file(const fs_visitor &s, FILE *file) { if (s.cfg && s.grf_used == 0) { const brw::def_analysis &defs = s.def_analysis.require(); const register_pressure *rp = INTEL_DEBUG(DEBUG_REG_PRESSURE) ? &s.regpressure_analysis.require() : NULL; unsigned ip = 0, max_pressure = 0; unsigned cf_count = 0; foreach_block(block, s.cfg) { fprintf(file, "START B%d", block->num); foreach_list_typed(bblock_link, link, link, &block->parents) { fprintf(file, " <%cB%d", link->kind == bblock_link_logical ? '-' : '~', link->block->num); } fprintf(file, "\n"); foreach_inst_in_block(fs_inst, inst, block) { if (inst->is_control_flow_end()) cf_count -= 1; if (rp) { max_pressure = MAX2(max_pressure, rp->regs_live_at_ip[ip]); fprintf(file, "{%3d} ", rp->regs_live_at_ip[ip]); } for (unsigned i = 0; i < cf_count; i++) fprintf(file, " "); brw_print_instruction(s, inst, file, &defs); ip++; if (inst->is_control_flow_begin()) cf_count += 1; } fprintf(file, "END B%d", block->num); foreach_list_typed(bblock_link, link, link, &block->children) { fprintf(file, " %c>B%d", link->kind == bblock_link_logical ? '-' : '~', link->block->num); } fprintf(file, "\n"); } if (rp) fprintf(file, "Maximum %3d registers live at once.\n", max_pressure); } else if (s.cfg && exec_list_is_empty(&s.instructions)) { foreach_block_and_inst(block, fs_inst, inst, s.cfg) { brw_print_instruction(s, inst, file); } } else { foreach_in_list(fs_inst, inst, &s.instructions) { brw_print_instruction(s, inst, file); } } } void brw_print_instructions(const fs_visitor &s, const char *name) { FILE *file = stderr; if (name && __normal_user()) { file = fopen(name, "w"); if (!file) file = stderr; } brw_print_instructions_to_file(s, file); if (file != stderr) { fclose(file); } } static const char * brw_instruction_name(const struct brw_isa_info *isa, enum opcode op) { const struct intel_device_info *devinfo = isa->devinfo; switch (op) { case 0 ... NUM_BRW_OPCODES - 1: /* The DO instruction doesn't exist on Gfx9+, but we use it to mark the * start of a loop in the IR. */ if (op == BRW_OPCODE_DO) return "do"; /* DPAS instructions may transiently exist on platforms that do not * support DPAS. They will eventually be lowered, but in the meantime it * must be possible to query the instruction name. */ if (devinfo->verx10 < 125 && op == BRW_OPCODE_DPAS) return "dpas"; assert(brw_opcode_desc(isa, op)->name); return brw_opcode_desc(isa, op)->name; case FS_OPCODE_FB_WRITE_LOGICAL: return "fb_write_logical"; case FS_OPCODE_FB_READ_LOGICAL: return "fb_read_logical"; case SHADER_OPCODE_RCP: return "rcp"; case SHADER_OPCODE_RSQ: return "rsq"; case SHADER_OPCODE_SQRT: return "sqrt"; case SHADER_OPCODE_EXP2: return "exp2"; case SHADER_OPCODE_LOG2: return "log2"; case SHADER_OPCODE_POW: return "pow"; case SHADER_OPCODE_INT_QUOTIENT: return "int_quot"; case SHADER_OPCODE_INT_REMAINDER: return "int_rem"; case SHADER_OPCODE_SIN: return "sin"; case SHADER_OPCODE_COS: return "cos"; case SHADER_OPCODE_SEND: return "send"; case SHADER_OPCODE_UNDEF: return "undef"; case SHADER_OPCODE_TEX_LOGICAL: return "tex_logical"; case SHADER_OPCODE_TXD_LOGICAL: return "txd_logical"; case SHADER_OPCODE_TXF_LOGICAL: return "txf_logical"; case SHADER_OPCODE_TXL_LOGICAL: return "txl_logical"; case SHADER_OPCODE_TXS_LOGICAL: return "txs_logical"; case FS_OPCODE_TXB_LOGICAL: return "txb_logical"; case SHADER_OPCODE_TXF_CMS_W_LOGICAL: return "txf_cms_w_logical"; case SHADER_OPCODE_TXF_CMS_W_GFX12_LOGICAL: return "txf_cms_w_gfx12_logical"; case SHADER_OPCODE_TXF_MCS_LOGICAL: return "txf_mcs_logical"; case SHADER_OPCODE_LOD_LOGICAL: return "lod_logical"; case SHADER_OPCODE_TG4_LOGICAL: return "tg4_logical"; case SHADER_OPCODE_TG4_OFFSET_LOGICAL: return "tg4_offset_logical"; case SHADER_OPCODE_TG4_OFFSET_LOD_LOGICAL: return "tg4_offset_lod_logical"; case SHADER_OPCODE_TG4_OFFSET_BIAS_LOGICAL: return "tg4_offset_bias_logical"; case SHADER_OPCODE_TG4_BIAS_LOGICAL: return "tg4_b_logical"; case SHADER_OPCODE_TG4_EXPLICIT_LOD_LOGICAL: return "tg4_l_logical"; case SHADER_OPCODE_TG4_IMPLICIT_LOD_LOGICAL: return "tg4_i_logical"; case SHADER_OPCODE_SAMPLEINFO_LOGICAL: return "sampleinfo_logical"; case SHADER_OPCODE_IMAGE_SIZE_LOGICAL: return "image_size_logical"; case SHADER_OPCODE_MEMORY_FENCE: return "memory_fence"; case FS_OPCODE_SCHEDULING_FENCE: return "scheduling_fence"; case SHADER_OPCODE_INTERLOCK: /* For an interlock we actually issue a memory fence via sendc. */ return "interlock"; case SHADER_OPCODE_LOAD_PAYLOAD: return "load_payload"; case FS_OPCODE_PACK: return "pack"; case SHADER_OPCODE_SCRATCH_HEADER: return "scratch_header"; case SHADER_OPCODE_URB_WRITE_LOGICAL: return "urb_write_logical"; case SHADER_OPCODE_URB_READ_LOGICAL: return "urb_read_logical"; case SHADER_OPCODE_FIND_LIVE_CHANNEL: return "find_live_channel"; case SHADER_OPCODE_FIND_LAST_LIVE_CHANNEL: return "find_last_live_channel"; case SHADER_OPCODE_LOAD_LIVE_CHANNELS: return "load_live_channels"; case FS_OPCODE_LOAD_LIVE_CHANNELS: return "fs_load_live_channels"; case SHADER_OPCODE_BROADCAST: return "broadcast"; case SHADER_OPCODE_SHUFFLE: return "shuffle"; case SHADER_OPCODE_SEL_EXEC: return "sel_exec"; case SHADER_OPCODE_QUAD_SWIZZLE: return "quad_swizzle"; case SHADER_OPCODE_CLUSTER_BROADCAST: return "cluster_broadcast"; case SHADER_OPCODE_GET_BUFFER_SIZE: return "get_buffer_size"; case FS_OPCODE_DDX_COARSE: return "ddx_coarse"; case FS_OPCODE_DDX_FINE: return "ddx_fine"; case FS_OPCODE_DDY_COARSE: return "ddy_coarse"; case FS_OPCODE_DDY_FINE: return "ddy_fine"; case FS_OPCODE_PIXEL_X: return "pixel_x"; case FS_OPCODE_PIXEL_Y: return "pixel_y"; case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD: return "uniform_pull_const"; case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL: return "varying_pull_const_logical"; case FS_OPCODE_PACK_HALF_2x16_SPLIT: return "pack_half_2x16_split"; case SHADER_OPCODE_HALT_TARGET: return "halt_target"; case FS_OPCODE_INTERPOLATE_AT_SAMPLE: return "interp_sample"; case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET: return "interp_shared_offset"; case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET: return "interp_per_slot_offset"; case SHADER_OPCODE_BARRIER: return "barrier"; case SHADER_OPCODE_MULH: return "mulh"; case SHADER_OPCODE_ISUB_SAT: return "isub_sat"; case SHADER_OPCODE_USUB_SAT: return "usub_sat"; case SHADER_OPCODE_MOV_INDIRECT: return "mov_indirect"; case SHADER_OPCODE_MOV_RELOC_IMM: return "mov_reloc_imm"; case RT_OPCODE_TRACE_RAY_LOGICAL: return "rt_trace_ray_logical"; case SHADER_OPCODE_RND_MODE: return "rnd_mode"; case SHADER_OPCODE_FLOAT_CONTROL_MODE: return "float_control_mode"; case SHADER_OPCODE_BTD_SPAWN_LOGICAL: return "btd_spawn_logical"; case SHADER_OPCODE_BTD_RETIRE_LOGICAL: return "btd_retire_logical"; case SHADER_OPCODE_READ_ARCH_REG: return "read_arch_reg"; case SHADER_OPCODE_LOAD_SUBGROUP_INVOCATION: return "load_subgroup_invocation"; case SHADER_OPCODE_MEMORY_LOAD_LOGICAL: return "memory_load"; case SHADER_OPCODE_MEMORY_STORE_LOGICAL: return "memory_store"; case SHADER_OPCODE_MEMORY_ATOMIC_LOGICAL: return "memory_atomic"; } unreachable("not reached"); } /** * Pretty-print a source for a SHADER_OPCODE_MEMORY_LOGICAL instruction. * * Returns true if the value is fully printed (i.e. an enum) and false if * we only printed a label, and the actual source value still needs printing. */ static bool print_memory_logical_source(FILE *file, const fs_inst *inst, unsigned i) { if (inst->is_control_source(i)) { assert(inst->src[i].file == IMM && inst->src[i].type == BRW_TYPE_UD); assert(!inst->src[i].negate); assert(!inst->src[i].abs); } switch (i) { case MEMORY_LOGICAL_OPCODE: fprintf(file, " %s", brw_lsc_op_to_string(inst->src[i].ud)); return true; case MEMORY_LOGICAL_MODE: { static const char *modes[] = { [MEMORY_MODE_TYPED] = "typed", [MEMORY_MODE_UNTYPED] = "untyped", [MEMORY_MODE_SHARED_LOCAL] = "shared", [MEMORY_MODE_SCRATCH] = "scratch", }; assert(inst->src[i].ud < ARRAY_SIZE(modes)); fprintf(file, " %s", modes[inst->src[i].ud]); return true; } case MEMORY_LOGICAL_BINDING_TYPE: fprintf(file, " %s", brw_lsc_addr_surftype_to_string(inst->src[i].ud)); if (inst->src[i].ud != LSC_ADDR_SURFTYPE_FLAT) fprintf(file, ":"); return true; case MEMORY_LOGICAL_BINDING: return inst->src[i].file == BAD_FILE; case MEMORY_LOGICAL_ADDRESS: fprintf(file, " addr: "); return false; case MEMORY_LOGICAL_COORD_COMPONENTS: fprintf(file, " coord_comps:"); return false; case MEMORY_LOGICAL_ALIGNMENT: fprintf(file, " align:"); return false; case MEMORY_LOGICAL_DATA_SIZE: fprintf(file, " %s", brw_lsc_data_size_to_string(inst->src[i].ud)); return true; case MEMORY_LOGICAL_COMPONENTS: fprintf(file, " comps:"); return false; case MEMORY_LOGICAL_FLAGS: if (inst->src[i].ud & MEMORY_FLAG_TRANSPOSE) fprintf(file, " transpose"); if (inst->src[i].ud & MEMORY_FLAG_INCLUDE_HELPERS) fprintf(file, " helpers"); return true; case MEMORY_LOGICAL_DATA0: fprintf(file, " data0: "); return false; case MEMORY_LOGICAL_DATA1: if (inst->src[i].file == BAD_FILE) return true; fprintf(file, " data1: "); return false; default: unreachable("invalid source"); } } void brw_print_instruction_to_file(const fs_visitor &s, const fs_inst *inst, FILE *file, const brw::def_analysis *defs) { if (inst->predicate) { fprintf(file, "(%cf%d.%d) ", inst->predicate_inverse ? '-' : '+', inst->flag_subreg / 2, inst->flag_subreg % 2); } fprintf(file, "%s", brw_instruction_name(&s.compiler->isa, inst->opcode)); if (inst->saturate) fprintf(file, ".sat"); if (inst->conditional_mod) { fprintf(file, "%s", conditional_modifier[inst->conditional_mod]); if (!inst->predicate && (inst->opcode != BRW_OPCODE_SEL && inst->opcode != BRW_OPCODE_CSEL && inst->opcode != BRW_OPCODE_IF && inst->opcode != BRW_OPCODE_WHILE)) { fprintf(file, ".f%d.%d", inst->flag_subreg / 2, inst->flag_subreg % 2); } } fprintf(file, "(%d) ", inst->exec_size); if (inst->mlen) { fprintf(file, "(mlen: %d) ", inst->mlen); } if (inst->ex_mlen) { fprintf(file, "(ex_mlen: %d) ", inst->ex_mlen); } if (inst->eot) { fprintf(file, "(EOT) "); } switch (inst->dst.file) { case VGRF: if (defs && defs->get(inst->dst)) fprintf(file, "%%%d", inst->dst.nr); else fprintf(file, "v%d", inst->dst.nr); break; case FIXED_GRF: fprintf(file, "g%d", inst->dst.nr); if (inst->dst.subnr != 0) fprintf(file, ".%d", inst->dst.subnr / brw_type_size_bytes(inst->dst.type)); break; case BAD_FILE: fprintf(file, "(null)"); break; case UNIFORM: fprintf(file, "***u%d***", inst->dst.nr); break; case ATTR: fprintf(file, "***attr%d***", inst->dst.nr); break; case ARF: switch (inst->dst.nr & 0xF0) { case BRW_ARF_NULL: fprintf(file, "null"); break; case BRW_ARF_ADDRESS: fprintf(file, "a0.%d", inst->dst.subnr); break; case BRW_ARF_ACCUMULATOR: if (inst->dst.subnr == 0) fprintf(file, "acc%d", inst->dst.nr & 0x0F); else fprintf(file, "acc%d.%d", inst->dst.nr & 0x0F, inst->dst.subnr); break; case BRW_ARF_FLAG: fprintf(file, "f%d.%d", inst->dst.nr & 0xf, inst->dst.subnr); break; default: fprintf(file, "arf%d.%d", inst->dst.nr & 0xf, inst->dst.subnr); break; } break; case IMM: unreachable("not reached"); } if (inst->dst.offset || (!s.grf_used && inst->dst.file == VGRF && s.alloc.sizes[inst->dst.nr] * REG_SIZE != inst->size_written)) { const unsigned reg_size = (inst->dst.file == UNIFORM ? 4 : REG_SIZE); fprintf(file, "+%d.%d", inst->dst.offset / reg_size, inst->dst.offset % reg_size); } if (inst->dst.stride != 1) fprintf(file, "<%u>", inst->dst.stride); fprintf(file, ":%s", brw_reg_type_to_letters(inst->dst.type)); for (int i = 0; i < inst->sources; i++) { if (inst->opcode == SHADER_OPCODE_MEMORY_LOAD_LOGICAL || inst->opcode == SHADER_OPCODE_MEMORY_STORE_LOGICAL || inst->opcode == SHADER_OPCODE_MEMORY_ATOMIC_LOGICAL) { if (print_memory_logical_source(file, inst, i)) continue; } else { fprintf(file, ", "); } if (inst->src[i].negate) fprintf(file, "-"); if (inst->src[i].abs) fprintf(file, "|"); switch (inst->src[i].file) { case VGRF: if (defs && defs->get(inst->src[i])) fprintf(file, "%%%d", inst->src[i].nr); else fprintf(file, "v%d", inst->src[i].nr); break; case FIXED_GRF: fprintf(file, "g%d", inst->src[i].nr); break; case ATTR: fprintf(file, "attr%d", inst->src[i].nr); break; case UNIFORM: fprintf(file, "u%d", inst->src[i].nr); break; case BAD_FILE: fprintf(file, "(null)"); break; case IMM: switch (inst->src[i].type) { case BRW_TYPE_HF: fprintf(file, "%-ghf", _mesa_half_to_float(inst->src[i].ud & 0xffff)); break; case BRW_TYPE_F: fprintf(file, "%-gf", inst->src[i].f); break; case BRW_TYPE_DF: fprintf(file, "%fdf", inst->src[i].df); break; case BRW_TYPE_W: fprintf(file, "%dw", (int)(int16_t)inst->src[i].d); break; case BRW_TYPE_D: fprintf(file, "%dd", inst->src[i].d); break; case BRW_TYPE_UW: fprintf(file, "%duw", inst->src[i].ud & 0xffff); break; case BRW_TYPE_UD: fprintf(file, "%uu", inst->src[i].ud); break; case BRW_TYPE_Q: fprintf(file, "%" PRId64 "q", inst->src[i].d64); break; case BRW_TYPE_UQ: fprintf(file, "%" PRIu64 "uq", inst->src[i].u64); break; case BRW_TYPE_VF: fprintf(file, "[%-gF, %-gF, %-gF, %-gF]", brw_vf_to_float((inst->src[i].ud >> 0) & 0xff), brw_vf_to_float((inst->src[i].ud >> 8) & 0xff), brw_vf_to_float((inst->src[i].ud >> 16) & 0xff), brw_vf_to_float((inst->src[i].ud >> 24) & 0xff)); break; case BRW_TYPE_V: case BRW_TYPE_UV: fprintf(file, "%08x%s", inst->src[i].ud, inst->src[i].type == BRW_TYPE_V ? "V" : "UV"); break; default: fprintf(file, "???"); break; } break; case ARF: switch (inst->src[i].nr & 0xF0) { case BRW_ARF_NULL: fprintf(file, "null"); break; case BRW_ARF_ADDRESS: fprintf(file, "a0.%d", inst->src[i].subnr); break; case BRW_ARF_ACCUMULATOR: if (inst->src[i].subnr == 0) fprintf(file, "acc%d", inst->src[i].nr & 0x0F); else fprintf(file, "acc%d.%d", inst->src[i].nr & 0x0F, inst->src[i].subnr); break; case BRW_ARF_FLAG: fprintf(file, "f%d.%d", inst->src[i].nr & 0xf, inst->src[i].subnr); break; default: fprintf(file, "arf%d.%d", inst->src[i].nr & 0xf, inst->src[i].subnr); break; } break; } if (inst->src[i].file == FIXED_GRF && inst->src[i].subnr != 0) { assert(inst->src[i].offset == 0); fprintf(file, ".%d", inst->src[i].subnr / brw_type_size_bytes(inst->src[i].type)); } else if (inst->src[i].offset || (!s.grf_used && inst->src[i].file == VGRF && s.alloc.sizes[inst->src[i].nr] * REG_SIZE != inst->size_read(i))) { const unsigned reg_size = (inst->src[i].file == UNIFORM ? 4 : REG_SIZE); fprintf(file, "+%d.%d", inst->src[i].offset / reg_size, inst->src[i].offset % reg_size); } if (inst->src[i].abs) fprintf(file, "|"); if (inst->src[i].file != IMM) { unsigned stride; if (inst->src[i].file == ARF || inst->src[i].file == FIXED_GRF) { unsigned hstride = inst->src[i].hstride; stride = (hstride == 0 ? 0 : (1 << (hstride - 1))); } else { stride = inst->src[i].stride; } if (stride != 1) fprintf(file, "<%u>", stride); fprintf(file, ":%s", brw_reg_type_to_letters(inst->src[i].type)); } } fprintf(file, " "); if (inst->force_writemask_all) fprintf(file, "NoMask "); if (inst->exec_size != s.dispatch_width) fprintf(file, "group%d ", inst->group); if (inst->has_no_mask_send_params) fprintf(file, "NoMaskParams "); if (inst->sched.pipe != TGL_PIPE_NONE) { fprintf(file, "{ "); brw_print_swsb(file, s.devinfo, inst->sched); fprintf(file, " } "); } fprintf(file, "\n"); } void brw_print_swsb(FILE *f, const struct intel_device_info *devinfo, const tgl_swsb swsb) { if (swsb.pipe == TGL_PIPE_NONE) return; if (swsb.regdist) { fprintf(f, "%s@%d", (devinfo && devinfo->verx10 < 125 ? "" : swsb.pipe == TGL_PIPE_FLOAT ? "F" : swsb.pipe == TGL_PIPE_INT ? "I" : swsb.pipe == TGL_PIPE_LONG ? "L" : swsb.pipe == TGL_PIPE_ALL ? "A" : swsb.pipe == TGL_PIPE_MATH ? "M" : "" ), swsb.regdist); } if (swsb.mode) { if (swsb.regdist) fprintf(f, " "); fprintf(f, "$%d%s", swsb.sbid, (swsb.mode & TGL_SBID_SET ? "" : swsb.mode & TGL_SBID_DST ? ".dst" : ".src")); } }