1/*************************************************************************************** 2 * Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3 * Copyright (c) 2020-2021 Peng Cheng Laboratory 4 * 5 * XiangShan is licensed under Mulan PSL v2. 6 * You can use this software according to the terms and conditions of the Mulan PSL v2. 7 * You may obtain a copy of Mulan PSL v2 at: 8 * http://license.coscl.org.cn/MulanPSL2 9 * 10 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11 * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13 * 14 * See the Mulan PSL v2 for more details. 15 ***************************************************************************************/ 16 17package xiangshan.backend.decode 18 19import org.chipsalliance.cde.config.Parameters 20import chisel3._ 21import chisel3.util._ 22import utility._ 23import utils._ 24import xiangshan._ 25import xiangshan.backend.rename.RatReadPort 26import xiangshan.backend.Bundles._ 27import xiangshan.backend.fu.vector.Bundles.{VType, Vl} 28import xiangshan.backend.fu.FuType 29import xiangshan.backend.fu.wrapper.CSRToDecode 30import yunsuan.VpermType 31import xiangshan.ExceptionNO.{illegalInstr, virtualInstr} 32import xiangshan.frontend.FtqPtr 33 34class DecodeStageIO(implicit p: Parameters) extends XSBundle { 35 // params alias 36 private val numIntRegSrc = backendParams.numIntRegSrc 37 private val numIntRatPorts = numIntRegSrc 38 private val numFpRegSrc = backendParams.numFpRegSrc 39 private val numFpRatPorts = numFpRegSrc 40 private val numVecRegSrc = backendParams.numVecRegSrc 41 private val numVecRatPorts = numVecRegSrc 42 43 val redirect = Input(Bool()) 44 val canAccept = Output(Bool()) 45 // from Ibuffer 46 val in = Vec(DecodeWidth, Flipped(DecoupledIO(new StaticInst))) 47 // to Rename 48 val out = Vec(DecodeWidth, DecoupledIO(new DecodedInst)) 49 // RAT read 50 val intRat = Vec(RenameWidth, Vec(numIntRatPorts, Flipped(new RatReadPort(IntLogicRegs)))) 51 val fpRat = Vec(RenameWidth, Vec(numFpRatPorts, Flipped(new RatReadPort(FpLogicRegs)))) 52 val vecRat = Vec(RenameWidth, Vec(numVecRatPorts, Flipped(new RatReadPort(VecLogicRegs)))) 53 val v0Rat = Vec(RenameWidth, Flipped(new RatReadPort(V0LogicRegs))) 54 val vlRat = Vec(RenameWidth, Flipped(new RatReadPort(VlLogicRegs))) 55 // csr control 56 val csrCtrl = Input(new CustomCSRCtrlIO) 57 val fromCSR = Input(new CSRToDecode) 58 val fusion = Vec(DecodeWidth - 1, Input(Bool())) 59 60 // vtype update 61 val fromRob = new Bundle { 62 val isResumeVType = Input(Bool()) 63 val walkToArchVType = Input(Bool()) 64 val commitVType = new Bundle { 65 val vtype = Flipped(Valid(new VType)) 66 val hasVsetvl = Input(Bool()) 67 } 68 val walkVType = Flipped(Valid(new VType)) 69 } 70 val stallReason = new Bundle { 71 val in = Flipped(new StallReasonIO(DecodeWidth)) 72 val out = new StallReasonIO(DecodeWidth) 73 } 74 val vsetvlVType = Input(VType()) 75 val vstart = Input(Vl()) 76 77 val toCSR = new Bundle { 78 val trapInstInfo = ValidIO(new TrapInstInfo) 79 } 80} 81 82class DecodeStage(implicit p: Parameters) extends XSModule 83 with HasPerfEvents 84 with VectorConstants { 85 86 val io = IO(new DecodeStageIO) 87 88 io.in.zipWithIndex.foreach{ case (d, i) => 89 PerfCCT.updateInstPos(d.bits.debug_seqNum, PerfCCT.InstPos.AtDecode.id.U, d.valid, clock, reset) 90 } 91 92 // io alias 93 private val outReadys = io.out.map(_.ready) 94 private val inValids = io.in.map(_.valid) 95 private val inValid = VecInit(inValids).asUInt.orR 96 private val outValids = io.out.map(_.valid) 97 private val outValid = VecInit(outValids).asUInt.orR 98 //readyFromRename Counter 99 /** Assume number of ready channels be "RenameWidth" if the first output channel is ready. If not, assume that be 0 */ 100 val readyCounter = Mux(outReadys.head, RenameWidth.U, 0.U) 101 102 /** complex decoder */ 103 val decoderComp = Module(new DecodeUnitComp) 104 /** simple decoders in Seq of DecodeWidth */ 105 val decoders = Seq.fill(DecodeWidth)(Module(new DecodeUnit)) 106 /** vtype generation module */ 107 val vtypeGen = Module(new VTypeGen) 108 109 val debug_globalCounter = RegInit(0.U(XLEN.W)) 110 111 /** whether DecodeStage can accept new requests from frontend (CtrlBlock) */ 112 val canAccept = Wire(Bool()) 113 114 //Simple 6 115 decoders.zip(io.in).foreach { case (dst, src) => 116 dst.io.enq.ctrlFlow := src.bits 117 dst.io.csrCtrl := io.csrCtrl 118 dst.io.fromCSR := io.fromCSR 119 dst.io.enq.vtype := vtypeGen.io.vtype 120 dst.io.enq.vstart := io.vstart 121 } 122 123 /** whether instructions decoded by simple decoders require complex decoding */ 124 val isComplexVec = VecInit(inValids.zip(decoders.map(_.io.deq.isComplex)).map { case (valid, isComplex) => valid && isComplex }) 125 /** whether instructions decoded by simple decoders don't require complex decoding */ 126 val isSimpleVec = VecInit(inValids.zip(decoders.map(_.io.deq.isComplex)).map { case (valid, isComplex) => valid && !isComplex }) 127 /** instructions decoded by simple decoders */ 128 val simpleDecodedInst = VecInit(decoders.map(_.io.deq.decodedInst)) 129 130 /** whether instructions decoded by simple decoders are illegal */ 131 val isIllegalInstVec = VecInit((outValids lazyZip outReadys lazyZip io.out.map(_.bits)).map { 132 case (valid, ready, decodedInst) => 133 valid && ready && (decodedInst.exceptionVec(ExceptionNO.EX_II) || decodedInst.exceptionVec(ExceptionNO.EX_VI)) 134 }) 135 /** at least 1 instruction decoded by simple decoders is illegal */ 136 val hasIllegalInst = Cat(isIllegalInstVec).orR 137 /** at least 1 instruction decoded by simple decoders is illegal */ 138 val illegalInst = PriorityMuxDefault(isIllegalInstVec.zip(io.out.map(_.bits)), 0.U.asTypeOf(new DecodedInst)) 139 140 /** number of instructions generated by complex decoder */ 141 val complexNum = Wire(UInt(3.W)) 142 // (0, 1, 2, 3, 4, 5) + complexNum 143 /** Order of simple decoders' result (in output of DecodeStage) considering complex decoder's. Since complex decoder's 144 * results will be arranged before simple decoders' */ 145 val complexNumAddLocation: Vec[UInt] = VecInit((0 until DecodeWidth).map(x => (x.U +& complexNum))) 146 /** mask off decoded instructions that can not be accepted */ 147 val noMoreThanRenameReady: Vec[Bool] = VecInit(complexNumAddLocation.map(x => x <= readyCounter)) 148 /** existance of complex instructions among first few simple decoders' results, which needs decoding */ 149 val complexValid = VecInit((isComplexVec zip noMoreThanRenameReady).map(x => x._1 & x._2)).asUInt.orR 150 /** selected complex instruction for complex decoder */ 151 val complexInst = PriorityMuxDefault(isComplexVec.zip(decoders.map(_.io.deq.decodedInst)), 0.U.asTypeOf(new DecodedInst)) 152 /** selected complex micro operation information for complex decoder */ 153 val complexUopInfo = PriorityMuxDefault(isComplexVec.zip(decoders.map(_.io.deq.uopInfo)), 0.U.asTypeOf(new UopInfo)) 154 155 vtypeGen.io.insts.zip(io.in).foreach { case (inst, in) => 156 inst.valid := in.valid 157 inst.bits := in.bits.instr 158 } 159 // when io.redirect is True, never update vtype 160 vtypeGen.io.canUpdateVType := decoderComp.io.in.fire && decoderComp.io.in.bits.simpleDecodedInst.isVset && !io.redirect 161 vtypeGen.io.walkToArchVType := io.fromRob.walkToArchVType 162 vtypeGen.io.commitVType := io.fromRob.commitVType 163 vtypeGen.io.walkVType := io.fromRob.walkVType 164 vtypeGen.io.vsetvlVType := io.vsetvlVType 165 166 //Comp 1 167 decoderComp.io.redirect := io.redirect 168 decoderComp.io.csrCtrl := io.csrCtrl 169 decoderComp.io.vtypeBypass := vtypeGen.io.vtype 170 // The input inst of decoderComp is latched last cycle. 171 // Set input empty, if there is no complex inst latched last cycle. 172 decoderComp.io.in.valid := complexValid && !io.fromRob.isResumeVType 173 decoderComp.io.in.bits.simpleDecodedInst := complexInst 174 decoderComp.io.in.bits.uopInfo := complexUopInfo 175 decoderComp.io.out.complexDecodedInsts.zipWithIndex.foreach { case (out, i) => out.ready := io.out(i).ready } 176 177 /** instructions decoded by complex decoders */ 178 val complexDecodedInst = VecInit(decoderComp.io.out.complexDecodedInsts.map(_.bits)) 179 /** whether instructions decoded by complex decoders are valid */ 180 val complexDecodedInstValid = VecInit(decoderComp.io.out.complexDecodedInsts.map(_.valid)) 181 complexNum := decoderComp.io.complexNum 182 183 // Vec(S,S,S,C,S,C) -> Vec(1,1,1,0,0,0) 184 /** whether a complex instruction is before or at this position of simple decoders' result */ 185 val simplePrefixVec = VecInit((0 until DecodeWidth).map(i => VecInit(isSimpleVec.take(i + 1)).asUInt.andR)) 186 187 // Vec(S,S,S,C,S,S) -> Vec(0,0,0,1,0,0) 188 /** one-hot representation of the first complex instruction */ 189 val firstComplexOH: Vec[Bool] = VecInit(PriorityEncoderOH(isComplexVec)) 190 191 // block vector inst when vtype is resuming 192 val hasVectorInst = VecInit(decoders.map(x => FuType.FuTypeOrR(x.io.deq.decodedInst.fuType, FuType.vecArithOrMem ++ FuType.vecVSET))).asUInt.orR 193 194 /** condition of acceptation: no redirection, ready from rename/complex decoder, no resumeVType */ 195 canAccept := !io.redirect && (io.out.head.ready || decoderComp.io.in.ready) && !io.fromRob.isResumeVType 196 197 io.canAccept := canAccept 198 199 /** 200 * Assign ready signal for DecodeStage's input. Ready signal in i-th channel: 201 * 202 * It must hold that no redirection and no isResumeVType is caught. 203 * One situation for set up ready signal is that first "i" instructions are all simple instructions, and these "i" 204 * instructions can be passed down to rename together with complex decoder's result. 205 * Another situation is that first "i-1" instructions are all simple instructions, and the "i-th" instructions needs 206 * to be sent to complex decoder, with complex decoder ready for new input. 207 */ 208 io.in.zipWithIndex.foreach { case (in, i) => 209 in.ready := !io.redirect && ( 210 simplePrefixVec(i) && (i.U +& complexNum) < readyCounter || 211 firstComplexOH(i) && (i.U +& complexNum) <= readyCounter && decoderComp.io.in.ready 212 ) && !io.fromRob.isResumeVType 213 } 214 215 /** final instruction decoding result */ 216 val finalDecodedInst = Wire(Vec(DecodeWidth, new DecodedInst)) 217 /** valid signs of final instruction decoding result */ 218 val finalDecodedInstValid = Wire(Vec(DecodeWidth, Bool())) 219 220 /** 221 * Select final result of DecodeStage. Select all complex decoded insts results at the beginning of final result, and 222 * use simple decoded insts to fill the rest space in DecodeWidth. 223 */ 224 for (i <- 0 until DecodeWidth) { 225 finalDecodedInst(i) := Mux(complexNum > i.U, complexDecodedInst(i), simpleDecodedInst(i.U - complexNum)) 226 finalDecodedInstValid(i) := Mux(complexNum > i.U, complexDecodedInstValid(i), simplePrefixVec(i.U - complexNum)) 227 } 228 229 /** 230 * Generate output of DecodeStage. Pass finalDecodedInst to output as decoded instructions. 231 * Note that finalDecodedInst is generated in order. 232 */ 233 io.out.zipWithIndex.foreach { case (inst, i) => 234 inst.valid := finalDecodedInstValid(i) && !io.fromRob.isResumeVType 235 inst.bits := finalDecodedInst(i) 236 inst.bits.lsrc(0) := Mux(finalDecodedInst(i).vpu.isReverse, finalDecodedInst(i).lsrc(1), finalDecodedInst(i).lsrc(0)) 237 inst.bits.lsrc(1) := Mux(finalDecodedInst(i).vpu.isReverse, finalDecodedInst(i).lsrc(0), finalDecodedInst(i).lsrc(1)) 238 inst.bits.srcType(0) := Mux(finalDecodedInst(i).vpu.isReverse, finalDecodedInst(i).srcType(1), finalDecodedInst(i).srcType(0)) 239 inst.bits.srcType(1) := Mux(finalDecodedInst(i).vpu.isReverse, finalDecodedInst(i).srcType(0), finalDecodedInst(i).srcType(1)) 240 inst.bits.v0Wen := finalDecodedInst(i).vecWen && finalDecodedInst(i).ldest === 0.U || finalDecodedInst(i).v0Wen 241 inst.bits.vecWen := finalDecodedInst(i).vecWen && finalDecodedInst(i).ldest =/= 0.U 242 // when src0/src1/src2 read V0, src3 read V0 243 val srcType0123HasV0 = finalDecodedInst(i).srcType.zip(finalDecodedInst(i).lsrc).take(4).map { case (s, l) => 244 SrcType.isVp(s) && (l === 0.U) 245 }.reduce(_ || _) 246 inst.bits.srcType(3) := Mux(srcType0123HasV0, SrcType.v0, finalDecodedInst(i).srcType(3)) 247 when (inst.bits.uopIdx =/= 0.U) { 248 inst.bits.debug_seqNum := 0.U 249 } 250 } 251 252 io.out.map(x => 253 when(x.valid){ 254 assert(PopCount(VecInit(x.bits.rfWen, x.bits.fpWen, x.bits.vecWen, x.bits.v0Wen, x.bits.vlWen)) < 2.U, 255 "DecodeOut: can't wirte two regfile in one uop/instruction") 256 } 257 ) 258 259 /** 260 * Prepare address and hold for output to Rat (Rename Alias Table) 261 */ 262 for (i <- 0 until DecodeWidth) { 263 264 // We use the lsrc/ldest before fusion decoder to read RAT for better timing. 265 io.intRat(i)(0).addr := io.out(i).bits.lsrc(0) 266 io.intRat(i)(1).addr := io.out(i).bits.lsrc(1) 267 io.intRat(i).foreach(_.hold := !io.out(i).ready) 268 269 // Floating-point instructions can not be fused now. 270 io.fpRat(i)(0).addr := io.out(i).bits.lsrc(0) 271 io.fpRat(i)(1).addr := io.out(i).bits.lsrc(1) 272 io.fpRat(i)(2).addr := io.out(i).bits.lsrc(2) 273 io.fpRat(i).foreach(_.hold := !io.out(i).ready) 274 275 // Vec instructions 276 // TODO: vec uop dividers need change this 277 io.vecRat(i)(0).addr := io.out(i).bits.lsrc(0) // vs1 278 io.vecRat(i)(1).addr := io.out(i).bits.lsrc(1) // vs2 279 io.vecRat(i)(2).addr := io.out(i).bits.lsrc(2) // old_vd 280 io.vecRat(i).foreach(_.hold := !io.out(i).ready) 281 282 io.v0Rat(i).addr := V0_IDX.U // v0 283 io.v0Rat(i).hold := !io.out(i).ready 284 285 io.vlRat(i).addr := Vl_IDX.U // vl 286 io.vlRat(i).hold := !io.out(i).ready 287 } 288 289 /** whether valid input requests from frontend exists */ 290 val hasValid = VecInit(io.in.map(_.valid)).asUInt.orR 291 292 debug_globalCounter := debug_globalCounter + PopCount(io.out.map(_.fire)) 293 294 io.stallReason.in.backReason := io.stallReason.out.backReason 295 io.stallReason.out.reason.zip(io.stallReason.in.reason).zip(io.in.map(_.valid)).foreach { case ((out, in), valid) => 296 out := Mux(io.stallReason.out.backReason.valid, 297 io.stallReason.out.backReason.bits, 298 in) 299 } 300 301 io.toCSR.trapInstInfo.valid := hasIllegalInst && !io.redirect 302 io.toCSR.trapInstInfo.bits.fromDecodedInst(illegalInst) 303 304 val recoveryFlag = RegInit(false.B) 305 when(io.redirect) { 306 recoveryFlag := true.B 307 }.elsewhen(io.in.map(_.fire).reduce(_ || _)) { 308 recoveryFlag := false.B 309 } 310 311 XSPerfAccumulate("in_valid_count", PopCount(io.in.map(_.valid))) 312 XSPerfAccumulate("in_fire_count", PopCount(io.in.map(_.fire))) 313 XSPerfAccumulate("in_valid_not_ready_count", PopCount(io.in.map(x => x.valid && !x.ready))) 314 XSPerfAccumulate("stall_cycle", io.in.head match { case x => x.valid && !x.ready}) 315 XSPerfAccumulate("wait_cycle", !io.in.head.valid && io.out.head.ready) 316 XSPerfAccumulate("inst_spec", PopCount(io.in.map(_.fire))) 317 XSPerfAccumulate("recovery_bubble", recoveryFlag) 318 319 XSPerfHistogram("in_valid_range", PopCount(io.in.map(_.valid)), true.B, 0, DecodeWidth + 1, 1) 320 XSPerfHistogram("in_fire_range", PopCount(io.in.map(_.fire)), true.B, 0, DecodeWidth + 1, 1) 321 XSPerfHistogram("out_valid_range", PopCount(io.out.map(_.valid)), true.B, 0, DecodeWidth + 1, 1) 322 XSPerfHistogram("out_fire_range", PopCount(io.out.map(_.fire)), true.B, 0, DecodeWidth + 1, 1) 323 324 val fusionValid = VecInit(io.fusion.map(x => GatedValidRegNext(x))) 325 val inValidNotReady = io.in.map(in => GatedValidRegNext(in.valid && !in.ready)) 326 val perfEvents = Seq( 327 ("decoder_fused_instr", PopCount(fusionValid) ), 328 ("decoder_waitInstr", PopCount(inValidNotReady) ), 329 ("decoder_stall_cycle", hasValid && !io.out(0).ready), 330 ("decoder_utilization", PopCount(io.in.map(_.valid))), 331 ("INST_SPEC", PopCount(io.in.map(_.fire))), 332 ("RECOVERY_BUBBLE", recoveryFlag) 333 ) 334 generatePerfEvent() 335 336 // for more readable verilog 337 dontTouch(isSimpleVec) 338 dontTouch(isComplexVec) 339 dontTouch(simplePrefixVec) 340 dontTouch(complexValid) 341 dontTouch(complexNum) 342 dontTouch(readyCounter) 343 dontTouch(firstComplexOH) 344} 345