xref: /XiangShan/src/main/scala/xiangshan/backend/decode/DecodeStage.scala (revision 1592abd11eecf7bec0f1453ffe4a7617167f8ba9)
1/***************************************************************************************
2 * Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3 * Copyright (c) 2020-2021 Peng Cheng Laboratory
4 *
5 * XiangShan is licensed under Mulan PSL v2.
6 * You can use this software according to the terms and conditions of the Mulan PSL v2.
7 * You may obtain a copy of Mulan PSL v2 at:
8 *          http://license.coscl.org.cn/MulanPSL2
9 *
10 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11 * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13 *
14 * See the Mulan PSL v2 for more details.
15 ***************************************************************************************/
16
17package xiangshan.backend.decode
18
19import org.chipsalliance.cde.config.Parameters
20import chisel3._
21import chisel3.util._
22import utility._
23import utils._
24import xiangshan._
25import xiangshan.backend.rename.RatReadPort
26import xiangshan.backend.Bundles._
27import xiangshan.backend.fu.vector.Bundles.{VType, Vl}
28import xiangshan.backend.fu.FuType
29import xiangshan.backend.fu.wrapper.CSRToDecode
30import yunsuan.VpermType
31import xiangshan.ExceptionNO.{illegalInstr, virtualInstr}
32import xiangshan.frontend.FtqPtr
33
34class DecodeStageIO(implicit p: Parameters) extends XSBundle {
35  // params alias
36  private val numIntRegSrc = backendParams.numIntRegSrc
37  private val numIntRatPorts = numIntRegSrc
38  private val numFpRegSrc = backendParams.numFpRegSrc
39  private val numFpRatPorts = numFpRegSrc
40  private val numVecRegSrc = backendParams.numVecRegSrc
41  private val numVecRatPorts = numVecRegSrc
42
43  val redirect = Input(Bool())
44  val canAccept = Output(Bool())
45  // from Ibuffer
46  val in = Vec(DecodeWidth, Flipped(DecoupledIO(new StaticInst)))
47  // to Rename
48  val out = Vec(DecodeWidth, DecoupledIO(new DecodedInst))
49  // RAT read
50  val intRat = Vec(RenameWidth, Vec(numIntRatPorts, Flipped(new RatReadPort(IntLogicRegs))))
51  val fpRat = Vec(RenameWidth, Vec(numFpRatPorts, Flipped(new RatReadPort(FpLogicRegs))))
52  val vecRat = Vec(RenameWidth, Vec(numVecRatPorts, Flipped(new RatReadPort(VecLogicRegs))))
53  val v0Rat = Vec(RenameWidth, Flipped(new RatReadPort(V0LogicRegs)))
54  val vlRat = Vec(RenameWidth, Flipped(new RatReadPort(VlLogicRegs)))
55  // csr control
56  val csrCtrl = Input(new CustomCSRCtrlIO)
57  val fromCSR = Input(new CSRToDecode)
58  val fusion = Vec(DecodeWidth - 1, Input(Bool()))
59
60  // vtype update
61  val fromRob = new Bundle {
62    val isResumeVType = Input(Bool())
63    val walkToArchVType = Input(Bool())
64    val commitVType = new Bundle {
65      val vtype = Flipped(Valid(new VType))
66      val hasVsetvl = Input(Bool())
67    }
68    val walkVType = Flipped(Valid(new VType))
69  }
70  val stallReason = new Bundle {
71    val in = Flipped(new StallReasonIO(DecodeWidth))
72    val out = new StallReasonIO(DecodeWidth)
73  }
74  val vsetvlVType = Input(VType())
75  val vstart = Input(Vl())
76
77  val toCSR = new Bundle {
78    val trapInstInfo = ValidIO(new TrapInstInfo)
79  }
80}
81
82class DecodeStage(implicit p: Parameters) extends XSModule
83  with HasPerfEvents
84  with VectorConstants {
85
86  val io = IO(new DecodeStageIO)
87
88  io.in.zipWithIndex.foreach{ case (d, i) =>
89    PerfCCT.updateInstPos(d.bits.debug_seqNum, PerfCCT.InstPos.AtDecode.id.U, d.valid, clock, reset)
90  }
91
92  // io alias
93  private val outReadys = io.out.map(_.ready)
94  private val inValids = io.in.map(_.valid)
95  private val inValid = VecInit(inValids).asUInt.orR
96  private val outValids = io.out.map(_.valid)
97  private val outValid = VecInit(outValids).asUInt.orR
98  //readyFromRename Counter
99  /** Assume number of ready channels be "RenameWidth" if the first output channel is ready. If not, assume that be 0 */
100  val readyCounter = Mux(outReadys.head, RenameWidth.U, 0.U)
101
102  /** complex decoder */
103  val decoderComp = Module(new DecodeUnitComp)
104  /** simple decoders in Seq of DecodeWidth */
105  val decoders = Seq.fill(DecodeWidth)(Module(new DecodeUnit))
106  /** vtype generation module */
107  val vtypeGen = Module(new VTypeGen)
108
109  val debug_globalCounter = RegInit(0.U(XLEN.W))
110
111  /** whether DecodeStage can accept new requests from frontend (CtrlBlock) */
112  val canAccept = Wire(Bool())
113
114  //Simple 6
115  decoders.zip(io.in).foreach { case (dst, src) =>
116    dst.io.enq.ctrlFlow := src.bits
117    dst.io.csrCtrl := io.csrCtrl
118    dst.io.fromCSR := io.fromCSR
119    dst.io.enq.vtype := vtypeGen.io.vtype
120    dst.io.enq.vstart := io.vstart
121  }
122
123  /** whether instructions decoded by simple decoders require complex decoding */
124  val isComplexVec = VecInit(inValids.zip(decoders.map(_.io.deq.isComplex)).map { case (valid, isComplex) => valid && isComplex })
125  /** whether instructions decoded by simple decoders don't require complex decoding */
126  val isSimpleVec = VecInit(inValids.zip(decoders.map(_.io.deq.isComplex)).map { case (valid, isComplex) => valid && !isComplex })
127  /** instructions decoded by simple decoders */
128  val simpleDecodedInst = VecInit(decoders.map(_.io.deq.decodedInst))
129
130  /** whether instructions decoded by simple decoders are illegal */
131  val isIllegalInstVec = VecInit((outValids lazyZip outReadys lazyZip io.out.map(_.bits)).map {
132    case (valid, ready, decodedInst) =>
133      valid && ready && (decodedInst.exceptionVec(ExceptionNO.EX_II) || decodedInst.exceptionVec(ExceptionNO.EX_VI))
134  })
135  /** at least 1 instruction decoded by simple decoders is illegal */
136  val hasIllegalInst = Cat(isIllegalInstVec).orR
137  /** at least 1 instruction decoded by simple decoders is illegal */
138  val illegalInst = PriorityMuxDefault(isIllegalInstVec.zip(io.out.map(_.bits)), 0.U.asTypeOf(new DecodedInst))
139
140  /** number of instructions generated by complex decoder */
141  val complexNum = Wire(UInt(3.W))
142  // (0, 1, 2, 3, 4, 5) + complexNum
143  /** Order of simple decoders' result (in output of DecodeStage) considering complex decoder's. Since complex decoder's
144   * results will be arranged before simple decoders' */
145  val complexNumAddLocation: Vec[UInt] = VecInit((0 until DecodeWidth).map(x => (x.U +& complexNum)))
146  /** mask off decoded instructions that can not be accepted */
147  val noMoreThanRenameReady: Vec[Bool] = VecInit(complexNumAddLocation.map(x => x <= readyCounter))
148  /** existance of complex instructions among first few simple decoders' results, which needs decoding */
149  val complexValid = VecInit((isComplexVec zip noMoreThanRenameReady).map(x => x._1 & x._2)).asUInt.orR
150  /** selected complex instruction for complex decoder */
151  val complexInst = PriorityMuxDefault(isComplexVec.zip(decoders.map(_.io.deq.decodedInst)), 0.U.asTypeOf(new DecodedInst))
152  /** selected complex micro operation information for complex decoder */
153  val complexUopInfo = PriorityMuxDefault(isComplexVec.zip(decoders.map(_.io.deq.uopInfo)), 0.U.asTypeOf(new UopInfo))
154
155  vtypeGen.io.insts.zip(io.in).foreach { case (inst, in) =>
156    inst.valid := in.valid
157    inst.bits := in.bits.instr
158  }
159  // when io.redirect is True, never update vtype
160  vtypeGen.io.canUpdateVType := decoderComp.io.in.fire && decoderComp.io.in.bits.simpleDecodedInst.isVset && !io.redirect
161  vtypeGen.io.walkToArchVType := io.fromRob.walkToArchVType
162  vtypeGen.io.commitVType := io.fromRob.commitVType
163  vtypeGen.io.walkVType := io.fromRob.walkVType
164  vtypeGen.io.vsetvlVType := io.vsetvlVType
165
166  //Comp 1
167  decoderComp.io.redirect := io.redirect
168  decoderComp.io.csrCtrl := io.csrCtrl
169  decoderComp.io.vtypeBypass := vtypeGen.io.vtype
170  // The input inst of decoderComp is latched last cycle.
171  // Set input empty, if there is no complex inst latched last cycle.
172  decoderComp.io.in.valid := complexValid && !io.fromRob.isResumeVType
173  decoderComp.io.in.bits.simpleDecodedInst := complexInst
174  decoderComp.io.in.bits.uopInfo := complexUopInfo
175  decoderComp.io.out.complexDecodedInsts.zipWithIndex.foreach { case (out, i) => out.ready := io.out(i).ready }
176
177  /** instructions decoded by complex decoders */
178  val complexDecodedInst = VecInit(decoderComp.io.out.complexDecodedInsts.map(_.bits))
179  /** whether instructions decoded by complex decoders are valid */
180  val complexDecodedInstValid = VecInit(decoderComp.io.out.complexDecodedInsts.map(_.valid))
181  complexNum := decoderComp.io.complexNum
182
183  // Vec(S,S,S,C,S,C) -> Vec(1,1,1,0,0,0)
184  /** whether a complex instruction is before or at this position of simple decoders' result */
185  val simplePrefixVec = VecInit((0 until DecodeWidth).map(i => VecInit(isSimpleVec.take(i + 1)).asUInt.andR))
186
187  // Vec(S,S,S,C,S,S) -> Vec(0,0,0,1,0,0)
188  /** one-hot representation of the first complex instruction */
189  val firstComplexOH: Vec[Bool] = VecInit(PriorityEncoderOH(isComplexVec))
190
191  // block vector inst when vtype is resuming
192  val hasVectorInst = VecInit(decoders.map(x => FuType.FuTypeOrR(x.io.deq.decodedInst.fuType, FuType.vecArithOrMem ++ FuType.vecVSET))).asUInt.orR
193
194  /** condition of acceptation: no redirection, ready from rename/complex decoder, no resumeVType */
195  canAccept := !io.redirect && (io.out.head.ready || decoderComp.io.in.ready) && !io.fromRob.isResumeVType
196
197  io.canAccept := canAccept
198
199  /**
200   * Assign ready signal for DecodeStage's input. Ready signal in i-th channel:
201   *
202   * It must hold that no redirection and no isResumeVType is caught.
203   * One situation for set up ready signal is that first "i" instructions are all simple instructions, and these "i"
204   * instructions can be passed down to rename together with complex decoder's result.
205   * Another situation is that first "i-1" instructions are all simple instructions, and the "i-th" instructions needs
206   * to be sent to complex decoder, with complex decoder ready for new input.
207   */
208  io.in.zipWithIndex.foreach { case (in, i) =>
209    in.ready := !io.redirect && (
210      simplePrefixVec(i) && (i.U +& complexNum) < readyCounter ||
211      firstComplexOH(i) && (i.U +& complexNum) <= readyCounter && decoderComp.io.in.ready
212    ) && !io.fromRob.isResumeVType
213  }
214
215  /** final instruction decoding result */
216  val finalDecodedInst = Wire(Vec(DecodeWidth, new DecodedInst))
217  /** valid signs of final instruction decoding result */
218  val finalDecodedInstValid = Wire(Vec(DecodeWidth, Bool()))
219
220  /**
221   * Select final result of DecodeStage. Select all complex decoded insts results at the beginning of final result, and
222   * use simple decoded insts to fill the rest space in DecodeWidth.
223   */
224  for (i <- 0 until DecodeWidth) {
225    finalDecodedInst(i) := Mux(complexNum > i.U, complexDecodedInst(i), simpleDecodedInst(i.U - complexNum))
226    finalDecodedInstValid(i) := Mux(complexNum > i.U, complexDecodedInstValid(i), simplePrefixVec(i.U - complexNum))
227  }
228
229  /**
230   * Generate output of DecodeStage. Pass finalDecodedInst to output as decoded instructions.
231   * Note that finalDecodedInst is generated in order.
232   */
233  io.out.zipWithIndex.foreach { case (inst, i) =>
234    inst.valid := finalDecodedInstValid(i) && !io.fromRob.isResumeVType
235    inst.bits := finalDecodedInst(i)
236    inst.bits.lsrc(0) := Mux(finalDecodedInst(i).vpu.isReverse, finalDecodedInst(i).lsrc(1), finalDecodedInst(i).lsrc(0))
237    inst.bits.lsrc(1) := Mux(finalDecodedInst(i).vpu.isReverse, finalDecodedInst(i).lsrc(0), finalDecodedInst(i).lsrc(1))
238    inst.bits.srcType(0) := Mux(finalDecodedInst(i).vpu.isReverse, finalDecodedInst(i).srcType(1), finalDecodedInst(i).srcType(0))
239    inst.bits.srcType(1) := Mux(finalDecodedInst(i).vpu.isReverse, finalDecodedInst(i).srcType(0), finalDecodedInst(i).srcType(1))
240    inst.bits.v0Wen := finalDecodedInst(i).vecWen && finalDecodedInst(i).ldest === 0.U || finalDecodedInst(i).v0Wen
241    inst.bits.vecWen := finalDecodedInst(i).vecWen && finalDecodedInst(i).ldest =/= 0.U
242    // when src0/src1/src2 read V0, src3 read V0
243    val srcType0123HasV0 = finalDecodedInst(i).srcType.zip(finalDecodedInst(i).lsrc).take(4).map { case (s, l) =>
244      SrcType.isVp(s) && (l === 0.U)
245    }.reduce(_ || _)
246    inst.bits.srcType(3) := Mux(srcType0123HasV0, SrcType.v0, finalDecodedInst(i).srcType(3))
247    when (inst.bits.uopIdx =/= 0.U) {
248      inst.bits.debug_seqNum := 0.U
249    }
250  }
251
252  io.out.map(x =>
253    when(x.valid){
254      assert(PopCount(VecInit(x.bits.rfWen, x.bits.fpWen, x.bits.vecWen, x.bits.v0Wen, x.bits.vlWen)) < 2.U,
255        "DecodeOut: can't wirte two regfile in one uop/instruction")
256    }
257  )
258
259  /**
260   * Prepare address and hold for output to Rat (Rename Alias Table)
261   */
262  for (i <- 0 until DecodeWidth) {
263
264    // We use the lsrc/ldest before fusion decoder to read RAT for better timing.
265    io.intRat(i)(0).addr := io.out(i).bits.lsrc(0)
266    io.intRat(i)(1).addr := io.out(i).bits.lsrc(1)
267    io.intRat(i).foreach(_.hold := !io.out(i).ready)
268
269    // Floating-point instructions can not be fused now.
270    io.fpRat(i)(0).addr := io.out(i).bits.lsrc(0)
271    io.fpRat(i)(1).addr := io.out(i).bits.lsrc(1)
272    io.fpRat(i)(2).addr := io.out(i).bits.lsrc(2)
273    io.fpRat(i).foreach(_.hold := !io.out(i).ready)
274
275    // Vec instructions
276    // TODO: vec uop dividers need change this
277    io.vecRat(i)(0).addr := io.out(i).bits.lsrc(0) // vs1
278    io.vecRat(i)(1).addr := io.out(i).bits.lsrc(1) // vs2
279    io.vecRat(i)(2).addr := io.out(i).bits.lsrc(2) // old_vd
280    io.vecRat(i).foreach(_.hold := !io.out(i).ready)
281
282    io.v0Rat(i).addr := V0_IDX.U // v0
283    io.v0Rat(i).hold := !io.out(i).ready
284
285    io.vlRat(i).addr := Vl_IDX.U // vl
286    io.vlRat(i).hold := !io.out(i).ready
287  }
288
289  /** whether valid input requests from frontend exists */
290  val hasValid = VecInit(io.in.map(_.valid)).asUInt.orR
291
292  debug_globalCounter := debug_globalCounter + PopCount(io.out.map(_.fire))
293
294  io.stallReason.in.backReason := io.stallReason.out.backReason
295  io.stallReason.out.reason.zip(io.stallReason.in.reason).zip(io.in.map(_.valid)).foreach { case ((out, in), valid) =>
296    out := Mux(io.stallReason.out.backReason.valid,
297               io.stallReason.out.backReason.bits,
298               in)
299  }
300
301  io.toCSR.trapInstInfo.valid := hasIllegalInst && !io.redirect
302  io.toCSR.trapInstInfo.bits.fromDecodedInst(illegalInst)
303
304  val recoveryFlag = RegInit(false.B)
305  when(io.redirect) {
306    recoveryFlag := true.B
307  }.elsewhen(io.in.map(_.fire).reduce(_ || _)) {
308    recoveryFlag := false.B
309  }
310
311  XSPerfAccumulate("in_valid_count", PopCount(io.in.map(_.valid)))
312  XSPerfAccumulate("in_fire_count", PopCount(io.in.map(_.fire)))
313  XSPerfAccumulate("in_valid_not_ready_count", PopCount(io.in.map(x => x.valid && !x.ready)))
314  XSPerfAccumulate("stall_cycle", io.in.head match { case x => x.valid && !x.ready})
315  XSPerfAccumulate("wait_cycle", !io.in.head.valid && io.out.head.ready)
316  XSPerfAccumulate("inst_spec", PopCount(io.in.map(_.fire)))
317  XSPerfAccumulate("recovery_bubble", recoveryFlag)
318
319  XSPerfHistogram("in_valid_range", PopCount(io.in.map(_.valid)), true.B, 0, DecodeWidth + 1, 1)
320  XSPerfHistogram("in_fire_range", PopCount(io.in.map(_.fire)), true.B, 0, DecodeWidth + 1, 1)
321  XSPerfHistogram("out_valid_range", PopCount(io.out.map(_.valid)), true.B, 0, DecodeWidth + 1, 1)
322  XSPerfHistogram("out_fire_range", PopCount(io.out.map(_.fire)), true.B, 0, DecodeWidth + 1, 1)
323
324  val fusionValid = VecInit(io.fusion.map(x => GatedValidRegNext(x)))
325  val inValidNotReady = io.in.map(in => GatedValidRegNext(in.valid && !in.ready))
326  val perfEvents = Seq(
327    ("decoder_fused_instr", PopCount(fusionValid)       ),
328    ("decoder_waitInstr",   PopCount(inValidNotReady)   ),
329    ("decoder_stall_cycle", hasValid && !io.out(0).ready),
330    ("decoder_utilization", PopCount(io.in.map(_.valid))),
331    ("INST_SPEC",           PopCount(io.in.map(_.fire))),
332    ("RECOVERY_BUBBLE",     recoveryFlag)
333  )
334  generatePerfEvent()
335
336  // for more readable verilog
337  dontTouch(isSimpleVec)
338  dontTouch(isComplexVec)
339  dontTouch(simplePrefixVec)
340  dontTouch(complexValid)
341  dontTouch(complexNum)
342  dontTouch(readyCounter)
343  dontTouch(firstComplexOH)
344}
345