xref: /XiangShan/src/main/scala/xiangshan/backend/dispatch/NewDispatch.scala (revision f9daee6677f06fa86072ef1ec22fe8d7b0eb4afe)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.backend.dispatch
18
19import org.chipsalliance.cde.config.Parameters
20import chisel3._
21import chisel3.util._
22import chisel3.util.experimental.decode._
23import freechips.rocketchip.diplomacy.{LazyModule, LazyModuleImp}
24import utility._
25import xiangshan.ExceptionNO._
26import xiangshan._
27import xiangshan.backend.rob.{RobDispatchTopDownIO, RobEnqIO}
28import xiangshan.backend.Bundles.{DecodedInst, DynInst, ExuVec, IssueQueueIQWakeUpBundle}
29import xiangshan.backend.fu.{FuConfig, FuType}
30import xiangshan.backend.rename.{BusyTable, VlBusyTable}
31import xiangshan.backend.fu.{FuConfig, FuType}
32import xiangshan.backend.rename.BusyTableReadIO
33import xiangshan.backend.datapath.DataConfig._
34import xiangshan.backend.datapath.WbConfig._
35import xiangshan.backend.datapath.DataSource
36import xiangshan.backend.datapath.WbConfig.VfWB
37import xiangshan.backend.fu.FuType.FuTypeOrR
38import xiangshan.backend.regcache.{RCTagTableReadPort, RegCacheTagTable}
39import xiangshan.mem.MemCoreTopDownIO
40import xiangshan.mem.mdp._
41import xiangshan.mem.{HasVLSUParameters, _}
42
43class CoreDispatchTopDownIO extends Bundle {
44  val l2MissMatch = Input(Bool())
45  val l3MissMatch = Input(Bool())
46  val fromMem = Flipped(new MemCoreTopDownIO)
47}
48// TODO delete trigger message from frontend to iq
49class NewDispatch(implicit p: Parameters) extends XSModule with HasPerfEvents with HasVLSUParameters {
50  // std IQ donot need dispatch, only copy sta IQ, but need sta IQ's ready && std IQ's ready
51  val allIssueParams = backendParams.allIssueParams.filter(_.StdCnt == 0)
52  val allExuParams = allIssueParams.map(_.exuBlockParams).flatten
53  val allFuConfigs = allExuParams.map(_.fuConfigs).flatten.toSet.toSeq
54  val sortedFuConfigs = allFuConfigs.sortBy(_.fuType.id)
55  println(s"[NewDispatch] ${allExuParams.map(_.name)}")
56  println(s"[NewDispatch] ${allFuConfigs.map(_.name)}")
57  println(s"[NewDispatch] ${allFuConfigs.map(_.fuType.id)}")
58  println(s"[NewDispatch] ${sortedFuConfigs.map(_.name)}")
59  println(s"[NewDispatch] ${sortedFuConfigs.map(_.fuType.id)}")
60  val fuConfigsInIssueParams = allIssueParams.map(_.allExuParams.map(_.fuConfigs).flatten.toSet.toSeq)
61  val fuMapIQIdx = sortedFuConfigs.map( fu => {
62    val fuInIQIdx = fuConfigsInIssueParams.zipWithIndex.filter { case (f, i) => f.contains(fu) }.map(_._2)
63    (fu -> fuInIQIdx)
64   }
65  )
66  fuMapIQIdx.map { case (fu, iqidx) =>
67    println(s"[NewDispatch] ${fu.name} $iqidx")
68  }
69  val sameIQIdxFus = fuMapIQIdx.map{ case (fu, iqidx) =>
70    fuMapIQIdx.filter(_._2 == iqidx).map(_._1) -> iqidx
71  }.toSet.toSeq
72  val needMultiIQ = sameIQIdxFus.sortBy(_._1.head.fuType.id).filter(_._2.size > 1)
73  val needSingleIQ = sameIQIdxFus.sortBy(_._1.head.fuType.id).filter(_._2.size == 1)
74  needMultiIQ.map { case (fus, iqidx) =>
75    println(s"[NewDispatch] needMultiIQ: ${fus.map(_.name)} $iqidx")
76  }
77  needSingleIQ.map { case (fus, iqidx) =>
78    println(s"[NewDispatch] needSingleIQ: ${fus.map(_.name)} $iqidx")
79  }
80  val fuConfigsInExuParams = allExuParams.map(_.fuConfigs)
81  val fuMapExuIdx = sortedFuConfigs.map { case fu => {
82    val fuInExuIdx = fuConfigsInExuParams.zipWithIndex.filter { case (f, i) => f.contains(fu) }.map(_._2)
83    (fu -> fuInExuIdx)
84    }
85  }
86  val sameExuIdxFus = fuMapExuIdx.map { case (fu, exuidx) =>
87    fuMapExuIdx.filter(_._2 == exuidx).map(_._1) -> exuidx
88  }.toSet.toSeq
89  val needMultiExu = sameExuIdxFus.sortBy(_._1.head.fuType.id).filter(_._2.size > 1).filter{ x =>
90    x._1.map(y => fuMapIQIdx.filter(_._1 == y).head._2.size > 1).reduce(_ && _)
91  }
92
93  val exuNum = allExuParams.size
94  val maxIQSize = allIssueParams.map(_.numEntries).max
95  val IQEnqSum = allIssueParams.map(_.numEnq).sum
96
97  val io = IO(new Bundle {
98    // from rename
99    val renameIn = Vec(RenameWidth, Flipped(ValidIO(new DecodedInst)))
100    val fromRename = Vec(RenameWidth, Flipped(DecoupledIO(new DynInst)))
101    val toRenameAllFire = Output(Bool())
102    // enq Rob
103    val enqRob = Flipped(new RobEnqIO)
104    // IssueQueues
105    val IQValidNumVec = Vec(exuNum, Input(UInt(maxIQSize.U.getWidth.W)))
106    val toIssueQueues = Vec(IQEnqSum, DecoupledIO(new DynInst))
107    // to busyTable
108    // set preg state to ready (write back regfile)
109    val wbPregsInt = Vec(backendParams.numPregWb(IntData()), Flipped(ValidIO(UInt(PhyRegIdxWidth.W))))
110    val wbPregsFp = Vec(backendParams.numPregWb(FpData()), Flipped(ValidIO(UInt(PhyRegIdxWidth.W))))
111    val wbPregsVec = Vec(backendParams.numPregWb(VecData()), Flipped(ValidIO(UInt(PhyRegIdxWidth.W))))
112    val wbPregsV0 = Vec(backendParams.numPregWb(V0Data()), Flipped(ValidIO(UInt(PhyRegIdxWidth.W))))
113    val wbPregsVl = Vec(backendParams.numPregWb(VlData()), Flipped(ValidIO(UInt(PhyRegIdxWidth.W))))
114    val wakeUpAll = new Bundle {
115      val wakeUpInt: MixedVec[ValidIO[IssueQueueIQWakeUpBundle]] = Flipped(backendParams.intSchdParams.get.genIQWakeUpOutValidBundle)
116      val wakeUpFp: MixedVec[ValidIO[IssueQueueIQWakeUpBundle]] = Flipped(backendParams.fpSchdParams.get.genIQWakeUpOutValidBundle)
117      val wakeUpVec: MixedVec[ValidIO[IssueQueueIQWakeUpBundle]] = Flipped(backendParams.vfSchdParams.get.genIQWakeUpOutValidBundle)
118      val wakeUpMem: MixedVec[ValidIO[IssueQueueIQWakeUpBundle]] = Flipped(backendParams.memSchdParams.get.genIQWakeUpOutValidBundle)
119    }
120    val og0Cancel = Input(ExuVec())
121    val ldCancel = Vec(backendParams.LdExuCnt, Flipped(new LoadCancelIO))
122    // to vlbusytable
123    val vlWriteBackInfo = new Bundle {
124      val vlFromIntIsZero  = Input(Bool())
125      val vlFromIntIsVlmax = Input(Bool())
126      val vlFromVfIsZero   = Input(Bool())
127      val vlFromVfIsVlmax  = Input(Bool())
128    }
129    // from MemBlock
130    val fromMem = new Bundle {
131      val lcommit = Input(UInt(log2Up(CommitWidth + 1).W))
132      val scommit = Input(UInt(log2Ceil(EnsbufferWidth + 1).W)) // connected to `memBlock.io.sqDeq` instead of ROB
133      val lqDeqPtr = Input(new LqPtr)
134      val sqDeqPtr = Input(new SqPtr)
135      // from lsq
136      val lqCancelCnt = Input(UInt(log2Up(VirtualLoadQueueSize + 1).W))
137      val sqCancelCnt = Input(UInt(log2Up(StoreQueueSize + 1).W))
138    }
139    //toMem
140    val toMem = new Bundle {
141      val lsqEnqIO = Flipped(new LsqEnqIO)
142    }
143    // redirect
144    val redirect = Flipped(ValidIO(new Redirect))
145    // singleStep
146    val singleStep = Input(Bool())
147    // lfst
148    val lfst = new DispatchLFSTIO
149
150    // perf only
151    val robHead = Input(new DynInst)
152    val stallReason = Flipped(new StallReasonIO(RenameWidth))
153    val lqCanAccept = Input(Bool())
154    val sqCanAccept = Input(Bool())
155    val robHeadNotReady = Input(Bool())
156    val robFull = Input(Bool())
157    val debugTopDown = new Bundle {
158      val fromRob = Flipped(new RobDispatchTopDownIO)
159      val fromCore = new CoreDispatchTopDownIO
160    }
161  })
162  // Deq for std's IQ is not assigned in Dispatch2Iq, so add one more src for it.
163  val issueBlockParams = backendParams.allIssueParams
164  val renameIn = io.renameIn
165  val fromRename = io.fromRename
166  io.toRenameAllFire := io.fromRename.map(x => !x.valid || x.fire).reduce(_ && _)
167  val fromRenameUpdate = Wire(Vec(RenameWidth, Flipped(ValidIO(new DynInst))))
168  fromRenameUpdate := fromRename
169  val renameWidth = io.fromRename.size
170  val issueQueueCount = io.IQValidNumVec
171  val issueQueueNum = allIssueParams.size
172  // int fp vec v0 vl
173  val numRegType = 5
174  val idxRegTypeInt = allFuConfigs.map(x => {
175    x.srcData.map(xx => {
176      xx.zipWithIndex.filter(y => IntRegSrcDataSet.contains(y._1)).map(_._2)
177    }).flatten
178  }).flatten.toSet.toSeq.sorted
179  val idxRegTypeFp = allFuConfigs.map(x => {
180    x.srcData.map(xx => {
181      xx.zipWithIndex.filter(y => FpRegSrcDataSet.contains(y._1)).map(_._2)
182    }).flatten
183  }).flatten.toSet.toSeq.sorted
184  val idxRegTypeVec = allFuConfigs.map(x => {
185    x.srcData.map(xx => {
186      xx.zipWithIndex.filter(y => VecRegSrcDataSet.contains(y._1)).map(_._2)
187    }).flatten
188  }).flatten.toSet.toSeq.sorted
189  val idxRegTypeV0 = allFuConfigs.map(x => {
190    x.srcData.map(xx => {
191      xx.zipWithIndex.filter(y => V0RegSrcDataSet.contains(y._1)).map(_._2)
192    }).flatten
193  }).flatten.toSet.toSeq.sorted
194  val idxRegTypeVl = allFuConfigs.map(x => {
195    x.srcData.map(xx => {
196      xx.zipWithIndex.filter(y => VlRegSrcDataSet.contains(y._1)).map(_._2)
197    }).flatten
198  }).flatten.toSet.toSeq.sorted
199  println(s"[NewDispatch] idxRegTypeInt: $idxRegTypeInt")
200  println(s"[NewDispatch] idxRegTypeFp: $idxRegTypeFp")
201  println(s"[NewDispatch] idxRegTypeVec: $idxRegTypeVec")
202  println(s"[NewDispatch] idxRegTypeV0: $idxRegTypeV0")
203  println(s"[NewDispatch] idxRegTypeVl: $idxRegTypeVl")
204  val numRegSrc: Int = issueBlockParams.map(_.exuBlockParams.map(
205    x => if (x.hasStdFu) x.numRegSrc + 1 else x.numRegSrc
206  ).max).max
207
208  val numRegSrcInt: Int = issueBlockParams.map(_.exuBlockParams.map(
209    x => if (x.hasStdFu) x.numIntSrc + 1 else x.numIntSrc
210  ).max).max
211  val numRegSrcFp: Int = issueBlockParams.map(_.exuBlockParams.map(
212    x => if (x.hasStdFu) x.numFpSrc + 1 else x.numFpSrc
213  ).max).max
214  val numRegSrcVf: Int = issueBlockParams.map(_.exuBlockParams.map(
215    x => x.numVecSrc
216  ).max).max
217  val numRegSrcV0: Int = issueBlockParams.map(_.exuBlockParams.map(
218    x => x.numV0Src
219  ).max).max
220  val numRegSrcVl: Int = issueBlockParams.map(_.exuBlockParams.map(
221    x => x.numVlSrc
222  ).max).max
223
224  println(s"[Dispatch2Iq] numRegSrc: ${numRegSrc}, numRegSrcInt: ${numRegSrcInt}, numRegSrcFp: ${numRegSrcFp}, " +
225    s"numRegSrcVf: ${numRegSrcVf}, numRegSrcV0: ${numRegSrcV0}, numRegSrcVl: ${numRegSrcVl}")
226
227  // RegCacheTagTable Module
228  val rcTagTable = Module(new RegCacheTagTable(numRegSrcInt * renameWidth))
229  // BusyTable Modules
230  val intBusyTable = Module(new BusyTable(numRegSrcInt * renameWidth, backendParams.numPregWb(IntData()), IntPhyRegs, IntWB()))
231  val fpBusyTable = Module(new BusyTable(numRegSrcFp * renameWidth, backendParams.numPregWb(FpData()), FpPhyRegs, FpWB()))
232  val vecBusyTable = Module(new BusyTable(numRegSrcVf * renameWidth, backendParams.numPregWb(VecData()), VfPhyRegs, VfWB()))
233  val v0BusyTable = Module(new BusyTable(numRegSrcV0 * renameWidth, backendParams.numPregWb(V0Data()), V0PhyRegs, V0WB()))
234  val vlBusyTable = Module(new VlBusyTable(numRegSrcVl * renameWidth, backendParams.numPregWb(VlData()), VlPhyRegs, VlWB()))
235  vlBusyTable.io_vl_Wb.vlWriteBackInfo := io.vlWriteBackInfo
236  val busyTables = Seq(intBusyTable, fpBusyTable, vecBusyTable, v0BusyTable, vlBusyTable)
237  val wbPregs = Seq(io.wbPregsInt, io.wbPregsFp, io.wbPregsVec, io.wbPregsV0, io.wbPregsVl)
238  val idxRegType = Seq(idxRegTypeInt, idxRegTypeFp, idxRegTypeVec, idxRegTypeV0, idxRegTypeVl)
239  val allocPregsValid = Wire(Vec(busyTables.size, Vec(RenameWidth, Bool())))
240  allocPregsValid(0) := VecInit(fromRename.map(x => x.valid && x.bits.rfWen && !x.bits.eliminatedMove))
241  allocPregsValid(1) := VecInit(fromRename.map(x => x.valid && x.bits.fpWen))
242  allocPregsValid(2) := VecInit(fromRename.map(x => x.valid && x.bits.vecWen))
243  allocPregsValid(3) := VecInit(fromRename.map(x => x.valid && x.bits.v0Wen))
244  allocPregsValid(4) := VecInit(fromRename.map(x => x.valid && x.bits.vlWen))
245  val allocPregs = Wire(Vec(busyTables.size, Vec(RenameWidth, ValidIO(UInt(PhyRegIdxWidth.W)))))
246  allocPregs.zip(allocPregsValid).map(x =>{
247    x._1.zip(x._2).zipWithIndex.map{case ((sink, source), i) => {
248      sink.valid := source
249      sink.bits := fromRename(i).bits.pdest
250    }}
251  })
252  val wakeUp = io.wakeUpAll.wakeUpInt ++ io.wakeUpAll.wakeUpFp ++ io.wakeUpAll.wakeUpVec ++ io.wakeUpAll.wakeUpMem
253  busyTables.zip(wbPregs).zip(allocPregs).map{ case ((b, w), a) => {
254    b.io.wakeUpInt := io.wakeUpAll.wakeUpInt
255    b.io.wakeUpFp  := io.wakeUpAll.wakeUpFp
256    b.io.wakeUpVec := io.wakeUpAll.wakeUpVec
257    b.io.wakeUpMem := io.wakeUpAll.wakeUpMem
258    b.io.og0Cancel := io.og0Cancel
259    b.io.ldCancel := io.ldCancel
260    b.io.wbPregs := w
261    b.io.allocPregs := a
262  }}
263  rcTagTable.io.allocPregs.zip(allocPregs(0)).map(x => x._1 := x._2)
264  rcTagTable.io.wakeupFromIQ := io.wakeUpAll.wakeUpInt ++ io.wakeUpAll.wakeUpMem
265  rcTagTable.io.og0Cancel := io.og0Cancel
266  rcTagTable.io.ldCancel := io.ldCancel
267  busyTables.zip(idxRegType).zipWithIndex.map { case ((b, idxseq), i) => {
268    val readAddr = VecInit(fromRename.map(x => x.bits.psrc.zipWithIndex.filter(xx => idxseq.contains(xx._2)).map(_._1)).flatten)
269    val readValid = VecInit(fromRename.map(x => x.bits.psrc.zipWithIndex.filter(xx => idxseq.contains(xx._2)).map(y => x.valid && SrcType.isXp(x.bits.srcType(y._2)))).flatten)
270    b.io.read.map(_.req).zip(readAddr).map(x => x._1 := x._2)
271    // only int src need srcLoadDependency, src0 src1
272    if (i == 0) {
273      val srcLoadDependencyUpdate = fromRenameUpdate.map(x => x.bits.srcLoadDependency.zipWithIndex.filter(x => idxseq.contains(x._2)).map(_._1)).flatten
274      val srcType = fromRenameUpdate.map(x => x.bits.srcType.zipWithIndex.filter(x => idxseq.contains(x._2)).map(_._1)).flatten
275      // for std, int src need srcLoadDependency, fp src donot need srcLoadDependency
276      srcLoadDependencyUpdate.lazyZip(b.io.read.map(_.loadDependency)).lazyZip(srcType).map{ case (sink, source, srctype) =>
277        sink := Mux(SrcType.isXp(srctype), source, 0.U.asTypeOf(sink))
278      }
279      // only int src need rcTag
280      val rcTagUpdate = fromRenameUpdate.map(x => x.bits.regCacheIdx.zipWithIndex.filter(x => idxseq.contains(x._2)).map(_._1)).flatten
281      rcTagUpdate.zip(rcTagTable.io.readPorts.map(_.addr)).map(x => x._1 := x._2)
282      val useRegCacheUpdate = fromRenameUpdate.map(x => x.bits.useRegCache.zipWithIndex.filter(x => idxseq.contains(x._2)).map(_._1)).flatten
283      useRegCacheUpdate.zip(rcTagTable.io.readPorts.map(_.valid)).map(x => x._1 := x._2)
284      rcTagTable.io.readPorts.map(_.ren).zip(readValid).map(x => x._1 := x._2)
285      rcTagTable.io.readPorts.map(_.tag).zip(readAddr).map(x => x._1 := x._2)
286    }
287  }}
288  val allSrcState = Wire(Vec(renameWidth, Vec(numRegSrc, Vec(numRegType, Bool()))))
289  for (i <- 0 until renameWidth){
290    for (j <- 0 until numRegSrc){
291      for (k <- 0 until numRegType){
292        if (!idxRegType(k).contains(j)) {
293          allSrcState(i)(j)(k) := false.B
294        }
295        else {
296          val readidx = i * idxRegType(k).size + idxRegType(k).indexOf(j)
297          val readEn = k match {
298            case 0 => SrcType.isXp(fromRename(i).bits.srcType(j))
299            case 1 => SrcType.isFp(fromRename(i).bits.srcType(j))
300            case 2 => SrcType.isVp(fromRename(i).bits.srcType(j))
301            case 3 => SrcType.isV0(fromRename(i).bits.srcType(j))
302            case 4 => true.B
303          }
304          allSrcState(i)(j)(k) := readEn && busyTables(k).io.read(readidx).resp || SrcType.isImm(fromRename(i).bits.srcType(j))
305        }
306      }
307    }
308  }
309
310  // eliminate old vd
311  val ignoreOldVdVec = Wire(Vec(renameWidth, Bool()))
312  for (i <- 0 until renameWidth){
313    // numRegSrcVf - 1 is old vd
314    var j = numRegSrcVf - 1
315    // 2 is type of vec
316    var k = 2
317    val readidx = i * idxRegType(k).size + idxRegType(k).indexOf(j)
318    val readEn = SrcType.isVp(fromRename(i).bits.srcType(j))
319    val isDependOldVd = fromRename(i).bits.vpu.isDependOldVd
320    val isWritePartVd = fromRename(i).bits.vpu.isWritePartVd
321    val vta = fromRename(i).bits.vpu.vta
322    val vma = fromRename(i).bits.vpu.vma
323    val vm = fromRename(i).bits.vpu.vm
324    val vlIsVlmax = vlBusyTable.io_vl_read.vlReadInfo(i).is_vlmax
325    val vlIsNonZero = vlBusyTable.io_vl_read.vlReadInfo(i).is_nonzero
326    val ignoreTail = vlIsVlmax && (vm =/= 0.U || vma) && !isWritePartVd
327    val ignoreWhole = (vm =/= 0.U || vma) && vta
328    val ignoreOldVd = vlBusyTable.io.read(i).resp && vlIsNonZero && !isDependOldVd && (ignoreTail || ignoreWhole)
329    ignoreOldVdVec(i) := readEn && ignoreOldVd
330    allSrcState(i)(j)(k) := readEn && (busyTables(k).io.read(readidx).resp || ignoreOldVd) || SrcType.isImm(fromRename(i).bits.srcType(j))
331  }
332
333  // Singlestep should only commit one machine instruction after dret, and then hart enter debugMode according to singlestep exception.
334  val s_holdRobidx :: s_updateRobidx :: Nil = Enum(2)
335  val singleStepState = RegInit(s_updateRobidx)
336
337  val robidxStepNext  = WireInit(0.U.asTypeOf(fromRename(0).bits.robIdx))
338  val robidxStepReg   = RegInit(0.U.asTypeOf(fromRename(0).bits.robIdx))
339  val robidxCanCommitStepping = WireInit(0.U.asTypeOf(fromRename(0).bits.robIdx))
340
341  when(!io.singleStep) {
342    singleStepState := s_updateRobidx
343  }.elsewhen(io.singleStep && fromRename(0).fire && io.enqRob.req(0).valid) {
344    singleStepState := s_holdRobidx
345    robidxStepNext := fromRename(0).bits.robIdx
346  }
347
348  when(singleStepState === s_updateRobidx) {
349    robidxStepReg := robidxStepNext
350    robidxCanCommitStepping := robidxStepNext
351  }.elsewhen(singleStepState === s_holdRobidx) {
352    robidxStepReg := robidxStepReg
353    robidxCanCommitStepping := robidxStepReg
354  }
355
356  val minIQSelAll = Wire(Vec(needMultiExu.size, Vec(renameWidth, Vec(issueQueueNum, Bool()))))
357  needMultiExu.zipWithIndex.map{ case ((fus, exuidx), needMultiExuidx) => {
358    val suffix = fus.map(_.name).mkString("_")
359    val iqNum = exuidx.size
360    val iqidx = allIssueParams.map(_.exuBlockParams.map(_.fuConfigs).flatten.toSet.toSeq).zipWithIndex.filter{x => fus.toSet.subsetOf(x._1.toSet)}.map(_._2)
361    println(s"[NewDispatch] ${fus.map(_.name)};iqidx:$iqidx;exuIdx:$exuidx")
362    val compareMatrix = Wire(Vec(iqNum, Vec(iqNum, Bool()))).suggestName(s"compareMatrix_$suffix")
363    for (i <- 0 until iqNum) {
364      for (j <- 0 until iqNum) {
365        if (i == j) compareMatrix(i)(j) := false.B
366        else if (i < j) compareMatrix(i)(j) := issueQueueCount(exuidx(i)) < issueQueueCount(exuidx(j))
367        else compareMatrix(i)(j) := !compareMatrix(j)(i)
368      }
369    }
370    val IQSort = Reg(Vec(iqNum, Vec(iqNum, Bool()))).suggestName(s"IQSort_$suffix}")
371    for (i <- 0 until iqNum){
372      // i = 0 minimum iq, i = iqNum - 1 -> maximum iq
373      IQSort(i) := compareMatrix.map(x => PopCount(x) === (iqNum - 1 - i).U)
374    }
375    val minIQSel = Wire(Vec(renameWidth, Vec(issueQueueNum, Bool()))).suggestName(s"minIQSel_$suffix")
376    for (i <- 0 until renameWidth){
377      val minIQSel_ith = IQSort(i % iqNum)
378      println(s"minIQSel_${i}th_$suffix = IQSort(${i % iqNum})")
379      for (j <- 0 until issueQueueNum){
380        minIQSel(i)(j) := false.B
381        if (iqidx.contains(j)){
382          minIQSel(i)(j) := minIQSel_ith(iqidx.indexOf(j))
383          println(s"minIQSel_${suffix}_${i}_${j} = minIQSel_ith(iqidx.indexOf(${j}))")
384        }
385      }
386    }
387    minIQSelAll(needMultiExuidx) := minIQSel
388    if (backendParams.debugEn){
389      dontTouch(compareMatrix)
390      dontTouch(IQSort)
391      dontTouch(minIQSel)
392    }
393  }
394  }
395  val fuConfigSeq = needMultiExu.map(_._1)
396  val fuTypeOH = Wire(Vec(renameWidth, Vec(needMultiExu.size, Bool())))
397  fuTypeOH.zip(renameIn).map{ case(oh, in) => {
398    oh := fuConfigSeq.map(x => x.map(xx => in.bits.fuType(xx.fuType.id)).reduce(_ || _) && in.valid)
399  }
400  }
401  // not count itself
402  val popFuTypeOH = Wire(Vec(renameWidth, Vec(needMultiExu.size, UInt((renameWidth-1).U.getWidth.W))))
403  popFuTypeOH.zipWithIndex.map{ case (pop, idx) => {
404    if (idx == 0){
405      pop := 0.U.asTypeOf(pop)
406    }
407    else {
408      pop.zipWithIndex.map{ case (p, i) => {
409        p := PopCount(fuTypeOH.take(idx).map(x => x(i)))
410        }
411      }
412    }
413  }}
414  val uopSelIQ = Reg(Vec(renameWidth, Vec(issueQueueNum, Bool())))
415  val fuTypeOHSingle = Wire(Vec(renameWidth, Vec(needSingleIQ.size, Bool())))
416  fuTypeOHSingle.zip(renameIn).map{ case (oh, in) => {
417    oh := needSingleIQ.map(_._1).map(x => x.map(xx => in.valid && in.bits.fuType(xx.fuType.id)).reduce(_ || _))
418  }}
419  val uopSelIQSingle = Wire(Vec(needSingleIQ.size, Vec(issueQueueNum, Bool())))
420  uopSelIQSingle := VecInit(needSingleIQ.map(_._2).flatten.map(x => VecInit((1.U(issueQueueNum.W) << x)(issueQueueNum-1, 0).asBools)))
421  uopSelIQ.zipWithIndex.map{ case (u, i) => {
422    when(io.toRenameAllFire){
423      u := Mux(renameIn(i).valid,
424                Mux(fuTypeOH(i).asUInt.orR,
425                  Mux1H(fuTypeOH(i), minIQSelAll)(Mux1H(fuTypeOH(i), popFuTypeOH(i))),
426                  Mux1H(fuTypeOHSingle(i), uopSelIQSingle)),
427                0.U.asTypeOf(u)
428              )
429    }.elsewhen(io.fromRename(i).fire){
430      u := 0.U.asTypeOf(u)
431    }
432  }}
433  val uopSelIQMatrix = Wire(Vec(renameWidth, Vec(issueQueueNum, UInt(renameWidth.U.getWidth.W))))
434  uopSelIQMatrix.zipWithIndex.map{ case (u, i) => {
435    u.zipWithIndex.map{ case (uu, j) => {
436     uu := PopCount(uopSelIQ.take(i+1).map(x => x.zipWithIndex.filter(_._2 == j).map(_._1)).flatten)
437    }}
438  }}
439  val IQSelUop = Wire(Vec(IQEnqSum, ValidIO(new DynInst)))
440  val uopBlockByIQ = Wire(Vec(renameWidth, Bool()))
441  val allowDispatch = Wire(Vec(renameWidth, Bool()))
442  val thisCanActualOut = Wire(Vec(renameWidth, Bool()))
443  val lsqCanAccept = Wire(Bool())
444  for (i <- 0 until RenameWidth){
445    // update valid logic
446    fromRenameUpdate(i).valid := fromRename(i).valid && allowDispatch(i) && !uopBlockByIQ(i) && thisCanActualOut(i) &&
447      lsqCanAccept && !fromRename(i).bits.eliminatedMove && !fromRename(i).bits.hasException && !fromRenameUpdate(i).bits.singleStep
448    fromRename(i).ready := allowDispatch(i) && !uopBlockByIQ(i) && thisCanActualOut(i) && lsqCanAccept
449    // update src type if eliminate old vd
450    fromRenameUpdate(i).bits.srcType(numRegSrcVf - 1) := Mux(ignoreOldVdVec(i), SrcType.no, fromRename(i).bits.srcType(numRegSrcVf - 1))
451  }
452  for (i <- 0 until RenameWidth){
453    // check is drop amocas sta
454    fromRenameUpdate(i).bits.isDropAmocasSta := fromRename(i).bits.isAMOCAS && fromRename(i).bits.uopIdx(0) === 0.U
455    // update singleStep
456    fromRenameUpdate(i).bits.singleStep := io.singleStep && (fromRename(i).bits.robIdx =/= robidxCanCommitStepping)
457  }
458  var temp = 0
459  allIssueParams.zipWithIndex.map{ case(issue, iqidx) => {
460    for (i <- 0 until issue.numEnq){
461      val oh = Wire(Vec(renameWidth, Bool())).suggestName(s"oh_IQSelUop_$temp")
462      oh := uopSelIQMatrix.map(_(iqidx)).map(_ === (i+1).U)
463      IQSelUop(temp) := PriorityMux(oh, fromRenameUpdate)
464      // there only assign valid not use PriorityMuxDefalut for better timing
465      IQSelUop(temp).valid := PriorityMuxDefault(oh.zip(fromRenameUpdate.map(_.valid)), false.B)
466      val allFuThisIQ = issue.exuBlockParams.map(_.fuConfigs).flatten.toSet.toSeq
467      val hasStaFu = !allFuThisIQ.filter(_.name == "sta").isEmpty
468      for (j <- 0 until numRegSrc){
469        val maskForStd = hasStaFu && (j == 1)
470        val thisSrcHasInt = allFuThisIQ.map(x => {x.srcData.map(xx => {if (j < xx.size) IntRegSrcDataSet.contains(xx(j)) else false}).reduce(_ || _)}).reduce(_ || _)
471        val thisSrcHasFp  = allFuThisIQ.map(x => {x.srcData.map(xx => {if (j < xx.size) FpRegSrcDataSet.contains(xx(j))  else false}).reduce(_ || _)}).reduce(_ || _)
472        val thisSrcHasVec = allFuThisIQ.map(x => {x.srcData.map(xx => {if (j < xx.size) VecRegSrcDataSet.contains(xx(j)) else false}).reduce(_ || _)}).reduce(_ || _)
473        val thisSrcHasV0  = allFuThisIQ.map(x => {x.srcData.map(xx => {if (j < xx.size) V0RegSrcDataSet.contains(xx(j))  else false}).reduce(_ || _)}).reduce(_ || _)
474        val thisSrcHasVl  = allFuThisIQ.map(x => {x.srcData.map(xx => {if (j < xx.size) VlRegSrcDataSet.contains(xx(j))  else false}).reduce(_ || _)}).reduce(_ || _)
475        val selSrcState = Seq(thisSrcHasInt || maskForStd, thisSrcHasFp || maskForStd, thisSrcHasVec, thisSrcHasV0, thisSrcHasVl)
476        IQSelUop(temp).bits.srcState(j) := PriorityMux(oh, allSrcState)(j).zip(selSrcState).filter(_._2 == true).map(_._1).foldLeft(false.B)(_ || _).asUInt
477      }
478      temp = temp + 1
479      if (backendParams.debugEn){
480        dontTouch(oh)
481      }
482    }
483  }}
484  temp = 0
485  val uopBlockMatrix = Wire(Vec(renameWidth, Vec(issueQueueNum, Bool())))
486  val uopBlockMatrixForAssign = allIssueParams.zipWithIndex.map { case (issue, iqidx) => {
487    val result = uopSelIQMatrix.map(_(iqidx)).map(x => Mux(io.toIssueQueues(temp).ready, x > issue.numEnq.U, x.orR))
488    temp = temp + issue.numEnq
489    result
490  }}.transpose
491  uopBlockMatrix.zip(uopBlockMatrixForAssign).map(x => x._1 := VecInit(x._2))
492  uopBlockByIQ := uopBlockMatrix.map(_.reduce(_ || _))
493  io.toIssueQueues.zip(IQSelUop).map(x => {
494    x._1.valid := x._2.valid
495    x._1.bits := x._2.bits
496  })
497  if (backendParams.debugEn){
498    dontTouch(uopSelIQMatrix)
499    dontTouch(IQSelUop)
500    dontTouch(fromRenameUpdate)
501    dontTouch(uopBlockByIQ)
502    dontTouch(allowDispatch)
503    dontTouch(thisCanActualOut)
504    dontTouch(popFuTypeOH)
505    dontTouch(fuTypeOH)
506    dontTouch(fuTypeOHSingle)
507    dontTouch(minIQSelAll)
508  }
509  ///////////////////////////////////////////////////////////
510
511  val lsqEnqCtrl = Module(new LsqEnqCtrl)
512
513  // TODO: check lsqEnqCtrl redirect logic
514  // here is RegNext because dispatch2iq use s2_s4_redirect, newDispatch use s1_s3_redirect
515  lsqEnqCtrl.io.redirect := RegNext(io.redirect)
516  lsqEnqCtrl.io.lcommit := io.fromMem.lcommit
517  lsqEnqCtrl.io.scommit := io.fromMem.scommit
518  lsqEnqCtrl.io.lqCancelCnt := io.fromMem.lqCancelCnt
519  lsqEnqCtrl.io.sqCancelCnt := io.fromMem.sqCancelCnt
520  lsqEnqCtrl.io.enq.iqAccept := io.fromRename.map(x => !x.valid || x.fire)
521  io.toMem.lsqEnqIO <> lsqEnqCtrl.io.enqLsq
522
523  private val enqLsqIO = lsqEnqCtrl.io.enq
524  private val lqFreeCount = lsqEnqCtrl.io.lqFreeCount
525  private val sqFreeCount = lsqEnqCtrl.io.sqFreeCount
526
527  private val numLoadDeq = LSQLdEnqWidth
528  private val numStoreAMODeq = LSQStEnqWidth
529  private val numVLoadDeq = LoadPipelineWidth
530  private val numDeq = enqLsqIO.req.size
531  lsqCanAccept := enqLsqIO.canAccept
532
533  private val isLoadVec = VecInit(fromRename.map(x => x.valid && FuType.isLoad(x.bits.fuType)))
534  private val isStoreVec = VecInit(fromRename.map(x => x.valid && FuType.isStore(x.bits.fuType)))
535  private val isAMOVec = fromRename.map(x => x.valid && FuType.isAMO(x.bits.fuType))
536  private val isStoreAMOVec = fromRename.map(x => x.valid && (FuType.isStore(x.bits.fuType) || FuType.isAMO(x.bits.fuType)))
537  private val isVLoadVec = VecInit(fromRename.map(x => x.valid && FuType.isVLoad(x.bits.fuType)))
538  private val isVStoreVec = VecInit(fromRename.map(x => x.valid && FuType.isVStore(x.bits.fuType)))
539
540  private val loadCntVec = VecInit(isLoadVec.indices.map(x => PopCount(isLoadVec.slice(0, x + 1))))
541  private val storeAMOCntVec = VecInit(isStoreAMOVec.indices.map(x => PopCount(isStoreAMOVec.slice(0, x + 1))))
542  private val vloadCntVec = VecInit(isVLoadVec.indices.map(x => PopCount(isVLoadVec.slice(0, x + 1))))
543
544  private val s0_enqLsq_resp = Wire(enqLsqIO.resp.cloneType)
545  for (i <- 0 until RenameWidth) {
546    // update lqIdx sqIdx
547    fromRenameUpdate(i).bits.lqIdx := s0_enqLsq_resp(i).lqIdx
548    fromRenameUpdate(i).bits.sqIdx := s0_enqLsq_resp(i).sqIdx
549  }
550
551  val loadBlockVec = VecInit(loadCntVec.map(_ > numLoadDeq.U))
552  val storeAMOBlockVec = VecInit(storeAMOCntVec.map(_ > numStoreAMODeq.U))
553  val vloadBlockVec = VecInit(vloadCntVec.map(_ > numVLoadDeq.U))
554  val lsStructBlockVec = VecInit((loadBlockVec.zip(storeAMOBlockVec)).zip(vloadBlockVec).map(x => x._1._1 || x._1._2 || x._2))
555  if (backendParams.debugEn) {
556    dontTouch(loadBlockVec)
557    dontTouch(storeAMOBlockVec)
558    dontTouch(lsStructBlockVec)
559    dontTouch(vloadBlockVec)
560    dontTouch(isLoadVec)
561    dontTouch(isVLoadVec)
562    dontTouch(loadCntVec)
563  }
564
565  private val uop = fromRename.map(_.bits)
566  private val fuType = uop.map(_.fuType)
567  private val fuOpType = uop.map(_.fuOpType)
568  private val vtype = uop.map(_.vpu.vtype)
569  private val sew = vtype.map(_.vsew)
570  private val lmul = vtype.map(_.vlmul)
571  private val eew = uop.map(_.vpu.veew)
572  private val mop = fuOpType.map(fuOpTypeItem => LSUOpType.getVecLSMop(fuOpTypeItem))
573  private val nf = fuOpType.zip(uop.map(_.vpu.nf)).map { case (fuOpTypeItem, nfItem) => Mux(LSUOpType.isWhole(fuOpTypeItem), 0.U, nfItem) }
574  private val emul = fuOpType.zipWithIndex.map { case (fuOpTypeItem, index) =>
575    Mux(
576      LSUOpType.isWhole(fuOpTypeItem),
577      GenUSWholeEmul(nf(index)),
578      Mux(
579        LSUOpType.isMasked(fuOpTypeItem),
580        0.U(mulBits.W),
581        EewLog2(eew(index)) - sew(index) + lmul(index)
582      )
583    )
584  }
585
586  private val isVlsType = fuType.map(fuTypeItem => FuType.isVls(fuTypeItem)).zip(fromRename.map(_.valid)).map(x => x._1 && x._2)
587  private val isLSType = fuType.map(fuTypeItem => FuType.isLoad(fuTypeItem) || FuType.isStore(fuTypeItem)).zip(fromRename.map(_.valid)).map(x => x._1 && x._2)
588  private val isSegment = fuType.map(fuTypeItem => FuType.isVsegls(fuTypeItem)).zip(fromRename.map(_.valid)).map(x => x._1 && x._2)
589  // TODO
590  private val isUnitStride = fuOpType.map(fuOpTypeItem => LSUOpType.isAllUS(fuOpTypeItem))
591  private val isVecUnitType = isVlsType.zip(isUnitStride).map { case (isVlsTypeItme, isUnitStrideItem) =>
592    isVlsTypeItme && isUnitStrideItem
593  }
594  private val isfofFixVlUop = uop.map { x => x.vpu.isVleff && x.lastUop }
595  private val instType = isSegment.zip(mop).map { case (isSegementItem, mopItem) => Cat(isSegementItem, mopItem) }
596  // There is no way to calculate the 'flow' for 'unit-stride' exactly:
597  //  Whether 'unit-stride' needs to be split can only be known after obtaining the address.
598  // For scalar instructions, this is not handled here, and different assignments are done later according to the situation.
599  private val numLsElem = VecInit(uop.map(_.numLsElem))
600
601  // The maximum 'numLsElem' number that can be emitted per port is:
602  //    16 2 2 2 2 2.
603  // The 'allowDispatch' calculations are done conservatively for timing purposes:
604  //   The Flow of scalar instructions is considered 1,
605  //   The flow of vector 'unit-stride' instructions is considered 2, and the flow of other vector instructions is considered 16.
606  private val conserveFlows = VecInit(isVlsType.zip(isLSType).zipWithIndex.map { case ((isVlsTyepItem, isLSTypeItem), index) =>
607    Mux(
608      isVlsTyepItem,
609      Mux(isUnitStride(index), VecMemUnitStrideMaxFlowNum.U, 16.U),
610      Mux(isLSTypeItem, 1.U, 0.U)
611    )
612  })
613
614  private val conserveFlowsIs16 = VecInit(isVlsType.zipWithIndex.map { case (isVlsTyepItem, index) =>
615    isVlsTyepItem && !isUnitStride(index)
616  })
617  private val conserveFlowsIs2 = VecInit(isVlsType.zipWithIndex.map { case (isVlsTyepItem, index) =>
618    isVlsTyepItem && isUnitStride(index)
619  })
620  private val conserveFlowsIs1 = VecInit(isLSType.zipWithIndex.map { case (isLSTyepItem, index) =>
621    isLSTyepItem
622  })
623  private val flowTotalWidth = (VecMemLSQEnqIteratorNumberSeq.max * RenameWidth).U.getWidth
624  private val conserveFlowTotalDispatch = Wire(Vec(RenameWidth, UInt(flowTotalWidth.W)))
625  private val lowCountMaxWidth = (2 * RenameWidth).U.getWidth
626  conserveFlowTotalDispatch.zipWithIndex.map{ case (flowTotal, idx) =>
627    val highCount = PopCount(conserveFlowsIs16.take(idx + 1))
628    val conserveFlowsIs2Or1 = VecInit(conserveFlowsIs2.zip(conserveFlowsIs1).map(x => Cat(x._1, x._2)))
629    val lowCount = conserveFlowsIs2Or1.take(idx + 1).reduce(_ +& _).asTypeOf(0.U(lowCountMaxWidth.W))
630    flowTotal := (if (RenameWidth == 6) Cat(highCount, lowCount) else ((highCount << 4).asUInt + lowCount))
631  }
632  // renameIn
633  private val isVlsTypeRename = io.renameIn.map(x => x.valid && FuType.isVls(x.bits.fuType))
634  private val isLSTypeRename = io.renameIn.map(x => x.valid && (FuType.isLoad(x.bits.fuType)) || FuType.isStore(x.bits.fuType))
635  private val isUnitStrideRename = io.renameIn.map(x => LSUOpType.isAllUS(x.bits.fuOpType))
636  private val conserveFlowsIs16Rename = VecInit(isVlsTypeRename.zipWithIndex.map { case (isVlsTyepItem, index) =>
637    isVlsTyepItem && !isUnitStrideRename(index)
638  })
639  private val conserveFlowsIs2Rename = VecInit(isVlsTypeRename.zipWithIndex.map { case (isVlsTyepItem, index) =>
640    isVlsTyepItem && isUnitStrideRename(index)
641  })
642  private val conserveFlowsIs1Rename = VecInit(isLSTypeRename.zipWithIndex.map { case (isLSTyepItem, index) =>
643    isLSTyepItem
644  })
645  private val conserveFlowTotalRename = Wire(Vec(RenameWidth, UInt(flowTotalWidth.W)))
646  conserveFlowTotalRename.zipWithIndex.map { case (flowTotal, idx) =>
647    val highCount = PopCount(conserveFlowsIs16Rename.take(idx + 1))
648    val conserveFlowsIs2Or1 = VecInit(conserveFlowsIs2Rename.zip(conserveFlowsIs1Rename).map(x => Cat(x._1, x._2)))
649    val lowCount = conserveFlowsIs2Or1.take(idx + 1).reduce(_ +& _).asTypeOf(0.U(lowCountMaxWidth.W))
650    flowTotal := (if (RenameWidth == 6) Cat(highCount, lowCount) else ((highCount << 4).asUInt + lowCount))
651  }
652
653
654  private val conserveFlowTotal = Reg(Vec(RenameWidth, UInt(flowTotalWidth.W)))
655  when(io.toRenameAllFire){
656    conserveFlowTotal := conserveFlowTotalRename
657  }.otherwise(
658    conserveFlowTotal := conserveFlowTotalDispatch
659  )
660  // A conservative allocation strategy is adopted here.
661  // Vector 'unit-stride' instructions and scalar instructions can be issued from all six ports,
662  // while other vector instructions can only be issued from the first port
663  // if is segment instruction, need disptch it to Vldst_RS0, so, except port 0, stall other.
664  // The allocation needs to meet a few conditions:
665  //  1) The lsq has enough entris.
666  //  2) The number of flows accumulated does not exceed VecMemDispatchMaxNumber.
667  //  3) Vector instructions other than 'unit-stride' can only be issued on the first port.
668
669
670  for (index <- allowDispatch.indices) {
671    val flowTotal = conserveFlowTotal(index)
672    val allowDispatchPrevious = if (index == 0) true.B else allowDispatch(index - 1)
673    when(isStoreVec(index) || isVStoreVec(index)) {
674      allowDispatch(index) := (sqFreeCount > flowTotal) && allowDispatchPrevious
675    }.elsewhen(isLoadVec(index) || isVLoadVec(index)) {
676      allowDispatch(index) := (lqFreeCount > flowTotal) && allowDispatchPrevious
677    }.elsewhen(isAMOVec(index)) {
678      allowDispatch(index) := allowDispatchPrevious
679    }.otherwise {
680      allowDispatch(index) := allowDispatchPrevious
681    }
682  }
683
684
685  // enqLsq io
686  require(enqLsqIO.req.size == enqLsqIO.resp.size)
687  for (i <- enqLsqIO.req.indices) {
688    when(!io.fromRename(i).fire) {
689      enqLsqIO.needAlloc(i) := 0.U
690    }.elsewhen(isStoreVec(i) || isVStoreVec(i)) {
691      enqLsqIO.needAlloc(i) := 2.U // store | vstore
692    }.elsewhen(isLoadVec(i) || isVLoadVec(i)){
693      enqLsqIO.needAlloc(i) := 1.U // load | vload
694    }.otherwise {
695      enqLsqIO.needAlloc(i) := 0.U
696    }
697    enqLsqIO.req(i).valid := io.fromRename(i).fire && !isAMOVec(i) && !isSegment(i) && !isfofFixVlUop(i)
698    enqLsqIO.req(i).bits := io.fromRename(i).bits
699
700    // This is to make it easier to calculate in LSQ.
701    // Both scalar instructions and vector instructions with FLOW equal to 1 have a NUM value of 1.”
702    // But, the 'numLsElem' that is not a vector is set to 0 when passed to IQ
703    enqLsqIO.req(i).bits.numLsElem := Mux(isVlsType(i), numLsElem(i), 1.U)
704    s0_enqLsq_resp(i) := enqLsqIO.resp(i)
705  }
706
707  val isFp = VecInit(fromRename.map(req => FuType.isFArith(req.bits.fuType)))
708  val isVec     = VecInit(fromRename.map(req => FuType.isVArith (req.bits.fuType) ||
709                                                  FuType.isVsetRvfWvf(req.bits.fuType)))
710  val isMem    = VecInit(fromRename.map(req => FuType.isMem(req.bits.fuType) ||
711                                                  FuType.isVls (req.bits.fuType)))
712  val isLs     = VecInit(fromRename.map(req => FuType.isLoadStore(req.bits.fuType)))
713  val isVls    = VecInit(fromRename.map(req => FuType.isVls (req.bits.fuType)))
714  val isStore  = VecInit(fromRename.map(req => FuType.isStore(req.bits.fuType)))
715  val isVStore = VecInit(fromRename.map(req => FuType.isVStore(req.bits.fuType)))
716  val isAMO    = VecInit(fromRename.map(req => FuType.isAMO(req.bits.fuType)))
717  val isBlockBackward  = VecInit(fromRename.map(x => x.valid && x.bits.blockBackward))
718  val isWaitForward    = VecInit(fromRename.map(x => x.valid && x.bits.waitForward))
719
720  val updatedUop = Wire(Vec(RenameWidth, new DynInst))
721  val checkpoint_id = RegInit(0.U(64.W))
722  checkpoint_id := checkpoint_id + PopCount((0 until RenameWidth).map(i =>
723    fromRename(i).fire
724  ))
725
726
727  for (i <- 0 until RenameWidth) {
728
729    updatedUop(i) := fromRename(i).bits
730    updatedUop(i).debugInfo.eliminatedMove := fromRename(i).bits.eliminatedMove
731    // For the LUI instruction: psrc(0) is from register file and should always be zero.
732    when (fromRename(i).bits.isLUI) {
733      updatedUop(i).psrc(0) := 0.U
734    }
735    //TODO: vec ls mdp
736    io.lfst.req(i).valid := fromRename(i).fire && updatedUop(i).storeSetHit
737    io.lfst.req(i).bits.isstore := isStore(i)
738    io.lfst.req(i).bits.ssid := updatedUop(i).ssid
739    io.lfst.req(i).bits.robIdx := updatedUop(i).robIdx // speculatively assigned in rename
740
741    // override load delay ctrl signal with store set result
742    if(StoreSetEnable) {
743      updatedUop(i).loadWaitBit := io.lfst.resp(i).bits.shouldWait
744      updatedUop(i).waitForRobIdx := io.lfst.resp(i).bits.robIdx
745    } else {
746      updatedUop(i).loadWaitBit := isLs(i) && !isStore(i) && fromRename(i).bits.loadWaitBit
747    }
748    // // update singleStep, singleStep exception only enable in next machine instruction.
749    updatedUop(i).singleStep := io.singleStep && (fromRename(i).bits.robIdx =/= robidxCanCommitStepping)
750    XSDebug(
751      fromRename(i).fire &&
752        (TriggerAction.isDmode(updatedUop(i).trigger) || updatedUop(i).exceptionVec(breakPoint)), s"Debug Mode: inst ${i} has frontend trigger exception\n")
753    XSDebug(fromRename(i).fire && updatedUop(i).singleStep, s"Debug Mode: inst ${i} has single step exception\n")
754    if (env.EnableDifftest) {
755      // debug runahead hint
756      val debug_runahead_checkpoint_id = Wire(checkpoint_id.cloneType)
757      if(i == 0){
758        debug_runahead_checkpoint_id := checkpoint_id
759      } else {
760        debug_runahead_checkpoint_id := checkpoint_id + PopCount((0 until i).map(i =>
761          fromRename(i).fire
762        ))
763      }
764    }
765  }
766
767  // store set perf count
768  XSPerfAccumulate("waittable_load_wait", PopCount((0 until RenameWidth).map(i =>
769    fromRename(i).fire && fromRename(i).bits.loadWaitBit && !isStore(i) && isLs(i)
770  )))
771  XSPerfAccumulate("storeset_load_wait", PopCount((0 until RenameWidth).map(i =>
772    fromRename(i).fire && updatedUop(i).loadWaitBit && !isStore(i) && isLs(i)
773  )))
774  XSPerfAccumulate("storeset_load_strict_wait", PopCount((0 until RenameWidth).map(i =>
775    fromRename(i).fire && updatedUop(i).loadWaitBit && updatedUop(i).loadWaitStrict && !isStore(i) && isLs(i)
776  )))
777  XSPerfAccumulate("storeset_store_wait", PopCount((0 until RenameWidth).map(i =>
778    fromRename(i).fire && updatedUop(i).loadWaitBit && isStore(i)
779  )))
780
781  val allResourceReady = io.enqRob.canAccept
782
783  // Instructions should enter dispatch queues in order.
784  // blockedByWaitForward: this instruction is blocked by itself (based on waitForward)
785  // nextCanOut: next instructions can out (based on blockBackward)
786  // notBlockedByPrevious: previous instructions can enqueue
787  val hasException = VecInit(fromRename.zip(updatedUop).map {
788    case (fromRename: DecoupledIO[DynInst], uop: DynInst) =>
789      fromRename.bits.hasException || uop.singleStep
790  })
791
792  private val blockedByWaitForward = Wire(Vec(RenameWidth, Bool()))
793  blockedByWaitForward(0) := !io.enqRob.isEmpty && isWaitForward(0)
794  for (i <- 1 until RenameWidth) {
795    blockedByWaitForward(i) := blockedByWaitForward(i - 1) || (!io.enqRob.isEmpty || Cat(fromRename.take(i).map(_.valid)).orR) && isWaitForward(i)
796  }
797  if(backendParams.debugEn){
798    dontTouch(blockedByWaitForward)
799    dontTouch(conserveFlows)
800  }
801
802  // Only the uop with block backward flag will block the next uop
803  val nextCanOut = VecInit((0 until RenameWidth).map(i =>
804    !isBlockBackward(i)
805  ))
806  val notBlockedByPrevious = VecInit((0 until RenameWidth).map(i =>
807    if (i == 0) true.B
808    else Cat((0 until i).map(j => nextCanOut(j))).andR
809  ))
810
811  // for noSpecExec: (robEmpty || !this.noSpecExec) && !previous.noSpecExec
812  // For blockBackward:
813  // this instruction can actually dequeue: 3 conditions
814  // (1) resources are ready
815  // (2) previous instructions are ready
816  thisCanActualOut := VecInit((0 until RenameWidth).map(i => !blockedByWaitForward(i) && notBlockedByPrevious(i) && io.enqRob.canAccept))
817  val thisActualOut = (0 until RenameWidth).map(i => io.enqRob.req(i).valid && io.enqRob.canAccept)
818
819  // input for ROB, LSQ
820  for (i <- 0 until RenameWidth) {
821    // needAlloc no use, need deleted
822    io.enqRob.needAlloc(i) := fromRename(i).valid
823    io.enqRob.req(i).valid := fromRename(i).fire
824    io.enqRob.req(i).bits := updatedUop(i)
825    io.enqRob.req(i).bits.hasException := updatedUop(i).hasException || updatedUop(i).singleStep
826    io.enqRob.req(i).bits.numWB := Mux(updatedUop(i).singleStep, 0.U, updatedUop(i).numWB)
827  }
828
829  val hasValidInstr = VecInit(fromRename.map(_.valid)).asUInt.orR
830  val hasSpecialInstr = Cat((0 until RenameWidth).map(i => isBlockBackward(i))).orR
831
832  private val canAccept = !hasValidInstr || !hasSpecialInstr && io.enqRob.canAccept
833
834  val isWaitForwardOrBlockBackward = isWaitForward.asUInt.orR || isBlockBackward.asUInt.orR
835  val renameFireCnt = PopCount(fromRename.map(_.fire))
836
837  val stall_rob = hasValidInstr && !io.enqRob.canAccept
838  val stall_int_dq = hasValidInstr && io.enqRob.canAccept
839  val stall_int_dq0 = hasValidInstr && io.enqRob.canAccept
840  val stall_int_dq1 = hasValidInstr && io.enqRob.canAccept
841  val stall_fp_dq = hasValidInstr && io.enqRob.canAccept
842  val stall_ls_dq = hasValidInstr && io.enqRob.canAccept
843
844  XSPerfAccumulate("in_valid_count", PopCount(fromRename.map(_.valid)))
845  XSPerfAccumulate("in_fire_count", PopCount(fromRename.map(_.fire)))
846  XSPerfAccumulate("in_valid_not_ready_count", PopCount(fromRename.map(x => x.valid && !x.ready)))
847  XSPerfAccumulate("wait_cycle", !fromRename.head.valid && allResourceReady)
848
849  XSPerfAccumulate("stall_cycle_rob", stall_rob)
850  XSPerfAccumulate("stall_cycle_int_dq0", stall_int_dq0)
851  XSPerfAccumulate("stall_cycle_int_dq1", stall_int_dq1)
852  XSPerfAccumulate("stall_cycle_fp_dq", stall_fp_dq)
853  XSPerfAccumulate("stall_cycle_ls_dq", stall_ls_dq)
854
855  val notIssue = !io.debugTopDown.fromRob.robHeadLsIssue
856  val tlbReplay = io.debugTopDown.fromCore.fromMem.robHeadTlbReplay
857  val tlbMiss = io.debugTopDown.fromCore.fromMem.robHeadTlbMiss
858  val vioReplay = io.debugTopDown.fromCore.fromMem.robHeadLoadVio
859  val mshrReplay = io.debugTopDown.fromCore.fromMem.robHeadLoadMSHR
860  val l1Miss = io.debugTopDown.fromCore.fromMem.robHeadMissInDCache
861  val l2Miss = io.debugTopDown.fromCore.l2MissMatch
862  val l3Miss = io.debugTopDown.fromCore.l3MissMatch
863
864  val ldReason = Mux(l3Miss, TopDownCounters.LoadMemStall.id.U,
865  Mux(l2Miss, TopDownCounters.LoadL3Stall.id.U,
866  Mux(l1Miss, TopDownCounters.LoadL2Stall.id.U,
867  Mux(notIssue, TopDownCounters.MemNotReadyStall.id.U,
868  Mux(tlbMiss, TopDownCounters.LoadTLBStall.id.U,
869  Mux(tlbReplay, TopDownCounters.LoadTLBStall.id.U,
870  Mux(mshrReplay, TopDownCounters.LoadMSHRReplayStall.id.U,
871  Mux(vioReplay, TopDownCounters.LoadVioReplayStall.id.U,
872  TopDownCounters.LoadL1Stall.id.U))))))))
873
874  val fusedVec = (0 until RenameWidth).map{ case i =>
875    if (i == 0) false.B
876    else (io.fromRename(i-1).fire && !io.fromRename(i).valid &&
877         CommitType.isFused(io.fromRename(i-1).bits.commitType))
878  }
879
880  val decodeReason = RegNextN(io.stallReason.reason, 2)
881  val renameReason = RegNext(io.stallReason.reason)
882
883  val stallReason = Wire(chiselTypeOf(io.stallReason.reason))
884  val firedVec = fromRename.map(_.fire)
885  io.stallReason.backReason.valid := !canAccept
886  io.stallReason.backReason.bits := TopDownCounters.OtherCoreStall.id.U
887  stallReason.zip(io.stallReason.reason).zip(firedVec).zipWithIndex.zip(fusedVec).map { case ((((update, in), fire), idx), fused) =>
888    val headIsInt = FuType.isInt(io.robHead.getDebugFuType)  && io.robHeadNotReady
889    val headIsFp  = FuType.isFArith(io.robHead.getDebugFuType)   && io.robHeadNotReady
890    val headIsDiv = FuType.isDivSqrt(io.robHead.getDebugFuType) && io.robHeadNotReady
891    val headIsLd  = io.robHead.getDebugFuType === FuType.ldu.U && io.robHeadNotReady || !io.lqCanAccept
892    val headIsSt  = io.robHead.getDebugFuType === FuType.stu.U && io.robHeadNotReady || !io.sqCanAccept
893    val headIsAmo = io.robHead.getDebugFuType === FuType.mou.U && io.robHeadNotReady
894    val headIsLs  = headIsLd || headIsSt
895    val robLsFull = io.robFull || !io.lqCanAccept || !io.sqCanAccept
896
897    import TopDownCounters._
898    update := MuxCase(OtherCoreStall.id.U, Seq(
899      // fire
900      (fire || fused                                     ) -> NoStall.id.U          ,
901      // dispatch not stall / core stall from decode or rename
902      (in =/= OtherCoreStall.id.U && in =/= NoStall.id.U ) -> in                    ,
903      // rob stall
904      (headIsAmo                                         ) -> AtomicStall.id.U      ,
905      (headIsSt                                          ) -> StoreStall.id.U       ,
906      (headIsLd                                          ) -> ldReason              ,
907      (headIsDiv                                         ) -> DivStall.id.U         ,
908      (headIsInt                                         ) -> IntNotReadyStall.id.U ,
909      (headIsFp                                          ) -> FPNotReadyStall.id.U  ,
910      (renameReason(idx) =/= NoStall.id.U                ) -> renameReason(idx)     ,
911      (decodeReason(idx) =/= NoStall.id.U                ) -> decodeReason(idx)     ,
912    ))
913  }
914
915  TopDownCounters.values.foreach(ctr => XSPerfAccumulate(ctr.toString(), PopCount(stallReason.map(_ === ctr.id.U)), XSPerfLevel.CRITICAL))
916
917  val robTrueCommit = io.debugTopDown.fromRob.robTrueCommit
918  TopDownCounters.values.foreach(ctr => XSPerfRolling("td_"+ctr.toString(), PopCount(stallReason.map(_ === ctr.id.U)),
919                                                      robTrueCommit, 1000, clock, reset))
920
921  XSPerfHistogram("slots_fire", PopCount(thisActualOut), true.B, 0, RenameWidth+1, 1)
922  // Explaination: when out(0) not fire, PopCount(valid) is not meaningfull
923  XSPerfHistogram("slots_valid_pure", PopCount(io.enqRob.req.map(_.valid)), thisActualOut(0), 0, RenameWidth+1, 1)
924  XSPerfHistogram("slots_valid_rough", PopCount(io.enqRob.req.map(_.valid)), true.B, 0, RenameWidth+1, 1)
925
926  val perfEvents = Seq(
927    ("dispatch_in",                 PopCount(fromRename.map(_.valid && fromRename(0).ready))                       ),
928    ("dispatch_empty",              !hasValidInstr                                                                 ),
929    ("dispatch_utili",              PopCount(fromRename.map(_.valid))                                              ),
930    ("dispatch_waitinstr",          PopCount(fromRename.map(!_.valid && canAccept))                                ),
931    ("dispatch_stall_cycle_lsq",    false.B                                                                        ),
932    ("dispatch_stall_cycle_rob",    stall_rob                                                                      ),
933    ("dispatch_stall_cycle_int_dq", stall_int_dq                                                                   ),
934    ("dispatch_stall_cycle_fp_dq",  stall_fp_dq                                                                    ),
935    ("dispatch_stall_cycle_ls_dq",  stall_ls_dq                                                                    )
936  )
937  generatePerfEvent()
938}
939