xref: /XiangShan/src/main/scala/xiangshan/mem/lsqueue/LoadQueue.scala (revision 60ebee385ce85a25a994f6da0c84ecce9bb91bca)
1c6d43980SLemover/***************************************************************************************
2c6d43980SLemover* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3f320e0f0SYinan Xu* Copyright (c) 2020-2021 Peng Cheng Laboratory
4c6d43980SLemover*
5c6d43980SLemover* XiangShan is licensed under Mulan PSL v2.
6c6d43980SLemover* You can use this software according to the terms and conditions of the Mulan PSL v2.
7c6d43980SLemover* You may obtain a copy of Mulan PSL v2 at:
8c6d43980SLemover*          http://license.coscl.org.cn/MulanPSL2
9c6d43980SLemover*
10c6d43980SLemover* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11c6d43980SLemover* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12c6d43980SLemover* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13c6d43980SLemover*
14c6d43980SLemover* See the Mulan PSL v2 for more details.
15c6d43980SLemover***************************************************************************************/
16c6d43980SLemover
17c7658a75SYinan Xupackage xiangshan.mem
18c7658a75SYinan Xu
192225d46eSJiawei Linimport chipsalliance.rocketchip.config.Parameters
20c7658a75SYinan Xuimport chisel3._
21c7658a75SYinan Xuimport chisel3.util._
22c7658a75SYinan Xuimport utils._
233c02ee8fSwakafaimport utility._
24c7658a75SYinan Xuimport xiangshan._
25dc597826SJiawei Linimport xiangshan.backend.fu.fpu.FPU
266ab6918fSYinan Xuimport xiangshan.backend.rob.RobLsqIO
276ab6918fSYinan Xuimport xiangshan.cache._
286ab6918fSYinan Xuimport xiangshan.frontend.FtqPtr
296786cfb7SWilliam Wangimport xiangshan.ExceptionNO._
30e4f69d78Ssfencevmaimport xiangshan.mem.mdp._
31e4f69d78Ssfencevmaimport xiangshan.backend.rob.RobPtr
32c7658a75SYinan Xu
332225d46eSJiawei Linclass LqPtr(implicit p: Parameters) extends CircularQueuePtr[LqPtr](
34e4f69d78Ssfencevma  p => p(XSCoreParamsKey).VirtualLoadQueueSize
352225d46eSJiawei Lin){
362225d46eSJiawei Lin}
37c7658a75SYinan Xu
382225d46eSJiawei Linobject LqPtr {
392225d46eSJiawei Lin  def apply(f: Bool, v: UInt)(implicit p: Parameters): LqPtr = {
40c7658a75SYinan Xu    val ptr = Wire(new LqPtr)
41c7658a75SYinan Xu    ptr.flag := f
42c7658a75SYinan Xu    ptr.value := v
43c7658a75SYinan Xu    ptr
44c7658a75SYinan Xu  }
45c7658a75SYinan Xu}
46c7658a75SYinan Xu
47579b9f28SLinJiaweitrait HasLoadHelper { this: XSModule =>
48579b9f28SLinJiawei  def rdataHelper(uop: MicroOp, rdata: UInt): UInt = {
49579b9f28SLinJiawei    val fpWen = uop.ctrl.fpWen
50579b9f28SLinJiawei    LookupTree(uop.ctrl.fuOpType, List(
51579b9f28SLinJiawei      LSUOpType.lb   -> SignExt(rdata(7, 0) , XLEN),
52579b9f28SLinJiawei      LSUOpType.lh   -> SignExt(rdata(15, 0), XLEN),
53dc597826SJiawei Lin      /*
54dc597826SJiawei Lin          riscv-spec-20191213: 12.2 NaN Boxing of Narrower Values
55dc597826SJiawei Lin          Any operation that writes a narrower result to an f register must write
56dc597826SJiawei Lin          all 1s to the uppermost FLEN−n bits to yield a legal NaN-boxed value.
57dc597826SJiawei Lin      */
58dc597826SJiawei Lin      LSUOpType.lw   -> Mux(fpWen, FPU.box(rdata, FPU.S), SignExt(rdata(31, 0), XLEN)),
59dc597826SJiawei Lin      LSUOpType.ld   -> Mux(fpWen, FPU.box(rdata, FPU.D), SignExt(rdata(63, 0), XLEN)),
60579b9f28SLinJiawei      LSUOpType.lbu  -> ZeroExt(rdata(7, 0) , XLEN),
61579b9f28SLinJiawei      LSUOpType.lhu  -> ZeroExt(rdata(15, 0), XLEN),
62579b9f28SLinJiawei      LSUOpType.lwu  -> ZeroExt(rdata(31, 0), XLEN),
63579b9f28SLinJiawei    ))
64579b9f28SLinJiawei  }
65579b9f28SLinJiawei}
66579b9f28SLinJiawei
672225d46eSJiawei Linclass LqEnqIO(implicit p: Parameters) extends XSBundle {
68780ade3fSYinan Xu  val canAccept = Output(Bool())
6903f2ceceSYinan Xu  val sqCanAccept = Input(Bool())
707057cff8SYinan Xu  val needAlloc = Vec(exuParameters.LsExuCnt, Input(Bool()))
717057cff8SYinan Xu  val req = Vec(exuParameters.LsExuCnt, Flipped(ValidIO(new MicroOp)))
727057cff8SYinan Xu  val resp = Vec(exuParameters.LsExuCnt, Output(new LqPtr))
73780ade3fSYinan Xu}
74c7658a75SYinan Xu
75b978565cSWilliam Wangclass LqTriggerIO(implicit p: Parameters) extends XSBundle {
76b978565cSWilliam Wang  val hitLoadAddrTriggerHitVec = Input(Vec(3, Bool()))
77b978565cSWilliam Wang  val lqLoadAddrTriggerHitVec = Output(Vec(3, Bool()))
78b978565cSWilliam Wang}
79b978565cSWilliam Wang
80*60ebee38STang Haojinclass LoadQueueTopDownIO(implicit p: Parameters) extends XSBundle {
81*60ebee38STang Haojin  val robHeadVaddr = Flipped(Valid(UInt(VAddrBits.W)))
82*60ebee38STang Haojin  val robHeadTlbReplay = Output(Bool())
83*60ebee38STang Haojin  val robHeadTlbMiss = Output(Bool())
84*60ebee38STang Haojin  val robHeadLoadVio = Output(Bool())
85*60ebee38STang Haojin  val robHeadLoadMSHR = Output(Bool())
86*60ebee38STang Haojin  val robHeadMissInDTlb = Input(Bool())
87*60ebee38STang Haojin  val robHeadOtherReplay = Output(Bool())
88*60ebee38STang Haojin}
89e4f69d78Ssfencevma
90e4f69d78Ssfencevmaclass LoadQueue(implicit p: Parameters) extends XSModule
91e4f69d78Ssfencevma  with HasDCacheParameters
92e4f69d78Ssfencevma  with HasCircularQueuePtrHelper
93e4f69d78Ssfencevma  with HasLoadHelper
94e4f69d78Ssfencevma  with HasPerfEvents
95e4f69d78Ssfencevma{
96e4f69d78Ssfencevma  val io = IO(new Bundle() {
97e4f69d78Ssfencevma    val redirect = Flipped(Valid(new Redirect))
98e4f69d78Ssfencevma    val enq = new LqEnqIO
99e4f69d78Ssfencevma    val ldu = new Bundle() {
10014a67055Ssfencevma        val stld_nuke_query = Vec(LoadPipelineWidth, Flipped(new LoadNukeQueryIO)) // from load_s2
10114a67055Ssfencevma        val ldld_nuke_query = Vec(LoadPipelineWidth, Flipped(new LoadNukeQueryIO)) // from load_s2
10214a67055Ssfencevma        val ldin         = Vec(LoadPipelineWidth, Flipped(Decoupled(new LqWriteBundle))) // from load_s3
103e4f69d78Ssfencevma    }
104e4f69d78Ssfencevma    val sta = new Bundle() {
105e4f69d78Ssfencevma      val storeAddrIn = Vec(StorePipelineWidth, Flipped(Valid(new LsPipelineBundle))) // from store_s1
106e4f69d78Ssfencevma    }
107e4f69d78Ssfencevma    val std = new Bundle() {
108e4f69d78Ssfencevma      val storeDataIn = Vec(StorePipelineWidth, Flipped(Valid(new ExuOutput))) // from store_s0, store data, send to sq from rs
109e4f69d78Ssfencevma    }
110e4f69d78Ssfencevma    val sq = new Bundle() {
111e4f69d78Ssfencevma      val stAddrReadySqPtr = Input(new SqPtr)
112e4f69d78Ssfencevma      val stAddrReadyVec   = Input(Vec(StoreQueueSize, Bool()))
113e4f69d78Ssfencevma      val stDataReadySqPtr = Input(new SqPtr)
114e4f69d78Ssfencevma      val stDataReadyVec   = Input(Vec(StoreQueueSize, Bool()))
115e4f69d78Ssfencevma      val stIssuePtr       = Input(new SqPtr)
116e4f69d78Ssfencevma      val sqEmpty          = Input(Bool())
117e4f69d78Ssfencevma    }
11814a67055Ssfencevma    val ldout = Vec(LoadPipelineWidth, DecoupledIO(new ExuOutput))
11914a67055Ssfencevma    val ld_raw_data = Vec(LoadPipelineWidth, Output(new LoadDataFromLQBundle))
120e4f69d78Ssfencevma    val replay = Vec(LoadPipelineWidth, Decoupled(new LsPipelineBundle))
121e4f69d78Ssfencevma    val refill = Flipped(ValidIO(new Refill))
1229444e131Ssfencevma    val tl_d_channel  = Input(new DcacheToLduForwardIO)
123e4f69d78Ssfencevma    val release = Flipped(Valid(new Release))
124e4f69d78Ssfencevma    val rollback = Output(Valid(new Redirect))
125e4f69d78Ssfencevma    val rob = Flipped(new RobLsqIO)
126e4f69d78Ssfencevma    val uncache = new UncacheWordIO
127e4f69d78Ssfencevma    val trigger = Vec(LoadPipelineWidth, new LqTriggerIO)
128e4f69d78Ssfencevma    val exceptionAddr = new ExceptionAddrIO
129e4f69d78Ssfencevma    val lqFull = Output(Bool())
130e4f69d78Ssfencevma    val lqDeq = Output(UInt(log2Up(CommitWidth + 1).W))
131e4f69d78Ssfencevma    val lqCancelCnt = Output(UInt(log2Up(VirtualLoadQueueSize+1).W))
13214a67055Ssfencevma    val lq_rep_full = Output(Bool())
133e4f69d78Ssfencevma    val tlbReplayDelayCycleCtrl = Vec(4, Input(UInt(ReSelectLen.W)))
13414a67055Ssfencevma    val l2_hint = Input(Valid(new L2ToL1Hint()))
1350d32f713Shappy-lx    val lqEmpty = Output(Bool())
136*60ebee38STang Haojin    val debugTopDown = new LoadQueueTopDownIO
137e4f69d78Ssfencevma  })
138e4f69d78Ssfencevma
139e4f69d78Ssfencevma  val loadQueueRAR = Module(new LoadQueueRAR)  //  read-after-read violation
140e4f69d78Ssfencevma  val loadQueueRAW = Module(new LoadQueueRAW)  //  read-after-write violation
141e4f69d78Ssfencevma  val loadQueueReplay = Module(new LoadQueueReplay)  //  enqueue if need replay
142e4f69d78Ssfencevma  val virtualLoadQueue = Module(new VirtualLoadQueue)  //  control state
143e4f69d78Ssfencevma  val exceptionBuffer = Module(new LqExceptionBuffer) // exception buffer
144e4f69d78Ssfencevma  val uncacheBuffer = Module(new UncacheBuffer) // uncache buffer
14516c3b0b7Ssfencevma
1469eb258c3SYinan Xu  /**
147e4f69d78Ssfencevma   * LoadQueueRAR
1489eb258c3SYinan Xu   */
149e4f69d78Ssfencevma  loadQueueRAR.io.redirect <> io.redirect
150e4f69d78Ssfencevma  loadQueueRAR.io.release  <> io.release
151e4f69d78Ssfencevma  loadQueueRAR.io.ldWbPtr  <> virtualLoadQueue.io.ldWbPtr
152e4f69d78Ssfencevma  for (w <- 0 until LoadPipelineWidth) {
15314a67055Ssfencevma    loadQueueRAR.io.query(w).req    <> io.ldu.ldld_nuke_query(w).req // from load_s1
15414a67055Ssfencevma    loadQueueRAR.io.query(w).resp   <> io.ldu.ldld_nuke_query(w).resp // to load_s2
15514a67055Ssfencevma    loadQueueRAR.io.query(w).revoke := io.ldu.ldld_nuke_query(w).revoke // from load_s3
1569eb258c3SYinan Xu  }
157c7658a75SYinan Xu
1589eb258c3SYinan Xu  /**
159e4f69d78Ssfencevma   * LoadQueueRAW
16067682d05SWilliam Wang   */
161e4f69d78Ssfencevma  loadQueueRAW.io.redirect         <> io.redirect
162e4f69d78Ssfencevma  loadQueueRAW.io.storeIn          <> io.sta.storeAddrIn
163e4f69d78Ssfencevma  loadQueueRAW.io.stAddrReadySqPtr <> io.sq.stAddrReadySqPtr
164e4f69d78Ssfencevma  loadQueueRAW.io.stIssuePtr       <> io.sq.stIssuePtr
165e4f69d78Ssfencevma  for (w <- 0 until LoadPipelineWidth) {
16614a67055Ssfencevma    loadQueueRAW.io.query(w).req    <> io.ldu.stld_nuke_query(w).req // from load_s1
16714a67055Ssfencevma    loadQueueRAW.io.query(w).resp   <> io.ldu.stld_nuke_query(w).resp // to load_s2
16814a67055Ssfencevma    loadQueueRAW.io.query(w).revoke := io.ldu.stld_nuke_query(w).revoke // from load_s3
169ef3b5b96SWilliam Wang  }
170ef3b5b96SWilliam Wang
17167682d05SWilliam Wang  /**
172e4f69d78Ssfencevma   * VirtualLoadQueue
1739eb258c3SYinan Xu   */
174e4f69d78Ssfencevma  virtualLoadQueue.io.redirect    <> io.redirect
175e4f69d78Ssfencevma  virtualLoadQueue.io.enq         <> io.enq
17614a67055Ssfencevma  virtualLoadQueue.io.ldin        <> io.ldu.ldin // from load_s3
177e4f69d78Ssfencevma  virtualLoadQueue.io.lqFull      <> io.lqFull
178e4f69d78Ssfencevma  virtualLoadQueue.io.lqDeq       <> io.lqDeq
179e4f69d78Ssfencevma  virtualLoadQueue.io.lqCancelCnt <> io.lqCancelCnt
1800d32f713Shappy-lx  virtualLoadQueue.io.lqEmpty <> io.lqEmpty
181e4f69d78Ssfencevma
182e4f69d78Ssfencevma  /**
183e4f69d78Ssfencevma   * Load queue exception buffer
184e4f69d78Ssfencevma   */
185e4f69d78Ssfencevma  exceptionBuffer.io.redirect <> io.redirect
186e4f69d78Ssfencevma  for ((buff, w) <- exceptionBuffer.io.req.zipWithIndex) {
18714a67055Ssfencevma    buff.valid := io.ldu.ldin(w).valid // from load_s3
18814a67055Ssfencevma    buff.bits := io.ldu.ldin(w).bits
18910aac6e7SWilliam Wang  }
190e4f69d78Ssfencevma  io.exceptionAddr <> exceptionBuffer.io.exceptionAddr
191e4f69d78Ssfencevma
192e4f69d78Ssfencevma  /**
193e4f69d78Ssfencevma   * Load uncache buffer
194e4f69d78Ssfencevma   */
195e4f69d78Ssfencevma  uncacheBuffer.io.redirect   <> io.redirect
19614a67055Ssfencevma  uncacheBuffer.io.ldout      <> io.ldout
19714a67055Ssfencevma  uncacheBuffer.io.ld_raw_data  <> io.ld_raw_data
198e4f69d78Ssfencevma  uncacheBuffer.io.rob        <> io.rob
199e4f69d78Ssfencevma  uncacheBuffer.io.uncache    <> io.uncache
200e4f69d78Ssfencevma  uncacheBuffer.io.trigger    <> io.trigger
201e4f69d78Ssfencevma  for ((buff, w) <- uncacheBuffer.io.req.zipWithIndex) {
20214a67055Ssfencevma    buff.valid := io.ldu.ldin(w).valid // from load_s3
20314a67055Ssfencevma    buff.bits := io.ldu.ldin(w).bits // from load_s3
20410aac6e7SWilliam Wang  }
205683c1411Shappy-lx
206e4f69d78Ssfencevma  // rollback
207e4f69d78Ssfencevma  def selectOldest[T <: Redirect](valid: Seq[Bool], bits: Seq[T]): (Seq[Bool], Seq[T]) = {
208e4f69d78Ssfencevma    assert(valid.length == bits.length)
209e4f69d78Ssfencevma    if (valid.length == 0 || valid.length == 1) {
210e4f69d78Ssfencevma      (valid, bits)
211e4f69d78Ssfencevma    } else if (valid.length == 2) {
212e4f69d78Ssfencevma      val res = Seq.fill(2)(Wire(ValidIO(chiselTypeOf(bits(0)))))
213e4f69d78Ssfencevma      for (i <- res.indices) {
214e4f69d78Ssfencevma        res(i).valid := valid(i)
215e4f69d78Ssfencevma        res(i).bits := bits(i)
216683c1411Shappy-lx      }
217e4f69d78Ssfencevma      val oldest = Mux(valid(0) && valid(1), Mux(isAfter(bits(0).robIdx, bits(1).robIdx), res(1), res(0)), Mux(valid(0) && !valid(1), res(0), res(1)))
218e4f69d78Ssfencevma      (Seq(oldest.valid), Seq(oldest.bits))
219683c1411Shappy-lx    } else {
220e4f69d78Ssfencevma      val left = selectOldest(valid.take(valid.length / 2), bits.take(bits.length / 2))
221e4f69d78Ssfencevma      val right = selectOldest(valid.takeRight(valid.length - (valid.length / 2)), bits.takeRight(bits.length - (bits.length / 2)))
222e4f69d78Ssfencevma      selectOldest(left._1 ++ right._1, left._2 ++ right._2)
223683c1411Shappy-lx    }
224e4f69d78Ssfencevma  }
22572951335SLi Qianruo
226e4f69d78Ssfencevma  val (rollbackSelV, rollbackSelBits) = selectOldest(
227e4f69d78Ssfencevma                                          Seq(loadQueueRAW.io.rollback.valid, uncacheBuffer.io.rollback.valid),
228e4f69d78Ssfencevma                                          Seq(loadQueueRAW.io.rollback.bits, uncacheBuffer.io.rollback.bits)
229e4f69d78Ssfencevma                                        )
230e4f69d78Ssfencevma  io.rollback.valid := rollbackSelV.head
231e4f69d78Ssfencevma  io.rollback.bits := rollbackSelBits.head
232e4f69d78Ssfencevma
233e4f69d78Ssfencevma  /* <------- DANGEROUS: Don't change sequence here ! -------> */
234a13210f6SYinan Xu
235a13210f6SYinan Xu  /**
236e4f69d78Ssfencevma   * LoadQueueReplay
237a13210f6SYinan Xu   */
238e4f69d78Ssfencevma  loadQueueReplay.io.redirect         <> io.redirect
23914a67055Ssfencevma  loadQueueReplay.io.enq              <> io.ldu.ldin // from load_s3
240e4f69d78Ssfencevma  loadQueueReplay.io.storeAddrIn      <> io.sta.storeAddrIn // from store_s1
241e4f69d78Ssfencevma  loadQueueReplay.io.storeDataIn      <> io.std.storeDataIn // from store_s0
242e4f69d78Ssfencevma  loadQueueReplay.io.replay           <> io.replay
243e4f69d78Ssfencevma  loadQueueReplay.io.refill           <> io.refill
2449444e131Ssfencevma  loadQueueReplay.io.tl_d_channel     <> io.tl_d_channel
245e4f69d78Ssfencevma  loadQueueReplay.io.stAddrReadySqPtr <> io.sq.stAddrReadySqPtr
246e4f69d78Ssfencevma  loadQueueReplay.io.stAddrReadyVec   <> io.sq.stAddrReadyVec
247e4f69d78Ssfencevma  loadQueueReplay.io.stDataReadySqPtr <> io.sq.stDataReadySqPtr
248e4f69d78Ssfencevma  loadQueueReplay.io.stDataReadyVec   <> io.sq.stDataReadyVec
249e4f69d78Ssfencevma  loadQueueReplay.io.sqEmpty          <> io.sq.sqEmpty
25014a67055Ssfencevma  loadQueueReplay.io.lqFull           <> io.lq_rep_full
25114a67055Ssfencevma  loadQueueReplay.io.ldWbPtr          <> virtualLoadQueue.io.ldWbPtr
25214a67055Ssfencevma  loadQueueReplay.io.rarFull          <> loadQueueRAR.io.lqFull
25314a67055Ssfencevma  loadQueueReplay.io.rawFull          <> loadQueueRAW.io.lqFull
25414a67055Ssfencevma  loadQueueReplay.io.l2_hint          <> io.l2_hint
255e4f69d78Ssfencevma  loadQueueReplay.io.tlbReplayDelayCycleCtrl <> io.tlbReplayDelayCycleCtrl
256c7658a75SYinan Xu
257*60ebee38STang Haojin  loadQueueReplay.io.debugTopDown <> io.debugTopDown
258*60ebee38STang Haojin
259e4f69d78Ssfencevma  val full_mask = Cat(loadQueueRAR.io.lqFull, loadQueueRAW.io.lqFull, loadQueueReplay.io.lqFull)
260e4f69d78Ssfencevma  XSPerfAccumulate("full_mask_000", full_mask === 0.U)
261e4f69d78Ssfencevma  XSPerfAccumulate("full_mask_001", full_mask === 1.U)
262e4f69d78Ssfencevma  XSPerfAccumulate("full_mask_010", full_mask === 2.U)
263e4f69d78Ssfencevma  XSPerfAccumulate("full_mask_011", full_mask === 3.U)
264e4f69d78Ssfencevma  XSPerfAccumulate("full_mask_100", full_mask === 4.U)
265e4f69d78Ssfencevma  XSPerfAccumulate("full_mask_101", full_mask === 5.U)
266e4f69d78Ssfencevma  XSPerfAccumulate("full_mask_110", full_mask === 6.U)
267e4f69d78Ssfencevma  XSPerfAccumulate("full_mask_111", full_mask === 7.U)
268e4f69d78Ssfencevma  XSPerfAccumulate("rollback", io.rollback.valid)
269a13210f6SYinan Xu
270e4f69d78Ssfencevma  // perf cnt
271e4f69d78Ssfencevma  val perfEvents = Seq(virtualLoadQueue, loadQueueRAR, loadQueueRAW, loadQueueReplay).flatMap(_.getPerfEvents) ++
272e4f69d78Ssfencevma  Seq(
273e4f69d78Ssfencevma    ("full_mask_000", full_mask === 0.U),
274e4f69d78Ssfencevma    ("full_mask_001", full_mask === 1.U),
275e4f69d78Ssfencevma    ("full_mask_010", full_mask === 2.U),
276e4f69d78Ssfencevma    ("full_mask_011", full_mask === 3.U),
277e4f69d78Ssfencevma    ("full_mask_100", full_mask === 4.U),
278e4f69d78Ssfencevma    ("full_mask_101", full_mask === 5.U),
279e4f69d78Ssfencevma    ("full_mask_110", full_mask === 6.U),
280e4f69d78Ssfencevma    ("full_mask_111", full_mask === 7.U),
281e4f69d78Ssfencevma    ("rollback", io.rollback.valid)
282cd365d4cSrvcoresjw  )
2831ca0e4f3SYinan Xu  generatePerfEvent()
284e4f69d78Ssfencevma  // end
285c7658a75SYinan Xu}