xref: /XiangShan/src/main/scala/xiangshan/mem/lsqueue/LoadQueue.scala (revision d0de7e4a4bcd4633260dda99dfedc2a5e543b8b4)
1c6d43980SLemover/***************************************************************************************
2c6d43980SLemover* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3f320e0f0SYinan Xu* Copyright (c) 2020-2021 Peng Cheng Laboratory
4c6d43980SLemover*
5c6d43980SLemover* XiangShan is licensed under Mulan PSL v2.
6c6d43980SLemover* You can use this software according to the terms and conditions of the Mulan PSL v2.
7c6d43980SLemover* You may obtain a copy of Mulan PSL v2 at:
8c6d43980SLemover*          http://license.coscl.org.cn/MulanPSL2
9c6d43980SLemover*
10c6d43980SLemover* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11c6d43980SLemover* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12c6d43980SLemover* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13c6d43980SLemover*
14c6d43980SLemover* See the Mulan PSL v2 for more details.
15c6d43980SLemover***************************************************************************************/
16c6d43980SLemover
17c7658a75SYinan Xupackage xiangshan.mem
18c7658a75SYinan Xu
198891a219SYinan Xuimport org.chipsalliance.cde.config.Parameters
20c7658a75SYinan Xuimport chisel3._
21c7658a75SYinan Xuimport chisel3.util._
22c7658a75SYinan Xuimport utils._
233c02ee8fSwakafaimport utility._
24c7658a75SYinan Xuimport xiangshan._
25dc597826SJiawei Linimport xiangshan.backend.fu.fpu.FPU
266ab6918fSYinan Xuimport xiangshan.backend.rob.RobLsqIO
276ab6918fSYinan Xuimport xiangshan.cache._
28185e6164SHaoyuan Fengimport xiangshan.cache.mmu._
296ab6918fSYinan Xuimport xiangshan.frontend.FtqPtr
306786cfb7SWilliam Wangimport xiangshan.ExceptionNO._
31e4f69d78Ssfencevmaimport xiangshan.mem.mdp._
32e4f69d78Ssfencevmaimport xiangshan.backend.rob.RobPtr
33c7658a75SYinan Xu
342225d46eSJiawei Linclass LqPtr(implicit p: Parameters) extends CircularQueuePtr[LqPtr](
35e4f69d78Ssfencevma  p => p(XSCoreParamsKey).VirtualLoadQueueSize
362225d46eSJiawei Lin){
372225d46eSJiawei Lin}
38c7658a75SYinan Xu
392225d46eSJiawei Linobject LqPtr {
402225d46eSJiawei Lin  def apply(f: Bool, v: UInt)(implicit p: Parameters): LqPtr = {
41c7658a75SYinan Xu    val ptr = Wire(new LqPtr)
42c7658a75SYinan Xu    ptr.flag := f
43c7658a75SYinan Xu    ptr.value := v
44c7658a75SYinan Xu    ptr
45c7658a75SYinan Xu  }
46c7658a75SYinan Xu}
47c7658a75SYinan Xu
48579b9f28SLinJiaweitrait HasLoadHelper { this: XSModule =>
49579b9f28SLinJiawei  def rdataHelper(uop: MicroOp, rdata: UInt): UInt = {
50579b9f28SLinJiawei    val fpWen = uop.ctrl.fpWen
51579b9f28SLinJiawei    LookupTree(uop.ctrl.fuOpType, List(
52579b9f28SLinJiawei      LSUOpType.lb   -> SignExt(rdata(7, 0) , XLEN),
53579b9f28SLinJiawei      LSUOpType.lh   -> SignExt(rdata(15, 0), XLEN),
54dc597826SJiawei Lin      /*
55dc597826SJiawei Lin          riscv-spec-20191213: 12.2 NaN Boxing of Narrower Values
56dc597826SJiawei Lin          Any operation that writes a narrower result to an f register must write
57dc597826SJiawei Lin          all 1s to the uppermost FLEN−n bits to yield a legal NaN-boxed value.
58dc597826SJiawei Lin      */
59dc597826SJiawei Lin      LSUOpType.lw   -> Mux(fpWen, FPU.box(rdata, FPU.S), SignExt(rdata(31, 0), XLEN)),
60dc597826SJiawei Lin      LSUOpType.ld   -> Mux(fpWen, FPU.box(rdata, FPU.D), SignExt(rdata(63, 0), XLEN)),
61579b9f28SLinJiawei      LSUOpType.lbu  -> ZeroExt(rdata(7, 0) , XLEN),
62579b9f28SLinJiawei      LSUOpType.lhu  -> ZeroExt(rdata(15, 0), XLEN),
63579b9f28SLinJiawei      LSUOpType.lwu  -> ZeroExt(rdata(31, 0), XLEN),
64*d0de7e4aSpeixiaokun
65*d0de7e4aSpeixiaokun      // hypervisor
66*d0de7e4aSpeixiaokun      LSUOpType.hlvb -> SignExt(rdata(7, 0), XLEN),
67*d0de7e4aSpeixiaokun      LSUOpType.hlvh -> SignExt(rdata(15, 0), XLEN),
68*d0de7e4aSpeixiaokun      LSUOpType.hlvw -> SignExt(rdata(31, 0), XLEN),
69*d0de7e4aSpeixiaokun      LSUOpType.hlvd -> SignExt(rdata(63, 0), XLEN),
70*d0de7e4aSpeixiaokun      LSUOpType.hlvbu -> ZeroExt(rdata(7, 0), XLEN),
71*d0de7e4aSpeixiaokun      LSUOpType.hlvhu -> ZeroExt(rdata(15, 0), XLEN),
72*d0de7e4aSpeixiaokun      LSUOpType.hlvwu -> ZeroExt(rdata(31, 0), XLEN),
73*d0de7e4aSpeixiaokun      LSUOpType.hlvxhu -> ZeroExt(rdata(15, 0), XLEN),
74*d0de7e4aSpeixiaokun      LSUOpType.hlvxwu -> ZeroExt(rdata(31, 0), XLEN),
75579b9f28SLinJiawei    ))
76579b9f28SLinJiawei  }
77579b9f28SLinJiawei}
78579b9f28SLinJiawei
792225d46eSJiawei Linclass LqEnqIO(implicit p: Parameters) extends XSBundle {
80780ade3fSYinan Xu  val canAccept = Output(Bool())
8103f2ceceSYinan Xu  val sqCanAccept = Input(Bool())
827057cff8SYinan Xu  val needAlloc = Vec(exuParameters.LsExuCnt, Input(Bool()))
837057cff8SYinan Xu  val req = Vec(exuParameters.LsExuCnt, Flipped(ValidIO(new MicroOp)))
847057cff8SYinan Xu  val resp = Vec(exuParameters.LsExuCnt, Output(new LqPtr))
85780ade3fSYinan Xu}
86c7658a75SYinan Xu
87b978565cSWilliam Wangclass LqTriggerIO(implicit p: Parameters) extends XSBundle {
88b978565cSWilliam Wang  val hitLoadAddrTriggerHitVec = Input(Vec(3, Bool()))
89b978565cSWilliam Wang  val lqLoadAddrTriggerHitVec = Output(Vec(3, Bool()))
90b978565cSWilliam Wang}
91b978565cSWilliam Wang
9260ebee38STang Haojinclass LoadQueueTopDownIO(implicit p: Parameters) extends XSBundle {
9360ebee38STang Haojin  val robHeadVaddr = Flipped(Valid(UInt(VAddrBits.W)))
9460ebee38STang Haojin  val robHeadTlbReplay = Output(Bool())
9560ebee38STang Haojin  val robHeadTlbMiss = Output(Bool())
9660ebee38STang Haojin  val robHeadLoadVio = Output(Bool())
9760ebee38STang Haojin  val robHeadLoadMSHR = Output(Bool())
9860ebee38STang Haojin  val robHeadMissInDTlb = Input(Bool())
9960ebee38STang Haojin  val robHeadOtherReplay = Output(Bool())
10060ebee38STang Haojin}
101e4f69d78Ssfencevma
102e4f69d78Ssfencevmaclass LoadQueue(implicit p: Parameters) extends XSModule
103e4f69d78Ssfencevma  with HasDCacheParameters
104e4f69d78Ssfencevma  with HasCircularQueuePtrHelper
105e4f69d78Ssfencevma  with HasLoadHelper
106e4f69d78Ssfencevma  with HasPerfEvents
107e4f69d78Ssfencevma{
108e4f69d78Ssfencevma  val io = IO(new Bundle() {
109e4f69d78Ssfencevma    val redirect = Flipped(Valid(new Redirect))
110e4f69d78Ssfencevma    val enq = new LqEnqIO
111e4f69d78Ssfencevma    val ldu = new Bundle() {
11214a67055Ssfencevma        val stld_nuke_query = Vec(LoadPipelineWidth, Flipped(new LoadNukeQueryIO)) // from load_s2
11314a67055Ssfencevma        val ldld_nuke_query = Vec(LoadPipelineWidth, Flipped(new LoadNukeQueryIO)) // from load_s2
11414a67055Ssfencevma        val ldin         = Vec(LoadPipelineWidth, Flipped(Decoupled(new LqWriteBundle))) // from load_s3
115e4f69d78Ssfencevma    }
116e4f69d78Ssfencevma    val sta = new Bundle() {
117e4f69d78Ssfencevma      val storeAddrIn = Vec(StorePipelineWidth, Flipped(Valid(new LsPipelineBundle))) // from store_s1
118e4f69d78Ssfencevma    }
119e4f69d78Ssfencevma    val std = new Bundle() {
120e4f69d78Ssfencevma      val storeDataIn = Vec(StorePipelineWidth, Flipped(Valid(new ExuOutput))) // from store_s0, store data, send to sq from rs
121e4f69d78Ssfencevma    }
122e4f69d78Ssfencevma    val sq = new Bundle() {
123e4f69d78Ssfencevma      val stAddrReadySqPtr = Input(new SqPtr)
124e4f69d78Ssfencevma      val stAddrReadyVec   = Input(Vec(StoreQueueSize, Bool()))
125e4f69d78Ssfencevma      val stDataReadySqPtr = Input(new SqPtr)
126e4f69d78Ssfencevma      val stDataReadyVec   = Input(Vec(StoreQueueSize, Bool()))
127e4f69d78Ssfencevma      val stIssuePtr       = Input(new SqPtr)
128e4f69d78Ssfencevma      val sqEmpty          = Input(Bool())
129e4f69d78Ssfencevma    }
13014a67055Ssfencevma    val ldout = Vec(LoadPipelineWidth, DecoupledIO(new ExuOutput))
13114a67055Ssfencevma    val ld_raw_data = Vec(LoadPipelineWidth, Output(new LoadDataFromLQBundle))
132e4f69d78Ssfencevma    val replay = Vec(LoadPipelineWidth, Decoupled(new LsPipelineBundle))
133692e2fafSHuijin Li  //  val refill = Flipped(ValidIO(new Refill))
1349444e131Ssfencevma    val tl_d_channel  = Input(new DcacheToLduForwardIO)
135e4f69d78Ssfencevma    val release = Flipped(Valid(new Release))
136cd2ff98bShappy-lx    val nuke_rollback = Output(Valid(new Redirect))
137cd2ff98bShappy-lx    val nack_rollback = Output(Valid(new Redirect))
138e4f69d78Ssfencevma    val rob = Flipped(new RobLsqIO)
139e4f69d78Ssfencevma    val uncache = new UncacheWordIO
140e4f69d78Ssfencevma    val exceptionAddr = new ExceptionAddrIO
141e4f69d78Ssfencevma    val lqFull = Output(Bool())
142e4f69d78Ssfencevma    val lqDeq = Output(UInt(log2Up(CommitWidth + 1).W))
143e4f69d78Ssfencevma    val lqCancelCnt = Output(UInt(log2Up(VirtualLoadQueueSize+1).W))
14414a67055Ssfencevma    val lq_rep_full = Output(Bool())
145e4f69d78Ssfencevma    val tlbReplayDelayCycleCtrl = Vec(4, Input(UInt(ReSelectLen.W)))
14614a67055Ssfencevma    val l2_hint = Input(Valid(new L2ToL1Hint()))
147185e6164SHaoyuan Feng    val tlb_hint = Flipped(new TlbHintIO)
1480d32f713Shappy-lx    val lqEmpty = Output(Bool())
14960ebee38STang Haojin    val debugTopDown = new LoadQueueTopDownIO
150e4f69d78Ssfencevma  })
151e4f69d78Ssfencevma
152e4f69d78Ssfencevma  val loadQueueRAR = Module(new LoadQueueRAR)  //  read-after-read violation
153e4f69d78Ssfencevma  val loadQueueRAW = Module(new LoadQueueRAW)  //  read-after-write violation
154e4f69d78Ssfencevma  val loadQueueReplay = Module(new LoadQueueReplay)  //  enqueue if need replay
155e4f69d78Ssfencevma  val virtualLoadQueue = Module(new VirtualLoadQueue)  //  control state
156e4f69d78Ssfencevma  val exceptionBuffer = Module(new LqExceptionBuffer) // exception buffer
157e4f69d78Ssfencevma  val uncacheBuffer = Module(new UncacheBuffer) // uncache buffer
1589eb258c3SYinan Xu  /**
159e4f69d78Ssfencevma   * LoadQueueRAR
1609eb258c3SYinan Xu   */
161e4f69d78Ssfencevma  loadQueueRAR.io.redirect <> io.redirect
162e4f69d78Ssfencevma  loadQueueRAR.io.release  <> io.release
163e4f69d78Ssfencevma  loadQueueRAR.io.ldWbPtr  <> virtualLoadQueue.io.ldWbPtr
164e4f69d78Ssfencevma  for (w <- 0 until LoadPipelineWidth) {
16514a67055Ssfencevma    loadQueueRAR.io.query(w).req    <> io.ldu.ldld_nuke_query(w).req // from load_s1
16614a67055Ssfencevma    loadQueueRAR.io.query(w).resp   <> io.ldu.ldld_nuke_query(w).resp // to load_s2
16714a67055Ssfencevma    loadQueueRAR.io.query(w).revoke := io.ldu.ldld_nuke_query(w).revoke // from load_s3
1689eb258c3SYinan Xu  }
169c7658a75SYinan Xu
1709eb258c3SYinan Xu  /**
171e4f69d78Ssfencevma   * LoadQueueRAW
17267682d05SWilliam Wang   */
173e4f69d78Ssfencevma  loadQueueRAW.io.redirect         <> io.redirect
174e4f69d78Ssfencevma  loadQueueRAW.io.storeIn          <> io.sta.storeAddrIn
175e4f69d78Ssfencevma  loadQueueRAW.io.stAddrReadySqPtr <> io.sq.stAddrReadySqPtr
176e4f69d78Ssfencevma  loadQueueRAW.io.stIssuePtr       <> io.sq.stIssuePtr
177e4f69d78Ssfencevma  for (w <- 0 until LoadPipelineWidth) {
17814a67055Ssfencevma    loadQueueRAW.io.query(w).req    <> io.ldu.stld_nuke_query(w).req // from load_s1
17914a67055Ssfencevma    loadQueueRAW.io.query(w).resp   <> io.ldu.stld_nuke_query(w).resp // to load_s2
18014a67055Ssfencevma    loadQueueRAW.io.query(w).revoke := io.ldu.stld_nuke_query(w).revoke // from load_s3
181ef3b5b96SWilliam Wang  }
182ef3b5b96SWilliam Wang
18367682d05SWilliam Wang  /**
184e4f69d78Ssfencevma   * VirtualLoadQueue
1859eb258c3SYinan Xu   */
186e4f69d78Ssfencevma  virtualLoadQueue.io.redirect    <> io.redirect
187e4f69d78Ssfencevma  virtualLoadQueue.io.enq         <> io.enq
18814a67055Ssfencevma  virtualLoadQueue.io.ldin        <> io.ldu.ldin // from load_s3
189e4f69d78Ssfencevma  virtualLoadQueue.io.lqFull      <> io.lqFull
190e4f69d78Ssfencevma  virtualLoadQueue.io.lqDeq       <> io.lqDeq
191e4f69d78Ssfencevma  virtualLoadQueue.io.lqCancelCnt <> io.lqCancelCnt
1920d32f713Shappy-lx  virtualLoadQueue.io.lqEmpty <> io.lqEmpty
193e4f69d78Ssfencevma
194e4f69d78Ssfencevma  /**
195e4f69d78Ssfencevma   * Load queue exception buffer
196e4f69d78Ssfencevma   */
197e4f69d78Ssfencevma  exceptionBuffer.io.redirect <> io.redirect
198e4f69d78Ssfencevma  for ((buff, w) <- exceptionBuffer.io.req.zipWithIndex) {
19914a67055Ssfencevma    buff.valid := io.ldu.ldin(w).valid // from load_s3
20014a67055Ssfencevma    buff.bits := io.ldu.ldin(w).bits
20110aac6e7SWilliam Wang  }
202e4f69d78Ssfencevma  io.exceptionAddr <> exceptionBuffer.io.exceptionAddr
203e4f69d78Ssfencevma
204e4f69d78Ssfencevma  /**
205e4f69d78Ssfencevma   * Load uncache buffer
206e4f69d78Ssfencevma   */
207e4f69d78Ssfencevma  uncacheBuffer.io.redirect   <> io.redirect
20814a67055Ssfencevma  uncacheBuffer.io.ldout      <> io.ldout
20914a67055Ssfencevma  uncacheBuffer.io.ld_raw_data  <> io.ld_raw_data
210e4f69d78Ssfencevma  uncacheBuffer.io.rob        <> io.rob
211e4f69d78Ssfencevma  uncacheBuffer.io.uncache    <> io.uncache
212e4f69d78Ssfencevma  for ((buff, w) <- uncacheBuffer.io.req.zipWithIndex) {
21314a67055Ssfencevma    buff.valid := io.ldu.ldin(w).valid // from load_s3
21414a67055Ssfencevma    buff.bits := io.ldu.ldin(w).bits // from load_s3
21510aac6e7SWilliam Wang  }
216683c1411Shappy-lx
21772951335SLi Qianruo
218cd2ff98bShappy-lx  io.nuke_rollback := loadQueueRAW.io.rollback
219cd2ff98bShappy-lx  io.nack_rollback := uncacheBuffer.io.rollback
220e4f69d78Ssfencevma
221e4f69d78Ssfencevma  /* <------- DANGEROUS: Don't change sequence here ! -------> */
222a13210f6SYinan Xu
223a13210f6SYinan Xu  /**
224e4f69d78Ssfencevma   * LoadQueueReplay
225a13210f6SYinan Xu   */
226e4f69d78Ssfencevma  loadQueueReplay.io.redirect         <> io.redirect
22714a67055Ssfencevma  loadQueueReplay.io.enq              <> io.ldu.ldin // from load_s3
228e4f69d78Ssfencevma  loadQueueReplay.io.storeAddrIn      <> io.sta.storeAddrIn // from store_s1
229e4f69d78Ssfencevma  loadQueueReplay.io.storeDataIn      <> io.std.storeDataIn // from store_s0
230e4f69d78Ssfencevma  loadQueueReplay.io.replay           <> io.replay
231692e2fafSHuijin Li  //loadQueueReplay.io.refill           <> io.refill
2329444e131Ssfencevma  loadQueueReplay.io.tl_d_channel     <> io.tl_d_channel
233e4f69d78Ssfencevma  loadQueueReplay.io.stAddrReadySqPtr <> io.sq.stAddrReadySqPtr
234e4f69d78Ssfencevma  loadQueueReplay.io.stAddrReadyVec   <> io.sq.stAddrReadyVec
235e4f69d78Ssfencevma  loadQueueReplay.io.stDataReadySqPtr <> io.sq.stDataReadySqPtr
236e4f69d78Ssfencevma  loadQueueReplay.io.stDataReadyVec   <> io.sq.stDataReadyVec
237e4f69d78Ssfencevma  loadQueueReplay.io.sqEmpty          <> io.sq.sqEmpty
23814a67055Ssfencevma  loadQueueReplay.io.lqFull           <> io.lq_rep_full
23914a67055Ssfencevma  loadQueueReplay.io.ldWbPtr          <> virtualLoadQueue.io.ldWbPtr
24014a67055Ssfencevma  loadQueueReplay.io.rarFull          <> loadQueueRAR.io.lqFull
24114a67055Ssfencevma  loadQueueReplay.io.rawFull          <> loadQueueRAW.io.lqFull
24214a67055Ssfencevma  loadQueueReplay.io.l2_hint          <> io.l2_hint
243185e6164SHaoyuan Feng  loadQueueReplay.io.tlb_hint         <> io.tlb_hint
244e4f69d78Ssfencevma  loadQueueReplay.io.tlbReplayDelayCycleCtrl <> io.tlbReplayDelayCycleCtrl
245c7658a75SYinan Xu
24660ebee38STang Haojin  loadQueueReplay.io.debugTopDown <> io.debugTopDown
24760ebee38STang Haojin
248e4f69d78Ssfencevma  val full_mask = Cat(loadQueueRAR.io.lqFull, loadQueueRAW.io.lqFull, loadQueueReplay.io.lqFull)
249e4f69d78Ssfencevma  XSPerfAccumulate("full_mask_000", full_mask === 0.U)
250e4f69d78Ssfencevma  XSPerfAccumulate("full_mask_001", full_mask === 1.U)
251e4f69d78Ssfencevma  XSPerfAccumulate("full_mask_010", full_mask === 2.U)
252e4f69d78Ssfencevma  XSPerfAccumulate("full_mask_011", full_mask === 3.U)
253e4f69d78Ssfencevma  XSPerfAccumulate("full_mask_100", full_mask === 4.U)
254e4f69d78Ssfencevma  XSPerfAccumulate("full_mask_101", full_mask === 5.U)
255e4f69d78Ssfencevma  XSPerfAccumulate("full_mask_110", full_mask === 6.U)
256e4f69d78Ssfencevma  XSPerfAccumulate("full_mask_111", full_mask === 7.U)
257cd2ff98bShappy-lx  XSPerfAccumulate("nuke_rollback", io.nuke_rollback.valid)
258cd2ff98bShappy-lx  XSPerfAccumulate("nack_rollabck", io.nack_rollback.valid)
259a13210f6SYinan Xu
260e4f69d78Ssfencevma  // perf cnt
261e4f69d78Ssfencevma  val perfEvents = Seq(virtualLoadQueue, loadQueueRAR, loadQueueRAW, loadQueueReplay).flatMap(_.getPerfEvents) ++
262e4f69d78Ssfencevma  Seq(
263e4f69d78Ssfencevma    ("full_mask_000", full_mask === 0.U),
264e4f69d78Ssfencevma    ("full_mask_001", full_mask === 1.U),
265e4f69d78Ssfencevma    ("full_mask_010", full_mask === 2.U),
266e4f69d78Ssfencevma    ("full_mask_011", full_mask === 3.U),
267e4f69d78Ssfencevma    ("full_mask_100", full_mask === 4.U),
268e4f69d78Ssfencevma    ("full_mask_101", full_mask === 5.U),
269e4f69d78Ssfencevma    ("full_mask_110", full_mask === 6.U),
270e4f69d78Ssfencevma    ("full_mask_111", full_mask === 7.U),
271cd2ff98bShappy-lx    ("nuke_rollback", io.nuke_rollback.valid),
272cd2ff98bShappy-lx    ("nack_rollback", io.nack_rollback.valid)
273cd365d4cSrvcoresjw  )
2741ca0e4f3SYinan Xu  generatePerfEvent()
275e4f69d78Ssfencevma  // end
276c7658a75SYinan Xu}