xref: /XiangShan/src/main/scala/xiangshan/mem/lsqueue/LoadQueue.scala (revision 99a48a761cc875d460b8ef13263c4c69b3c4be48)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.mem
18
19import org.chipsalliance.cde.config.Parameters
20import chisel3._
21import chisel3.util._
22import utils._
23import utility._
24import xiangshan._
25import xiangshan.ExceptionNO._
26import xiangshan.frontend.FtqPtr
27import xiangshan.backend._
28import xiangshan.backend.fu.fpu._
29import xiangshan.backend.rob.RobLsqIO
30import xiangshan.backend.Bundles.{DynInst, MemExuOutput, MemMicroOpRbExt}
31import xiangshan.backend.rob.RobPtr
32import xiangshan.mem.mdp._
33import xiangshan.mem.Bundles._
34import xiangshan.cache._
35import xiangshan.cache.mmu._
36
37class LqPtr(implicit p: Parameters) extends CircularQueuePtr[LqPtr](
38  p => p(XSCoreParamsKey).VirtualLoadQueueSize
39){
40}
41
42object LqPtr {
43  def apply(f: Bool, v: UInt)(implicit p: Parameters): LqPtr = {
44    val ptr = Wire(new LqPtr)
45    ptr.flag := f
46    ptr.value := v
47    ptr
48  }
49}
50
51trait HasLoadHelper { this: XSModule =>
52  def rdataHelper(uop: DynInst, rdata: UInt): UInt = {
53    val fpWen = uop.fpWen
54    LookupTree(uop.fuOpType, List(
55      LSUOpType.lb   -> SignExt(rdata(7, 0) , XLEN),
56      LSUOpType.lh   -> SignExt(rdata(15, 0), XLEN),
57      /*
58          riscv-spec-20191213: 12.2 NaN Boxing of Narrower Values
59          Any operation that writes a narrower result to an f register must write
60          all 1s to the uppermost FLEN−n bits to yield a legal NaN-boxed value.
61      */
62      LSUOpType.lw   -> Mux(fpWen, FPU.box(rdata, FPU.S), SignExt(rdata(31, 0), XLEN)),
63      LSUOpType.ld   -> Mux(fpWen, FPU.box(rdata, FPU.D), SignExt(rdata(63, 0), XLEN)),
64      LSUOpType.lbu  -> ZeroExt(rdata(7, 0) , XLEN),
65      LSUOpType.lhu  -> ZeroExt(rdata(15, 0), XLEN),
66      LSUOpType.lwu  -> ZeroExt(rdata(31, 0), XLEN),
67
68      // hypervisor
69      LSUOpType.hlvb -> SignExt(rdata(7, 0), XLEN),
70      LSUOpType.hlvh -> SignExt(rdata(15, 0), XLEN),
71      LSUOpType.hlvw -> SignExt(rdata(31, 0), XLEN),
72      LSUOpType.hlvd -> SignExt(rdata(63, 0), XLEN),
73      LSUOpType.hlvbu -> ZeroExt(rdata(7, 0), XLEN),
74      LSUOpType.hlvhu -> ZeroExt(rdata(15, 0), XLEN),
75      LSUOpType.hlvwu -> ZeroExt(rdata(31, 0), XLEN),
76      LSUOpType.hlvxhu -> ZeroExt(rdata(15, 0), XLEN),
77      LSUOpType.hlvxwu -> ZeroExt(rdata(31, 0), XLEN),
78    ))
79  }
80
81  def genRdataOH(uop: DynInst): UInt = {
82    val fuOpType = uop.fuOpType
83    val fpWen    = uop.fpWen
84    val result = Cat(
85      (fuOpType === LSUOpType.lw && fpWen),
86      (fuOpType === LSUOpType.lh && fpWen),
87      (fuOpType === LSUOpType.lw && !fpWen) || (fuOpType === LSUOpType.hlvw),
88      (fuOpType === LSUOpType.lh && !fpWen) || (fuOpType === LSUOpType.hlvh),
89      (fuOpType === LSUOpType.lb)           || (fuOpType === LSUOpType.hlvb),
90      (fuOpType === LSUOpType.ld)           || (fuOpType === LSUOpType.hlvd),
91      (fuOpType === LSUOpType.lwu)          || (fuOpType === LSUOpType.hlvwu) || (fuOpType === LSUOpType.hlvxwu),
92      (fuOpType === LSUOpType.lhu)          || (fuOpType === LSUOpType.hlvhu) || (fuOpType === LSUOpType.hlvxhu),
93      (fuOpType === LSUOpType.lbu)          || (fuOpType === LSUOpType.hlvbu),
94    )
95    result
96  }
97
98  def newRdataHelper(select: UInt, rdata: UInt): UInt = {
99    XSError(PopCount(select) > 1.U, "data selector must be One-Hot!\n")
100    val selData = Seq(
101      ZeroExt(rdata(7, 0), XLEN),
102      ZeroExt(rdata(15, 0), XLEN),
103      ZeroExt(rdata(31, 0), XLEN),
104      rdata(63, 0),
105      SignExt(rdata(7, 0) , XLEN),
106      SignExt(rdata(15, 0) , XLEN),
107      SignExt(rdata(31, 0) , XLEN),
108      FPU.box(rdata, FPU.H),
109      FPU.box(rdata, FPU.S)
110    )
111    Mux1H(select, selData)
112  }
113
114  def genDataSelectByOffset(addrOffset: UInt): Vec[Bool] = {
115    require(addrOffset.getWidth == 4)
116    VecInit((0 until 16).map{ case i =>
117      addrOffset === i.U
118    })
119  }
120
121  def rdataVecHelper(alignedType: UInt, rdata: UInt): UInt = {
122    LookupTree(alignedType, List(
123      "b00".U -> ZeroExt(rdata(7, 0), VLEN),
124      "b01".U -> ZeroExt(rdata(15, 0), VLEN),
125      "b10".U -> ZeroExt(rdata(31, 0), VLEN),
126      "b11".U -> ZeroExt(rdata(63, 0), VLEN)
127    ))
128  }
129}
130
131class LqEnqIO(implicit p: Parameters) extends MemBlockBundle {
132  val canAccept = Output(Bool())
133  val sqCanAccept = Input(Bool())
134  val needAlloc = Vec(LSQEnqWidth, Input(Bool()))
135  val req = Vec(LSQEnqWidth, Flipped(ValidIO(new DynInst)))
136  val resp = Vec(LSQEnqWidth, Output(new LqPtr))
137}
138
139class LqTriggerIO(implicit p: Parameters) extends XSBundle {
140  val hitLoadAddrTriggerHitVec = Input(Vec(TriggerNum, Bool()))
141  val lqLoadAddrTriggerHitVec = Output(Vec(TriggerNum, Bool()))
142}
143
144class LoadQueueTopDownIO(implicit p: Parameters) extends XSBundle {
145  val robHeadVaddr = Flipped(Valid(UInt(VAddrBits.W)))
146  val robHeadTlbReplay = Output(Bool())
147  val robHeadTlbMiss = Output(Bool())
148  val robHeadLoadVio = Output(Bool())
149  val robHeadLoadMSHR = Output(Bool())
150  val robHeadMissInDTlb = Input(Bool())
151  val robHeadOtherReplay = Output(Bool())
152}
153
154class LoadQueue(implicit p: Parameters) extends XSModule
155  with HasDCacheParameters
156  with HasCircularQueuePtrHelper
157  with HasLoadHelper
158  with HasPerfEvents
159{
160  val io = IO(new Bundle() {
161    val redirect = Flipped(Valid(new Redirect))
162    val vecFeedback = Vec(VecLoadPipelineWidth, Flipped(ValidIO(new FeedbackToLsqIO)))
163    val enq = new LqEnqIO
164    val ldu = new Bundle() {
165        val stld_nuke_query = Vec(LoadPipelineWidth, Flipped(new LoadNukeQueryIO)) // from load_s2
166        val ldld_nuke_query = Vec(LoadPipelineWidth, Flipped(new LoadNukeQueryIO)) // from load_s2
167        val ldin         = Vec(LoadPipelineWidth, Flipped(Decoupled(new LqWriteBundle))) // from load_s3
168    }
169    val sta = new Bundle() {
170      val storeAddrIn = Vec(StorePipelineWidth, Flipped(Valid(new LsPipelineBundle))) // from store_s1
171    }
172    val std = new Bundle() {
173      val storeDataIn = Vec(StorePipelineWidth, Flipped(Valid(new MemExuOutput(isVector = true)))) // from store_s0, store data, send to sq from rs
174    }
175    val sq = new Bundle() {
176      val stAddrReadySqPtr = Input(new SqPtr)
177      val stAddrReadyVec   = Input(Vec(StoreQueueSize, Bool()))
178      val stDataReadySqPtr = Input(new SqPtr)
179      val stDataReadyVec   = Input(Vec(StoreQueueSize, Bool()))
180      val stIssuePtr       = Input(new SqPtr)
181      val sqEmpty          = Input(Bool())
182    }
183    val ldout = Vec(LoadPipelineWidth, DecoupledIO(new MemExuOutput))
184    val ld_raw_data = Vec(LoadPipelineWidth, Output(new LoadDataFromLQBundle))
185    val ncOut = Vec(LoadPipelineWidth, DecoupledIO(new LsPipelineBundle))
186    val replay = Vec(LoadPipelineWidth, Decoupled(new LsPipelineBundle))
187  //  val refill = Flipped(ValidIO(new Refill))
188    val tl_d_channel  = Input(new DcacheToLduForwardIO)
189    val release = Flipped(Valid(new Release))
190    val nuke_rollback = Vec(StorePipelineWidth, Output(Valid(new Redirect)))
191    val nack_rollback = Vec(1, Output(Valid(new Redirect))) // uncachebuffer
192    val rob = Flipped(new RobLsqIO)
193    val uncache = new UncacheWordIO
194    val exceptionAddr = new ExceptionAddrIO
195    val loadMisalignFull = Input(Bool())
196    val misalignAllowSpec = Input(Bool())
197    val lqFull = Output(Bool())
198    val lqDeq = Output(UInt(log2Up(CommitWidth + 1).W))
199    val lqCancelCnt = Output(UInt(log2Up(VirtualLoadQueueSize+1).W))
200    val lq_rep_full = Output(Bool())
201    val tlbReplayDelayCycleCtrl = Vec(4, Input(UInt(ReSelectLen.W)))
202    val l2_hint = Input(Valid(new L2ToL1Hint()))
203    val tlb_hint = Flipped(new TlbHintIO)
204    val lqEmpty = Output(Bool())
205
206    val lqDeqPtr = Output(new LqPtr)
207
208    val rarValidCount = Output(UInt())
209
210    val debugTopDown = new LoadQueueTopDownIO
211    val noUopsIssed = Input(Bool())
212  })
213
214  val loadQueueRAR = Module(new LoadQueueRAR)  //  read-after-read violation
215  val loadQueueRAW = Module(new LoadQueueRAW)  //  read-after-write violation
216  val loadQueueReplay = Module(new LoadQueueReplay)  //  enqueue if need replay
217  val virtualLoadQueue = Module(new VirtualLoadQueue)  //  control state
218  val exceptionBuffer = Module(new LqExceptionBuffer) // exception buffer
219  val uncacheBuffer = Module(new LoadQueueUncache) // uncache
220  /**
221   * LoadQueueRAR
222   */
223  loadQueueRAR.io.redirect  <> io.redirect
224  loadQueueRAR.io.release   <> io.release
225  loadQueueRAR.io.ldWbPtr   <> virtualLoadQueue.io.ldWbPtr
226  loadQueueRAR.io.validCount<> io.rarValidCount
227  for (w <- 0 until LoadPipelineWidth) {
228    loadQueueRAR.io.query(w).req    <> io.ldu.ldld_nuke_query(w).req // from load_s1
229    loadQueueRAR.io.query(w).resp   <> io.ldu.ldld_nuke_query(w).resp // to load_s2
230    loadQueueRAR.io.query(w).revoke := io.ldu.ldld_nuke_query(w).revoke // from load_s3
231  }
232
233  /**
234   * LoadQueueRAW
235   */
236  loadQueueRAW.io.redirect         <> io.redirect
237  loadQueueRAW.io.storeIn          <> io.sta.storeAddrIn
238  loadQueueRAW.io.stAddrReadySqPtr <> io.sq.stAddrReadySqPtr
239  loadQueueRAW.io.stIssuePtr       <> io.sq.stIssuePtr
240  for (w <- 0 until LoadPipelineWidth) {
241    loadQueueRAW.io.query(w).req    <> io.ldu.stld_nuke_query(w).req // from load_s1
242    loadQueueRAW.io.query(w).resp   <> io.ldu.stld_nuke_query(w).resp // to load_s2
243    loadQueueRAW.io.query(w).revoke := io.ldu.stld_nuke_query(w).revoke // from load_s3
244  }
245
246  /**
247   * VirtualLoadQueue
248   */
249  virtualLoadQueue.io.redirect      <> io.redirect
250  virtualLoadQueue.io.vecCommit     <> io.vecFeedback
251  virtualLoadQueue.io.enq           <> io.enq
252  virtualLoadQueue.io.ldin          <> io.ldu.ldin // from load_s3
253  virtualLoadQueue.io.lqFull        <> io.lqFull
254  virtualLoadQueue.io.lqDeq         <> io.lqDeq
255  virtualLoadQueue.io.lqCancelCnt   <> io.lqCancelCnt
256  virtualLoadQueue.io.lqEmpty       <> io.lqEmpty
257  virtualLoadQueue.io.ldWbPtr       <> io.lqDeqPtr
258
259  /**
260   * Load queue exception buffer
261   */
262  exceptionBuffer.io.redirect <> io.redirect
263  for (i <- 0 until LoadPipelineWidth) {
264    exceptionBuffer.io.req(i).valid := io.ldu.ldin(i).valid && !io.ldu.ldin(i).bits.isvec // from load_s3
265    exceptionBuffer.io.req(i).bits := io.ldu.ldin(i).bits
266  }
267  // vlsu exception!
268  for (i <- 0 until VecLoadPipelineWidth) {
269    exceptionBuffer.io.req(LoadPipelineWidth + i).valid                 := io.vecFeedback(i).valid && io.vecFeedback(i).bits.feedback(VecFeedbacks.FLUSH) // have exception
270    exceptionBuffer.io.req(LoadPipelineWidth + i).bits                  := DontCare
271    exceptionBuffer.io.req(LoadPipelineWidth + i).bits.vaddr            := io.vecFeedback(i).bits.vaddr
272    exceptionBuffer.io.req(LoadPipelineWidth + i).bits.fullva           := io.vecFeedback(i).bits.vaddr
273    exceptionBuffer.io.req(LoadPipelineWidth + i).bits.vaNeedExt        := io.vecFeedback(i).bits.vaNeedExt
274    exceptionBuffer.io.req(LoadPipelineWidth + i).bits.gpaddr           := io.vecFeedback(i).bits.gpaddr
275    exceptionBuffer.io.req(LoadPipelineWidth + i).bits.uop.uopIdx       := io.vecFeedback(i).bits.uopidx
276    exceptionBuffer.io.req(LoadPipelineWidth + i).bits.uop.robIdx       := io.vecFeedback(i).bits.robidx
277    exceptionBuffer.io.req(LoadPipelineWidth + i).bits.uop.vpu.vstart   := io.vecFeedback(i).bits.vstart
278    exceptionBuffer.io.req(LoadPipelineWidth + i).bits.uop.vpu.vl       := io.vecFeedback(i).bits.vl
279    exceptionBuffer.io.req(LoadPipelineWidth + i).bits.uop.exceptionVec := io.vecFeedback(i).bits.exceptionVec
280  }
281  // mmio non-data error exception
282  exceptionBuffer.io.req(LoadPipelineWidth + VecLoadPipelineWidth) := uncacheBuffer.io.exception
283  exceptionBuffer.io.req(LoadPipelineWidth + VecLoadPipelineWidth).bits.vaNeedExt := true.B
284
285  loadQueueReplay.io.loadMisalignFull := io.loadMisalignFull
286  loadQueueReplay.io.misalignAllowSpec := io.misalignAllowSpec
287
288  io.exceptionAddr <> exceptionBuffer.io.exceptionAddr
289
290  /**
291   * Load uncache buffer
292   */
293  uncacheBuffer.io.redirect <> io.redirect
294  uncacheBuffer.io.mmioOut <> io.ldout
295  uncacheBuffer.io.ncOut <> io.ncOut
296  uncacheBuffer.io.mmioRawData <> io.ld_raw_data
297  uncacheBuffer.io.rob <> io.rob
298  uncacheBuffer.io.uncache <> io.uncache
299
300  for ((buff, w) <- uncacheBuffer.io.req.zipWithIndex) {
301    // from load_s3
302    val ldinBits = io.ldu.ldin(w).bits
303    buff.valid := io.ldu.ldin(w).valid && !ldinBits.nc_with_data
304    buff.bits := ldinBits
305  }
306
307  io.uncache.resp.ready := true.B
308
309  io.nuke_rollback := loadQueueRAW.io.rollback
310  io.nack_rollback(0) := uncacheBuffer.io.rollback
311
312  /* <------- DANGEROUS: Don't change sequence here ! -------> */
313
314  /**
315   * LoadQueueReplay
316   */
317  loadQueueReplay.io.redirect         <> io.redirect
318  loadQueueReplay.io.enq              <> io.ldu.ldin // from load_s3
319  loadQueueReplay.io.storeAddrIn      <> io.sta.storeAddrIn // from store_s1
320  loadQueueReplay.io.storeDataIn      <> io.std.storeDataIn // from store_s0
321  loadQueueReplay.io.replay           <> io.replay
322  //loadQueueReplay.io.refill           <> io.refill
323  loadQueueReplay.io.tl_d_channel     <> io.tl_d_channel
324  loadQueueReplay.io.stAddrReadySqPtr <> io.sq.stAddrReadySqPtr
325  loadQueueReplay.io.stAddrReadyVec   <> io.sq.stAddrReadyVec
326  loadQueueReplay.io.stDataReadySqPtr <> io.sq.stDataReadySqPtr
327  loadQueueReplay.io.stDataReadyVec   <> io.sq.stDataReadyVec
328  loadQueueReplay.io.sqEmpty          <> io.sq.sqEmpty
329  loadQueueReplay.io.lqFull           <> io.lq_rep_full
330  loadQueueReplay.io.ldWbPtr          <> virtualLoadQueue.io.ldWbPtr
331  loadQueueReplay.io.rarFull          <> loadQueueRAR.io.lqFull
332  loadQueueReplay.io.rawFull          <> loadQueueRAW.io.lqFull
333  loadQueueReplay.io.l2_hint          <> io.l2_hint
334  loadQueueReplay.io.tlb_hint         <> io.tlb_hint
335  loadQueueReplay.io.tlbReplayDelayCycleCtrl <> io.tlbReplayDelayCycleCtrl
336
337  // TODO: implement it!
338  loadQueueReplay.io.vecFeedback := io.vecFeedback
339
340  loadQueueReplay.io.debugTopDown <> io.debugTopDown
341
342  virtualLoadQueue.io.noUopsIssued := io.noUopsIssed
343
344  val full_mask = Cat(loadQueueRAR.io.lqFull, loadQueueRAW.io.lqFull, loadQueueReplay.io.lqFull)
345  XSPerfAccumulate("full_mask_000", full_mask === 0.U)
346  XSPerfAccumulate("full_mask_001", full_mask === 1.U)
347  XSPerfAccumulate("full_mask_010", full_mask === 2.U)
348  XSPerfAccumulate("full_mask_011", full_mask === 3.U)
349  XSPerfAccumulate("full_mask_100", full_mask === 4.U)
350  XSPerfAccumulate("full_mask_101", full_mask === 5.U)
351  XSPerfAccumulate("full_mask_110", full_mask === 6.U)
352  XSPerfAccumulate("full_mask_111", full_mask === 7.U)
353  XSPerfAccumulate("nuke_rollback", io.nuke_rollback.map(_.valid).reduce(_ || _).asUInt)
354  XSPerfAccumulate("nack_rollabck", io.nack_rollback.map(_.valid).reduce(_ || _).asUInt)
355
356  // perf cnt
357  val perfEvents = Seq(virtualLoadQueue, loadQueueRAR, loadQueueRAW, loadQueueReplay).flatMap(_.getPerfEvents) ++
358  Seq(
359    ("full_mask_000", full_mask === 0.U),
360    ("full_mask_001", full_mask === 1.U),
361    ("full_mask_010", full_mask === 2.U),
362    ("full_mask_011", full_mask === 3.U),
363    ("full_mask_100", full_mask === 4.U),
364    ("full_mask_101", full_mask === 5.U),
365    ("full_mask_110", full_mask === 6.U),
366    ("full_mask_111", full_mask === 7.U),
367    ("nuke_rollback", io.nuke_rollback.map(_.valid).reduce(_ || _).asUInt),
368    ("nack_rollback", io.nack_rollback.map(_.valid).reduce(_ || _).asUInt)
369  )
370  generatePerfEvent()
371  // end
372}
373