xref: /XiangShan/src/main/scala/xiangshan/mem/lsqueue/LoadQueueRAW.scala (revision d2b20d1a96e238e36a849bd253f65ec7b6a5db38)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.mem
18
19import chisel3._
20import chisel3.util._
21import chipsalliance.rocketchip.config._
22import xiangshan._
23import xiangshan.backend.rob.RobPtr
24import xiangshan.cache._
25import xiangshan.frontend.FtqPtr
26import xiangshan.mem.mdp._
27import utils._
28import utility._
29
30class LoadQueueRAW(implicit p: Parameters) extends XSModule
31  with HasDCacheParameters
32  with HasCircularQueuePtrHelper
33  with HasLoadHelper
34  with HasPerfEvents
35{
36  val io = IO(new Bundle() {
37    val redirect = Flipped(ValidIO(new Redirect))
38    val query = Vec(LoadPipelineWidth, Flipped(new LoadViolationQueryIO))
39    val storeIn = Vec(StorePipelineWidth, Flipped(Valid(new LsPipelineBundle)))
40    val rollback = Output(Valid(new Redirect))
41    val stAddrReadySqPtr = Input(new SqPtr)
42    val stIssuePtr = Input(new SqPtr)
43    val lqFull = Output(Bool())
44  })
45
46  println("LoadQueueRAW: size " + LoadQueueRAWSize)
47  //  LoadQueueRAW field
48  //  +-------+--------+-------+-------+-----------+
49  //  | Valid |  uop   |PAddr  | Mask  | Datavalid |
50  //  +-------+--------+-------+-------+-----------+
51  //
52  //  Field descriptions:
53  //  Allocated   : entry has been allocated already
54  //  MicroOp     : inst's microOp
55  //  PAddr       : physical address.
56  //  Mask        : data mask
57  //  Datavalid   : data valid
58  //
59  val allocated = RegInit(VecInit(List.fill(LoadQueueRAWSize)(false.B))) // The control signals need to explicitly indicate the initial value
60  val uop = Reg(Vec(LoadQueueRAWSize, new MicroOp))
61  val paddrModule = Module(new LqPAddrModule(
62    gen = UInt(PAddrBits.W),
63    numEntries = LoadQueueRAWSize,
64    numRead = LoadPipelineWidth,
65    numWrite = LoadPipelineWidth,
66    numWBank = LoadQueueNWriteBanks,
67    numWDelay = 2,
68    numCamPort = StorePipelineWidth
69  ))
70  paddrModule.io := DontCare
71  val maskModule = Module(new LqMaskModule(
72    gen = UInt(8.W),
73    numEntries = LoadQueueRAWSize,
74    numRead = LoadPipelineWidth,
75    numWrite = LoadPipelineWidth,
76    numWBank = LoadQueueNWriteBanks,
77    numWDelay = 2,
78    numCamPort = StorePipelineWidth
79  ))
80  maskModule.io := DontCare
81  val datavalid = RegInit(VecInit(List.fill(LoadQueueRAWSize)(false.B)))
82
83  // freeliset: store valid entries index.
84  // +---+---+--------------+-----+-----+
85  // | 0 | 1 |      ......  | n-2 | n-1 |
86  // +---+---+--------------+-----+-----+
87  val freeList = Module(new FreeList(
88    size = LoadQueueRAWSize,
89    allocWidth = LoadPipelineWidth,
90    freeWidth = 4,
91    moduleName = "LoadQueueRAW freelist"
92  ))
93  freeList.io := DontCare
94
95  //  LoadQueueRAW enqueue
96  val canEnqueue = io.query.map(_.req.valid)
97  val cancelEnqueue = io.query.map(_.req.bits.uop.robIdx.needFlush(io.redirect))
98  val allAddrCheck = io.stIssuePtr === io.stAddrReadySqPtr
99  val hasAddrInvalidStore = io.query.map(_.req.bits.uop.sqIdx).map(sqIdx => {
100    Mux(!allAddrCheck, isBefore(io.stAddrReadySqPtr, sqIdx), false.B)
101  })
102  val needEnqueue = canEnqueue.zip(hasAddrInvalidStore).zip(cancelEnqueue).map { case ((v, r), c) => v && r && !c }
103  val bypassPAddr = Reg(Vec(LoadPipelineWidth, UInt(PAddrBits.W)))
104  val bypassMask = Reg(Vec(LoadPipelineWidth, UInt(8.W)))
105
106  // Allocate logic
107  val enqValidVec = Wire(Vec(LoadPipelineWidth, Bool()))
108  val enqIndexVec = Wire(Vec(LoadPipelineWidth, UInt()))
109  val enqOffset = Wire(Vec(LoadPipelineWidth, UInt()))
110
111  // Enqueue
112  for ((enq, w) <- io.query.map(_.req).zipWithIndex) {
113    paddrModule.io.wen(w) := false.B
114    maskModule.io.wen(w) := false.B
115    freeList.io.doAllocate(w) := false.B
116
117    enqOffset(w) := PopCount(needEnqueue.take(w))
118    freeList.io.allocateReq(w) := needEnqueue(w)
119
120    //  Allocate ready
121    enqValidVec(w) := freeList.io.canAllocate(enqOffset(w))
122    enqIndexVec(w) := freeList.io.allocateSlot(enqOffset(w))
123    enq.ready := Mux(needEnqueue(w), enqValidVec(w), true.B)
124
125    val enqIndex = enqIndexVec(w)
126    when (needEnqueue(w) && enq.ready) {
127      val debug_robIdx = enq.bits.uop.robIdx.asUInt
128      XSError(allocated(enqIndex), p"LoadQueueRAW: You can not write an valid entry! check: ldu $w, robIdx $debug_robIdx")
129
130      freeList.io.doAllocate(w) := true.B
131
132      //  Allocate new entry
133      allocated(enqIndex) := true.B
134
135      //  Write paddr
136      paddrModule.io.wen(w) := true.B
137      paddrModule.io.waddr(w) := enqIndex
138      paddrModule.io.wdata(w) := enq.bits.paddr
139      bypassPAddr(w) := enq.bits.paddr
140
141      //  Write mask
142      maskModule.io.wen(w) := true.B
143      maskModule.io.waddr(w) := enqIndex
144      maskModule.io.wdata(w) := enq.bits.mask
145      bypassMask(w) := enq.bits.mask
146
147      //  Fill info
148      uop(enqIndex) := enq.bits.uop
149      datavalid(enqIndex) := enq.bits.datavalid
150    }
151  }
152
153  for ((query, w) <- io.query.map(_.resp).zipWithIndex) {
154    query.valid := RegNext(io.query(w).req.valid)
155    query.bits.replayFromFetch := RegNext(false.B)
156  }
157
158  //  LoadQueueRAW deallocate
159  val freeMaskVec = Wire(Vec(LoadQueueRAWSize, Bool()))
160
161  // init
162  freeMaskVec.map(e => e := false.B)
163
164  // when the stores that "older than" current load address were ready.
165  // current load will be released.
166  for (i <- 0 until LoadQueueRAWSize) {
167    val deqNotBlock = Mux(!allAddrCheck, !isBefore(io.stAddrReadySqPtr, uop(i).sqIdx), true.B)
168    val needCancel = uop(i).robIdx.needFlush(io.redirect)
169
170    when (allocated(i) && (deqNotBlock || needCancel)) {
171      allocated(i) := false.B
172      freeMaskVec(i) := true.B
173    }
174  }
175
176  // if need replay deallocate entry
177  val lastCanAccept = RegNext(VecInit(needEnqueue.zip(enqValidVec).map(x => x._1 && x._2)))
178  val lastAllocIndex = RegNext(enqIndexVec)
179
180  for ((release, w) <- io.query.map(_.release).zipWithIndex) {
181    val releaseValid = release && lastCanAccept(w)
182    val releaseIndex = lastAllocIndex(w)
183
184    when (allocated(releaseIndex) && releaseValid) {
185      allocated(releaseIndex) := false.B
186      freeMaskVec(releaseIndex) := true.B
187    }
188  }
189  freeList.io.free := freeMaskVec.asUInt
190
191  io.lqFull := freeList.io.empty
192
193  /**
194    * Store-Load Memory violation detection
195    * Scheme 1(Current scheme): flush the pipeline then re-fetch from the load instruction (like old load queue).
196    * Scheme 2                : re-fetch instructions from the first instruction after the store instruction.
197    *
198    * When store writes back, it searches LoadQueue for younger load instructions
199    * with the same load physical address. They loaded wrong data and need re-execution.
200    *
201    * Cycle 0: Store Writeback
202    *   Generate match vector for store address with rangeMask(stPtr, enqPtr).
203    * Cycle 1: Select oldest load from select group.
204    * Cycle x: Redirect Fire
205    *   Choose the oldest load from LoadPipelineWidth oldest loads.
206    *   Prepare redirect request according to the detected violation.
207    *   Fire redirect request (if valid)
208    */
209  //              SelectGroup 0         SelectGroup 1          SelectGroup y
210  // stage 0:       lq  lq  lq  ......    lq  lq  lq  .......    lq  lq  lq
211  //                |   |   |             |   |   |              |   |   |
212  // stage 1:       lq  lq  lq  ......    lq  lq  lq  .......    lq  lq  lq
213  //                 \  |  /    ......     \  |  /    .......     \  |  /
214  // stage 2:           lq                    lq                     lq
215  //                     \  |  /  .......  \  |  /   ........  \  |  /
216  // stage 3:               lq                lq                  lq
217  //                                          ...
218  //                                          ...
219  //                                           |
220  // stage x:                                  lq
221  //                                           |
222  //                                       rollback req
223
224  // select logic
225  val SelectGroupSize = RollbackGroupSize
226  val lgSelectGroupSize = log2Ceil(SelectGroupSize)
227  val TotalSelectCycles = scala.math.ceil(log2Ceil(LoadQueueRAWSize).toFloat / lgSelectGroupSize).toInt + 1
228
229  def selectPartialOldest[T <: XSBundleWithMicroOp](valid: Seq[Bool], bits: Seq[T]): (Seq[Bool], Seq[T]) = {
230    assert(valid.length == bits.length)
231    if (valid.length == 0 || valid.length == 1) {
232      (valid, bits)
233    } else if (valid.length == 2) {
234      val res = Seq.fill(2)(Wire(ValidIO(chiselTypeOf(bits(0)))))
235      for (i <- res.indices) {
236        res(i).valid := valid(i)
237        res(i).bits := bits(i)
238      }
239      val oldest = Mux(valid(0) && valid(1), Mux(isAfter(bits(0).uop.robIdx, bits(1).uop.robIdx), res(1), res(0)), Mux(valid(0) && !valid(1), res(0), res(1)))
240      (Seq(oldest.valid), Seq(oldest.bits))
241    } else {
242      val left = selectPartialOldest(valid.take(valid.length / 2), bits.take(bits.length / 2))
243      val right = selectPartialOldest(valid.takeRight(valid.length - (valid.length / 2)), bits.takeRight(bits.length - (bits.length / 2)))
244      selectPartialOldest(left._1 ++ right._1, left._2 ++ right._2)
245    }
246  }
247
248  def selectOldest[T <: XSBundleWithMicroOp](valid: Seq[Bool], bits: Seq[T]): (Seq[Bool], Seq[T]) = {
249    assert(valid.length == bits.length)
250    val numSelectGroups = scala.math.ceil(valid.length.toFloat / SelectGroupSize).toInt
251
252    // group info
253    val selectValidGroups =
254      if (valid.length <= SelectGroupSize) {
255        Seq(valid)
256      } else {
257        (0 until numSelectGroups).map(g => {
258          if (valid.length < (g + 1) * SelectGroupSize) {
259            valid.takeRight(valid.length - g * SelectGroupSize)
260          } else {
261            (0 until SelectGroupSize).map(j => valid(g * SelectGroupSize + j))
262          }
263        })
264      }
265    val selectBitsGroups =
266      if (bits.length <= SelectGroupSize) {
267        Seq(bits)
268      } else {
269        (0 until numSelectGroups).map(g => {
270          if (bits.length < (g + 1) * SelectGroupSize) {
271            bits.takeRight(bits.length - g * SelectGroupSize)
272          } else {
273            (0 until SelectGroupSize).map(j => bits(g * SelectGroupSize + j))
274          }
275        })
276      }
277
278    // select logic
279    if (valid.length <= SelectGroupSize) {
280      val (selValid, selBits) = selectPartialOldest(valid, bits)
281      (Seq(RegNext(selValid(0) && !selBits(0).uop.robIdx.needFlush(io.redirect))), Seq(RegNext(selBits(0))))
282    } else {
283      val select = (0 until numSelectGroups).map(g => {
284        val (selValid, selBits) = selectPartialOldest(selectValidGroups(g), selectBitsGroups(g))
285        (RegNext(selValid(0) && !selBits(0).uop.robIdx.needFlush(io.redirect)), RegNext(selBits(0)))
286      })
287      selectOldest(select.map(_._1), select.map(_._2))
288    }
289  }
290
291  def detectRollback(i: Int) = {
292    paddrModule.io.violationMdata(i) := io.storeIn(i).bits.paddr
293    maskModule.io.violationMdata(i) := io.storeIn(i).bits.mask
294
295    val bypassPaddrMask = RegNext(VecInit((0 until LoadPipelineWidth).map(j => bypassPAddr(j)(PAddrBits-1, 3) === io.storeIn(i).bits.paddr(PAddrBits-1, 3))))
296    val bypassMMask = RegNext(VecInit((0 until LoadPipelineWidth).map(j => (bypassMask(j) & io.storeIn(i).bits.mask).orR)))
297    val bypassMaskUInt = (0 until LoadPipelineWidth).map(j =>
298      Fill(LoadQueueRAWSize, RegNext(RegNext(io.query(j).req.fire))) & Mux(bypassPaddrMask(j) && bypassMMask(j), UIntToOH(RegNext(RegNext(enqIndexVec(j)))), 0.U(LoadQueueRAWSize))
299    ).reduce(_|_)
300
301    val addrMaskMatch = RegNext(paddrModule.io.violationMmask(i).asUInt & maskModule.io.violationMmask(i).asUInt) | bypassMaskUInt
302    val entryNeedCheck = RegNext(VecInit((0 until LoadQueueRAWSize).map(j => {
303      allocated(j) && isAfter(uop(j).robIdx, io.storeIn(i).bits.uop.robIdx) && datavalid(j) && !uop(j).robIdx.needFlush(io.redirect)
304    })))
305    val lqViolationSelVec = VecInit((0 until LoadQueueRAWSize).map(j => {
306      addrMaskMatch(j) && entryNeedCheck(j)
307    }))
308
309    val lqViolationSelUopExts = uop.map(uop => {
310      val wrapper = Wire(new XSBundleWithMicroOp)
311      wrapper.uop := uop
312      wrapper
313    })
314
315    // select logic
316    val lqSelect = selectOldest(lqViolationSelVec, lqViolationSelUopExts)
317
318    // select one inst
319    val lqViolation = lqSelect._1(0)
320    val lqViolationUop = lqSelect._2(0).uop
321
322    XSDebug(
323      lqViolation,
324      "need rollback (ld wb before store) pc %x robidx %d target %x\n",
325      io.storeIn(i).bits.uop.cf.pc, io.storeIn(i).bits.uop.robIdx.asUInt, lqViolationUop.robIdx.asUInt
326    )
327
328    (lqViolation, lqViolationUop)
329  }
330
331  // select rollback (part1) and generate rollback request
332  // rollback check
333  // Lq rollback seq check is done in s3 (next stage), as getting rollbackLq MicroOp is slow
334  val rollbackLqWb = Wire(Vec(StorePipelineWidth, Valid(new MicroOpRbExt)))
335  val stFtqIdx = Wire(Vec(StorePipelineWidth, new FtqPtr))
336  val stFtqOffset = Wire(Vec(StorePipelineWidth, UInt(log2Up(PredictWidth).W)))
337  for (w <- 0 until StorePipelineWidth) {
338    val detectedRollback = detectRollback(w)
339    rollbackLqWb(w).valid := detectedRollback._1 && DelayN(io.storeIn(w).valid && !io.storeIn(w).bits.miss, TotalSelectCycles)
340    rollbackLqWb(w).bits.uop := detectedRollback._2
341    rollbackLqWb(w).bits.flag := w.U
342    stFtqIdx(w) := DelayN(io.storeIn(w).bits.uop.cf.ftqPtr, TotalSelectCycles)
343    stFtqOffset(w) := DelayN(io.storeIn(w).bits.uop.cf.ftqOffset, TotalSelectCycles)
344  }
345
346  val rollbackLqWbValid = rollbackLqWb.map(x => x.valid && !x.bits.uop.robIdx.needFlush(io.redirect))
347  val rollbackLqWbBits = rollbackLqWb.map(x => x.bits)
348
349  // select rollback (part2), generate rollback request, then fire rollback request
350  // Note that we use robIdx - 1.U to flush the load instruction itself.
351  // Thus, here if last cycle's robIdx equals to this cycle's robIdx, it still triggers the redirect.
352
353  // select uop in parallel
354  val lqs = selectPartialOldest(rollbackLqWbValid, rollbackLqWbBits)
355  val rollbackUopExt = lqs._2(0)
356  val rollbackUop = rollbackUopExt.uop
357  val rollbackStFtqIdx = stFtqIdx(rollbackUopExt.flag)
358  val rollbackStFtqOffset = stFtqOffset(rollbackUopExt.flag)
359
360  // check if rollback request is still valid in parallel
361  io.rollback.bits := DontCare
362  io.rollback.bits.robIdx := rollbackUop.robIdx
363  io.rollback.bits.ftqIdx := rollbackUop.cf.ftqPtr
364  io.rollback.bits.stFtqIdx := rollbackStFtqIdx
365  io.rollback.bits.ftqOffset := rollbackUop.cf.ftqOffset
366  io.rollback.bits.stFtqOffset := rollbackStFtqOffset
367  io.rollback.bits.level := RedirectLevel.flush
368  io.rollback.bits.interrupt := DontCare
369  io.rollback.bits.cfiUpdate := DontCare
370  io.rollback.bits.cfiUpdate.target := rollbackUop.cf.pc
371  io.rollback.bits.debug_runahead_checkpoint_id := rollbackUop.debugInfo.runahead_checkpoint_id
372  // io.rollback.bits.pc := DontCare
373
374  io.rollback.valid := VecInit(rollbackLqWbValid).asUInt.orR
375
376  // perf cnt
377  val canEnqCount = PopCount(io.query.map(_.req.fire))
378  val validCount = freeList.io.validCount
379  val allowEnqueue = validCount <= (LoadQueueRAWSize - LoadPipelineWidth).U
380
381  QueuePerf(LoadQueueRAWSize, validCount, !allowEnqueue)
382  XSPerfAccumulate("enqs", canEnqCount)
383  XSPerfAccumulate("stld_rollback", io.rollback.valid)
384  val perfEvents: Seq[(String, UInt)] = Seq(
385    ("enq ", canEnqCount),
386    ("stld_rollback", io.rollback.valid),
387  )
388  generatePerfEvent()
389  // end
390}