xref: /XiangShan/src/main/scala/xiangshan/cache/dcache/Uncache.scala (revision dd3d70bad8def5b15d6c20ec5888b367b0575198)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.cache
18
19import org.chipsalliance.cde.config.Parameters
20import chisel3._
21import chisel3.util._
22import utils._
23import utility._
24import freechips.rocketchip.diplomacy.{IdRange, LazyModule, LazyModuleImp, TransferSizes}
25import freechips.rocketchip.tilelink.{TLArbiter, TLBundleA, TLBundleD, TLClientNode, TLEdgeOut, TLMasterParameters, TLMasterPortParameters}
26import xiangshan._
27import xiangshan.mem._
28import xiangshan.mem.Bundles._
29import coupledL2.{MemBackTypeMM, MemBackTypeMMField, MemPageTypeNC, MemPageTypeNCField}
30import difftest._
31
32trait HasUncacheBufferParameters extends HasXSParameter with HasDCacheParameters {
33
34  def doMerge(oldData: UInt, oldMask: UInt, newData:UInt, newMask: UInt):(UInt, UInt) = {
35    val resData = VecInit((0 until DataBytes).map(j =>
36      Mux(newMask(j), newData(8*(j+1)-1, 8*j), oldData(8*(j+1)-1, 8*j))
37    )).asUInt
38    val resMask = newMask | oldMask
39    (resData, resMask)
40  }
41
42  def INDEX_WIDTH = log2Up(UncacheBufferSize)
43  def BLOCK_OFFSET = log2Up(XLEN / 8)
44  def getBlockAddr(x: UInt) = x >> BLOCK_OFFSET
45}
46
47abstract class UncacheBundle(implicit p: Parameters) extends XSBundle with HasUncacheBufferParameters
48
49abstract class UncacheModule(implicit p: Parameters) extends XSModule with HasUncacheBufferParameters
50
51
52class UncacheFlushBundle extends Bundle {
53  val valid = Output(Bool())
54  val empty = Input(Bool())
55}
56
57class UncacheEntry(implicit p: Parameters) extends UncacheBundle {
58  val cmd = UInt(M_SZ.W)
59  val addr = UInt(PAddrBits.W)
60  val vaddr = UInt(VAddrBits.W)
61  val data = UInt(XLEN.W)
62  val mask = UInt(DataBytes.W)
63  val nc = Bool()
64  val atomic = Bool()
65  val memBackTypeMM = Bool()
66
67  val resp_nderr = Bool()
68
69  /* NOTE: if it support the internal forward logic, here can uncomment */
70  // val fwd_data = UInt(XLEN.W)
71  // val fwd_mask = UInt(DataBytes.W)
72
73  def set(x: UncacheWordReq): Unit = {
74    cmd := x.cmd
75    addr := x.addr
76    vaddr := x.vaddr
77    data := x.data
78    mask := x.mask
79    nc := x.nc
80    memBackTypeMM := x.memBackTypeMM
81    atomic := x.atomic
82    resp_nderr := false.B
83    // fwd_data := 0.U
84    // fwd_mask := 0.U
85  }
86
87  def update(x: UncacheWordReq): Unit = {
88    val (resData, resMask) = doMerge(data, mask, x.data, x.mask)
89    // mask -> get the first position as 1 -> for address align
90    val (resOffset, resFlag) = PriorityEncoderWithFlag(resMask)
91    data := resData
92    mask := resMask
93    when(resFlag){
94      addr := (getBlockAddr(addr) << BLOCK_OFFSET) | resOffset
95      vaddr := (getBlockAddr(vaddr) << BLOCK_OFFSET) | resOffset
96    }
97  }
98
99  def update(x: TLBundleD): Unit = {
100    when(cmd === MemoryOpConstants.M_XRD) {
101      data := x.data
102    }
103    resp_nderr := x.denied || x.corrupt
104  }
105
106  // def update(forwardData: UInt, forwardMask: UInt): Unit = {
107  //   fwd_data := forwardData
108  //   fwd_mask := forwardMask
109  // }
110
111  def toUncacheWordResp(eid: UInt): UncacheWordResp = {
112    // val resp_fwd_data = VecInit((0 until DataBytes).map(j =>
113    //   Mux(fwd_mask(j), fwd_data(8*(j+1)-1, 8*j), data(8*(j+1)-1, 8*j))
114    // )).asUInt
115    val resp_fwd_data = data
116    val r = Wire(new UncacheWordResp)
117    r := DontCare
118    r.data := resp_fwd_data
119    r.id := eid
120    r.nderr := resp_nderr
121    r.nc := nc
122    r.is2lq := cmd === MemoryOpConstants.M_XRD
123    r.miss := false.B
124    r.replay := false.B
125    r.tag_error := false.B
126    r.error := false.B
127    r
128  }
129}
130
131class UncacheEntryState(implicit p: Parameters) extends DCacheBundle {
132  // valid (-> waitSame) -> inflight -> waitReturn
133  val valid = Bool()
134  val inflight = Bool() // uncache -> L2
135  val waitSame = Bool()
136  val waitReturn = Bool() // uncache -> LSQ
137
138  def init: Unit = {
139    valid := false.B
140    inflight := false.B
141    waitSame := false.B
142    waitReturn := false.B
143  }
144
145  def isValid(): Bool = valid
146  def isInflight(): Bool = valid && inflight
147  def isWaitReturn(): Bool = valid && waitReturn
148  def isWaitSame(): Bool = valid && waitSame
149  def can2Bus(): Bool = valid && !inflight && !waitSame && !waitReturn
150  def can2Lsq(): Bool = valid && waitReturn
151  def canMerge(): Bool = valid && !inflight
152  def isFwdOld(): Bool = valid && (inflight || waitReturn)
153  def isFwdNew(): Bool = valid && !inflight && !waitReturn
154
155  def setValid(x: Bool): Unit = { valid := x}
156  def setInflight(x: Bool): Unit = { inflight := x}
157  def setWaitReturn(x: Bool): Unit = { waitReturn := x }
158  def setWaitSame(x: Bool): Unit = { waitSame := x}
159
160  def updateUncacheResp(): Unit = {
161    assert(inflight, "The request was not sent and a response was received")
162    inflight := false.B
163    waitReturn := true.B
164  }
165  def updateReturn(): Unit = {
166    valid := false.B
167    inflight := false.B
168    waitSame := false.B
169    waitReturn := false.B
170  }
171}
172
173class UncacheIO(implicit p: Parameters) extends DCacheBundle {
174  val hartId = Input(UInt())
175  val enableOutstanding = Input(Bool())
176  val flush = Flipped(new UncacheFlushBundle)
177  val lsq = Flipped(new UncacheWordIO)
178  val forward = Vec(LoadPipelineWidth, Flipped(new LoadForwardQueryIO))
179}
180
181// convert DCacheIO to TileLink
182// for Now, we only deal with TL-UL
183
184class Uncache()(implicit p: Parameters) extends LazyModule with HasXSParameter {
185  override def shouldBeInlined: Boolean = false
186  def idRange: Int = UncacheBufferSize
187
188  val clientParameters = TLMasterPortParameters.v1(
189    clients = Seq(TLMasterParameters.v1(
190      "uncache",
191      sourceId = IdRange(0, idRange)
192    )),
193    requestFields = Seq(MemBackTypeMMField(), MemPageTypeNCField())
194  )
195  val clientNode = TLClientNode(Seq(clientParameters))
196
197  lazy val module = new UncacheImp(this)
198}
199
200/* Uncache Buffer */
201class UncacheImp(outer: Uncache)extends LazyModuleImp(outer)
202  with HasTLDump
203  with HasXSParameter
204  with HasUncacheBufferParameters
205  with HasPerfEvents
206{
207  println(s"Uncahe Buffer Size: $UncacheBufferSize entries")
208  val io = IO(new UncacheIO)
209
210  val (bus, edge) = outer.clientNode.out.head
211
212  val req  = io.lsq.req
213  val resp = io.lsq.resp
214  val mem_acquire = bus.a
215  val mem_grant   = bus.d
216  val req_ready = WireInit(false.B)
217
218  // assign default values to output signals
219  bus.b.ready := false.B
220  bus.c.valid := false.B
221  bus.c.bits  := DontCare
222  bus.d.ready := false.B
223  bus.e.valid := false.B
224  bus.e.bits  := DontCare
225  io.lsq.req.ready := req_ready
226  io.lsq.resp.valid := false.B
227  io.lsq.resp.bits := DontCare
228
229
230  /******************************************************************
231   * Data Structure
232   ******************************************************************/
233
234  val entries = Reg(Vec(UncacheBufferSize, new UncacheEntry))
235  val states = RegInit(VecInit(Seq.fill(UncacheBufferSize)(0.U.asTypeOf(new UncacheEntryState))))
236  val s_idle :: s_inflight :: s_wait_return :: Nil = Enum(3)
237  val uState = RegInit(s_idle)
238
239  // drain buffer
240  val empty = Wire(Bool())
241  val f1_needDrain = Wire(Bool())
242  val do_uarch_drain = RegInit(false.B)
243  when((f1_needDrain || io.flush.valid) && !empty){
244    do_uarch_drain := true.B
245  }.elsewhen(empty){
246    do_uarch_drain := false.B
247  }.otherwise{
248    do_uarch_drain := false.B
249  }
250
251  val q0_entry = Wire(new UncacheEntry)
252  val q0_canSentIdx = Wire(UInt(INDEX_WIDTH.W))
253  val q0_canSent = Wire(Bool())
254
255
256  /******************************************************************
257   * Functions
258   ******************************************************************/
259  def sizeMap[T <: Data](f: Int => T) = VecInit((0 until UncacheBufferSize).map(f))
260  def sizeForeach[T <: Data](f: Int => Unit) = (0 until UncacheBufferSize).map(f)
261  def isStore(e: UncacheEntry): Bool = e.cmd === MemoryOpConstants.M_XWR
262  def isStore(x: UInt): Bool = x === MemoryOpConstants.M_XWR
263  def addrMatch(x: UncacheEntry, y: UncacheWordReq) : Bool = getBlockAddr(x.addr) === getBlockAddr(y.addr)
264  def addrMatch(x: UncacheWordReq, y: UncacheEntry) : Bool = getBlockAddr(x.addr) === getBlockAddr(y.addr)
265  def addrMatch(x: UncacheEntry, y: UncacheEntry) : Bool = getBlockAddr(x.addr) === getBlockAddr(y.addr)
266  def addrMatch(x: UInt, y: UInt) : Bool = getBlockAddr(x) === getBlockAddr(y)
267
268  def continueAndAlign(mask: UInt): Bool = {
269    val res =
270      PopCount(mask) === 1.U ||
271      mask === 0b00000011.U ||
272      mask === 0b00001100.U ||
273      mask === 0b00110000.U ||
274      mask === 0b11000000.U ||
275      mask === 0b00001111.U ||
276      mask === 0b11110000.U ||
277      mask === 0b11111111.U
278    res
279  }
280
281  def canMergePrimary(x: UncacheWordReq, e: UncacheEntry, eid: UInt): Bool = {
282    // vaddr same, properties same
283    getBlockAddr(x.vaddr) === getBlockAddr(e.vaddr) &&
284      x.cmd === e.cmd && x.nc && e.nc &&
285      x.memBackTypeMM === e.memBackTypeMM && !x.atomic && !e.atomic &&
286      continueAndAlign(x.mask | e.mask) &&
287    // not receiving uncache response, not waitReturn -> no wake-up signal in these cases
288      !(mem_grant.fire && mem_grant.bits.source === eid || states(eid).isWaitReturn())
289  }
290
291  def canMergeSecondary(eid: UInt): Bool = {
292    // old entry is not inflight and senting
293    states(eid).canMerge() && !(q0_canSent && q0_canSentIdx === eid)
294  }
295
296  /******************************************************************
297   * uState for non-outstanding
298   ******************************************************************/
299
300  switch(uState){
301    is(s_idle){
302      when(mem_acquire.fire){
303        uState := s_inflight
304      }
305    }
306    is(s_inflight){
307      when(mem_grant.fire){
308        uState := s_wait_return
309      }
310    }
311    is(s_wait_return){
312      when(resp.fire){
313        uState := s_idle
314      }
315    }
316  }
317
318
319  /******************************************************************
320   * Enter Buffer
321   *  Version 0 (better timing)
322   *    e0 judge: alloc/merge write vec
323   *    e1 alloc
324   *
325   *  Version 1 (better performance)
326   *    e0: solved in one cycle for achieving the original performance.
327   *    e1: return idResp to set sid for handshake
328   ******************************************************************/
329
330  /* e0: merge/alloc */
331  val e0_fire = req.fire
332  val e0_req_valid = req.valid
333  val e0_req = req.bits
334
335  val e0_rejectVec = Wire(Vec(UncacheBufferSize, Bool()))
336  val e0_mergeVec = Wire(Vec(UncacheBufferSize, Bool()))
337  val e0_allocWaitSameVec = Wire(Vec(UncacheBufferSize, Bool()))
338  sizeForeach(i => {
339    val valid = e0_req_valid && states(i).isValid()
340    val isAddrMatch = addrMatch(e0_req, entries(i))
341    val canMerge1 = canMergePrimary(e0_req, entries(i), i.U)
342    val canMerge2 = canMergeSecondary(i.U)
343    e0_rejectVec(i) := valid && isAddrMatch && !canMerge1
344    e0_mergeVec(i) := valid && isAddrMatch && canMerge1 && canMerge2
345    e0_allocWaitSameVec(i) := valid && isAddrMatch && canMerge1 && !canMerge2
346  })
347  assert(PopCount(e0_mergeVec) <= 1.U, "Uncache buffer should not merge multiple entries")
348
349  val e0_invalidVec = sizeMap(i => !states(i).isValid())
350  val (e0_mergeIdx, e0_canMerge) = PriorityEncoderWithFlag(e0_mergeVec)
351  val (e0_allocIdx, e0_canAlloc) = PriorityEncoderWithFlag(e0_invalidVec)
352  val e0_allocWaitSame = e0_allocWaitSameVec.reduce(_ || _)
353  val e0_sid = Mux(e0_canMerge, e0_mergeIdx, e0_allocIdx)
354  val e0_reject = do_uarch_drain || (!e0_canMerge && !e0_invalidVec.asUInt.orR) || e0_rejectVec.reduce(_ || _)
355
356  // e0_fire is used to guarantee that it will not be rejected
357  when(e0_canMerge && e0_req_valid){
358    entries(e0_mergeIdx).update(e0_req)
359  }.elsewhen(e0_canAlloc && e0_fire){
360    entries(e0_allocIdx).set(e0_req)
361    states(e0_allocIdx).setValid(true.B)
362    when(e0_allocWaitSame){
363      states(e0_allocIdx).setWaitSame(true.B)
364    }
365  }
366
367  req_ready := !e0_reject
368
369  /* e1: return accept */
370  io.lsq.idResp.valid := RegNext(e0_fire)
371  io.lsq.idResp.bits.mid := RegEnable(e0_req.id, e0_fire)
372  io.lsq.idResp.bits.sid := RegEnable(e0_sid, e0_fire)
373  io.lsq.idResp.bits.is2lq := RegEnable(!isStore(e0_req.cmd), e0_fire)
374  io.lsq.idResp.bits.nc := RegEnable(e0_req.nc, e0_fire)
375
376  /******************************************************************
377   * Uncache Req
378   *  Version 0 (better timing)
379   *    q0: choose which one is sent
380   *    q0: sent
381   *
382   *  Version 1 (better performance)
383   *    solved in one cycle for achieving the original performance.
384   *    NOTE: "Enter Buffer" & "Uncache Req" not a continuous pipeline,
385   *          because there is no guarantee that mem_aquire will be always ready.
386   ******************************************************************/
387
388  val q0_canSentVec = sizeMap(i =>
389    (io.enableOutstanding || uState === s_idle) &&
390    states(i).can2Bus()
391  )
392  val q0_res = PriorityEncoderWithFlag(q0_canSentVec)
393  q0_canSentIdx := q0_res._1
394  q0_canSent := q0_res._2
395  q0_entry := entries(q0_canSentIdx)
396
397  val size = PopCount(q0_entry.mask)
398  val (lgSize, legal) = PriorityMuxWithFlag(Seq(
399    1.U -> 0.U,
400    2.U -> 1.U,
401    4.U -> 2.U,
402    8.U -> 3.U
403  ).map(m => (size===m._1) -> m._2))
404  assert(!(q0_canSent && !legal))
405
406  val q0_load = edge.Get(
407    fromSource      = q0_canSentIdx,
408    toAddress       = q0_entry.addr,
409    lgSize          = lgSize
410  )._2
411
412  val q0_store = edge.Put(
413    fromSource      = q0_canSentIdx,
414    toAddress       = q0_entry.addr,
415    lgSize          = lgSize,
416    data            = q0_entry.data,
417    mask            = q0_entry.mask
418  )._2
419
420  val q0_isStore = q0_entry.cmd === MemoryOpConstants.M_XWR
421
422  mem_acquire.valid := q0_canSent
423  mem_acquire.bits := Mux(q0_isStore, q0_store, q0_load)
424  mem_acquire.bits.user.lift(MemBackTypeMM).foreach(_ := q0_entry.memBackTypeMM)
425  mem_acquire.bits.user.lift(MemPageTypeNC).foreach(_ := q0_entry.nc)
426  when(mem_acquire.fire){
427    states(q0_canSentIdx).setInflight(true.B)
428
429    // q0 should judge whether wait same block
430    (0 until UncacheBufferSize).map(j =>
431      when(q0_canSentIdx =/= j.U && states(j).isValid() && !states(j).isWaitReturn() && addrMatch(q0_entry, entries(j))){
432        states(j).setWaitSame(true.B)
433      }
434    )
435  }
436
437  // uncache store but memBackTypeMM should update the golden memory
438  if (env.EnableDifftest) {
439    val difftest = DifftestModule(new DiffUncacheMMStoreEvent, delay = 1)
440    difftest.coreid := io.hartId
441    difftest.index  := 0.U
442    difftest.valid  := mem_acquire.fire && isStore(entries(q0_canSentIdx)) && entries(q0_canSentIdx).memBackTypeMM
443    difftest.addr   := entries(q0_canSentIdx).addr
444    difftest.data   := entries(q0_canSentIdx).data.asTypeOf(Vec(DataBytes, UInt(8.W)))
445    difftest.mask   := entries(q0_canSentIdx).mask
446  }
447
448  /******************************************************************
449   * Uncache Resp
450   ******************************************************************/
451
452  val (_, _, refill_done, _) = edge.addr_inc(mem_grant)
453
454  mem_grant.ready := true.B
455  when (mem_grant.fire) {
456    val id = mem_grant.bits.source
457    entries(id).update(mem_grant.bits)
458    states(id).updateUncacheResp()
459    assert(refill_done, "Uncache response should be one beat only!")
460
461    // remove state of wait same block
462    (0 until UncacheBufferSize).map(j =>
463      when(id =/= j.U && states(j).isValid() && states(j).isWaitSame() && addrMatch(entries(id), entries(j))){
464        states(j).setWaitSame(false.B)
465      }
466    )
467  }
468
469  /******************************************************************
470   * Return to LSQ
471   ******************************************************************/
472
473  val r0_canSentVec = sizeMap(i => states(i).can2Lsq())
474  val (r0_canSentIdx, r0_canSent) = PriorityEncoderWithFlag(r0_canSentVec)
475  resp.valid := r0_canSent
476  resp.bits := entries(r0_canSentIdx).toUncacheWordResp(r0_canSentIdx)
477  when(resp.fire){
478    states(r0_canSentIdx).updateReturn()
479  }
480
481
482  /******************************************************************
483   * Buffer Flush
484   * 1. when io.flush.valid is true: drain store queue and ubuffer
485   * 2. when io.lsq.req.bits.atomic is true: not support temporarily
486   ******************************************************************/
487  empty := !VecInit(states.map(_.isValid())).asUInt.orR
488  io.flush.empty := empty
489
490
491  /******************************************************************
492   * Load Data Forward to loadunit
493   *  f0: vaddr match, fast resp
494   *  f1: mask & data select, merge; paddr match; resp
495   *      NOTE: forward.paddr from dtlb, which is far from uncache f0
496   ******************************************************************/
497
498  val f0_validMask = sizeMap(i => isStore(entries(i)) && states(i).isValid())
499  val f0_fwdMaskCandidates = VecInit(entries.map(e => e.mask))
500  val f0_fwdDataCandidates = VecInit(entries.map(e => e.data))
501  val f1_fwdMaskCandidates = sizeMap(i => RegEnable(entries(i).mask, f0_validMask(i)))
502  val f1_fwdDataCandidates = sizeMap(i => RegEnable(entries(i).data, f0_validMask(i)))
503  val f1_tagMismatchVec = Wire(Vec(LoadPipelineWidth, Bool()))
504  f1_needDrain := f1_tagMismatchVec.asUInt.orR && !empty
505
506  for ((forward, i) <- io.forward.zipWithIndex) {
507    val f0_fwdValid = forward.valid
508    val f1_fwdValid = RegNext(f0_fwdValid)
509
510    /* f0 */
511    // vaddr match
512    val f0_vtagMatches = sizeMap(w => addrMatch(entries(w).vaddr, forward.vaddr))
513    val f0_flyTagMatches = sizeMap(w => f0_vtagMatches(w) && f0_validMask(w) && f0_fwdValid && states(w).isFwdOld())
514    val f0_idleTagMatches = sizeMap(w => f0_vtagMatches(w) && f0_validMask(w) && f0_fwdValid && states(w).isFwdNew())
515    // ONLY for fast use to get better timing
516    val f0_flyMaskFast = shiftMaskToHigh(
517      forward.vaddr,
518      Mux1H(f0_flyTagMatches, f0_fwdMaskCandidates)
519    ).asTypeOf(Vec(VDataBytes, Bool()))
520    val f0_idleMaskFast = shiftMaskToHigh(
521      forward.vaddr,
522      Mux1H(f0_idleTagMatches, f0_fwdMaskCandidates)
523    ).asTypeOf(Vec(VDataBytes, Bool()))
524
525    /* f1 */
526    val f1_flyTagMatches = RegEnable(f0_flyTagMatches, f0_fwdValid)
527    val f1_idleTagMatches = RegEnable(f0_idleTagMatches, f0_fwdValid)
528    val f1_fwdPAddr = RegEnable(forward.paddr, f0_fwdValid)
529    // select
530    val f1_flyMask = Mux1H(f1_flyTagMatches, f1_fwdMaskCandidates)
531    val f1_flyData = Mux1H(f1_flyTagMatches, f1_fwdDataCandidates)
532    val f1_idleMask = Mux1H(f1_idleTagMatches, f1_fwdMaskCandidates)
533    val f1_idleData = Mux1H(f1_idleTagMatches, f1_fwdDataCandidates)
534    // merge old(inflight) and new(idle)
535    val (f1_fwdDataTmp, f1_fwdMaskTmp) = doMerge(f1_flyData, f1_flyMask, f1_idleData, f1_idleMask)
536    val f1_fwdMask = shiftMaskToHigh(f1_fwdPAddr, f1_fwdMaskTmp).asTypeOf(Vec(VDataBytes, Bool()))
537    val f1_fwdData = shiftDataToHigh(f1_fwdPAddr, f1_fwdDataTmp).asTypeOf(Vec(VDataBytes, UInt(8.W)))
538    // paddr match and mismatch judge
539    val f1_ptagMatches = sizeMap(w => addrMatch(RegEnable(entries(w).addr, f0_fwdValid), f1_fwdPAddr))
540    f1_tagMismatchVec(i) := sizeMap(w =>
541      RegEnable(f0_vtagMatches(w), f0_fwdValid) =/= f1_ptagMatches(w) && RegEnable(f0_validMask(w), f0_fwdValid) && f1_fwdValid
542    ).asUInt.orR
543    XSDebug(
544      f1_tagMismatchVec(i),
545      "forward tag mismatch: pmatch %x vmatch %x vaddr %x paddr %x\n",
546      f1_ptagMatches.asUInt,
547      RegEnable(f0_vtagMatches.asUInt, f0_fwdValid),
548      RegEnable(forward.vaddr, f0_fwdValid),
549      RegEnable(forward.paddr, f0_fwdValid)
550    )
551    // response
552    forward.addrInvalid := false.B // addr in ubuffer is always ready
553    forward.dataInvalid := false.B // data in ubuffer is always ready
554    forward.matchInvalid := f1_tagMismatchVec(i) // paddr / vaddr cam result does not match
555    for (j <- 0 until VDataBytes) {
556      forward.forwardMaskFast(j) := f0_flyMaskFast(j) || f0_idleMaskFast(j)
557
558      forward.forwardData(j) := f1_fwdData(j)
559      forward.forwardMask(j) := false.B
560      when(f1_fwdMask(j) && f1_fwdValid) {
561        forward.forwardMask(j) := true.B
562      }
563    }
564
565  }
566
567
568  /******************************************************************
569   * Debug / Performance
570   ******************************************************************/
571
572  /* Debug Counters */
573  // print all input/output requests for debug purpose
574  // print req/resp
575  XSDebug(req.fire, "req cmd: %x addr: %x data: %x mask: %x\n",
576    req.bits.cmd, req.bits.addr, req.bits.data, req.bits.mask)
577  XSDebug(resp.fire, "data: %x\n", req.bits.data)
578  // print tilelink messages
579  XSDebug(mem_acquire.valid, "mem_acquire valid, ready=%d ", mem_acquire.ready)
580  mem_acquire.bits.dump(mem_acquire.valid)
581
582  XSDebug(mem_grant.fire, "mem_grant fire ")
583  mem_grant.bits.dump(mem_grant.fire)
584
585  /* Performance Counters */
586  XSPerfAccumulate("e0_reject", e0_reject && e0_req_valid)
587  XSPerfAccumulate("e0_total_enter", e0_fire)
588  XSPerfAccumulate("e0_merge", e0_fire && e0_canMerge)
589  XSPerfAccumulate("e0_alloc_simple", e0_fire && e0_canAlloc && !e0_allocWaitSame)
590  XSPerfAccumulate("e0_alloc_wait_same", e0_fire && e0_canAlloc && e0_allocWaitSame)
591  XSPerfAccumulate("q0_acquire", q0_canSent)
592  XSPerfAccumulate("q0_acquire_store", q0_canSent && q0_isStore)
593  XSPerfAccumulate("q0_acquire_load", q0_canSent && !q0_isStore)
594  XSPerfAccumulate("uncache_memBackTypeMM", io.lsq.req.fire && io.lsq.req.bits.memBackTypeMM)
595  XSPerfAccumulate("uncache_mmio_store", io.lsq.req.fire && isStore(io.lsq.req.bits.cmd) && !io.lsq.req.bits.nc)
596  XSPerfAccumulate("uncache_mmio_load", io.lsq.req.fire && !isStore(io.lsq.req.bits.cmd) && !io.lsq.req.bits.nc)
597  XSPerfAccumulate("uncache_nc_store", io.lsq.req.fire && isStore(io.lsq.req.bits.cmd) && io.lsq.req.bits.nc)
598  XSPerfAccumulate("uncache_nc_load", io.lsq.req.fire && !isStore(io.lsq.req.bits.cmd) && io.lsq.req.bits.nc)
599  XSPerfAccumulate("uncache_outstanding", uState =/= s_idle && mem_acquire.fire)
600  XSPerfAccumulate("forward_count", PopCount(io.forward.map(_.forwardMask.asUInt.orR)))
601  XSPerfAccumulate("forward_vaddr_match_failed", PopCount(f1_tagMismatchVec))
602
603  val perfEvents = Seq(
604    ("uncache_mmio_store", io.lsq.req.fire && isStore(io.lsq.req.bits.cmd) && !io.lsq.req.bits.nc),
605    ("uncache_mmio_load", io.lsq.req.fire && !isStore(io.lsq.req.bits.cmd) && !io.lsq.req.bits.nc),
606    ("uncache_nc_store", io.lsq.req.fire && isStore(io.lsq.req.bits.cmd) && io.lsq.req.bits.nc),
607    ("uncache_nc_load", io.lsq.req.fire && !isStore(io.lsq.req.bits.cmd) && io.lsq.req.bits.nc),
608    ("uncache_outstanding", uState =/= s_idle && mem_acquire.fire),
609    ("forward_count", PopCount(io.forward.map(_.forwardMask.asUInt.orR))),
610    ("forward_vaddr_match_failed", PopCount(f1_tagMismatchVec))
611  )
612
613  generatePerfEvent()
614  //  End
615}
616