xref: /XiangShan/src/main/scala/xiangshan/cache/dcache/Uncache.scala (revision dd3d70bad8def5b15d6c20ec5888b367b0575198)
11f0e2dc7SJiawei Lin/***************************************************************************************
21f0e2dc7SJiawei Lin* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
31f0e2dc7SJiawei Lin* Copyright (c) 2020-2021 Peng Cheng Laboratory
41f0e2dc7SJiawei Lin*
51f0e2dc7SJiawei Lin* XiangShan is licensed under Mulan PSL v2.
61f0e2dc7SJiawei Lin* You can use this software according to the terms and conditions of the Mulan PSL v2.
71f0e2dc7SJiawei Lin* You may obtain a copy of Mulan PSL v2 at:
81f0e2dc7SJiawei Lin*          http://license.coscl.org.cn/MulanPSL2
91f0e2dc7SJiawei Lin*
101f0e2dc7SJiawei Lin* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
111f0e2dc7SJiawei Lin* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
121f0e2dc7SJiawei Lin* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
131f0e2dc7SJiawei Lin*
141f0e2dc7SJiawei Lin* See the Mulan PSL v2 for more details.
151f0e2dc7SJiawei Lin***************************************************************************************/
161f0e2dc7SJiawei Lin
171f0e2dc7SJiawei Linpackage xiangshan.cache
181f0e2dc7SJiawei Lin
199e12e8edScz4eimport org.chipsalliance.cde.config.Parameters
201f0e2dc7SJiawei Linimport chisel3._
211f0e2dc7SJiawei Linimport chisel3.util._
2237225120Ssfencevmaimport utils._
233c02ee8fSwakafaimport utility._
241f0e2dc7SJiawei Linimport freechips.rocketchip.diplomacy.{IdRange, LazyModule, LazyModuleImp, TransferSizes}
251f0e2dc7SJiawei Linimport freechips.rocketchip.tilelink.{TLArbiter, TLBundleA, TLBundleD, TLClientNode, TLEdgeOut, TLMasterParameters, TLMasterPortParameters}
269e12e8edScz4eimport xiangshan._
279e12e8edScz4eimport xiangshan.mem._
289e12e8edScz4eimport xiangshan.mem.Bundles._
299e12e8edScz4eimport coupledL2.{MemBackTypeMM, MemBackTypeMMField, MemPageTypeNC, MemPageTypeNCField}
30*dd3d70baSYanqin Liimport difftest._
3137225120Ssfencevma
3274050fc0SYanqin Litrait HasUncacheBufferParameters extends HasXSParameter with HasDCacheParameters {
3374050fc0SYanqin Li
3474050fc0SYanqin Li  def doMerge(oldData: UInt, oldMask: UInt, newData:UInt, newMask: UInt):(UInt, UInt) = {
3574050fc0SYanqin Li    val resData = VecInit((0 until DataBytes).map(j =>
3674050fc0SYanqin Li      Mux(newMask(j), newData(8*(j+1)-1, 8*j), oldData(8*(j+1)-1, 8*j))
3774050fc0SYanqin Li    )).asUInt
3874050fc0SYanqin Li    val resMask = newMask | oldMask
3974050fc0SYanqin Li    (resData, resMask)
4074050fc0SYanqin Li  }
4174050fc0SYanqin Li
4274050fc0SYanqin Li  def INDEX_WIDTH = log2Up(UncacheBufferSize)
4374050fc0SYanqin Li  def BLOCK_OFFSET = log2Up(XLEN / 8)
4474050fc0SYanqin Li  def getBlockAddr(x: UInt) = x >> BLOCK_OFFSET
4574050fc0SYanqin Li}
4674050fc0SYanqin Li
4774050fc0SYanqin Liabstract class UncacheBundle(implicit p: Parameters) extends XSBundle with HasUncacheBufferParameters
4874050fc0SYanqin Li
4974050fc0SYanqin Liabstract class UncacheModule(implicit p: Parameters) extends XSModule with HasUncacheBufferParameters
5074050fc0SYanqin Li
5174050fc0SYanqin Li
5237225120Ssfencevmaclass UncacheFlushBundle extends Bundle {
5337225120Ssfencevma  val valid = Output(Bool())
5437225120Ssfencevma  val empty = Input(Bool())
5537225120Ssfencevma}
561f0e2dc7SJiawei Lin
5774050fc0SYanqin Liclass UncacheEntry(implicit p: Parameters) extends UncacheBundle {
58cfdd605fSYanqin Li  val cmd = UInt(M_SZ.W)
59cfdd605fSYanqin Li  val addr = UInt(PAddrBits.W)
60e04c5f64SYanqin Li  val vaddr = UInt(VAddrBits.W)
61cfdd605fSYanqin Li  val data = UInt(XLEN.W)
62e04c5f64SYanqin Li  val mask = UInt(DataBytes.W)
63cfdd605fSYanqin Li  val nc = Bool()
64cfdd605fSYanqin Li  val atomic = Bool()
65519244c7SYanqin Li  val memBackTypeMM = Bool()
661f0e2dc7SJiawei Lin
67cfdd605fSYanqin Li  val resp_nderr = Bool()
681f0e2dc7SJiawei Lin
6946236761SYanqin Li  /* NOTE: if it support the internal forward logic, here can uncomment */
7046236761SYanqin Li  // val fwd_data = UInt(XLEN.W)
7146236761SYanqin Li  // val fwd_mask = UInt(DataBytes.W)
72e04c5f64SYanqin Li
73cfdd605fSYanqin Li  def set(x: UncacheWordReq): Unit = {
74cfdd605fSYanqin Li    cmd := x.cmd
75cfdd605fSYanqin Li    addr := x.addr
76e04c5f64SYanqin Li    vaddr := x.vaddr
77cfdd605fSYanqin Li    data := x.data
78cfdd605fSYanqin Li    mask := x.mask
79cfdd605fSYanqin Li    nc := x.nc
80519244c7SYanqin Li    memBackTypeMM := x.memBackTypeMM
81cfdd605fSYanqin Li    atomic := x.atomic
8258cb1b0bSzhanglinjuan    resp_nderr := false.B
8346236761SYanqin Li    // fwd_data := 0.U
8446236761SYanqin Li    // fwd_mask := 0.U
85cfdd605fSYanqin Li  }
86cfdd605fSYanqin Li
8774050fc0SYanqin Li  def update(x: UncacheWordReq): Unit = {
8874050fc0SYanqin Li    val (resData, resMask) = doMerge(data, mask, x.data, x.mask)
8974050fc0SYanqin Li    // mask -> get the first position as 1 -> for address align
9074050fc0SYanqin Li    val (resOffset, resFlag) = PriorityEncoderWithFlag(resMask)
9174050fc0SYanqin Li    data := resData
9274050fc0SYanqin Li    mask := resMask
9374050fc0SYanqin Li    when(resFlag){
9474050fc0SYanqin Li      addr := (getBlockAddr(addr) << BLOCK_OFFSET) | resOffset
9574050fc0SYanqin Li      vaddr := (getBlockAddr(vaddr) << BLOCK_OFFSET) | resOffset
9674050fc0SYanqin Li    }
9774050fc0SYanqin Li  }
9874050fc0SYanqin Li
99cfdd605fSYanqin Li  def update(x: TLBundleD): Unit = {
10046236761SYanqin Li    when(cmd === MemoryOpConstants.M_XRD) {
10146236761SYanqin Li      data := x.data
10246236761SYanqin Li    }
103db81ab70SYanqin Li    resp_nderr := x.denied || x.corrupt
104cfdd605fSYanqin Li  }
105cfdd605fSYanqin Li
10646236761SYanqin Li  // def update(forwardData: UInt, forwardMask: UInt): Unit = {
10746236761SYanqin Li  //   fwd_data := forwardData
10846236761SYanqin Li  //   fwd_mask := forwardMask
10946236761SYanqin Li  // }
110e04c5f64SYanqin Li
11174050fc0SYanqin Li  def toUncacheWordResp(eid: UInt): UncacheWordResp = {
11246236761SYanqin Li    // val resp_fwd_data = VecInit((0 until DataBytes).map(j =>
11346236761SYanqin Li    //   Mux(fwd_mask(j), fwd_data(8*(j+1)-1, 8*j), data(8*(j+1)-1, 8*j))
11446236761SYanqin Li    // )).asUInt
11546236761SYanqin Li    val resp_fwd_data = data
116cfdd605fSYanqin Li    val r = Wire(new UncacheWordResp)
117cfdd605fSYanqin Li    r := DontCare
118e04c5f64SYanqin Li    r.data := resp_fwd_data
11974050fc0SYanqin Li    r.id := eid
120cfdd605fSYanqin Li    r.nderr := resp_nderr
121cfdd605fSYanqin Li    r.nc := nc
122cfdd605fSYanqin Li    r.is2lq := cmd === MemoryOpConstants.M_XRD
123cfdd605fSYanqin Li    r.miss := false.B
124cfdd605fSYanqin Li    r.replay := false.B
125cfdd605fSYanqin Li    r.tag_error := false.B
126cfdd605fSYanqin Li    r.error := false.B
127cfdd605fSYanqin Li    r
1281f0e2dc7SJiawei Lin  }
1291f0e2dc7SJiawei Lin}
1301f0e2dc7SJiawei Lin
131cfdd605fSYanqin Liclass UncacheEntryState(implicit p: Parameters) extends DCacheBundle {
132cfdd605fSYanqin Li  // valid (-> waitSame) -> inflight -> waitReturn
133cfdd605fSYanqin Li  val valid = Bool()
134cfdd605fSYanqin Li  val inflight = Bool() // uncache -> L2
135cfdd605fSYanqin Li  val waitSame = Bool()
136cfdd605fSYanqin Li  val waitReturn = Bool() // uncache -> LSQ
1371f0e2dc7SJiawei Lin
138cfdd605fSYanqin Li  def init: Unit = {
139cfdd605fSYanqin Li    valid := false.B
140cfdd605fSYanqin Li    inflight := false.B
141cfdd605fSYanqin Li    waitSame := false.B
142cfdd605fSYanqin Li    waitReturn := false.B
1431f0e2dc7SJiawei Lin  }
1441f0e2dc7SJiawei Lin
145cfdd605fSYanqin Li  def isValid(): Bool = valid
14674050fc0SYanqin Li  def isInflight(): Bool = valid && inflight
14774050fc0SYanqin Li  def isWaitReturn(): Bool = valid && waitReturn
14874050fc0SYanqin Li  def isWaitSame(): Bool = valid && waitSame
14974050fc0SYanqin Li  def can2Bus(): Bool = valid && !inflight && !waitSame && !waitReturn
150cfdd605fSYanqin Li  def can2Lsq(): Bool = valid && waitReturn
151d74a7897SYanqin Li  def canMerge(): Bool = valid && !inflight
152d74a7897SYanqin Li  def isFwdOld(): Bool = valid && (inflight || waitReturn)
153d74a7897SYanqin Li  def isFwdNew(): Bool = valid && !inflight && !waitReturn
1541f0e2dc7SJiawei Lin
155cfdd605fSYanqin Li  def setValid(x: Bool): Unit = { valid := x}
156cfdd605fSYanqin Li  def setInflight(x: Bool): Unit = { inflight := x}
157cfdd605fSYanqin Li  def setWaitReturn(x: Bool): Unit = { waitReturn := x }
158cfdd605fSYanqin Li  def setWaitSame(x: Bool): Unit = { waitSame := x}
1591f0e2dc7SJiawei Lin
160cfdd605fSYanqin Li  def updateUncacheResp(): Unit = {
161cfdd605fSYanqin Li    assert(inflight, "The request was not sent and a response was received")
162cfdd605fSYanqin Li    inflight := false.B
163cfdd605fSYanqin Li    waitReturn := true.B
1641f0e2dc7SJiawei Lin  }
165cfdd605fSYanqin Li  def updateReturn(): Unit = {
166cfdd605fSYanqin Li    valid := false.B
167cfdd605fSYanqin Li    inflight := false.B
168cfdd605fSYanqin Li    waitSame := false.B
169cfdd605fSYanqin Li    waitReturn := false.B
1701f0e2dc7SJiawei Lin  }
1711f0e2dc7SJiawei Lin}
1721f0e2dc7SJiawei Lin
1731f0e2dc7SJiawei Linclass UncacheIO(implicit p: Parameters) extends DCacheBundle {
17437225120Ssfencevma  val hartId = Input(UInt())
17537225120Ssfencevma  val enableOutstanding = Input(Bool())
17637225120Ssfencevma  val flush = Flipped(new UncacheFlushBundle)
1776786cfb7SWilliam Wang  val lsq = Flipped(new UncacheWordIO)
178e04c5f64SYanqin Li  val forward = Vec(LoadPipelineWidth, Flipped(new LoadForwardQueryIO))
1791f0e2dc7SJiawei Lin}
1801f0e2dc7SJiawei Lin
1811f0e2dc7SJiawei Lin// convert DCacheIO to TileLink
1821f0e2dc7SJiawei Lin// for Now, we only deal with TL-UL
1831f0e2dc7SJiawei Lin
18437225120Ssfencevmaclass Uncache()(implicit p: Parameters) extends LazyModule with HasXSParameter {
18595e60e55STang Haojin  override def shouldBeInlined: Boolean = false
18637225120Ssfencevma  def idRange: Int = UncacheBufferSize
1871f0e2dc7SJiawei Lin
1881f0e2dc7SJiawei Lin  val clientParameters = TLMasterPortParameters.v1(
1891f0e2dc7SJiawei Lin    clients = Seq(TLMasterParameters.v1(
1901f0e2dc7SJiawei Lin      "uncache",
19137225120Ssfencevma      sourceId = IdRange(0, idRange)
192519244c7SYanqin Li    )),
193519244c7SYanqin Li    requestFields = Seq(MemBackTypeMMField(), MemPageTypeNCField())
1941f0e2dc7SJiawei Lin  )
1951f0e2dc7SJiawei Lin  val clientNode = TLClientNode(Seq(clientParameters))
1961f0e2dc7SJiawei Lin
1971f0e2dc7SJiawei Lin  lazy val module = new UncacheImp(this)
1981f0e2dc7SJiawei Lin}
1991f0e2dc7SJiawei Lin
200cfdd605fSYanqin Li/* Uncache Buffer */
20137225120Ssfencevmaclass UncacheImp(outer: Uncache)extends LazyModuleImp(outer)
2021f0e2dc7SJiawei Lin  with HasTLDump
20337225120Ssfencevma  with HasXSParameter
20474050fc0SYanqin Li  with HasUncacheBufferParameters
20537225120Ssfencevma  with HasPerfEvents
2061f0e2dc7SJiawei Lin{
207cfdd605fSYanqin Li  println(s"Uncahe Buffer Size: $UncacheBufferSize entries")
2081f0e2dc7SJiawei Lin  val io = IO(new UncacheIO)
2091f0e2dc7SJiawei Lin
2101f0e2dc7SJiawei Lin  val (bus, edge) = outer.clientNode.out.head
2111f0e2dc7SJiawei Lin
2121f0e2dc7SJiawei Lin  val req  = io.lsq.req
2131f0e2dc7SJiawei Lin  val resp = io.lsq.resp
2141f0e2dc7SJiawei Lin  val mem_acquire = bus.a
2151f0e2dc7SJiawei Lin  val mem_grant   = bus.d
2161f0e2dc7SJiawei Lin  val req_ready = WireInit(false.B)
2171f0e2dc7SJiawei Lin
2181f0e2dc7SJiawei Lin  // assign default values to output signals
2191f0e2dc7SJiawei Lin  bus.b.ready := false.B
2201f0e2dc7SJiawei Lin  bus.c.valid := false.B
2211f0e2dc7SJiawei Lin  bus.c.bits  := DontCare
2221f0e2dc7SJiawei Lin  bus.d.ready := false.B
2231f0e2dc7SJiawei Lin  bus.e.valid := false.B
2241f0e2dc7SJiawei Lin  bus.e.bits  := DontCare
225cfdd605fSYanqin Li  io.lsq.req.ready := req_ready
22637225120Ssfencevma  io.lsq.resp.valid := false.B
22737225120Ssfencevma  io.lsq.resp.bits := DontCare
2281f0e2dc7SJiawei Lin
22937225120Ssfencevma
230cfdd605fSYanqin Li  /******************************************************************
231cfdd605fSYanqin Li   * Data Structure
232cfdd605fSYanqin Li   ******************************************************************/
23337225120Ssfencevma
234cfdd605fSYanqin Li  val entries = Reg(Vec(UncacheBufferSize, new UncacheEntry))
235cfdd605fSYanqin Li  val states = RegInit(VecInit(Seq.fill(UncacheBufferSize)(0.U.asTypeOf(new UncacheEntryState))))
23674050fc0SYanqin Li  val s_idle :: s_inflight :: s_wait_return :: Nil = Enum(3)
237cfdd605fSYanqin Li  val uState = RegInit(s_idle)
2381f0e2dc7SJiawei Lin
239e04c5f64SYanqin Li  // drain buffer
240e04c5f64SYanqin Li  val empty = Wire(Bool())
241043d3da4SYanqin Li  val f1_needDrain = Wire(Bool())
242ccd7d228SYanqin Li  val do_uarch_drain = RegInit(false.B)
243ccd7d228SYanqin Li  when((f1_needDrain || io.flush.valid) && !empty){
244ccd7d228SYanqin Li    do_uarch_drain := true.B
245ccd7d228SYanqin Li  }.elsewhen(empty){
246ccd7d228SYanqin Li    do_uarch_drain := false.B
247ccd7d228SYanqin Li  }.otherwise{
248ccd7d228SYanqin Li    do_uarch_drain := false.B
249ccd7d228SYanqin Li  }
2501f0e2dc7SJiawei Lin
251cfdd605fSYanqin Li  val q0_entry = Wire(new UncacheEntry)
252cfdd605fSYanqin Li  val q0_canSentIdx = Wire(UInt(INDEX_WIDTH.W))
253cfdd605fSYanqin Li  val q0_canSent = Wire(Bool())
254e04c5f64SYanqin Li
255e04c5f64SYanqin Li
256cfdd605fSYanqin Li  /******************************************************************
25774050fc0SYanqin Li   * Functions
25874050fc0SYanqin Li   ******************************************************************/
25974050fc0SYanqin Li  def sizeMap[T <: Data](f: Int => T) = VecInit((0 until UncacheBufferSize).map(f))
26074050fc0SYanqin Li  def sizeForeach[T <: Data](f: Int => Unit) = (0 until UncacheBufferSize).map(f)
26174050fc0SYanqin Li  def isStore(e: UncacheEntry): Bool = e.cmd === MemoryOpConstants.M_XWR
26274050fc0SYanqin Li  def isStore(x: UInt): Bool = x === MemoryOpConstants.M_XWR
26374050fc0SYanqin Li  def addrMatch(x: UncacheEntry, y: UncacheWordReq) : Bool = getBlockAddr(x.addr) === getBlockAddr(y.addr)
26474050fc0SYanqin Li  def addrMatch(x: UncacheWordReq, y: UncacheEntry) : Bool = getBlockAddr(x.addr) === getBlockAddr(y.addr)
26574050fc0SYanqin Li  def addrMatch(x: UncacheEntry, y: UncacheEntry) : Bool = getBlockAddr(x.addr) === getBlockAddr(y.addr)
26674050fc0SYanqin Li  def addrMatch(x: UInt, y: UInt) : Bool = getBlockAddr(x) === getBlockAddr(y)
26774050fc0SYanqin Li
26874050fc0SYanqin Li  def continueAndAlign(mask: UInt): Bool = {
26974050fc0SYanqin Li    val res =
27074050fc0SYanqin Li      PopCount(mask) === 1.U ||
27174050fc0SYanqin Li      mask === 0b00000011.U ||
27274050fc0SYanqin Li      mask === 0b00001100.U ||
27374050fc0SYanqin Li      mask === 0b00110000.U ||
27474050fc0SYanqin Li      mask === 0b11000000.U ||
27574050fc0SYanqin Li      mask === 0b00001111.U ||
27674050fc0SYanqin Li      mask === 0b11110000.U ||
27774050fc0SYanqin Li      mask === 0b11111111.U
27874050fc0SYanqin Li    res
27974050fc0SYanqin Li  }
28074050fc0SYanqin Li
281d74a7897SYanqin Li  def canMergePrimary(x: UncacheWordReq, e: UncacheEntry, eid: UInt): Bool = {
28274050fc0SYanqin Li    // vaddr same, properties same
28374050fc0SYanqin Li    getBlockAddr(x.vaddr) === getBlockAddr(e.vaddr) &&
28474050fc0SYanqin Li      x.cmd === e.cmd && x.nc && e.nc &&
28574050fc0SYanqin Li      x.memBackTypeMM === e.memBackTypeMM && !x.atomic && !e.atomic &&
286d74a7897SYanqin Li      continueAndAlign(x.mask | e.mask) &&
287d74a7897SYanqin Li    // not receiving uncache response, not waitReturn -> no wake-up signal in these cases
288d74a7897SYanqin Li      !(mem_grant.fire && mem_grant.bits.source === eid || states(eid).isWaitReturn())
28974050fc0SYanqin Li  }
29074050fc0SYanqin Li
29174050fc0SYanqin Li  def canMergeSecondary(eid: UInt): Bool = {
29274050fc0SYanqin Li    // old entry is not inflight and senting
293d74a7897SYanqin Li    states(eid).canMerge() && !(q0_canSent && q0_canSentIdx === eid)
29474050fc0SYanqin Li  }
29574050fc0SYanqin Li
29674050fc0SYanqin Li  /******************************************************************
297cfdd605fSYanqin Li   * uState for non-outstanding
298cfdd605fSYanqin Li   ******************************************************************/
29937225120Ssfencevma
300cfdd605fSYanqin Li  switch(uState){
301cfdd605fSYanqin Li    is(s_idle){
30237225120Ssfencevma      when(mem_acquire.fire){
30374050fc0SYanqin Li        uState := s_inflight
30437225120Ssfencevma      }
30537225120Ssfencevma    }
30674050fc0SYanqin Li    is(s_inflight){
30737225120Ssfencevma      when(mem_grant.fire){
30874050fc0SYanqin Li        uState := s_wait_return
30937225120Ssfencevma      }
31037225120Ssfencevma    }
31174050fc0SYanqin Li    is(s_wait_return){
312cfdd605fSYanqin Li      when(resp.fire){
313cfdd605fSYanqin Li        uState := s_idle
314cfdd605fSYanqin Li      }
31537225120Ssfencevma    }
31637225120Ssfencevma  }
31737225120Ssfencevma
318cfdd605fSYanqin Li
319cfdd605fSYanqin Li  /******************************************************************
320cfdd605fSYanqin Li   * Enter Buffer
321cfdd605fSYanqin Li   *  Version 0 (better timing)
322cfdd605fSYanqin Li   *    e0 judge: alloc/merge write vec
323cfdd605fSYanqin Li   *    e1 alloc
324cfdd605fSYanqin Li   *
325cfdd605fSYanqin Li   *  Version 1 (better performance)
32674050fc0SYanqin Li   *    e0: solved in one cycle for achieving the original performance.
32774050fc0SYanqin Li   *    e1: return idResp to set sid for handshake
328cfdd605fSYanqin Li   ******************************************************************/
329cfdd605fSYanqin Li
33074050fc0SYanqin Li  /* e0: merge/alloc */
331cfdd605fSYanqin Li  val e0_fire = req.fire
332e10e20c6SYanqin Li  val e0_req_valid = req.valid
333cfdd605fSYanqin Li  val e0_req = req.bits
334cfdd605fSYanqin Li
33574050fc0SYanqin Li  val e0_rejectVec = Wire(Vec(UncacheBufferSize, Bool()))
33674050fc0SYanqin Li  val e0_mergeVec = Wire(Vec(UncacheBufferSize, Bool()))
33774050fc0SYanqin Li  val e0_allocWaitSameVec = Wire(Vec(UncacheBufferSize, Bool()))
33874050fc0SYanqin Li  sizeForeach(i => {
33974050fc0SYanqin Li    val valid = e0_req_valid && states(i).isValid()
34074050fc0SYanqin Li    val isAddrMatch = addrMatch(e0_req, entries(i))
341d74a7897SYanqin Li    val canMerge1 = canMergePrimary(e0_req, entries(i), i.U)
34274050fc0SYanqin Li    val canMerge2 = canMergeSecondary(i.U)
34374050fc0SYanqin Li    e0_rejectVec(i) := valid && isAddrMatch && !canMerge1
34474050fc0SYanqin Li    e0_mergeVec(i) := valid && isAddrMatch && canMerge1 && canMerge2
34574050fc0SYanqin Li    e0_allocWaitSameVec(i) := valid && isAddrMatch && canMerge1 && !canMerge2
34674050fc0SYanqin Li  })
34774050fc0SYanqin Li  assert(PopCount(e0_mergeVec) <= 1.U, "Uncache buffer should not merge multiple entries")
34874050fc0SYanqin Li
34974050fc0SYanqin Li  val e0_invalidVec = sizeMap(i => !states(i).isValid())
35074050fc0SYanqin Li  val (e0_mergeIdx, e0_canMerge) = PriorityEncoderWithFlag(e0_mergeVec)
35174050fc0SYanqin Li  val (e0_allocIdx, e0_canAlloc) = PriorityEncoderWithFlag(e0_invalidVec)
35274050fc0SYanqin Li  val e0_allocWaitSame = e0_allocWaitSameVec.reduce(_ || _)
35374050fc0SYanqin Li  val e0_sid = Mux(e0_canMerge, e0_mergeIdx, e0_allocIdx)
35412931efeSYanqin Li  val e0_reject = do_uarch_drain || (!e0_canMerge && !e0_invalidVec.asUInt.orR) || e0_rejectVec.reduce(_ || _)
35574050fc0SYanqin Li
35674050fc0SYanqin Li  // e0_fire is used to guarantee that it will not be rejected
35712931efeSYanqin Li  when(e0_canMerge && e0_req_valid){
35874050fc0SYanqin Li    entries(e0_mergeIdx).update(e0_req)
35974050fc0SYanqin Li  }.elsewhen(e0_canAlloc && e0_fire){
360e04c5f64SYanqin Li    entries(e0_allocIdx).set(e0_req)
361e04c5f64SYanqin Li    states(e0_allocIdx).setValid(true.B)
36274050fc0SYanqin Li    when(e0_allocWaitSame){
363e04c5f64SYanqin Li      states(e0_allocIdx).setWaitSame(true.B)
364cfdd605fSYanqin Li    }
365cfdd605fSYanqin Li  }
366cfdd605fSYanqin Li
36774050fc0SYanqin Li  req_ready := !e0_reject
36874050fc0SYanqin Li
36974050fc0SYanqin Li  /* e1: return accept */
37074050fc0SYanqin Li  io.lsq.idResp.valid := RegNext(e0_fire)
37174050fc0SYanqin Li  io.lsq.idResp.bits.mid := RegEnable(e0_req.id, e0_fire)
37274050fc0SYanqin Li  io.lsq.idResp.bits.sid := RegEnable(e0_sid, e0_fire)
37374050fc0SYanqin Li  io.lsq.idResp.bits.is2lq := RegEnable(!isStore(e0_req.cmd), e0_fire)
37474050fc0SYanqin Li  io.lsq.idResp.bits.nc := RegEnable(e0_req.nc, e0_fire)
375cfdd605fSYanqin Li
376cfdd605fSYanqin Li  /******************************************************************
377cfdd605fSYanqin Li   * Uncache Req
378cfdd605fSYanqin Li   *  Version 0 (better timing)
379cfdd605fSYanqin Li   *    q0: choose which one is sent
380cfdd605fSYanqin Li   *    q0: sent
381cfdd605fSYanqin Li   *
382cfdd605fSYanqin Li   *  Version 1 (better performance)
383cfdd605fSYanqin Li   *    solved in one cycle for achieving the original performance.
384cfdd605fSYanqin Li   *    NOTE: "Enter Buffer" & "Uncache Req" not a continuous pipeline,
385cfdd605fSYanqin Li   *          because there is no guarantee that mem_aquire will be always ready.
386cfdd605fSYanqin Li   ******************************************************************/
387cfdd605fSYanqin Li
388cfdd605fSYanqin Li  val q0_canSentVec = sizeMap(i =>
38974050fc0SYanqin Li    (io.enableOutstanding || uState === s_idle) &&
39074050fc0SYanqin Li    states(i).can2Bus()
391cfdd605fSYanqin Li  )
392cfdd605fSYanqin Li  val q0_res = PriorityEncoderWithFlag(q0_canSentVec)
393cfdd605fSYanqin Li  q0_canSentIdx := q0_res._1
394cfdd605fSYanqin Li  q0_canSent := q0_res._2
395cfdd605fSYanqin Li  q0_entry := entries(q0_canSentIdx)
396cfdd605fSYanqin Li
397cfdd605fSYanqin Li  val size = PopCount(q0_entry.mask)
398cfdd605fSYanqin Li  val (lgSize, legal) = PriorityMuxWithFlag(Seq(
399cfdd605fSYanqin Li    1.U -> 0.U,
400cfdd605fSYanqin Li    2.U -> 1.U,
401cfdd605fSYanqin Li    4.U -> 2.U,
402cfdd605fSYanqin Li    8.U -> 3.U
403cfdd605fSYanqin Li  ).map(m => (size===m._1) -> m._2))
404cfdd605fSYanqin Li  assert(!(q0_canSent && !legal))
405cfdd605fSYanqin Li
406cfdd605fSYanqin Li  val q0_load = edge.Get(
407cfdd605fSYanqin Li    fromSource      = q0_canSentIdx,
408cfdd605fSYanqin Li    toAddress       = q0_entry.addr,
409cfdd605fSYanqin Li    lgSize          = lgSize
410cfdd605fSYanqin Li  )._2
411cfdd605fSYanqin Li
412cfdd605fSYanqin Li  val q0_store = edge.Put(
413cfdd605fSYanqin Li    fromSource      = q0_canSentIdx,
414cfdd605fSYanqin Li    toAddress       = q0_entry.addr,
415cfdd605fSYanqin Li    lgSize          = lgSize,
416cfdd605fSYanqin Li    data            = q0_entry.data,
417cfdd605fSYanqin Li    mask            = q0_entry.mask
418cfdd605fSYanqin Li  )._2
419cfdd605fSYanqin Li
420cfdd605fSYanqin Li  val q0_isStore = q0_entry.cmd === MemoryOpConstants.M_XWR
421cfdd605fSYanqin Li
422cfdd605fSYanqin Li  mem_acquire.valid := q0_canSent
423cfdd605fSYanqin Li  mem_acquire.bits := Mux(q0_isStore, q0_store, q0_load)
424519244c7SYanqin Li  mem_acquire.bits.user.lift(MemBackTypeMM).foreach(_ := q0_entry.memBackTypeMM)
425519244c7SYanqin Li  mem_acquire.bits.user.lift(MemPageTypeNC).foreach(_ := q0_entry.nc)
426cfdd605fSYanqin Li  when(mem_acquire.fire){
427cfdd605fSYanqin Li    states(q0_canSentIdx).setInflight(true.B)
428cfdd605fSYanqin Li
429cfdd605fSYanqin Li    // q0 should judge whether wait same block
430cfdd605fSYanqin Li    (0 until UncacheBufferSize).map(j =>
431759834f0SYanqin Li      when(q0_canSentIdx =/= j.U && states(j).isValid() && !states(j).isWaitReturn() && addrMatch(q0_entry, entries(j))){
432cfdd605fSYanqin Li        states(j).setWaitSame(true.B)
433cfdd605fSYanqin Li      }
434cfdd605fSYanqin Li    )
435cfdd605fSYanqin Li  }
436cfdd605fSYanqin Li
437*dd3d70baSYanqin Li  // uncache store but memBackTypeMM should update the golden memory
438*dd3d70baSYanqin Li  if (env.EnableDifftest) {
439*dd3d70baSYanqin Li    val difftest = DifftestModule(new DiffUncacheMMStoreEvent, delay = 1)
440*dd3d70baSYanqin Li    difftest.coreid := io.hartId
441*dd3d70baSYanqin Li    difftest.index  := 0.U
442*dd3d70baSYanqin Li    difftest.valid  := mem_acquire.fire && isStore(entries(q0_canSentIdx)) && entries(q0_canSentIdx).memBackTypeMM
443*dd3d70baSYanqin Li    difftest.addr   := entries(q0_canSentIdx).addr
444*dd3d70baSYanqin Li    difftest.data   := entries(q0_canSentIdx).data.asTypeOf(Vec(DataBytes, UInt(8.W)))
445*dd3d70baSYanqin Li    difftest.mask   := entries(q0_canSentIdx).mask
446*dd3d70baSYanqin Li  }
447cfdd605fSYanqin Li
448cfdd605fSYanqin Li  /******************************************************************
449cfdd605fSYanqin Li   * Uncache Resp
450cfdd605fSYanqin Li   ******************************************************************/
451cfdd605fSYanqin Li
452cfdd605fSYanqin Li  val (_, _, refill_done, _) = edge.addr_inc(mem_grant)
453cfdd605fSYanqin Li
454cfdd605fSYanqin Li  mem_grant.ready := true.B
455cfdd605fSYanqin Li  when (mem_grant.fire) {
456cfdd605fSYanqin Li    val id = mem_grant.bits.source
457cfdd605fSYanqin Li    entries(id).update(mem_grant.bits)
458cfdd605fSYanqin Li    states(id).updateUncacheResp()
459cfdd605fSYanqin Li    assert(refill_done, "Uncache response should be one beat only!")
460cfdd605fSYanqin Li
461cfdd605fSYanqin Li    // remove state of wait same block
462cfdd605fSYanqin Li    (0 until UncacheBufferSize).map(j =>
463759834f0SYanqin Li      when(id =/= j.U && states(j).isValid() && states(j).isWaitSame() && addrMatch(entries(id), entries(j))){
464cfdd605fSYanqin Li        states(j).setWaitSame(false.B)
465cfdd605fSYanqin Li      }
466cfdd605fSYanqin Li    )
467cfdd605fSYanqin Li  }
468cfdd605fSYanqin Li
469cfdd605fSYanqin Li  /******************************************************************
470cfdd605fSYanqin Li   * Return to LSQ
471cfdd605fSYanqin Li   ******************************************************************/
472cfdd605fSYanqin Li
473cfdd605fSYanqin Li  val r0_canSentVec = sizeMap(i => states(i).can2Lsq())
474cfdd605fSYanqin Li  val (r0_canSentIdx, r0_canSent) = PriorityEncoderWithFlag(r0_canSentVec)
475cfdd605fSYanqin Li  resp.valid := r0_canSent
47674050fc0SYanqin Li  resp.bits := entries(r0_canSentIdx).toUncacheWordResp(r0_canSentIdx)
477cfdd605fSYanqin Li  when(resp.fire){
478cfdd605fSYanqin Li    states(r0_canSentIdx).updateReturn()
479cfdd605fSYanqin Li  }
480cfdd605fSYanqin Li
481cfdd605fSYanqin Li
482cfdd605fSYanqin Li  /******************************************************************
483cfdd605fSYanqin Li   * Buffer Flush
48446236761SYanqin Li   * 1. when io.flush.valid is true: drain store queue and ubuffer
48546236761SYanqin Li   * 2. when io.lsq.req.bits.atomic is true: not support temporarily
486cfdd605fSYanqin Li   ******************************************************************/
487e04c5f64SYanqin Li  empty := !VecInit(states.map(_.isValid())).asUInt.orR
488e04c5f64SYanqin Li  io.flush.empty := empty
489cfdd605fSYanqin Li
490e04c5f64SYanqin Li
491e04c5f64SYanqin Li  /******************************************************************
49274050fc0SYanqin Li   * Load Data Forward to loadunit
49374050fc0SYanqin Li   *  f0: vaddr match, fast resp
49474050fc0SYanqin Li   *  f1: mask & data select, merge; paddr match; resp
49574050fc0SYanqin Li   *      NOTE: forward.paddr from dtlb, which is far from uncache f0
496e04c5f64SYanqin Li   ******************************************************************/
497e04c5f64SYanqin Li
498e04c5f64SYanqin Li  val f0_validMask = sizeMap(i => isStore(entries(i)) && states(i).isValid())
499e04c5f64SYanqin Li  val f0_fwdMaskCandidates = VecInit(entries.map(e => e.mask))
500e04c5f64SYanqin Li  val f0_fwdDataCandidates = VecInit(entries.map(e => e.data))
50174050fc0SYanqin Li  val f1_fwdMaskCandidates = sizeMap(i => RegEnable(entries(i).mask, f0_validMask(i)))
50274050fc0SYanqin Li  val f1_fwdDataCandidates = sizeMap(i => RegEnable(entries(i).data, f0_validMask(i)))
503043d3da4SYanqin Li  val f1_tagMismatchVec = Wire(Vec(LoadPipelineWidth, Bool()))
504043d3da4SYanqin Li  f1_needDrain := f1_tagMismatchVec.asUInt.orR && !empty
505043d3da4SYanqin Li
506043d3da4SYanqin Li  for ((forward, i) <- io.forward.zipWithIndex) {
507043d3da4SYanqin Li    val f0_fwdValid = forward.valid
508043d3da4SYanqin Li    val f1_fwdValid = RegNext(f0_fwdValid)
509043d3da4SYanqin Li
51074050fc0SYanqin Li    /* f0 */
51174050fc0SYanqin Li    // vaddr match
512e10e20c6SYanqin Li    val f0_vtagMatches = sizeMap(w => addrMatch(entries(w).vaddr, forward.vaddr))
51311269ca7STang Haojin    val f0_flyTagMatches = sizeMap(w => f0_vtagMatches(w) && f0_validMask(w) && f0_fwdValid && states(w).isFwdOld())
51411269ca7STang Haojin    val f0_idleTagMatches = sizeMap(w => f0_vtagMatches(w) && f0_validMask(w) && f0_fwdValid && states(w).isFwdNew())
51574050fc0SYanqin Li    // ONLY for fast use to get better timing
51674050fc0SYanqin Li    val f0_flyMaskFast = shiftMaskToHigh(
517043d3da4SYanqin Li      forward.vaddr,
51874050fc0SYanqin Li      Mux1H(f0_flyTagMatches, f0_fwdMaskCandidates)
519e04c5f64SYanqin Li    ).asTypeOf(Vec(VDataBytes, Bool()))
52074050fc0SYanqin Li    val f0_idleMaskFast = shiftMaskToHigh(
521043d3da4SYanqin Li      forward.vaddr,
52274050fc0SYanqin Li      Mux1H(f0_idleTagMatches, f0_fwdMaskCandidates)
52374050fc0SYanqin Li    ).asTypeOf(Vec(VDataBytes, Bool()))
524e04c5f64SYanqin Li
52574050fc0SYanqin Li    /* f1 */
52674050fc0SYanqin Li    val f1_flyTagMatches = RegEnable(f0_flyTagMatches, f0_fwdValid)
52774050fc0SYanqin Li    val f1_idleTagMatches = RegEnable(f0_idleTagMatches, f0_fwdValid)
52874050fc0SYanqin Li    val f1_fwdPAddr = RegEnable(forward.paddr, f0_fwdValid)
52974050fc0SYanqin Li    // select
53074050fc0SYanqin Li    val f1_flyMask = Mux1H(f1_flyTagMatches, f1_fwdMaskCandidates)
53174050fc0SYanqin Li    val f1_flyData = Mux1H(f1_flyTagMatches, f1_fwdDataCandidates)
53274050fc0SYanqin Li    val f1_idleMask = Mux1H(f1_idleTagMatches, f1_fwdMaskCandidates)
53374050fc0SYanqin Li    val f1_idleData = Mux1H(f1_idleTagMatches, f1_fwdDataCandidates)
53474050fc0SYanqin Li    // merge old(inflight) and new(idle)
53574050fc0SYanqin Li    val (f1_fwdDataTmp, f1_fwdMaskTmp) = doMerge(f1_flyData, f1_flyMask, f1_idleData, f1_idleMask)
53674050fc0SYanqin Li    val f1_fwdMask = shiftMaskToHigh(f1_fwdPAddr, f1_fwdMaskTmp).asTypeOf(Vec(VDataBytes, Bool()))
53774050fc0SYanqin Li    val f1_fwdData = shiftDataToHigh(f1_fwdPAddr, f1_fwdDataTmp).asTypeOf(Vec(VDataBytes, UInt(8.W)))
53874050fc0SYanqin Li    // paddr match and mismatch judge
53974050fc0SYanqin Li    val f1_ptagMatches = sizeMap(w => addrMatch(RegEnable(entries(w).addr, f0_fwdValid), f1_fwdPAddr))
540e10e20c6SYanqin Li    f1_tagMismatchVec(i) := sizeMap(w =>
541043d3da4SYanqin Li      RegEnable(f0_vtagMatches(w), f0_fwdValid) =/= f1_ptagMatches(w) && RegEnable(f0_validMask(w), f0_fwdValid) && f1_fwdValid
542043d3da4SYanqin Li    ).asUInt.orR
5431eb8dd22SKunlin You    XSDebug(
5441eb8dd22SKunlin You      f1_tagMismatchVec(i),
5451eb8dd22SKunlin You      "forward tag mismatch: pmatch %x vmatch %x vaddr %x paddr %x\n",
546043d3da4SYanqin Li      f1_ptagMatches.asUInt,
547043d3da4SYanqin Li      RegEnable(f0_vtagMatches.asUInt, f0_fwdValid),
548043d3da4SYanqin Li      RegEnable(forward.vaddr, f0_fwdValid),
549043d3da4SYanqin Li      RegEnable(forward.paddr, f0_fwdValid)
550043d3da4SYanqin Li    )
55174050fc0SYanqin Li    // response
552e04c5f64SYanqin Li    forward.addrInvalid := false.B // addr in ubuffer is always ready
553e04c5f64SYanqin Li    forward.dataInvalid := false.B // data in ubuffer is always ready
554043d3da4SYanqin Li    forward.matchInvalid := f1_tagMismatchVec(i) // paddr / vaddr cam result does not match
555e04c5f64SYanqin Li    for (j <- 0 until VDataBytes) {
55674050fc0SYanqin Li      forward.forwardMaskFast(j) := f0_flyMaskFast(j) || f0_idleMaskFast(j)
557e04c5f64SYanqin Li
558e10e20c6SYanqin Li      forward.forwardData(j) := f1_fwdData(j)
559e04c5f64SYanqin Li      forward.forwardMask(j) := false.B
560e04c5f64SYanqin Li      when(f1_fwdMask(j) && f1_fwdValid) {
561e04c5f64SYanqin Li        forward.forwardMask(j) := true.B
562e04c5f64SYanqin Li      }
563e04c5f64SYanqin Li    }
564e04c5f64SYanqin Li
565e04c5f64SYanqin Li  }
5661f0e2dc7SJiawei Lin
5671f0e2dc7SJiawei Lin
568cfdd605fSYanqin Li  /******************************************************************
569cfdd605fSYanqin Li   * Debug / Performance
570cfdd605fSYanqin Li   ******************************************************************/
571cfdd605fSYanqin Li
572cfdd605fSYanqin Li  /* Debug Counters */
5731f0e2dc7SJiawei Lin  // print all input/output requests for debug purpose
5741f0e2dc7SJiawei Lin  // print req/resp
575935edac4STang Haojin  XSDebug(req.fire, "req cmd: %x addr: %x data: %x mask: %x\n",
5761f0e2dc7SJiawei Lin    req.bits.cmd, req.bits.addr, req.bits.data, req.bits.mask)
577935edac4STang Haojin  XSDebug(resp.fire, "data: %x\n", req.bits.data)
5781f0e2dc7SJiawei Lin  // print tilelink messages
5798b33cd30Sklin02  XSDebug(mem_acquire.valid, "mem_acquire valid, ready=%d ", mem_acquire.ready)
5808b33cd30Sklin02  mem_acquire.bits.dump(mem_acquire.valid)
5818b33cd30Sklin02
5828b33cd30Sklin02  XSDebug(mem_grant.fire, "mem_grant fire ")
5838b33cd30Sklin02  mem_grant.bits.dump(mem_grant.fire)
58437225120Ssfencevma
585cfdd605fSYanqin Li  /* Performance Counters */
58674050fc0SYanqin Li  XSPerfAccumulate("e0_reject", e0_reject && e0_req_valid)
58774050fc0SYanqin Li  XSPerfAccumulate("e0_total_enter", e0_fire)
58874050fc0SYanqin Li  XSPerfAccumulate("e0_merge", e0_fire && e0_canMerge)
58974050fc0SYanqin Li  XSPerfAccumulate("e0_alloc_simple", e0_fire && e0_canAlloc && !e0_allocWaitSame)
59074050fc0SYanqin Li  XSPerfAccumulate("e0_alloc_wait_same", e0_fire && e0_canAlloc && e0_allocWaitSame)
59174050fc0SYanqin Li  XSPerfAccumulate("q0_acquire", q0_canSent)
59274050fc0SYanqin Li  XSPerfAccumulate("q0_acquire_store", q0_canSent && q0_isStore)
59374050fc0SYanqin Li  XSPerfAccumulate("q0_acquire_load", q0_canSent && !q0_isStore)
594519244c7SYanqin Li  XSPerfAccumulate("uncache_memBackTypeMM", io.lsq.req.fire && io.lsq.req.bits.memBackTypeMM)
595e04c5f64SYanqin Li  XSPerfAccumulate("uncache_mmio_store", io.lsq.req.fire && isStore(io.lsq.req.bits.cmd) && !io.lsq.req.bits.nc)
596e04c5f64SYanqin Li  XSPerfAccumulate("uncache_mmio_load", io.lsq.req.fire && !isStore(io.lsq.req.bits.cmd) && !io.lsq.req.bits.nc)
597e04c5f64SYanqin Li  XSPerfAccumulate("uncache_nc_store", io.lsq.req.fire && isStore(io.lsq.req.bits.cmd) && io.lsq.req.bits.nc)
598e04c5f64SYanqin Li  XSPerfAccumulate("uncache_nc_load", io.lsq.req.fire && !isStore(io.lsq.req.bits.cmd) && io.lsq.req.bits.nc)
59974050fc0SYanqin Li  XSPerfAccumulate("uncache_outstanding", uState =/= s_idle && mem_acquire.fire)
60046236761SYanqin Li  XSPerfAccumulate("forward_count", PopCount(io.forward.map(_.forwardMask.asUInt.orR)))
601043d3da4SYanqin Li  XSPerfAccumulate("forward_vaddr_match_failed", PopCount(f1_tagMismatchVec))
602cfdd605fSYanqin Li
60337225120Ssfencevma  val perfEvents = Seq(
604e04c5f64SYanqin Li    ("uncache_mmio_store", io.lsq.req.fire && isStore(io.lsq.req.bits.cmd) && !io.lsq.req.bits.nc),
605e04c5f64SYanqin Li    ("uncache_mmio_load", io.lsq.req.fire && !isStore(io.lsq.req.bits.cmd) && !io.lsq.req.bits.nc),
606e04c5f64SYanqin Li    ("uncache_nc_store", io.lsq.req.fire && isStore(io.lsq.req.bits.cmd) && io.lsq.req.bits.nc),
607e04c5f64SYanqin Li    ("uncache_nc_load", io.lsq.req.fire && !isStore(io.lsq.req.bits.cmd) && io.lsq.req.bits.nc),
60874050fc0SYanqin Li    ("uncache_outstanding", uState =/= s_idle && mem_acquire.fire),
60946236761SYanqin Li    ("forward_count", PopCount(io.forward.map(_.forwardMask.asUInt.orR))),
610043d3da4SYanqin Li    ("forward_vaddr_match_failed", PopCount(f1_tagMismatchVec))
61137225120Ssfencevma  )
61237225120Ssfencevma
61337225120Ssfencevma  generatePerfEvent()
61437225120Ssfencevma  //  End
6151f0e2dc7SJiawei Lin}
616