11f0e2dc7SJiawei Lin/*************************************************************************************** 21f0e2dc7SJiawei Lin* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 31f0e2dc7SJiawei Lin* Copyright (c) 2020-2021 Peng Cheng Laboratory 41f0e2dc7SJiawei Lin* 51f0e2dc7SJiawei Lin* XiangShan is licensed under Mulan PSL v2. 61f0e2dc7SJiawei Lin* You can use this software according to the terms and conditions of the Mulan PSL v2. 71f0e2dc7SJiawei Lin* You may obtain a copy of Mulan PSL v2 at: 81f0e2dc7SJiawei Lin* http://license.coscl.org.cn/MulanPSL2 91f0e2dc7SJiawei Lin* 101f0e2dc7SJiawei Lin* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 111f0e2dc7SJiawei Lin* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 121f0e2dc7SJiawei Lin* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 131f0e2dc7SJiawei Lin* 141f0e2dc7SJiawei Lin* See the Mulan PSL v2 for more details. 151f0e2dc7SJiawei Lin***************************************************************************************/ 161f0e2dc7SJiawei Lin 171f0e2dc7SJiawei Linpackage xiangshan.cache 181f0e2dc7SJiawei Lin 199e12e8edScz4eimport org.chipsalliance.cde.config.Parameters 201f0e2dc7SJiawei Linimport chisel3._ 211f0e2dc7SJiawei Linimport chisel3.util._ 2237225120Ssfencevmaimport utils._ 233c02ee8fSwakafaimport utility._ 241f0e2dc7SJiawei Linimport freechips.rocketchip.diplomacy.{IdRange, LazyModule, LazyModuleImp, TransferSizes} 251f0e2dc7SJiawei Linimport freechips.rocketchip.tilelink.{TLArbiter, TLBundleA, TLBundleD, TLClientNode, TLEdgeOut, TLMasterParameters, TLMasterPortParameters} 269e12e8edScz4eimport xiangshan._ 279e12e8edScz4eimport xiangshan.mem._ 289e12e8edScz4eimport xiangshan.mem.Bundles._ 299e12e8edScz4eimport coupledL2.{MemBackTypeMM, MemBackTypeMMField, MemPageTypeNC, MemPageTypeNCField} 30*dd3d70baSYanqin Liimport difftest._ 3137225120Ssfencevma 3274050fc0SYanqin Litrait HasUncacheBufferParameters extends HasXSParameter with HasDCacheParameters { 3374050fc0SYanqin Li 3474050fc0SYanqin Li def doMerge(oldData: UInt, oldMask: UInt, newData:UInt, newMask: UInt):(UInt, UInt) = { 3574050fc0SYanqin Li val resData = VecInit((0 until DataBytes).map(j => 3674050fc0SYanqin Li Mux(newMask(j), newData(8*(j+1)-1, 8*j), oldData(8*(j+1)-1, 8*j)) 3774050fc0SYanqin Li )).asUInt 3874050fc0SYanqin Li val resMask = newMask | oldMask 3974050fc0SYanqin Li (resData, resMask) 4074050fc0SYanqin Li } 4174050fc0SYanqin Li 4274050fc0SYanqin Li def INDEX_WIDTH = log2Up(UncacheBufferSize) 4374050fc0SYanqin Li def BLOCK_OFFSET = log2Up(XLEN / 8) 4474050fc0SYanqin Li def getBlockAddr(x: UInt) = x >> BLOCK_OFFSET 4574050fc0SYanqin Li} 4674050fc0SYanqin Li 4774050fc0SYanqin Liabstract class UncacheBundle(implicit p: Parameters) extends XSBundle with HasUncacheBufferParameters 4874050fc0SYanqin Li 4974050fc0SYanqin Liabstract class UncacheModule(implicit p: Parameters) extends XSModule with HasUncacheBufferParameters 5074050fc0SYanqin Li 5174050fc0SYanqin Li 5237225120Ssfencevmaclass UncacheFlushBundle extends Bundle { 5337225120Ssfencevma val valid = Output(Bool()) 5437225120Ssfencevma val empty = Input(Bool()) 5537225120Ssfencevma} 561f0e2dc7SJiawei Lin 5774050fc0SYanqin Liclass UncacheEntry(implicit p: Parameters) extends UncacheBundle { 58cfdd605fSYanqin Li val cmd = UInt(M_SZ.W) 59cfdd605fSYanqin Li val addr = UInt(PAddrBits.W) 60e04c5f64SYanqin Li val vaddr = UInt(VAddrBits.W) 61cfdd605fSYanqin Li val data = UInt(XLEN.W) 62e04c5f64SYanqin Li val mask = UInt(DataBytes.W) 63cfdd605fSYanqin Li val nc = Bool() 64cfdd605fSYanqin Li val atomic = Bool() 65519244c7SYanqin Li val memBackTypeMM = Bool() 661f0e2dc7SJiawei Lin 67cfdd605fSYanqin Li val resp_nderr = Bool() 681f0e2dc7SJiawei Lin 6946236761SYanqin Li /* NOTE: if it support the internal forward logic, here can uncomment */ 7046236761SYanqin Li // val fwd_data = UInt(XLEN.W) 7146236761SYanqin Li // val fwd_mask = UInt(DataBytes.W) 72e04c5f64SYanqin Li 73cfdd605fSYanqin Li def set(x: UncacheWordReq): Unit = { 74cfdd605fSYanqin Li cmd := x.cmd 75cfdd605fSYanqin Li addr := x.addr 76e04c5f64SYanqin Li vaddr := x.vaddr 77cfdd605fSYanqin Li data := x.data 78cfdd605fSYanqin Li mask := x.mask 79cfdd605fSYanqin Li nc := x.nc 80519244c7SYanqin Li memBackTypeMM := x.memBackTypeMM 81cfdd605fSYanqin Li atomic := x.atomic 8258cb1b0bSzhanglinjuan resp_nderr := false.B 8346236761SYanqin Li // fwd_data := 0.U 8446236761SYanqin Li // fwd_mask := 0.U 85cfdd605fSYanqin Li } 86cfdd605fSYanqin Li 8774050fc0SYanqin Li def update(x: UncacheWordReq): Unit = { 8874050fc0SYanqin Li val (resData, resMask) = doMerge(data, mask, x.data, x.mask) 8974050fc0SYanqin Li // mask -> get the first position as 1 -> for address align 9074050fc0SYanqin Li val (resOffset, resFlag) = PriorityEncoderWithFlag(resMask) 9174050fc0SYanqin Li data := resData 9274050fc0SYanqin Li mask := resMask 9374050fc0SYanqin Li when(resFlag){ 9474050fc0SYanqin Li addr := (getBlockAddr(addr) << BLOCK_OFFSET) | resOffset 9574050fc0SYanqin Li vaddr := (getBlockAddr(vaddr) << BLOCK_OFFSET) | resOffset 9674050fc0SYanqin Li } 9774050fc0SYanqin Li } 9874050fc0SYanqin Li 99cfdd605fSYanqin Li def update(x: TLBundleD): Unit = { 10046236761SYanqin Li when(cmd === MemoryOpConstants.M_XRD) { 10146236761SYanqin Li data := x.data 10246236761SYanqin Li } 103db81ab70SYanqin Li resp_nderr := x.denied || x.corrupt 104cfdd605fSYanqin Li } 105cfdd605fSYanqin Li 10646236761SYanqin Li // def update(forwardData: UInt, forwardMask: UInt): Unit = { 10746236761SYanqin Li // fwd_data := forwardData 10846236761SYanqin Li // fwd_mask := forwardMask 10946236761SYanqin Li // } 110e04c5f64SYanqin Li 11174050fc0SYanqin Li def toUncacheWordResp(eid: UInt): UncacheWordResp = { 11246236761SYanqin Li // val resp_fwd_data = VecInit((0 until DataBytes).map(j => 11346236761SYanqin Li // Mux(fwd_mask(j), fwd_data(8*(j+1)-1, 8*j), data(8*(j+1)-1, 8*j)) 11446236761SYanqin Li // )).asUInt 11546236761SYanqin Li val resp_fwd_data = data 116cfdd605fSYanqin Li val r = Wire(new UncacheWordResp) 117cfdd605fSYanqin Li r := DontCare 118e04c5f64SYanqin Li r.data := resp_fwd_data 11974050fc0SYanqin Li r.id := eid 120cfdd605fSYanqin Li r.nderr := resp_nderr 121cfdd605fSYanqin Li r.nc := nc 122cfdd605fSYanqin Li r.is2lq := cmd === MemoryOpConstants.M_XRD 123cfdd605fSYanqin Li r.miss := false.B 124cfdd605fSYanqin Li r.replay := false.B 125cfdd605fSYanqin Li r.tag_error := false.B 126cfdd605fSYanqin Li r.error := false.B 127cfdd605fSYanqin Li r 1281f0e2dc7SJiawei Lin } 1291f0e2dc7SJiawei Lin} 1301f0e2dc7SJiawei Lin 131cfdd605fSYanqin Liclass UncacheEntryState(implicit p: Parameters) extends DCacheBundle { 132cfdd605fSYanqin Li // valid (-> waitSame) -> inflight -> waitReturn 133cfdd605fSYanqin Li val valid = Bool() 134cfdd605fSYanqin Li val inflight = Bool() // uncache -> L2 135cfdd605fSYanqin Li val waitSame = Bool() 136cfdd605fSYanqin Li val waitReturn = Bool() // uncache -> LSQ 1371f0e2dc7SJiawei Lin 138cfdd605fSYanqin Li def init: Unit = { 139cfdd605fSYanqin Li valid := false.B 140cfdd605fSYanqin Li inflight := false.B 141cfdd605fSYanqin Li waitSame := false.B 142cfdd605fSYanqin Li waitReturn := false.B 1431f0e2dc7SJiawei Lin } 1441f0e2dc7SJiawei Lin 145cfdd605fSYanqin Li def isValid(): Bool = valid 14674050fc0SYanqin Li def isInflight(): Bool = valid && inflight 14774050fc0SYanqin Li def isWaitReturn(): Bool = valid && waitReturn 14874050fc0SYanqin Li def isWaitSame(): Bool = valid && waitSame 14974050fc0SYanqin Li def can2Bus(): Bool = valid && !inflight && !waitSame && !waitReturn 150cfdd605fSYanqin Li def can2Lsq(): Bool = valid && waitReturn 151d74a7897SYanqin Li def canMerge(): Bool = valid && !inflight 152d74a7897SYanqin Li def isFwdOld(): Bool = valid && (inflight || waitReturn) 153d74a7897SYanqin Li def isFwdNew(): Bool = valid && !inflight && !waitReturn 1541f0e2dc7SJiawei Lin 155cfdd605fSYanqin Li def setValid(x: Bool): Unit = { valid := x} 156cfdd605fSYanqin Li def setInflight(x: Bool): Unit = { inflight := x} 157cfdd605fSYanqin Li def setWaitReturn(x: Bool): Unit = { waitReturn := x } 158cfdd605fSYanqin Li def setWaitSame(x: Bool): Unit = { waitSame := x} 1591f0e2dc7SJiawei Lin 160cfdd605fSYanqin Li def updateUncacheResp(): Unit = { 161cfdd605fSYanqin Li assert(inflight, "The request was not sent and a response was received") 162cfdd605fSYanqin Li inflight := false.B 163cfdd605fSYanqin Li waitReturn := true.B 1641f0e2dc7SJiawei Lin } 165cfdd605fSYanqin Li def updateReturn(): Unit = { 166cfdd605fSYanqin Li valid := false.B 167cfdd605fSYanqin Li inflight := false.B 168cfdd605fSYanqin Li waitSame := false.B 169cfdd605fSYanqin Li waitReturn := false.B 1701f0e2dc7SJiawei Lin } 1711f0e2dc7SJiawei Lin} 1721f0e2dc7SJiawei Lin 1731f0e2dc7SJiawei Linclass UncacheIO(implicit p: Parameters) extends DCacheBundle { 17437225120Ssfencevma val hartId = Input(UInt()) 17537225120Ssfencevma val enableOutstanding = Input(Bool()) 17637225120Ssfencevma val flush = Flipped(new UncacheFlushBundle) 1776786cfb7SWilliam Wang val lsq = Flipped(new UncacheWordIO) 178e04c5f64SYanqin Li val forward = Vec(LoadPipelineWidth, Flipped(new LoadForwardQueryIO)) 1791f0e2dc7SJiawei Lin} 1801f0e2dc7SJiawei Lin 1811f0e2dc7SJiawei Lin// convert DCacheIO to TileLink 1821f0e2dc7SJiawei Lin// for Now, we only deal with TL-UL 1831f0e2dc7SJiawei Lin 18437225120Ssfencevmaclass Uncache()(implicit p: Parameters) extends LazyModule with HasXSParameter { 18595e60e55STang Haojin override def shouldBeInlined: Boolean = false 18637225120Ssfencevma def idRange: Int = UncacheBufferSize 1871f0e2dc7SJiawei Lin 1881f0e2dc7SJiawei Lin val clientParameters = TLMasterPortParameters.v1( 1891f0e2dc7SJiawei Lin clients = Seq(TLMasterParameters.v1( 1901f0e2dc7SJiawei Lin "uncache", 19137225120Ssfencevma sourceId = IdRange(0, idRange) 192519244c7SYanqin Li )), 193519244c7SYanqin Li requestFields = Seq(MemBackTypeMMField(), MemPageTypeNCField()) 1941f0e2dc7SJiawei Lin ) 1951f0e2dc7SJiawei Lin val clientNode = TLClientNode(Seq(clientParameters)) 1961f0e2dc7SJiawei Lin 1971f0e2dc7SJiawei Lin lazy val module = new UncacheImp(this) 1981f0e2dc7SJiawei Lin} 1991f0e2dc7SJiawei Lin 200cfdd605fSYanqin Li/* Uncache Buffer */ 20137225120Ssfencevmaclass UncacheImp(outer: Uncache)extends LazyModuleImp(outer) 2021f0e2dc7SJiawei Lin with HasTLDump 20337225120Ssfencevma with HasXSParameter 20474050fc0SYanqin Li with HasUncacheBufferParameters 20537225120Ssfencevma with HasPerfEvents 2061f0e2dc7SJiawei Lin{ 207cfdd605fSYanqin Li println(s"Uncahe Buffer Size: $UncacheBufferSize entries") 2081f0e2dc7SJiawei Lin val io = IO(new UncacheIO) 2091f0e2dc7SJiawei Lin 2101f0e2dc7SJiawei Lin val (bus, edge) = outer.clientNode.out.head 2111f0e2dc7SJiawei Lin 2121f0e2dc7SJiawei Lin val req = io.lsq.req 2131f0e2dc7SJiawei Lin val resp = io.lsq.resp 2141f0e2dc7SJiawei Lin val mem_acquire = bus.a 2151f0e2dc7SJiawei Lin val mem_grant = bus.d 2161f0e2dc7SJiawei Lin val req_ready = WireInit(false.B) 2171f0e2dc7SJiawei Lin 2181f0e2dc7SJiawei Lin // assign default values to output signals 2191f0e2dc7SJiawei Lin bus.b.ready := false.B 2201f0e2dc7SJiawei Lin bus.c.valid := false.B 2211f0e2dc7SJiawei Lin bus.c.bits := DontCare 2221f0e2dc7SJiawei Lin bus.d.ready := false.B 2231f0e2dc7SJiawei Lin bus.e.valid := false.B 2241f0e2dc7SJiawei Lin bus.e.bits := DontCare 225cfdd605fSYanqin Li io.lsq.req.ready := req_ready 22637225120Ssfencevma io.lsq.resp.valid := false.B 22737225120Ssfencevma io.lsq.resp.bits := DontCare 2281f0e2dc7SJiawei Lin 22937225120Ssfencevma 230cfdd605fSYanqin Li /****************************************************************** 231cfdd605fSYanqin Li * Data Structure 232cfdd605fSYanqin Li ******************************************************************/ 23337225120Ssfencevma 234cfdd605fSYanqin Li val entries = Reg(Vec(UncacheBufferSize, new UncacheEntry)) 235cfdd605fSYanqin Li val states = RegInit(VecInit(Seq.fill(UncacheBufferSize)(0.U.asTypeOf(new UncacheEntryState)))) 23674050fc0SYanqin Li val s_idle :: s_inflight :: s_wait_return :: Nil = Enum(3) 237cfdd605fSYanqin Li val uState = RegInit(s_idle) 2381f0e2dc7SJiawei Lin 239e04c5f64SYanqin Li // drain buffer 240e04c5f64SYanqin Li val empty = Wire(Bool()) 241043d3da4SYanqin Li val f1_needDrain = Wire(Bool()) 242ccd7d228SYanqin Li val do_uarch_drain = RegInit(false.B) 243ccd7d228SYanqin Li when((f1_needDrain || io.flush.valid) && !empty){ 244ccd7d228SYanqin Li do_uarch_drain := true.B 245ccd7d228SYanqin Li }.elsewhen(empty){ 246ccd7d228SYanqin Li do_uarch_drain := false.B 247ccd7d228SYanqin Li }.otherwise{ 248ccd7d228SYanqin Li do_uarch_drain := false.B 249ccd7d228SYanqin Li } 2501f0e2dc7SJiawei Lin 251cfdd605fSYanqin Li val q0_entry = Wire(new UncacheEntry) 252cfdd605fSYanqin Li val q0_canSentIdx = Wire(UInt(INDEX_WIDTH.W)) 253cfdd605fSYanqin Li val q0_canSent = Wire(Bool()) 254e04c5f64SYanqin Li 255e04c5f64SYanqin Li 256cfdd605fSYanqin Li /****************************************************************** 25774050fc0SYanqin Li * Functions 25874050fc0SYanqin Li ******************************************************************/ 25974050fc0SYanqin Li def sizeMap[T <: Data](f: Int => T) = VecInit((0 until UncacheBufferSize).map(f)) 26074050fc0SYanqin Li def sizeForeach[T <: Data](f: Int => Unit) = (0 until UncacheBufferSize).map(f) 26174050fc0SYanqin Li def isStore(e: UncacheEntry): Bool = e.cmd === MemoryOpConstants.M_XWR 26274050fc0SYanqin Li def isStore(x: UInt): Bool = x === MemoryOpConstants.M_XWR 26374050fc0SYanqin Li def addrMatch(x: UncacheEntry, y: UncacheWordReq) : Bool = getBlockAddr(x.addr) === getBlockAddr(y.addr) 26474050fc0SYanqin Li def addrMatch(x: UncacheWordReq, y: UncacheEntry) : Bool = getBlockAddr(x.addr) === getBlockAddr(y.addr) 26574050fc0SYanqin Li def addrMatch(x: UncacheEntry, y: UncacheEntry) : Bool = getBlockAddr(x.addr) === getBlockAddr(y.addr) 26674050fc0SYanqin Li def addrMatch(x: UInt, y: UInt) : Bool = getBlockAddr(x) === getBlockAddr(y) 26774050fc0SYanqin Li 26874050fc0SYanqin Li def continueAndAlign(mask: UInt): Bool = { 26974050fc0SYanqin Li val res = 27074050fc0SYanqin Li PopCount(mask) === 1.U || 27174050fc0SYanqin Li mask === 0b00000011.U || 27274050fc0SYanqin Li mask === 0b00001100.U || 27374050fc0SYanqin Li mask === 0b00110000.U || 27474050fc0SYanqin Li mask === 0b11000000.U || 27574050fc0SYanqin Li mask === 0b00001111.U || 27674050fc0SYanqin Li mask === 0b11110000.U || 27774050fc0SYanqin Li mask === 0b11111111.U 27874050fc0SYanqin Li res 27974050fc0SYanqin Li } 28074050fc0SYanqin Li 281d74a7897SYanqin Li def canMergePrimary(x: UncacheWordReq, e: UncacheEntry, eid: UInt): Bool = { 28274050fc0SYanqin Li // vaddr same, properties same 28374050fc0SYanqin Li getBlockAddr(x.vaddr) === getBlockAddr(e.vaddr) && 28474050fc0SYanqin Li x.cmd === e.cmd && x.nc && e.nc && 28574050fc0SYanqin Li x.memBackTypeMM === e.memBackTypeMM && !x.atomic && !e.atomic && 286d74a7897SYanqin Li continueAndAlign(x.mask | e.mask) && 287d74a7897SYanqin Li // not receiving uncache response, not waitReturn -> no wake-up signal in these cases 288d74a7897SYanqin Li !(mem_grant.fire && mem_grant.bits.source === eid || states(eid).isWaitReturn()) 28974050fc0SYanqin Li } 29074050fc0SYanqin Li 29174050fc0SYanqin Li def canMergeSecondary(eid: UInt): Bool = { 29274050fc0SYanqin Li // old entry is not inflight and senting 293d74a7897SYanqin Li states(eid).canMerge() && !(q0_canSent && q0_canSentIdx === eid) 29474050fc0SYanqin Li } 29574050fc0SYanqin Li 29674050fc0SYanqin Li /****************************************************************** 297cfdd605fSYanqin Li * uState for non-outstanding 298cfdd605fSYanqin Li ******************************************************************/ 29937225120Ssfencevma 300cfdd605fSYanqin Li switch(uState){ 301cfdd605fSYanqin Li is(s_idle){ 30237225120Ssfencevma when(mem_acquire.fire){ 30374050fc0SYanqin Li uState := s_inflight 30437225120Ssfencevma } 30537225120Ssfencevma } 30674050fc0SYanqin Li is(s_inflight){ 30737225120Ssfencevma when(mem_grant.fire){ 30874050fc0SYanqin Li uState := s_wait_return 30937225120Ssfencevma } 31037225120Ssfencevma } 31174050fc0SYanqin Li is(s_wait_return){ 312cfdd605fSYanqin Li when(resp.fire){ 313cfdd605fSYanqin Li uState := s_idle 314cfdd605fSYanqin Li } 31537225120Ssfencevma } 31637225120Ssfencevma } 31737225120Ssfencevma 318cfdd605fSYanqin Li 319cfdd605fSYanqin Li /****************************************************************** 320cfdd605fSYanqin Li * Enter Buffer 321cfdd605fSYanqin Li * Version 0 (better timing) 322cfdd605fSYanqin Li * e0 judge: alloc/merge write vec 323cfdd605fSYanqin Li * e1 alloc 324cfdd605fSYanqin Li * 325cfdd605fSYanqin Li * Version 1 (better performance) 32674050fc0SYanqin Li * e0: solved in one cycle for achieving the original performance. 32774050fc0SYanqin Li * e1: return idResp to set sid for handshake 328cfdd605fSYanqin Li ******************************************************************/ 329cfdd605fSYanqin Li 33074050fc0SYanqin Li /* e0: merge/alloc */ 331cfdd605fSYanqin Li val e0_fire = req.fire 332e10e20c6SYanqin Li val e0_req_valid = req.valid 333cfdd605fSYanqin Li val e0_req = req.bits 334cfdd605fSYanqin Li 33574050fc0SYanqin Li val e0_rejectVec = Wire(Vec(UncacheBufferSize, Bool())) 33674050fc0SYanqin Li val e0_mergeVec = Wire(Vec(UncacheBufferSize, Bool())) 33774050fc0SYanqin Li val e0_allocWaitSameVec = Wire(Vec(UncacheBufferSize, Bool())) 33874050fc0SYanqin Li sizeForeach(i => { 33974050fc0SYanqin Li val valid = e0_req_valid && states(i).isValid() 34074050fc0SYanqin Li val isAddrMatch = addrMatch(e0_req, entries(i)) 341d74a7897SYanqin Li val canMerge1 = canMergePrimary(e0_req, entries(i), i.U) 34274050fc0SYanqin Li val canMerge2 = canMergeSecondary(i.U) 34374050fc0SYanqin Li e0_rejectVec(i) := valid && isAddrMatch && !canMerge1 34474050fc0SYanqin Li e0_mergeVec(i) := valid && isAddrMatch && canMerge1 && canMerge2 34574050fc0SYanqin Li e0_allocWaitSameVec(i) := valid && isAddrMatch && canMerge1 && !canMerge2 34674050fc0SYanqin Li }) 34774050fc0SYanqin Li assert(PopCount(e0_mergeVec) <= 1.U, "Uncache buffer should not merge multiple entries") 34874050fc0SYanqin Li 34974050fc0SYanqin Li val e0_invalidVec = sizeMap(i => !states(i).isValid()) 35074050fc0SYanqin Li val (e0_mergeIdx, e0_canMerge) = PriorityEncoderWithFlag(e0_mergeVec) 35174050fc0SYanqin Li val (e0_allocIdx, e0_canAlloc) = PriorityEncoderWithFlag(e0_invalidVec) 35274050fc0SYanqin Li val e0_allocWaitSame = e0_allocWaitSameVec.reduce(_ || _) 35374050fc0SYanqin Li val e0_sid = Mux(e0_canMerge, e0_mergeIdx, e0_allocIdx) 35412931efeSYanqin Li val e0_reject = do_uarch_drain || (!e0_canMerge && !e0_invalidVec.asUInt.orR) || e0_rejectVec.reduce(_ || _) 35574050fc0SYanqin Li 35674050fc0SYanqin Li // e0_fire is used to guarantee that it will not be rejected 35712931efeSYanqin Li when(e0_canMerge && e0_req_valid){ 35874050fc0SYanqin Li entries(e0_mergeIdx).update(e0_req) 35974050fc0SYanqin Li }.elsewhen(e0_canAlloc && e0_fire){ 360e04c5f64SYanqin Li entries(e0_allocIdx).set(e0_req) 361e04c5f64SYanqin Li states(e0_allocIdx).setValid(true.B) 36274050fc0SYanqin Li when(e0_allocWaitSame){ 363e04c5f64SYanqin Li states(e0_allocIdx).setWaitSame(true.B) 364cfdd605fSYanqin Li } 365cfdd605fSYanqin Li } 366cfdd605fSYanqin Li 36774050fc0SYanqin Li req_ready := !e0_reject 36874050fc0SYanqin Li 36974050fc0SYanqin Li /* e1: return accept */ 37074050fc0SYanqin Li io.lsq.idResp.valid := RegNext(e0_fire) 37174050fc0SYanqin Li io.lsq.idResp.bits.mid := RegEnable(e0_req.id, e0_fire) 37274050fc0SYanqin Li io.lsq.idResp.bits.sid := RegEnable(e0_sid, e0_fire) 37374050fc0SYanqin Li io.lsq.idResp.bits.is2lq := RegEnable(!isStore(e0_req.cmd), e0_fire) 37474050fc0SYanqin Li io.lsq.idResp.bits.nc := RegEnable(e0_req.nc, e0_fire) 375cfdd605fSYanqin Li 376cfdd605fSYanqin Li /****************************************************************** 377cfdd605fSYanqin Li * Uncache Req 378cfdd605fSYanqin Li * Version 0 (better timing) 379cfdd605fSYanqin Li * q0: choose which one is sent 380cfdd605fSYanqin Li * q0: sent 381cfdd605fSYanqin Li * 382cfdd605fSYanqin Li * Version 1 (better performance) 383cfdd605fSYanqin Li * solved in one cycle for achieving the original performance. 384cfdd605fSYanqin Li * NOTE: "Enter Buffer" & "Uncache Req" not a continuous pipeline, 385cfdd605fSYanqin Li * because there is no guarantee that mem_aquire will be always ready. 386cfdd605fSYanqin Li ******************************************************************/ 387cfdd605fSYanqin Li 388cfdd605fSYanqin Li val q0_canSentVec = sizeMap(i => 38974050fc0SYanqin Li (io.enableOutstanding || uState === s_idle) && 39074050fc0SYanqin Li states(i).can2Bus() 391cfdd605fSYanqin Li ) 392cfdd605fSYanqin Li val q0_res = PriorityEncoderWithFlag(q0_canSentVec) 393cfdd605fSYanqin Li q0_canSentIdx := q0_res._1 394cfdd605fSYanqin Li q0_canSent := q0_res._2 395cfdd605fSYanqin Li q0_entry := entries(q0_canSentIdx) 396cfdd605fSYanqin Li 397cfdd605fSYanqin Li val size = PopCount(q0_entry.mask) 398cfdd605fSYanqin Li val (lgSize, legal) = PriorityMuxWithFlag(Seq( 399cfdd605fSYanqin Li 1.U -> 0.U, 400cfdd605fSYanqin Li 2.U -> 1.U, 401cfdd605fSYanqin Li 4.U -> 2.U, 402cfdd605fSYanqin Li 8.U -> 3.U 403cfdd605fSYanqin Li ).map(m => (size===m._1) -> m._2)) 404cfdd605fSYanqin Li assert(!(q0_canSent && !legal)) 405cfdd605fSYanqin Li 406cfdd605fSYanqin Li val q0_load = edge.Get( 407cfdd605fSYanqin Li fromSource = q0_canSentIdx, 408cfdd605fSYanqin Li toAddress = q0_entry.addr, 409cfdd605fSYanqin Li lgSize = lgSize 410cfdd605fSYanqin Li )._2 411cfdd605fSYanqin Li 412cfdd605fSYanqin Li val q0_store = edge.Put( 413cfdd605fSYanqin Li fromSource = q0_canSentIdx, 414cfdd605fSYanqin Li toAddress = q0_entry.addr, 415cfdd605fSYanqin Li lgSize = lgSize, 416cfdd605fSYanqin Li data = q0_entry.data, 417cfdd605fSYanqin Li mask = q0_entry.mask 418cfdd605fSYanqin Li )._2 419cfdd605fSYanqin Li 420cfdd605fSYanqin Li val q0_isStore = q0_entry.cmd === MemoryOpConstants.M_XWR 421cfdd605fSYanqin Li 422cfdd605fSYanqin Li mem_acquire.valid := q0_canSent 423cfdd605fSYanqin Li mem_acquire.bits := Mux(q0_isStore, q0_store, q0_load) 424519244c7SYanqin Li mem_acquire.bits.user.lift(MemBackTypeMM).foreach(_ := q0_entry.memBackTypeMM) 425519244c7SYanqin Li mem_acquire.bits.user.lift(MemPageTypeNC).foreach(_ := q0_entry.nc) 426cfdd605fSYanqin Li when(mem_acquire.fire){ 427cfdd605fSYanqin Li states(q0_canSentIdx).setInflight(true.B) 428cfdd605fSYanqin Li 429cfdd605fSYanqin Li // q0 should judge whether wait same block 430cfdd605fSYanqin Li (0 until UncacheBufferSize).map(j => 431759834f0SYanqin Li when(q0_canSentIdx =/= j.U && states(j).isValid() && !states(j).isWaitReturn() && addrMatch(q0_entry, entries(j))){ 432cfdd605fSYanqin Li states(j).setWaitSame(true.B) 433cfdd605fSYanqin Li } 434cfdd605fSYanqin Li ) 435cfdd605fSYanqin Li } 436cfdd605fSYanqin Li 437*dd3d70baSYanqin Li // uncache store but memBackTypeMM should update the golden memory 438*dd3d70baSYanqin Li if (env.EnableDifftest) { 439*dd3d70baSYanqin Li val difftest = DifftestModule(new DiffUncacheMMStoreEvent, delay = 1) 440*dd3d70baSYanqin Li difftest.coreid := io.hartId 441*dd3d70baSYanqin Li difftest.index := 0.U 442*dd3d70baSYanqin Li difftest.valid := mem_acquire.fire && isStore(entries(q0_canSentIdx)) && entries(q0_canSentIdx).memBackTypeMM 443*dd3d70baSYanqin Li difftest.addr := entries(q0_canSentIdx).addr 444*dd3d70baSYanqin Li difftest.data := entries(q0_canSentIdx).data.asTypeOf(Vec(DataBytes, UInt(8.W))) 445*dd3d70baSYanqin Li difftest.mask := entries(q0_canSentIdx).mask 446*dd3d70baSYanqin Li } 447cfdd605fSYanqin Li 448cfdd605fSYanqin Li /****************************************************************** 449cfdd605fSYanqin Li * Uncache Resp 450cfdd605fSYanqin Li ******************************************************************/ 451cfdd605fSYanqin Li 452cfdd605fSYanqin Li val (_, _, refill_done, _) = edge.addr_inc(mem_grant) 453cfdd605fSYanqin Li 454cfdd605fSYanqin Li mem_grant.ready := true.B 455cfdd605fSYanqin Li when (mem_grant.fire) { 456cfdd605fSYanqin Li val id = mem_grant.bits.source 457cfdd605fSYanqin Li entries(id).update(mem_grant.bits) 458cfdd605fSYanqin Li states(id).updateUncacheResp() 459cfdd605fSYanqin Li assert(refill_done, "Uncache response should be one beat only!") 460cfdd605fSYanqin Li 461cfdd605fSYanqin Li // remove state of wait same block 462cfdd605fSYanqin Li (0 until UncacheBufferSize).map(j => 463759834f0SYanqin Li when(id =/= j.U && states(j).isValid() && states(j).isWaitSame() && addrMatch(entries(id), entries(j))){ 464cfdd605fSYanqin Li states(j).setWaitSame(false.B) 465cfdd605fSYanqin Li } 466cfdd605fSYanqin Li ) 467cfdd605fSYanqin Li } 468cfdd605fSYanqin Li 469cfdd605fSYanqin Li /****************************************************************** 470cfdd605fSYanqin Li * Return to LSQ 471cfdd605fSYanqin Li ******************************************************************/ 472cfdd605fSYanqin Li 473cfdd605fSYanqin Li val r0_canSentVec = sizeMap(i => states(i).can2Lsq()) 474cfdd605fSYanqin Li val (r0_canSentIdx, r0_canSent) = PriorityEncoderWithFlag(r0_canSentVec) 475cfdd605fSYanqin Li resp.valid := r0_canSent 47674050fc0SYanqin Li resp.bits := entries(r0_canSentIdx).toUncacheWordResp(r0_canSentIdx) 477cfdd605fSYanqin Li when(resp.fire){ 478cfdd605fSYanqin Li states(r0_canSentIdx).updateReturn() 479cfdd605fSYanqin Li } 480cfdd605fSYanqin Li 481cfdd605fSYanqin Li 482cfdd605fSYanqin Li /****************************************************************** 483cfdd605fSYanqin Li * Buffer Flush 48446236761SYanqin Li * 1. when io.flush.valid is true: drain store queue and ubuffer 48546236761SYanqin Li * 2. when io.lsq.req.bits.atomic is true: not support temporarily 486cfdd605fSYanqin Li ******************************************************************/ 487e04c5f64SYanqin Li empty := !VecInit(states.map(_.isValid())).asUInt.orR 488e04c5f64SYanqin Li io.flush.empty := empty 489cfdd605fSYanqin Li 490e04c5f64SYanqin Li 491e04c5f64SYanqin Li /****************************************************************** 49274050fc0SYanqin Li * Load Data Forward to loadunit 49374050fc0SYanqin Li * f0: vaddr match, fast resp 49474050fc0SYanqin Li * f1: mask & data select, merge; paddr match; resp 49574050fc0SYanqin Li * NOTE: forward.paddr from dtlb, which is far from uncache f0 496e04c5f64SYanqin Li ******************************************************************/ 497e04c5f64SYanqin Li 498e04c5f64SYanqin Li val f0_validMask = sizeMap(i => isStore(entries(i)) && states(i).isValid()) 499e04c5f64SYanqin Li val f0_fwdMaskCandidates = VecInit(entries.map(e => e.mask)) 500e04c5f64SYanqin Li val f0_fwdDataCandidates = VecInit(entries.map(e => e.data)) 50174050fc0SYanqin Li val f1_fwdMaskCandidates = sizeMap(i => RegEnable(entries(i).mask, f0_validMask(i))) 50274050fc0SYanqin Li val f1_fwdDataCandidates = sizeMap(i => RegEnable(entries(i).data, f0_validMask(i))) 503043d3da4SYanqin Li val f1_tagMismatchVec = Wire(Vec(LoadPipelineWidth, Bool())) 504043d3da4SYanqin Li f1_needDrain := f1_tagMismatchVec.asUInt.orR && !empty 505043d3da4SYanqin Li 506043d3da4SYanqin Li for ((forward, i) <- io.forward.zipWithIndex) { 507043d3da4SYanqin Li val f0_fwdValid = forward.valid 508043d3da4SYanqin Li val f1_fwdValid = RegNext(f0_fwdValid) 509043d3da4SYanqin Li 51074050fc0SYanqin Li /* f0 */ 51174050fc0SYanqin Li // vaddr match 512e10e20c6SYanqin Li val f0_vtagMatches = sizeMap(w => addrMatch(entries(w).vaddr, forward.vaddr)) 51311269ca7STang Haojin val f0_flyTagMatches = sizeMap(w => f0_vtagMatches(w) && f0_validMask(w) && f0_fwdValid && states(w).isFwdOld()) 51411269ca7STang Haojin val f0_idleTagMatches = sizeMap(w => f0_vtagMatches(w) && f0_validMask(w) && f0_fwdValid && states(w).isFwdNew()) 51574050fc0SYanqin Li // ONLY for fast use to get better timing 51674050fc0SYanqin Li val f0_flyMaskFast = shiftMaskToHigh( 517043d3da4SYanqin Li forward.vaddr, 51874050fc0SYanqin Li Mux1H(f0_flyTagMatches, f0_fwdMaskCandidates) 519e04c5f64SYanqin Li ).asTypeOf(Vec(VDataBytes, Bool())) 52074050fc0SYanqin Li val f0_idleMaskFast = shiftMaskToHigh( 521043d3da4SYanqin Li forward.vaddr, 52274050fc0SYanqin Li Mux1H(f0_idleTagMatches, f0_fwdMaskCandidates) 52374050fc0SYanqin Li ).asTypeOf(Vec(VDataBytes, Bool())) 524e04c5f64SYanqin Li 52574050fc0SYanqin Li /* f1 */ 52674050fc0SYanqin Li val f1_flyTagMatches = RegEnable(f0_flyTagMatches, f0_fwdValid) 52774050fc0SYanqin Li val f1_idleTagMatches = RegEnable(f0_idleTagMatches, f0_fwdValid) 52874050fc0SYanqin Li val f1_fwdPAddr = RegEnable(forward.paddr, f0_fwdValid) 52974050fc0SYanqin Li // select 53074050fc0SYanqin Li val f1_flyMask = Mux1H(f1_flyTagMatches, f1_fwdMaskCandidates) 53174050fc0SYanqin Li val f1_flyData = Mux1H(f1_flyTagMatches, f1_fwdDataCandidates) 53274050fc0SYanqin Li val f1_idleMask = Mux1H(f1_idleTagMatches, f1_fwdMaskCandidates) 53374050fc0SYanqin Li val f1_idleData = Mux1H(f1_idleTagMatches, f1_fwdDataCandidates) 53474050fc0SYanqin Li // merge old(inflight) and new(idle) 53574050fc0SYanqin Li val (f1_fwdDataTmp, f1_fwdMaskTmp) = doMerge(f1_flyData, f1_flyMask, f1_idleData, f1_idleMask) 53674050fc0SYanqin Li val f1_fwdMask = shiftMaskToHigh(f1_fwdPAddr, f1_fwdMaskTmp).asTypeOf(Vec(VDataBytes, Bool())) 53774050fc0SYanqin Li val f1_fwdData = shiftDataToHigh(f1_fwdPAddr, f1_fwdDataTmp).asTypeOf(Vec(VDataBytes, UInt(8.W))) 53874050fc0SYanqin Li // paddr match and mismatch judge 53974050fc0SYanqin Li val f1_ptagMatches = sizeMap(w => addrMatch(RegEnable(entries(w).addr, f0_fwdValid), f1_fwdPAddr)) 540e10e20c6SYanqin Li f1_tagMismatchVec(i) := sizeMap(w => 541043d3da4SYanqin Li RegEnable(f0_vtagMatches(w), f0_fwdValid) =/= f1_ptagMatches(w) && RegEnable(f0_validMask(w), f0_fwdValid) && f1_fwdValid 542043d3da4SYanqin Li ).asUInt.orR 5431eb8dd22SKunlin You XSDebug( 5441eb8dd22SKunlin You f1_tagMismatchVec(i), 5451eb8dd22SKunlin You "forward tag mismatch: pmatch %x vmatch %x vaddr %x paddr %x\n", 546043d3da4SYanqin Li f1_ptagMatches.asUInt, 547043d3da4SYanqin Li RegEnable(f0_vtagMatches.asUInt, f0_fwdValid), 548043d3da4SYanqin Li RegEnable(forward.vaddr, f0_fwdValid), 549043d3da4SYanqin Li RegEnable(forward.paddr, f0_fwdValid) 550043d3da4SYanqin Li ) 55174050fc0SYanqin Li // response 552e04c5f64SYanqin Li forward.addrInvalid := false.B // addr in ubuffer is always ready 553e04c5f64SYanqin Li forward.dataInvalid := false.B // data in ubuffer is always ready 554043d3da4SYanqin Li forward.matchInvalid := f1_tagMismatchVec(i) // paddr / vaddr cam result does not match 555e04c5f64SYanqin Li for (j <- 0 until VDataBytes) { 55674050fc0SYanqin Li forward.forwardMaskFast(j) := f0_flyMaskFast(j) || f0_idleMaskFast(j) 557e04c5f64SYanqin Li 558e10e20c6SYanqin Li forward.forwardData(j) := f1_fwdData(j) 559e04c5f64SYanqin Li forward.forwardMask(j) := false.B 560e04c5f64SYanqin Li when(f1_fwdMask(j) && f1_fwdValid) { 561e04c5f64SYanqin Li forward.forwardMask(j) := true.B 562e04c5f64SYanqin Li } 563e04c5f64SYanqin Li } 564e04c5f64SYanqin Li 565e04c5f64SYanqin Li } 5661f0e2dc7SJiawei Lin 5671f0e2dc7SJiawei Lin 568cfdd605fSYanqin Li /****************************************************************** 569cfdd605fSYanqin Li * Debug / Performance 570cfdd605fSYanqin Li ******************************************************************/ 571cfdd605fSYanqin Li 572cfdd605fSYanqin Li /* Debug Counters */ 5731f0e2dc7SJiawei Lin // print all input/output requests for debug purpose 5741f0e2dc7SJiawei Lin // print req/resp 575935edac4STang Haojin XSDebug(req.fire, "req cmd: %x addr: %x data: %x mask: %x\n", 5761f0e2dc7SJiawei Lin req.bits.cmd, req.bits.addr, req.bits.data, req.bits.mask) 577935edac4STang Haojin XSDebug(resp.fire, "data: %x\n", req.bits.data) 5781f0e2dc7SJiawei Lin // print tilelink messages 5798b33cd30Sklin02 XSDebug(mem_acquire.valid, "mem_acquire valid, ready=%d ", mem_acquire.ready) 5808b33cd30Sklin02 mem_acquire.bits.dump(mem_acquire.valid) 5818b33cd30Sklin02 5828b33cd30Sklin02 XSDebug(mem_grant.fire, "mem_grant fire ") 5838b33cd30Sklin02 mem_grant.bits.dump(mem_grant.fire) 58437225120Ssfencevma 585cfdd605fSYanqin Li /* Performance Counters */ 58674050fc0SYanqin Li XSPerfAccumulate("e0_reject", e0_reject && e0_req_valid) 58774050fc0SYanqin Li XSPerfAccumulate("e0_total_enter", e0_fire) 58874050fc0SYanqin Li XSPerfAccumulate("e0_merge", e0_fire && e0_canMerge) 58974050fc0SYanqin Li XSPerfAccumulate("e0_alloc_simple", e0_fire && e0_canAlloc && !e0_allocWaitSame) 59074050fc0SYanqin Li XSPerfAccumulate("e0_alloc_wait_same", e0_fire && e0_canAlloc && e0_allocWaitSame) 59174050fc0SYanqin Li XSPerfAccumulate("q0_acquire", q0_canSent) 59274050fc0SYanqin Li XSPerfAccumulate("q0_acquire_store", q0_canSent && q0_isStore) 59374050fc0SYanqin Li XSPerfAccumulate("q0_acquire_load", q0_canSent && !q0_isStore) 594519244c7SYanqin Li XSPerfAccumulate("uncache_memBackTypeMM", io.lsq.req.fire && io.lsq.req.bits.memBackTypeMM) 595e04c5f64SYanqin Li XSPerfAccumulate("uncache_mmio_store", io.lsq.req.fire && isStore(io.lsq.req.bits.cmd) && !io.lsq.req.bits.nc) 596e04c5f64SYanqin Li XSPerfAccumulate("uncache_mmio_load", io.lsq.req.fire && !isStore(io.lsq.req.bits.cmd) && !io.lsq.req.bits.nc) 597e04c5f64SYanqin Li XSPerfAccumulate("uncache_nc_store", io.lsq.req.fire && isStore(io.lsq.req.bits.cmd) && io.lsq.req.bits.nc) 598e04c5f64SYanqin Li XSPerfAccumulate("uncache_nc_load", io.lsq.req.fire && !isStore(io.lsq.req.bits.cmd) && io.lsq.req.bits.nc) 59974050fc0SYanqin Li XSPerfAccumulate("uncache_outstanding", uState =/= s_idle && mem_acquire.fire) 60046236761SYanqin Li XSPerfAccumulate("forward_count", PopCount(io.forward.map(_.forwardMask.asUInt.orR))) 601043d3da4SYanqin Li XSPerfAccumulate("forward_vaddr_match_failed", PopCount(f1_tagMismatchVec)) 602cfdd605fSYanqin Li 60337225120Ssfencevma val perfEvents = Seq( 604e04c5f64SYanqin Li ("uncache_mmio_store", io.lsq.req.fire && isStore(io.lsq.req.bits.cmd) && !io.lsq.req.bits.nc), 605e04c5f64SYanqin Li ("uncache_mmio_load", io.lsq.req.fire && !isStore(io.lsq.req.bits.cmd) && !io.lsq.req.bits.nc), 606e04c5f64SYanqin Li ("uncache_nc_store", io.lsq.req.fire && isStore(io.lsq.req.bits.cmd) && io.lsq.req.bits.nc), 607e04c5f64SYanqin Li ("uncache_nc_load", io.lsq.req.fire && !isStore(io.lsq.req.bits.cmd) && io.lsq.req.bits.nc), 60874050fc0SYanqin Li ("uncache_outstanding", uState =/= s_idle && mem_acquire.fire), 60946236761SYanqin Li ("forward_count", PopCount(io.forward.map(_.forwardMask.asUInt.orR))), 610043d3da4SYanqin Li ("forward_vaddr_match_failed", PopCount(f1_tagMismatchVec)) 61137225120Ssfencevma ) 61237225120Ssfencevma 61337225120Ssfencevma generatePerfEvent() 61437225120Ssfencevma // End 6151f0e2dc7SJiawei Lin} 616