1e4f69d78Ssfencevma/*************************************************************************************** 2e3da8badSTang Haojin* Copyright (c) 2024 Beijing Institute of Open Source Chip (BOSC) 3e3da8badSTang Haojin* Copyright (c) 2020-2024 Institute of Computing Technology, Chinese Academy of Sciences 4e4f69d78Ssfencevma* Copyright (c) 2020-2021 Peng Cheng Laboratory 5e4f69d78Ssfencevma* 6e4f69d78Ssfencevma* XiangShan is licensed under Mulan PSL v2. 7e4f69d78Ssfencevma* You can use this software according to the terms and conditions of the Mulan PSL v2. 8e4f69d78Ssfencevma* You may obtain a copy of Mulan PSL v2 at: 9e4f69d78Ssfencevma* http://license.coscl.org.cn/MulanPSL2 10e4f69d78Ssfencevma* 11e4f69d78Ssfencevma* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 12e4f69d78Ssfencevma* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 13e4f69d78Ssfencevma* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 14e4f69d78Ssfencevma* 15e4f69d78Ssfencevma* See the Mulan PSL v2 for more details. 16e4f69d78Ssfencevma***************************************************************************************/ 17e4f69d78Ssfencevmapackage xiangshan.mem 18e4f69d78Ssfencevma 199e12e8edScz4eimport org.chipsalliance.cde.config._ 20e4f69d78Ssfencevmaimport chisel3._ 21e4f69d78Ssfencevmaimport chisel3.util._ 22e4f69d78Ssfencevmaimport utils._ 23e4f69d78Ssfencevmaimport utility._ 249e12e8edScz4eimport xiangshan._ 259e12e8edScz4eimport xiangshan.ExceptionNO._ 269e12e8edScz4eimport xiangshan.backend.rob.{RobLsqIO, RobPtr} 270a84afd5Scz4eimport xiangshan.backend.Bundles.{DynInst, MemExuOutput, UopIdx} 2868d13085SXuan Huimport xiangshan.backend.fu.FuConfig.LduCfg 2926af847eSgood-circleimport xiangshan.backend.decode.isa.bitfield.{InstVType, XSInstBitFields} 30ad74c6cbSAnzoimport xiangshan.backend.fu.FuType 319e12e8edScz4eimport xiangshan.mem.Bundles._ 329e12e8edScz4eimport xiangshan.cache._ 33e4f69d78Ssfencevma 34e4f69d78Ssfencevmaclass VirtualLoadQueue(implicit p: Parameters) extends XSModule 35e4f69d78Ssfencevma with HasDCacheParameters 36e4f69d78Ssfencevma with HasCircularQueuePtrHelper 37e4f69d78Ssfencevma with HasLoadHelper 38e4f69d78Ssfencevma with HasPerfEvents 39f3a9fb05SAnzo with HasVLSUParameters { 40e4f69d78Ssfencevma val io = IO(new Bundle() { 4114a67055Ssfencevma // control 42e4f69d78Ssfencevma val redirect = Flipped(Valid(new Redirect)) 43627be78bSgood-circle val vecCommit = Vec(VecLoadPipelineWidth, Flipped(ValidIO(new FeedbackToLsqIO))) 4414a67055Ssfencevma // from dispatch 45e4f69d78Ssfencevma val enq = new LqEnqIO 4614a67055Ssfencevma // from ldu s3 4714a67055Ssfencevma val ldin = Vec(LoadPipelineWidth, Flipped(DecoupledIO(new LqWriteBundle))) 4814a67055Ssfencevma // to LoadQueueReplay and LoadQueueRAR 49e4f69d78Ssfencevma val ldWbPtr = Output(new LqPtr) 5014a67055Ssfencevma // global 51e4f69d78Ssfencevma val lqFull = Output(Bool()) 520d32f713Shappy-lx val lqEmpty = Output(Bool()) 5314a67055Ssfencevma // to dispatch 54e4f69d78Ssfencevma val lqDeq = Output(UInt(log2Up(CommitWidth + 1).W)) 55e4f69d78Ssfencevma val lqCancelCnt = Output(UInt(log2Up(VirtualLoadQueueSize+1).W)) 56e836c770SZhaoyang You // for topdown 57e836c770SZhaoyang You val noUopsIssued = Input(Bool()) 58e4f69d78Ssfencevma }) 59e4f69d78Ssfencevma 60e4f69d78Ssfencevma println("VirtualLoadQueue: size: " + VirtualLoadQueueSize) 61e4f69d78Ssfencevma // VirtualLoadQueue field 62e4f69d78Ssfencevma // +-----------+---------+-------+ 63e4f69d78Ssfencevma // | Allocated | MicroOp | Flags | 64e4f69d78Ssfencevma // +-----------+---------+-------+ 65e4f69d78Ssfencevma // Allocated : entry has been allocated already 66e4f69d78Ssfencevma // MicroOp : inst's microOp 67e4f69d78Ssfencevma // Flags : load flags 68e4f69d78Ssfencevma val allocated = RegInit(VecInit(List.fill(VirtualLoadQueueSize)(false.B))) // The control signals need to explicitly indicate the initial value 690a84afd5Scz4e val robIdx = Reg(Vec(VirtualLoadQueueSize, new RobPtr)) 700a84afd5Scz4e val uopIdx = Reg(Vec(VirtualLoadQueueSize, UopIdx())) 7126af847eSgood-circle val isvec = RegInit(VecInit(List.fill(VirtualLoadQueueSize)(false.B))) // vector load flow 720a84afd5Scz4e val committed = Reg(Vec(VirtualLoadQueueSize, Bool())) 73e4f69d78Ssfencevma 74e4f69d78Ssfencevma /** 75e4f69d78Ssfencevma * used for debug 76e4f69d78Ssfencevma */ 77e4f69d78Ssfencevma val debug_mmio = Reg(Vec(VirtualLoadQueueSize, Bool())) // mmio: inst is an mmio inst 78e4f69d78Ssfencevma val debug_paddr = Reg(Vec(VirtualLoadQueueSize, UInt(PAddrBits.W))) // mmio: inst's paddr 79e4f69d78Ssfencevma 80e4f69d78Ssfencevma // maintain pointers 81e4f69d78Ssfencevma val enqPtrExt = RegInit(VecInit((0 until io.enq.req.length).map(_.U.asTypeOf(new LqPtr)))) 82e4f69d78Ssfencevma val enqPtr = enqPtrExt(0).value 83e4f69d78Ssfencevma val deqPtr = Wire(new LqPtr) 84e4f69d78Ssfencevma val deqPtrNext = Wire(new LqPtr) 85e4f69d78Ssfencevma 86e4f69d78Ssfencevma /** 87e4f69d78Ssfencevma * update pointer 88e4f69d78Ssfencevma */ 89e4f69d78Ssfencevma val lastCycleRedirect = RegNext(io.redirect) 90e4f69d78Ssfencevma val lastLastCycleRedirect = RegNext(lastCycleRedirect) 91e4f69d78Ssfencevma 92e4f69d78Ssfencevma val validCount = distanceBetween(enqPtrExt(0), deqPtr) 93d97a1af7SXuan Hu val allowEnqueue = validCount <= (VirtualLoadQueueSize - LSQLdEnqWidth).U 94e4f69d78Ssfencevma val canEnqueue = io.enq.req.map(_.valid) 9530bd4482SAnzo val vLoadFlow = io.enq.req.map(_.bits.numLsElem.asTypeOf(UInt(elemIdxBits.W))) 96e4f69d78Ssfencevma val needCancel = WireInit(VecInit((0 until VirtualLoadQueueSize).map(i => { 970a84afd5Scz4e robIdx(i).needFlush(io.redirect) && allocated(i) 98e4f69d78Ssfencevma }))) 995003e6f8SHuijin Li val lastNeedCancel = GatedValidRegNext(needCancel) 100d8be2368Sweiding liu val enqCancel = canEnqueue.zip(io.enq.req).map{case (v , x) => 101d8be2368Sweiding liu v && x.bits.robIdx.needFlush(io.redirect) 102d8be2368Sweiding liu } 10330bd4482SAnzo val enqCancelNum = enqCancel.zip(vLoadFlow).map{case (v, flow) => 10430bd4482SAnzo Mux(v, flow, 0.U) 105d8be2368Sweiding liu } 1065003e6f8SHuijin Li val lastEnqCancel = GatedRegNext(enqCancelNum.reduce(_ + _)) 107e4f69d78Ssfencevma val lastCycleCancelCount = PopCount(lastNeedCancel) 108935edac4STang Haojin val redirectCancelCount = RegEnable(lastCycleCancelCount + lastEnqCancel, 0.U, lastCycleRedirect.valid) 109e4f69d78Ssfencevma 110e4f69d78Ssfencevma // update enqueue pointer 11108b0bc30Shappy-lx val validVLoadFlow = vLoadFlow.zipWithIndex.map{case (vLoadFlowNumItem, index) => Mux(canEnqueue(index), vLoadFlowNumItem, 0.U)} 11232977e5dSAnzooooo val validVLoadOffset = vLoadFlow.zip(io.enq.needAlloc).map{case (flow, needAllocItem) => Mux(needAllocItem, flow, 0.U)} 1133ea758f9SAnzo val validVLoadOffsetRShift = 0.U +: validVLoadOffset.take(validVLoadFlow.length - 1) 1143ea758f9SAnzo 115f3a9fb05SAnzo val enqNumber = validVLoadFlow.reduce(_ + _) 116e4f69d78Ssfencevma val enqPtrExtNextVec = Wire(Vec(io.enq.req.length, new LqPtr)) 117e4f69d78Ssfencevma val enqPtrExtNext = Wire(Vec(io.enq.req.length, new LqPtr)) 118f275998aSsfencevma when (lastLastCycleRedirect.valid) { 119e4f69d78Ssfencevma // we recover the pointers in the next cycle after redirect 120f275998aSsfencevma enqPtrExtNextVec := VecInit(enqPtrExt.map(_ - redirectCancelCount)) 121e4f69d78Ssfencevma } .otherwise { 122f275998aSsfencevma enqPtrExtNextVec := VecInit(enqPtrExt.map(_ + enqNumber)) 123e4f69d78Ssfencevma } 124f275998aSsfencevma assert(!(lastCycleRedirect.valid && enqNumber =/= 0.U)) 125e4f69d78Ssfencevma 126e4f69d78Ssfencevma when (isAfter(enqPtrExtNextVec(0), deqPtrNext)) { 127e4f69d78Ssfencevma enqPtrExtNext := enqPtrExtNextVec 128e4f69d78Ssfencevma } .otherwise { 129e4f69d78Ssfencevma enqPtrExtNext := VecInit((0 until io.enq.req.length).map(i => deqPtrNext + i.U)) 130e4f69d78Ssfencevma } 131e4f69d78Ssfencevma enqPtrExt := enqPtrExtNext 132e4f69d78Ssfencevma 133e4f69d78Ssfencevma // update dequeue pointer 134e4f69d78Ssfencevma val DeqPtrMoveStride = CommitWidth 135e4f69d78Ssfencevma require(DeqPtrMoveStride == CommitWidth, "DeqPtrMoveStride must be equal to CommitWidth!") 136e4f69d78Ssfencevma val deqLookupVec = VecInit((0 until DeqPtrMoveStride).map(deqPtr + _.U)) 1370a84afd5Scz4e val deqLookup = VecInit(deqLookupVec.map(ptr => allocated(ptr.value) && committed(ptr.value) && ptr =/= enqPtrExt(0))) 138e4f69d78Ssfencevma val deqInSameRedirectCycle = VecInit(deqLookupVec.map(ptr => needCancel(ptr.value))) 139e4f69d78Ssfencevma // make chisel happy 140e4f69d78Ssfencevma val deqCountMask = Wire(UInt(DeqPtrMoveStride.W)) 141870f462dSXuan Hu deqCountMask := deqLookup.asUInt & (~deqInSameRedirectCycle.asUInt).asUInt 142e4f69d78Ssfencevma val commitCount = PopCount(PriorityEncoderOH(~deqCountMask) - 1.U) 1435003e6f8SHuijin Li val lastCommitCount = GatedRegNext(commitCount) 144e4f69d78Ssfencevma 145e4f69d78Ssfencevma // update deqPtr 146e4f69d78Ssfencevma // cycle 1: generate deqPtrNext 147e4f69d78Ssfencevma // cycle 2: update deqPtr 148e4f69d78Ssfencevma val deqPtrUpdateEna = lastCommitCount =/= 0.U 149e4f69d78Ssfencevma deqPtrNext := deqPtr + lastCommitCount 150935edac4STang Haojin deqPtr := RegEnable(deqPtrNext, 0.U.asTypeOf(new LqPtr), deqPtrUpdateEna) 151e4f69d78Ssfencevma 1525003e6f8SHuijin Li io.lqDeq := GatedRegNext(lastCommitCount) 153f275998aSsfencevma io.lqCancelCnt := redirectCancelCount 154e4f69d78Ssfencevma io.ldWbPtr := deqPtr 1550d32f713Shappy-lx io.lqEmpty := RegNext(validCount === 0.U) 156e4f69d78Ssfencevma 157e4f69d78Ssfencevma /** 158e4f69d78Ssfencevma * Enqueue at dispatch 159e4f69d78Ssfencevma * 160e4f69d78Ssfencevma * Currently, VirtualLoadQueue only allows enqueue when #emptyEntries > EnqWidth 1615de026b7SAnzooooo * Dynamic enq based on numLsElem number 162e4f69d78Ssfencevma */ 163e4f69d78Ssfencevma io.enq.canAccept := allowEnqueue 1645de026b7SAnzooooo val enqLowBound = io.enq.req.map(_.bits.lqIdx) 1655de026b7SAnzooooo val enqUpBound = io.enq.req.map(x => x.bits.lqIdx + x.bits.numLsElem) 1665de026b7SAnzooooo val enqCrossLoop = enqLowBound.zip(enqUpBound).map{case (low, up) => low.flag =/= up.flag} 1675de026b7SAnzooooo 1685de026b7SAnzooooo for(i <- 0 until VirtualLoadQueueSize) { 1695de026b7SAnzooooo val entryCanEnqSeq = (0 until io.enq.req.length).map { j => 1705de026b7SAnzooooo val entryHitBound = Mux( 1715de026b7SAnzooooo enqCrossLoop(j), 1725de026b7SAnzooooo enqLowBound(j).value <= i.U || i.U < enqUpBound(j).value, 1735de026b7SAnzooooo enqLowBound(j).value <= i.U && i.U < enqUpBound(j).value 1745de026b7SAnzooooo ) 1755de026b7SAnzooooo canEnqueue(j) && !enqCancel(j) && entryHitBound 1765de026b7SAnzooooo } 1775de026b7SAnzooooo val entryCanEnq = entryCanEnqSeq.reduce(_ || _) 1785de026b7SAnzooooo val selectBits = ParallelPriorityMux(entryCanEnqSeq, io.enq.req.map(_.bits)) 1795de026b7SAnzooooo when (entryCanEnq) { 1805de026b7SAnzooooo allocated(i) := true.B 1810a84afd5Scz4e robIdx(i) := selectBits.robIdx 1820a84afd5Scz4e uopIdx(i) := selectBits.uopIdx 1835de026b7SAnzooooo isvec(i) := FuType.isVLoad(selectBits.fuType) 1840a84afd5Scz4e committed(i) := false.B 1855de026b7SAnzooooo 1865de026b7SAnzooooo debug_mmio(i) := false.B 1875de026b7SAnzooooo debug_paddr(i) := 0.U 1885de026b7SAnzooooo } 1895de026b7SAnzooooo } 1905de026b7SAnzooooo 191e4f69d78Ssfencevma for (i <- 0 until io.enq.req.length) { 1923ea758f9SAnzo val lqIdx = enqPtrExt(0) + validVLoadOffsetRShift.take(i + 1).reduce(_ + _) 193243bee57Sweiding liu val index = io.enq.req(i).bits.lqIdx 1948b33cd30Sklin02 XSError(canEnqueue(i) && !enqCancel(i) && (!io.enq.canAccept || !io.enq.sqCanAccept), s"must accept $i\n") 1958b33cd30Sklin02 XSError(canEnqueue(i) && !enqCancel(i) && index.value =/= lqIdx.value, s"must be the same entry $i\n") 196e4f69d78Ssfencevma io.enq.resp(i) := lqIdx 197e4f69d78Ssfencevma } 198e4f69d78Ssfencevma 199e4f69d78Ssfencevma /** 200e4f69d78Ssfencevma * Load commits 201e4f69d78Ssfencevma * 202e4f69d78Ssfencevma * When load commited, mark it as !allocated and move deqPtr forward. 203e4f69d78Ssfencevma */ 204e4f69d78Ssfencevma (0 until DeqPtrMoveStride).map(i => { 205e4f69d78Ssfencevma when (commitCount > i.U) { 206e4f69d78Ssfencevma allocated((deqPtr+i.U).value) := false.B 207e4f69d78Ssfencevma } 2088b33cd30Sklin02 XSError(commitCount > i.U && !allocated((deqPtr+i.U).value), s"why commit invalid entry $i?\n") 209e4f69d78Ssfencevma }) 210e4f69d78Ssfencevma 21126af847eSgood-circle // vector commit or replay 212627be78bSgood-circle val vecLdCommittmp = Wire(Vec(VirtualLoadQueueSize, Vec(VecLoadPipelineWidth, Bool()))) 21326af847eSgood-circle val vecLdCommit = Wire(Vec(VirtualLoadQueueSize, Bool())) 21426af847eSgood-circle for (i <- 0 until VirtualLoadQueueSize) { 215627be78bSgood-circle val cmt = io.vecCommit 216627be78bSgood-circle for (j <- 0 until VecLoadPipelineWidth) { 2170a84afd5Scz4e vecLdCommittmp(i)(j) := allocated(i) && cmt(j).valid && robIdx(i) === cmt(j).bits.robidx && uopIdx(i) === cmt(j).bits.uopidx 218627be78bSgood-circle } 219627be78bSgood-circle vecLdCommit(i) := vecLdCommittmp(i).reduce(_ || _) 220627be78bSgood-circle 2210a84afd5Scz4e when (vecLdCommit(i) && isvec(i)) { 2220a84afd5Scz4e committed(i) := true.B 22326af847eSgood-circle } 22426af847eSgood-circle } 22526af847eSgood-circle 226e4f69d78Ssfencevma // misprediction recovery / exception redirect 227e4f69d78Ssfencevma // invalidate lq term using robIdx 228e4f69d78Ssfencevma for (i <- 0 until VirtualLoadQueueSize) { 229e4f69d78Ssfencevma when (needCancel(i)) { 230e4f69d78Ssfencevma allocated(i) := false.B 231e4f69d78Ssfencevma } 232e4f69d78Ssfencevma } 233e4f69d78Ssfencevma 234e4f69d78Ssfencevma XSDebug(p"(ready, valid): ${io.enq.canAccept}, ${Binary(Cat(io.enq.req.map(_.valid)))}\n") 235e4f69d78Ssfencevma 236e4f69d78Ssfencevma /** 237e4f69d78Ssfencevma * Writeback load from load units 238e4f69d78Ssfencevma * 239e4f69d78Ssfencevma * Most load instructions writeback to regfile at the same time. 240e4f69d78Ssfencevma * However, 241e4f69d78Ssfencevma * (1) For ready load instruction (no need replay), it writes back to ROB immediately. 242e4f69d78Ssfencevma */ 243e4f69d78Ssfencevma for(i <- 0 until LoadPipelineWidth) { 244e4f69d78Ssfencevma // most lq status need to be updated immediately after load writeback to lq 245e4f69d78Ssfencevma // flag bits in lq needs to be updated accurately 24614a67055Ssfencevma io.ldin(i).ready := true.B 24714a67055Ssfencevma val loadWbIndex = io.ldin(i).bits.uop.lqIdx.value 248e4f69d78Ssfencevma 24914a67055Ssfencevma val need_rep = io.ldin(i).bits.rep_info.need_rep 250b240e1c0SAnzooooo val need_valid = io.ldin(i).bits.updateAddrValid 251*1eb8dd22SKunlin You when (io.ldin(i).valid) { 252*1eb8dd22SKunlin You val hasExceptions = ExceptionNO.selectByFu(io.ldin(i).bits.uop.exceptionVec, LduCfg).asUInt.orR 2530a84afd5Scz4e when (!need_rep && need_valid && !io.ldin(i).bits.isvec) { 2540a84afd5Scz4e committed(loadWbIndex) := true.B 255e4f69d78Ssfencevma // Debug info 25614a67055Ssfencevma debug_mmio(loadWbIndex) := io.ldin(i).bits.mmio 25714a67055Ssfencevma debug_paddr(loadWbIndex) := io.ldin(i).bits.paddr 2588b33cd30Sklin02 } 259*1eb8dd22SKunlin You } 260*1eb8dd22SKunlin You XSInfo(io.ldin(i).valid && !need_rep && need_valid, 261fcec058dSHaoyuan Feng "load hit write to lq idx %d pc 0x%x vaddr %x paddr %x mask %x forwardData %x forwardMask: %x mmio %x isvec %x\n", 26214a67055Ssfencevma io.ldin(i).bits.uop.lqIdx.asUInt, 263c61abc0cSXuan Hu io.ldin(i).bits.uop.pc, 26414a67055Ssfencevma io.ldin(i).bits.vaddr, 26514a67055Ssfencevma io.ldin(i).bits.paddr, 26614a67055Ssfencevma io.ldin(i).bits.mask, 26714a67055Ssfencevma io.ldin(i).bits.forwardData.asUInt, 26814a67055Ssfencevma io.ldin(i).bits.forwardMask.asUInt, 26926af847eSgood-circle io.ldin(i).bits.mmio, 270fcec058dSHaoyuan Feng io.ldin(i).bits.isvec 271e4f69d78Ssfencevma ) 272e4f69d78Ssfencevma } 273e4f69d78Ssfencevma 274e4f69d78Ssfencevma // perf counter 275e4f69d78Ssfencevma QueuePerf(VirtualLoadQueueSize, validCount, !allowEnqueue) 276b2d6d8e7Sgood-circle val vecValidVec = WireInit(VecInit((0 until VirtualLoadQueueSize).map(i => allocated(i) && isvec(i)))) 277b2d6d8e7Sgood-circle QueuePerf(VirtualLoadQueueSize, PopCount(vecValidVec), !allowEnqueue) 278e4f69d78Ssfencevma io.lqFull := !allowEnqueue 279e836c770SZhaoyang You 280e836c770SZhaoyang You def NLoadNotCompleted = 1 281e836c770SZhaoyang You val validCountReg = RegNext(validCount) 282e836c770SZhaoyang You val noUopsIssued = io.noUopsIssued 283e836c770SZhaoyang You val stallLoad = io.noUopsIssued && (validCountReg >= NLoadNotCompleted.U) 284e836c770SZhaoyang You val memStallAnyLoad = RegNext(stallLoad) 285e836c770SZhaoyang You 286e836c770SZhaoyang You XSPerfAccumulate("mem_stall_anyload", memStallAnyLoad) 287e836c770SZhaoyang You 288e836c770SZhaoyang You val perfEvents: Seq[(String, UInt)] = Seq( 289e836c770SZhaoyang You ("MEMSTALL_ANY_LOAD", memStallAnyLoad), 290e836c770SZhaoyang You ) 291e4f69d78Ssfencevma generatePerfEvent() 292e4f69d78Ssfencevma 293e4f69d78Ssfencevma // debug info 294e4f69d78Ssfencevma XSDebug("enqPtrExt %d:%d deqPtrExt %d:%d\n", enqPtrExt(0).flag, enqPtr, deqPtr.flag, deqPtr.value) 295e4f69d78Ssfencevma 296e4f69d78Ssfencevma def PrintFlag(flag: Bool, name: String): Unit = { 2978b33cd30Sklin02 XSDebug(false, flag, name) // when(flag) 2988b33cd30Sklin02 XSDebug(false, !flag, " ") // otherwise 299e4f69d78Ssfencevma } 300e4f69d78Ssfencevma 301e4f69d78Ssfencevma for (i <- 0 until VirtualLoadQueueSize) { 3020a84afd5Scz4e PrintFlag(allocated(i), "a") 3030a84afd5Scz4e PrintFlag(allocated(i) && committed(i), "c") 3040a84afd5Scz4e PrintFlag(allocated(i) && isvec(i), "v") 305e4f69d78Ssfencevma XSDebug(false, true.B, "\n") 306e4f69d78Ssfencevma } 307e4f69d78Ssfencevma // end 308e4f69d78Ssfencevma} 309