1e4f69d78Ssfencevma/*************************************************************************************** 2e4f69d78Ssfencevma* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3e4f69d78Ssfencevma* Copyright (c) 2020-2021 Peng Cheng Laboratory 4e4f69d78Ssfencevma* 5e4f69d78Ssfencevma* XiangShan is licensed under Mulan PSL v2. 6e4f69d78Ssfencevma* You can use this software according to the terms and conditions of the Mulan PSL v2. 7e4f69d78Ssfencevma* You may obtain a copy of Mulan PSL v2 at: 8e4f69d78Ssfencevma* http://license.coscl.org.cn/MulanPSL2 9e4f69d78Ssfencevma* 10e4f69d78Ssfencevma* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11e4f69d78Ssfencevma* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12e4f69d78Ssfencevma* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13e4f69d78Ssfencevma* 14e4f69d78Ssfencevma* See the Mulan PSL v2 for more details. 15e4f69d78Ssfencevma***************************************************************************************/ 16e4f69d78Ssfencevmapackage xiangshan.mem 17e4f69d78Ssfencevma 189e12e8edScz4eimport org.chipsalliance.cde.config._ 19e4f69d78Ssfencevmaimport chisel3._ 20e4f69d78Ssfencevmaimport chisel3.util._ 21e4f69d78Ssfencevmaimport utils._ 22e4f69d78Ssfencevmaimport utility._ 239e12e8edScz4eimport xiangshan._ 249e12e8edScz4eimport xiangshan.backend.rob.RobPtr 25dfb4c5dcSXuan Huimport xiangshan.backend.Bundles.DynInst 269e12e8edScz4eimport xiangshan.mem.Bundles._ 279e12e8edScz4eimport xiangshan.cache._ 28e4f69d78Ssfencevma 29e4f69d78Ssfencevmaclass LoadQueueRAR(implicit p: Parameters) extends XSModule 30e4f69d78Ssfencevma with HasDCacheParameters 31e4f69d78Ssfencevma with HasCircularQueuePtrHelper 32e4f69d78Ssfencevma with HasLoadHelper 33e4f69d78Ssfencevma with HasPerfEvents 34e4f69d78Ssfencevma{ 35e4f69d78Ssfencevma val io = IO(new Bundle() { 3614a67055Ssfencevma // control 37e4f69d78Ssfencevma val redirect = Flipped(Valid(new Redirect)) 3814a67055Ssfencevma 3914a67055Ssfencevma // violation query 4014a67055Ssfencevma val query = Vec(LoadPipelineWidth, Flipped(new LoadNukeQueryIO)) 4114a67055Ssfencevma 4214a67055Ssfencevma // release cacheline 43e4f69d78Ssfencevma val release = Flipped(Valid(new Release)) 4414a67055Ssfencevma 4514a67055Ssfencevma // from VirtualLoadQueue 46e4f69d78Ssfencevma val ldWbPtr = Input(new LqPtr) 4714a67055Ssfencevma 4814a67055Ssfencevma // global 49e4f69d78Ssfencevma val lqFull = Output(Bool()) 50*522c7f99SAnzo 51*522c7f99SAnzo val validCount = Output(UInt()) 52e4f69d78Ssfencevma }) 53e4f69d78Ssfencevma 54549073a0Scz4e private val PartialPAddrStride: Int = 6 55549073a0Scz4e private val PartialPAddrBits: Int = 16 56549073a0Scz4e private val PartialPAddrLowBits: Int = (PartialPAddrBits - PartialPAddrStride) / 2 // avoid overlap 57549073a0Scz4e private val PartialPAddrHighBits: Int = PartialPAddrBits - PartialPAddrLowBits 58549073a0Scz4e private def boundary(x: Int, h: Int) = if (x < h) Some(x) else None 59549073a0Scz4e private def lowMapping = (0 until PartialPAddrLowBits).map(i => Seq( 60549073a0Scz4e boundary(PartialPAddrStride + i , PartialPAddrBits), 61549073a0Scz4e boundary(PartialPAddrBits - i - 1, PartialPAddrBits) 62549073a0Scz4e ) 63549073a0Scz4e ) 64549073a0Scz4e private def highMapping = (0 until PartialPAddrHighBits).map(i => Seq( 65549073a0Scz4e boundary(i + PartialPAddrStride , PAddrBits), 66549073a0Scz4e boundary(i + PartialPAddrStride + 11, PAddrBits), 67549073a0Scz4e boundary(i + PartialPAddrStride + 22, PAddrBits), 68549073a0Scz4e boundary(i + PartialPAddrStride + 33, PAddrBits) 69549073a0Scz4e ) 70549073a0Scz4e ) 71549073a0Scz4e private def genPartialPAddr(paddr: UInt) = { 72549073a0Scz4e val ppaddr_low = Wire(Vec(PartialPAddrLowBits, Bool())) 73549073a0Scz4e ppaddr_low.zip(lowMapping).foreach { 74549073a0Scz4e case (bit, mapping) => 75549073a0Scz4e bit := mapping.filter(_.isDefined).map(x => paddr(x.get)).reduce(_^_) 76549073a0Scz4e } 77549073a0Scz4e 78549073a0Scz4e val ppaddr_high = Wire(Vec(PartialPAddrHighBits, Bool())) 79549073a0Scz4e ppaddr_high.zip(highMapping).foreach { 80549073a0Scz4e case (bit, mapping) => 81549073a0Scz4e bit := mapping.filter(_.isDefined).map(x => paddr(x.get)).reduce(_^_) 82549073a0Scz4e } 83549073a0Scz4e Cat(ppaddr_high.asUInt, ppaddr_low.asUInt) 84549073a0Scz4e } 85549073a0Scz4e 86e4f69d78Ssfencevma println("LoadQueueRAR: size: " + LoadQueueRARSize) 87e4f69d78Ssfencevma // LoadQueueRAR field 88e10e20c6SYanqin Li // +-------+-------+-------+----------+ 89e10e20c6SYanqin Li // | Valid | Uop | PAddr | Released | 90e10e20c6SYanqin Li // +-------+-------+-------+----------+ 91e4f69d78Ssfencevma // 92e4f69d78Ssfencevma // Field descriptions: 93e4f69d78Ssfencevma // Allocated : entry is valid. 94e4f69d78Ssfencevma // MicroOp : Micro-op 95e4f69d78Ssfencevma // PAddr : physical address. 96e4f69d78Ssfencevma // Released : DCache released. 97e4f69d78Ssfencevma val allocated = RegInit(VecInit(List.fill(LoadQueueRARSize)(false.B))) // The control signals need to explicitly indicate the initial value 98dfb4c5dcSXuan Hu val uop = Reg(Vec(LoadQueueRARSize, new DynInst)) 99e4f69d78Ssfencevma val paddrModule = Module(new LqPAddrModule( 100549073a0Scz4e gen = UInt(PartialPAddrBits.W), 101e4f69d78Ssfencevma numEntries = LoadQueueRARSize, 102e4f69d78Ssfencevma numRead = LoadPipelineWidth, 103e4f69d78Ssfencevma numWrite = LoadPipelineWidth, 104e4f69d78Ssfencevma numWBank = LoadQueueNWriteBanks, 105e4f69d78Ssfencevma numWDelay = 2, 1063c808de0SAnzo numCamPort = LoadPipelineWidth, 1073c808de0SAnzo enableCacheLineCheck = false, // Now `RARQueue` has no need to check cacheline. 1083c808de0SAnzo paddrOffset = 0 // If you need to check cacheline, set the offset relative to the original paddr correctly. 109e4f69d78Ssfencevma )) 110e4f69d78Ssfencevma paddrModule.io := DontCare 111e4f69d78Ssfencevma val released = RegInit(VecInit(List.fill(LoadQueueRARSize)(false.B))) 112e4f69d78Ssfencevma 113e4f69d78Ssfencevma // freeliset: store valid entries index. 114e4f69d78Ssfencevma // +---+---+--------------+-----+-----+ 115e4f69d78Ssfencevma // | 0 | 1 | ...... | n-2 | n-1 | 116e4f69d78Ssfencevma // +---+---+--------------+-----+-----+ 117e4f69d78Ssfencevma val freeList = Module(new FreeList( 118e4f69d78Ssfencevma size = LoadQueueRARSize, 119e4f69d78Ssfencevma allocWidth = LoadPipelineWidth, 120e4f69d78Ssfencevma freeWidth = 4, 1210d55e1dbScz4e enablePreAlloc = false, 122e4f69d78Ssfencevma moduleName = "LoadQueueRAR freelist" 123e4f69d78Ssfencevma )) 124e4f69d78Ssfencevma freeList.io := DontCare 125e4f69d78Ssfencevma 126e4f69d78Ssfencevma // Real-allocation: load_s2 127e4f69d78Ssfencevma // PAddr write needs 2 cycles, release signal should delay 1 cycle so that 128e4f69d78Ssfencevma // load enqueue can catch release. 129e4f69d78Ssfencevma val release1Cycle = io.release 1305003e6f8SHuijin Li // val release2Cycle = RegNext(io.release) 1315003e6f8SHuijin Li // val release2Cycle_dup_lsu = RegNext(io.release) 1325003e6f8SHuijin Li val release2Cycle = RegEnable(io.release, io.release.valid) 1335003e6f8SHuijin Li release2Cycle.valid := RegNext(io.release.valid) 1345003e6f8SHuijin Li //val release2Cycle_dup_lsu = RegEnable(io.release, io.release.valid) 135e4f69d78Ssfencevma 136e4f69d78Ssfencevma // LoadQueueRAR enqueue condition: 137e4f69d78Ssfencevma // There are still not completed load instructions before the current load instruction. 138e4f69d78Ssfencevma // (e.g. "not completed" means that load instruction get the data or exception). 139e4f69d78Ssfencevma val canEnqueue = io.query.map(_.req.valid) 140e4f69d78Ssfencevma val cancelEnqueue = io.query.map(_.req.bits.uop.robIdx.needFlush(io.redirect)) 141e4f69d78Ssfencevma val hasNotWritebackedLoad = io.query.map(_.req.bits.uop.lqIdx).map(lqIdx => isAfter(lqIdx, io.ldWbPtr)) 142e4f69d78Ssfencevma val needEnqueue = canEnqueue.zip(hasNotWritebackedLoad).zip(cancelEnqueue).map { case ((v, r), c) => v && r && !c } 143e4f69d78Ssfencevma 144e4f69d78Ssfencevma // Allocate logic 145f275998aSsfencevma val acceptedVec = Wire(Vec(LoadPipelineWidth, Bool())) 14631fae68eSYanqin Li val enqIndexVec = Wire(Vec(LoadPipelineWidth, UInt(log2Up(LoadQueueRARSize).W))) 147e4f69d78Ssfencevma 148e4f69d78Ssfencevma for ((enq, w) <- io.query.map(_.req).zipWithIndex) { 149f275998aSsfencevma acceptedVec(w) := false.B 150e4f69d78Ssfencevma paddrModule.io.wen(w) := false.B 151e4f69d78Ssfencevma freeList.io.doAllocate(w) := false.B 152e4f69d78Ssfencevma 153f275998aSsfencevma freeList.io.allocateReq(w) := true.B 154e4f69d78Ssfencevma 155e4f69d78Ssfencevma // Allocate ready 156f275998aSsfencevma val offset = PopCount(needEnqueue.take(w)) 157f275998aSsfencevma val canAccept = freeList.io.canAllocate(offset) 158f275998aSsfencevma val enqIndex = freeList.io.allocateSlot(offset) 159f275998aSsfencevma enq.ready := Mux(needEnqueue(w), canAccept, true.B) 160e4f69d78Ssfencevma 161f275998aSsfencevma enqIndexVec(w) := enqIndex 162e4f69d78Ssfencevma when (needEnqueue(w) && enq.ready) { 163f275998aSsfencevma acceptedVec(w) := true.B 164f275998aSsfencevma 165e4f69d78Ssfencevma freeList.io.doAllocate(w) := true.B 166e4f69d78Ssfencevma // Allocate new entry 167e4f69d78Ssfencevma allocated(enqIndex) := true.B 168e4f69d78Ssfencevma 169e4f69d78Ssfencevma // Write paddr 170e4f69d78Ssfencevma paddrModule.io.wen(w) := true.B 171e4f69d78Ssfencevma paddrModule.io.waddr(w) := enqIndex 172549073a0Scz4e paddrModule.io.wdata(w) := genPartialPAddr(enq.bits.paddr) 173e4f69d78Ssfencevma 174e4f69d78Ssfencevma // Fill info 175e4f69d78Ssfencevma uop(enqIndex) := enq.bits.uop 176e10e20c6SYanqin Li // NC is uncachable and will not be explicitly released. 177e10e20c6SYanqin Li // So NC requests are not allowed to have RAR 178e10e20c6SYanqin Li released(enqIndex) := enq.bits.is_nc || ( 17914a67055Ssfencevma enq.bits.data_valid && 1804ab5d137Ssfencevma (release2Cycle.valid && 181e4f69d78Ssfencevma enq.bits.paddr(PAddrBits-1, DCacheLineOffset) === release2Cycle.bits.paddr(PAddrBits-1, DCacheLineOffset) || 182e4f69d78Ssfencevma release1Cycle.valid && 1834ab5d137Ssfencevma enq.bits.paddr(PAddrBits-1, DCacheLineOffset) === release1Cycle.bits.paddr(PAddrBits-1, DCacheLineOffset)) 184e10e20c6SYanqin Li ) 185e4f69d78Ssfencevma } 1868b33cd30Sklin02 val debug_robIdx = enq.bits.uop.robIdx.asUInt 1878b33cd30Sklin02 XSError( 1888b33cd30Sklin02 needEnqueue(w) && enq.ready && allocated(enqIndex), 1898b33cd30Sklin02 p"LoadQueueRAR: You can not write an valid entry! check: ldu $w, robIdx $debug_robIdx") 190e4f69d78Ssfencevma } 191e4f69d78Ssfencevma 192e4f69d78Ssfencevma // LoadQueueRAR deallocate 193e4f69d78Ssfencevma val freeMaskVec = Wire(Vec(LoadQueueRARSize, Bool())) 194e4f69d78Ssfencevma 195e4f69d78Ssfencevma // init 196e4f69d78Ssfencevma freeMaskVec.map(e => e := false.B) 197e4f69d78Ssfencevma 198e4f69d78Ssfencevma // when the loads that "older than" current load were writebacked, 199e4f69d78Ssfencevma // current load will be released. 200e4f69d78Ssfencevma for (i <- 0 until LoadQueueRARSize) { 201e4f69d78Ssfencevma val deqNotBlock = !isBefore(io.ldWbPtr, uop(i).lqIdx) 202e4f69d78Ssfencevma val needFlush = uop(i).robIdx.needFlush(io.redirect) 203e4f69d78Ssfencevma 2049b12a106SAnzo when (allocated(i) && (deqNotBlock || needFlush)) { 205e4f69d78Ssfencevma allocated(i) := false.B 206e4f69d78Ssfencevma freeMaskVec(i) := true.B 207e4f69d78Ssfencevma } 208e4f69d78Ssfencevma } 209e4f69d78Ssfencevma 21014a67055Ssfencevma // if need replay revoke entry 2115003e6f8SHuijin Li val lastCanAccept = GatedRegNext(acceptedVec) 2125003e6f8SHuijin Li val lastAllocIndex = GatedRegNext(enqIndexVec) 213e4f69d78Ssfencevma 21414a67055Ssfencevma for ((revoke, w) <- io.query.map(_.revoke).zipWithIndex) { 21514a67055Ssfencevma val revokeValid = revoke && lastCanAccept(w) 21614a67055Ssfencevma val revokeIndex = lastAllocIndex(w) 217e4f69d78Ssfencevma 21814a67055Ssfencevma when (allocated(revokeIndex) && revokeValid) { 21914a67055Ssfencevma allocated(revokeIndex) := false.B 22014a67055Ssfencevma freeMaskVec(revokeIndex) := true.B 221e4f69d78Ssfencevma } 222e4f69d78Ssfencevma } 223e4f69d78Ssfencevma 224e4f69d78Ssfencevma freeList.io.free := freeMaskVec.asUInt 225e4f69d78Ssfencevma 226e4f69d78Ssfencevma // LoadQueueRAR Query 227e4f69d78Ssfencevma // Load-to-Load violation check condition: 228e4f69d78Ssfencevma // 1. Physical address match by CAM port. 229c7353d05SYanqin Li // 2. release or nc_with_data is set. 230e4f69d78Ssfencevma // 3. Younger than current load instruction. 231e4f69d78Ssfencevma val ldLdViolation = Wire(Vec(LoadPipelineWidth, Bool())) 2325003e6f8SHuijin Li //val allocatedUInt = RegNext(allocated.asUInt) 233e4f69d78Ssfencevma for ((query, w) <- io.query.zipWithIndex) { 234e4f69d78Ssfencevma ldLdViolation(w) := false.B 235549073a0Scz4e paddrModule.io.releaseViolationMdata(w) := genPartialPAddr(query.req.bits.paddr) 236e4f69d78Ssfencevma 237e4f69d78Ssfencevma query.resp.valid := RegNext(query.req.valid) 238e4f69d78Ssfencevma // Generate real violation mask 239e4f69d78Ssfencevma val robIdxMask = VecInit(uop.map(_.robIdx).map(isAfter(_, query.req.bits.uop.robIdx))) 2405003e6f8SHuijin Li val matchMaskReg = Wire(Vec(LoadQueueRARSize, Bool())) 2415003e6f8SHuijin Li for(i <- 0 until LoadQueueRARSize) { 2425003e6f8SHuijin Li matchMaskReg(i) := (allocated(i) & 243cd2ff98bShappy-lx paddrModule.io.releaseViolationMmask(w)(i) & 244cd2ff98bShappy-lx robIdxMask(i) && 245e10e20c6SYanqin Li released(i)) 2465003e6f8SHuijin Li } 2475003e6f8SHuijin Li val matchMask = GatedValidRegNext(matchMaskReg) 248e4f69d78Ssfencevma // Load-to-Load violation check result 2495003e6f8SHuijin Li val ldLdViolationMask = matchMask 250e4f69d78Ssfencevma ldLdViolationMask.suggestName("ldLdViolationMask_" + w) 251cd2ff98bShappy-lx query.resp.bits.rep_frm_fetch := ParallelORR(ldLdViolationMask) 252e4f69d78Ssfencevma } 253e4f69d78Ssfencevma 254e4f69d78Ssfencevma 255e4f69d78Ssfencevma // When io.release.valid (release1cycle.valid), it uses the last ld-ld paddr cam port to 256e4f69d78Ssfencevma // update release flag in 1 cycle 257e4f69d78Ssfencevma val releaseVioMask = Reg(Vec(LoadQueueRARSize, Bool())) 258e4f69d78Ssfencevma when (release1Cycle.valid) { 259549073a0Scz4e paddrModule.io.releaseMdata.takeRight(1)(0) := genPartialPAddr(release1Cycle.bits.paddr) 260e4f69d78Ssfencevma } 261e4f69d78Ssfencevma 262e4f69d78Ssfencevma (0 until LoadQueueRARSize).map(i => { 263549073a0Scz4e when (RegNext((paddrModule.io.releaseMmask.takeRight(1)(0)(i)) && allocated(i) && release1Cycle.valid)) { 264e4f69d78Ssfencevma // Note: if a load has missed in dcache and is waiting for refill in load queue, 265e4f69d78Ssfencevma // its released flag still needs to be set as true if addr matches. 266e4f69d78Ssfencevma released(i) := true.B 267e4f69d78Ssfencevma } 268e4f69d78Ssfencevma }) 269e4f69d78Ssfencevma 270e4f69d78Ssfencevma io.lqFull := freeList.io.empty 271*522c7f99SAnzo io.validCount := freeList.io.validCount 272e4f69d78Ssfencevma 273e4f69d78Ssfencevma // perf cnt 274e4f69d78Ssfencevma val canEnqCount = PopCount(io.query.map(_.req.fire)) 275e4f69d78Ssfencevma val validCount = freeList.io.validCount 276e4f69d78Ssfencevma val allowEnqueue = validCount <= (LoadQueueRARSize - LoadPipelineWidth).U 27714a67055Ssfencevma val ldLdViolationCount = PopCount(io.query.map(_.resp).map(resp => resp.valid && resp.bits.rep_frm_fetch)) 278e4f69d78Ssfencevma 279e4f69d78Ssfencevma QueuePerf(LoadQueueRARSize, validCount, !allowEnqueue) 280e4f69d78Ssfencevma XSPerfAccumulate("enq", canEnqCount) 281e4f69d78Ssfencevma XSPerfAccumulate("ld_ld_violation", ldLdViolationCount) 282e4f69d78Ssfencevma val perfEvents: Seq[(String, UInt)] = Seq( 283e4f69d78Ssfencevma ("enq", canEnqCount), 284e4f69d78Ssfencevma ("ld_ld_violation", ldLdViolationCount) 285e4f69d78Ssfencevma ) 286e4f69d78Ssfencevma generatePerfEvent() 287e4f69d78Ssfencevma // End 288e4f69d78Ssfencevma} 289