1/*************************************************************************************** 2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3* Copyright (c) 2020-2021 Peng Cheng Laboratory 4* 5* XiangShan is licensed under Mulan PSL v2. 6* You can use this software according to the terms and conditions of the Mulan PSL v2. 7* You may obtain a copy of Mulan PSL v2 at: 8* http://license.coscl.org.cn/MulanPSL2 9* 10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13* 14* See the Mulan PSL v2 for more details. 15***************************************************************************************/ 16package xiangshan.mem 17 18import org.chipsalliance.cde.config._ 19import chisel3._ 20import chisel3.util._ 21import utils._ 22import utility._ 23import xiangshan._ 24import xiangshan.backend.rob.RobPtr 25import xiangshan.backend.Bundles.DynInst 26import xiangshan.mem.Bundles._ 27import xiangshan.cache._ 28 29class LoadQueueRAR(implicit p: Parameters) extends XSModule 30 with HasDCacheParameters 31 with HasCircularQueuePtrHelper 32 with HasLoadHelper 33 with HasPerfEvents 34{ 35 val io = IO(new Bundle() { 36 // control 37 val redirect = Flipped(Valid(new Redirect)) 38 39 // violation query 40 val query = Vec(LoadPipelineWidth, Flipped(new LoadNukeQueryIO)) 41 42 // release cacheline 43 val release = Flipped(Valid(new Release)) 44 45 // from VirtualLoadQueue 46 val ldWbPtr = Input(new LqPtr) 47 48 // global 49 val lqFull = Output(Bool()) 50 51 val validCount = Output(UInt()) 52 }) 53 54 private val PartialPAddrStride: Int = 6 55 private val PartialPAddrBits: Int = 16 56 private val PartialPAddrLowBits: Int = (PartialPAddrBits - PartialPAddrStride) / 2 // avoid overlap 57 private val PartialPAddrHighBits: Int = PartialPAddrBits - PartialPAddrLowBits 58 private def boundary(x: Int, h: Int) = if (x < h) Some(x) else None 59 private def lowMapping = (0 until PartialPAddrLowBits).map(i => Seq( 60 boundary(PartialPAddrStride + i , PartialPAddrBits), 61 boundary(PartialPAddrBits - i - 1, PartialPAddrBits) 62 ) 63 ) 64 private def highMapping = (0 until PartialPAddrHighBits).map(i => Seq( 65 boundary(i + PartialPAddrStride , PAddrBits), 66 boundary(i + PartialPAddrStride + 11, PAddrBits), 67 boundary(i + PartialPAddrStride + 22, PAddrBits), 68 boundary(i + PartialPAddrStride + 33, PAddrBits) 69 ) 70 ) 71 private def genPartialPAddr(paddr: UInt) = { 72 val ppaddr_low = Wire(Vec(PartialPAddrLowBits, Bool())) 73 ppaddr_low.zip(lowMapping).foreach { 74 case (bit, mapping) => 75 bit := mapping.filter(_.isDefined).map(x => paddr(x.get)).reduce(_^_) 76 } 77 78 val ppaddr_high = Wire(Vec(PartialPAddrHighBits, Bool())) 79 ppaddr_high.zip(highMapping).foreach { 80 case (bit, mapping) => 81 bit := mapping.filter(_.isDefined).map(x => paddr(x.get)).reduce(_^_) 82 } 83 Cat(ppaddr_high.asUInt, ppaddr_low.asUInt) 84 } 85 86 println("LoadQueueRAR: size: " + LoadQueueRARSize) 87 // LoadQueueRAR field 88 // +-------+-------+-------+----------+ 89 // | Valid | Uop | PAddr | Released | 90 // +-------+-------+-------+----------+ 91 // 92 // Field descriptions: 93 // Allocated : entry is valid. 94 // MicroOp : Micro-op 95 // PAddr : physical address. 96 // Released : DCache released. 97 val allocated = RegInit(VecInit(List.fill(LoadQueueRARSize)(false.B))) // The control signals need to explicitly indicate the initial value 98 val uop = Reg(Vec(LoadQueueRARSize, new DynInst)) 99 val paddrModule = Module(new LqPAddrModule( 100 gen = UInt(PartialPAddrBits.W), 101 numEntries = LoadQueueRARSize, 102 numRead = LoadPipelineWidth, 103 numWrite = LoadPipelineWidth, 104 numWBank = LoadQueueNWriteBanks, 105 numWDelay = 2, 106 numCamPort = LoadPipelineWidth, 107 enableCacheLineCheck = false, // Now `RARQueue` has no need to check cacheline. 108 paddrOffset = 0 // If you need to check cacheline, set the offset relative to the original paddr correctly. 109 )) 110 paddrModule.io := DontCare 111 val released = RegInit(VecInit(List.fill(LoadQueueRARSize)(false.B))) 112 113 // freeliset: store valid entries index. 114 // +---+---+--------------+-----+-----+ 115 // | 0 | 1 | ...... | n-2 | n-1 | 116 // +---+---+--------------+-----+-----+ 117 val freeList = Module(new FreeList( 118 size = LoadQueueRARSize, 119 allocWidth = LoadPipelineWidth, 120 freeWidth = 4, 121 enablePreAlloc = false, 122 moduleName = "LoadQueueRAR freelist" 123 )) 124 freeList.io := DontCare 125 126 // Real-allocation: load_s2 127 // PAddr write needs 2 cycles, release signal should delay 1 cycle so that 128 // load enqueue can catch release. 129 val release1Cycle = io.release 130 // val release2Cycle = RegNext(io.release) 131 // val release2Cycle_dup_lsu = RegNext(io.release) 132 val release2Cycle = RegEnable(io.release, io.release.valid) 133 release2Cycle.valid := RegNext(io.release.valid) 134 //val release2Cycle_dup_lsu = RegEnable(io.release, io.release.valid) 135 136 // LoadQueueRAR enqueue condition: 137 // There are still not completed load instructions before the current load instruction. 138 // (e.g. "not completed" means that load instruction get the data or exception). 139 val canEnqueue = io.query.map(_.req.valid) 140 val cancelEnqueue = io.query.map(_.req.bits.uop.robIdx.needFlush(io.redirect)) 141 val hasNotWritebackedLoad = io.query.map(_.req.bits.uop.lqIdx).map(lqIdx => isAfter(lqIdx, io.ldWbPtr)) 142 val needEnqueue = canEnqueue.zip(hasNotWritebackedLoad).zip(cancelEnqueue).map { case ((v, r), c) => v && r && !c } 143 144 // Allocate logic 145 val acceptedVec = Wire(Vec(LoadPipelineWidth, Bool())) 146 val enqIndexVec = Wire(Vec(LoadPipelineWidth, UInt(log2Up(LoadQueueRARSize).W))) 147 148 for ((enq, w) <- io.query.map(_.req).zipWithIndex) { 149 acceptedVec(w) := false.B 150 paddrModule.io.wen(w) := false.B 151 freeList.io.doAllocate(w) := false.B 152 153 freeList.io.allocateReq(w) := true.B 154 155 // Allocate ready 156 val offset = PopCount(needEnqueue.take(w)) 157 val canAccept = freeList.io.canAllocate(offset) 158 val enqIndex = freeList.io.allocateSlot(offset) 159 enq.ready := Mux(needEnqueue(w), canAccept, true.B) 160 161 enqIndexVec(w) := enqIndex 162 when (needEnqueue(w) && enq.ready) { 163 acceptedVec(w) := true.B 164 165 freeList.io.doAllocate(w) := true.B 166 // Allocate new entry 167 allocated(enqIndex) := true.B 168 169 // Write paddr 170 paddrModule.io.wen(w) := true.B 171 paddrModule.io.waddr(w) := enqIndex 172 paddrModule.io.wdata(w) := genPartialPAddr(enq.bits.paddr) 173 174 // Fill info 175 uop(enqIndex) := enq.bits.uop 176 // NC is uncachable and will not be explicitly released. 177 // So NC requests are not allowed to have RAR 178 released(enqIndex) := enq.bits.is_nc || ( 179 enq.bits.data_valid && 180 (release2Cycle.valid && 181 enq.bits.paddr(PAddrBits-1, DCacheLineOffset) === release2Cycle.bits.paddr(PAddrBits-1, DCacheLineOffset) || 182 release1Cycle.valid && 183 enq.bits.paddr(PAddrBits-1, DCacheLineOffset) === release1Cycle.bits.paddr(PAddrBits-1, DCacheLineOffset)) 184 ) 185 } 186 val debug_robIdx = enq.bits.uop.robIdx.asUInt 187 XSError( 188 needEnqueue(w) && enq.ready && allocated(enqIndex), 189 p"LoadQueueRAR: You can not write an valid entry! check: ldu $w, robIdx $debug_robIdx") 190 } 191 192 // LoadQueueRAR deallocate 193 val freeMaskVec = Wire(Vec(LoadQueueRARSize, Bool())) 194 195 // init 196 freeMaskVec.map(e => e := false.B) 197 198 // when the loads that "older than" current load were writebacked, 199 // current load will be released. 200 for (i <- 0 until LoadQueueRARSize) { 201 val deqNotBlock = !isBefore(io.ldWbPtr, uop(i).lqIdx) 202 val needFlush = uop(i).robIdx.needFlush(io.redirect) 203 204 when (allocated(i) && (deqNotBlock || needFlush)) { 205 allocated(i) := false.B 206 freeMaskVec(i) := true.B 207 } 208 } 209 210 // if need replay revoke entry 211 val lastCanAccept = GatedRegNext(acceptedVec) 212 val lastAllocIndex = GatedRegNext(enqIndexVec) 213 214 for ((revoke, w) <- io.query.map(_.revoke).zipWithIndex) { 215 val revokeValid = revoke && lastCanAccept(w) 216 val revokeIndex = lastAllocIndex(w) 217 218 when (allocated(revokeIndex) && revokeValid) { 219 allocated(revokeIndex) := false.B 220 freeMaskVec(revokeIndex) := true.B 221 } 222 } 223 224 freeList.io.free := freeMaskVec.asUInt 225 226 // LoadQueueRAR Query 227 // Load-to-Load violation check condition: 228 // 1. Physical address match by CAM port. 229 // 2. release or nc_with_data is set. 230 // 3. Younger than current load instruction. 231 val ldLdViolation = Wire(Vec(LoadPipelineWidth, Bool())) 232 //val allocatedUInt = RegNext(allocated.asUInt) 233 for ((query, w) <- io.query.zipWithIndex) { 234 ldLdViolation(w) := false.B 235 paddrModule.io.releaseViolationMdata(w) := genPartialPAddr(query.req.bits.paddr) 236 237 query.resp.valid := RegNext(query.req.valid) 238 // Generate real violation mask 239 val robIdxMask = VecInit(uop.map(_.robIdx).map(isAfter(_, query.req.bits.uop.robIdx))) 240 val matchMaskReg = Wire(Vec(LoadQueueRARSize, Bool())) 241 for(i <- 0 until LoadQueueRARSize) { 242 matchMaskReg(i) := (allocated(i) & 243 paddrModule.io.releaseViolationMmask(w)(i) & 244 robIdxMask(i) && 245 released(i)) 246 } 247 val matchMask = GatedValidRegNext(matchMaskReg) 248 // Load-to-Load violation check result 249 val ldLdViolationMask = matchMask 250 ldLdViolationMask.suggestName("ldLdViolationMask_" + w) 251 query.resp.bits.rep_frm_fetch := ParallelORR(ldLdViolationMask) 252 } 253 254 255 // When io.release.valid (release1cycle.valid), it uses the last ld-ld paddr cam port to 256 // update release flag in 1 cycle 257 val releaseVioMask = Reg(Vec(LoadQueueRARSize, Bool())) 258 when (release1Cycle.valid) { 259 paddrModule.io.releaseMdata.takeRight(1)(0) := genPartialPAddr(release1Cycle.bits.paddr) 260 } 261 262 (0 until LoadQueueRARSize).map(i => { 263 when (RegNext((paddrModule.io.releaseMmask.takeRight(1)(0)(i)) && allocated(i) && release1Cycle.valid)) { 264 // Note: if a load has missed in dcache and is waiting for refill in load queue, 265 // its released flag still needs to be set as true if addr matches. 266 released(i) := true.B 267 } 268 }) 269 270 io.lqFull := freeList.io.empty 271 io.validCount := freeList.io.validCount 272 273 // perf cnt 274 val canEnqCount = PopCount(io.query.map(_.req.fire)) 275 val validCount = freeList.io.validCount 276 val allowEnqueue = validCount <= (LoadQueueRARSize - LoadPipelineWidth).U 277 val ldLdViolationCount = PopCount(io.query.map(_.resp).map(resp => resp.valid && resp.bits.rep_frm_fetch)) 278 279 QueuePerf(LoadQueueRARSize, validCount, !allowEnqueue) 280 XSPerfAccumulate("enq", canEnqCount) 281 XSPerfAccumulate("ld_ld_violation", ldLdViolationCount) 282 val perfEvents: Seq[(String, UInt)] = Seq( 283 ("enq", canEnqCount), 284 ("ld_ld_violation", ldLdViolationCount) 285 ) 286 generatePerfEvent() 287 // End 288} 289