1/*************************************************************************************** 2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3* Copyright (c) 2020-2021 Peng Cheng Laboratory 4* 5* XiangShan is licensed under Mulan PSL v2. 6* You can use this software according to the terms and conditions of the Mulan PSL v2. 7* You may obtain a copy of Mulan PSL v2 at: 8* http://license.coscl.org.cn/MulanPSL2 9* 10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13* 14* See the Mulan PSL v2 for more details. 15***************************************************************************************/ 16 17package xiangshan.mem 18 19import org.chipsalliance.cde.config.Parameters 20import chisel3._ 21import chisel3.util._ 22import utils._ 23import utility._ 24import xiangshan._ 25import xiangshan.ExceptionNO._ 26import xiangshan.frontend.FtqPtr 27import xiangshan.backend._ 28import xiangshan.backend.fu.fpu._ 29import xiangshan.backend.rob.RobLsqIO 30import xiangshan.backend.Bundles.{DynInst, MemExuOutput, MemMicroOpRbExt} 31import xiangshan.backend.rob.RobPtr 32import xiangshan.mem.mdp._ 33import xiangshan.mem.Bundles._ 34import xiangshan.cache._ 35import xiangshan.cache.mmu._ 36 37class LqPtr(implicit p: Parameters) extends CircularQueuePtr[LqPtr]( 38 p => p(XSCoreParamsKey).VirtualLoadQueueSize 39){ 40} 41 42object LqPtr { 43 def apply(f: Bool, v: UInt)(implicit p: Parameters): LqPtr = { 44 val ptr = Wire(new LqPtr) 45 ptr.flag := f 46 ptr.value := v 47 ptr 48 } 49} 50 51trait HasLoadHelper { this: XSModule => 52 def rdataHelper(uop: DynInst, rdata: UInt): UInt = { 53 val fpWen = uop.fpWen 54 LookupTree(uop.fuOpType, List( 55 LSUOpType.lb -> SignExt(rdata(7, 0) , XLEN), 56 LSUOpType.lh -> SignExt(rdata(15, 0), XLEN), 57 /* 58 riscv-spec-20191213: 12.2 NaN Boxing of Narrower Values 59 Any operation that writes a narrower result to an f register must write 60 all 1s to the uppermost FLEN−n bits to yield a legal NaN-boxed value. 61 */ 62 LSUOpType.lw -> Mux(fpWen, FPU.box(rdata, FPU.S), SignExt(rdata(31, 0), XLEN)), 63 LSUOpType.ld -> Mux(fpWen, FPU.box(rdata, FPU.D), SignExt(rdata(63, 0), XLEN)), 64 LSUOpType.lbu -> ZeroExt(rdata(7, 0) , XLEN), 65 LSUOpType.lhu -> ZeroExt(rdata(15, 0), XLEN), 66 LSUOpType.lwu -> ZeroExt(rdata(31, 0), XLEN), 67 68 // hypervisor 69 LSUOpType.hlvb -> SignExt(rdata(7, 0), XLEN), 70 LSUOpType.hlvh -> SignExt(rdata(15, 0), XLEN), 71 LSUOpType.hlvw -> SignExt(rdata(31, 0), XLEN), 72 LSUOpType.hlvd -> SignExt(rdata(63, 0), XLEN), 73 LSUOpType.hlvbu -> ZeroExt(rdata(7, 0), XLEN), 74 LSUOpType.hlvhu -> ZeroExt(rdata(15, 0), XLEN), 75 LSUOpType.hlvwu -> ZeroExt(rdata(31, 0), XLEN), 76 LSUOpType.hlvxhu -> ZeroExt(rdata(15, 0), XLEN), 77 LSUOpType.hlvxwu -> ZeroExt(rdata(31, 0), XLEN), 78 )) 79 } 80 81 def genRdataOH(uop: DynInst): UInt = { 82 val fuOpType = uop.fuOpType 83 val fpWen = uop.fpWen 84 val result = Cat( 85 (fuOpType === LSUOpType.lw && fpWen), 86 (fuOpType === LSUOpType.lh && fpWen), 87 (fuOpType === LSUOpType.lw && !fpWen) || (fuOpType === LSUOpType.hlvw), 88 (fuOpType === LSUOpType.lh && !fpWen) || (fuOpType === LSUOpType.hlvh), 89 (fuOpType === LSUOpType.lb) || (fuOpType === LSUOpType.hlvb), 90 (fuOpType === LSUOpType.ld) || (fuOpType === LSUOpType.hlvd), 91 (fuOpType === LSUOpType.lwu) || (fuOpType === LSUOpType.hlvwu) || (fuOpType === LSUOpType.hlvxwu), 92 (fuOpType === LSUOpType.lhu) || (fuOpType === LSUOpType.hlvhu) || (fuOpType === LSUOpType.hlvxhu), 93 (fuOpType === LSUOpType.lbu) || (fuOpType === LSUOpType.hlvbu), 94 ) 95 result 96 } 97 98 def newRdataHelper(select: UInt, rdata: UInt): UInt = { 99 XSError(PopCount(select) > 1.U, "data selector must be One-Hot!\n") 100 val selData = Seq( 101 ZeroExt(rdata(7, 0), XLEN), 102 ZeroExt(rdata(15, 0), XLEN), 103 ZeroExt(rdata(31, 0), XLEN), 104 rdata(63, 0), 105 SignExt(rdata(7, 0) , XLEN), 106 SignExt(rdata(15, 0) , XLEN), 107 SignExt(rdata(31, 0) , XLEN), 108 FPU.box(rdata, FPU.H), 109 FPU.box(rdata, FPU.S) 110 ) 111 Mux1H(select, selData) 112 } 113 114 def genDataSelectByOffset(addrOffset: UInt): Vec[Bool] = { 115 require(addrOffset.getWidth == 4) 116 VecInit((0 until 16).map{ case i => 117 addrOffset === i.U 118 }) 119 } 120 121 def rdataVecHelper(alignedType: UInt, rdata: UInt): UInt = { 122 LookupTree(alignedType, List( 123 "b00".U -> ZeroExt(rdata(7, 0), VLEN), 124 "b01".U -> ZeroExt(rdata(15, 0), VLEN), 125 "b10".U -> ZeroExt(rdata(31, 0), VLEN), 126 "b11".U -> ZeroExt(rdata(63, 0), VLEN) 127 )) 128 } 129} 130 131class LqEnqIO(implicit p: Parameters) extends MemBlockBundle { 132 val canAccept = Output(Bool()) 133 val sqCanAccept = Input(Bool()) 134 val needAlloc = Vec(LSQEnqWidth, Input(Bool())) 135 val req = Vec(LSQEnqWidth, Flipped(ValidIO(new DynInst))) 136 val resp = Vec(LSQEnqWidth, Output(new LqPtr)) 137} 138 139class LqTriggerIO(implicit p: Parameters) extends XSBundle { 140 val hitLoadAddrTriggerHitVec = Input(Vec(TriggerNum, Bool())) 141 val lqLoadAddrTriggerHitVec = Output(Vec(TriggerNum, Bool())) 142} 143 144class LoadQueueTopDownIO(implicit p: Parameters) extends XSBundle { 145 val robHeadVaddr = Flipped(Valid(UInt(VAddrBits.W))) 146 val robHeadTlbReplay = Output(Bool()) 147 val robHeadTlbMiss = Output(Bool()) 148 val robHeadLoadVio = Output(Bool()) 149 val robHeadLoadMSHR = Output(Bool()) 150 val robHeadMissInDTlb = Input(Bool()) 151 val robHeadOtherReplay = Output(Bool()) 152} 153 154class LoadQueue(implicit p: Parameters) extends XSModule 155 with HasDCacheParameters 156 with HasCircularQueuePtrHelper 157 with HasLoadHelper 158 with HasPerfEvents 159{ 160 val io = IO(new Bundle() { 161 val redirect = Flipped(Valid(new Redirect)) 162 val vecFeedback = Vec(VecLoadPipelineWidth, Flipped(ValidIO(new FeedbackToLsqIO))) 163 val enq = new LqEnqIO 164 val ldu = new Bundle() { 165 val stld_nuke_query = Vec(LoadPipelineWidth, Flipped(new LoadNukeQueryIO)) // from load_s2 166 val ldld_nuke_query = Vec(LoadPipelineWidth, Flipped(new LoadNukeQueryIO)) // from load_s2 167 val ldin = Vec(LoadPipelineWidth, Flipped(Decoupled(new LqWriteBundle))) // from load_s3 168 } 169 val sta = new Bundle() { 170 val storeAddrIn = Vec(StorePipelineWidth, Flipped(Valid(new LsPipelineBundle))) // from store_s1 171 } 172 val std = new Bundle() { 173 val storeDataIn = Vec(StorePipelineWidth, Flipped(Valid(new MemExuOutput(isVector = true)))) // from store_s0, store data, send to sq from rs 174 } 175 val sq = new Bundle() { 176 val stAddrReadySqPtr = Input(new SqPtr) 177 val stAddrReadyVec = Input(Vec(StoreQueueSize, Bool())) 178 val stDataReadySqPtr = Input(new SqPtr) 179 val stDataReadyVec = Input(Vec(StoreQueueSize, Bool())) 180 val stIssuePtr = Input(new SqPtr) 181 val sqEmpty = Input(Bool()) 182 } 183 val ldout = Vec(LoadPipelineWidth, DecoupledIO(new MemExuOutput)) 184 val ld_raw_data = Vec(LoadPipelineWidth, Output(new LoadDataFromLQBundle)) 185 val ncOut = Vec(LoadPipelineWidth, DecoupledIO(new LsPipelineBundle)) 186 val replay = Vec(LoadPipelineWidth, Decoupled(new LsPipelineBundle)) 187 // val refill = Flipped(ValidIO(new Refill)) 188 val tl_d_channel = Input(new DcacheToLduForwardIO) 189 val release = Flipped(Valid(new Release)) 190 val nuke_rollback = Vec(StorePipelineWidth, Output(Valid(new Redirect))) 191 val nack_rollback = Vec(1, Output(Valid(new Redirect))) // uncachebuffer 192 val rob = Flipped(new RobLsqIO) 193 val uncache = new UncacheWordIO 194 val exceptionAddr = new ExceptionAddrIO 195 val loadMisalignFull = Input(Bool()) 196 val misalignAllowSpec = Input(Bool()) 197 val lqFull = Output(Bool()) 198 val lqDeq = Output(UInt(log2Up(CommitWidth + 1).W)) 199 val lqCancelCnt = Output(UInt(log2Up(VirtualLoadQueueSize+1).W)) 200 val lq_rep_full = Output(Bool()) 201 val tlbReplayDelayCycleCtrl = Vec(4, Input(UInt(ReSelectLen.W))) 202 val l2_hint = Input(Valid(new L2ToL1Hint())) 203 val tlb_hint = Flipped(new TlbHintIO) 204 val lqEmpty = Output(Bool()) 205 206 val lqDeqPtr = Output(new LqPtr) 207 208 val rarValidCount = Output(UInt()) 209 210 val debugTopDown = new LoadQueueTopDownIO 211 val noUopsIssed = Input(Bool()) 212 }) 213 214 val loadQueueRAR = Module(new LoadQueueRAR) // read-after-read violation 215 val loadQueueRAW = Module(new LoadQueueRAW) // read-after-write violation 216 val loadQueueReplay = Module(new LoadQueueReplay) // enqueue if need replay 217 val virtualLoadQueue = Module(new VirtualLoadQueue) // control state 218 val exceptionBuffer = Module(new LqExceptionBuffer) // exception buffer 219 val uncacheBuffer = Module(new LoadQueueUncache) // uncache 220 /** 221 * LoadQueueRAR 222 */ 223 loadQueueRAR.io.redirect <> io.redirect 224 loadQueueRAR.io.release <> io.release 225 loadQueueRAR.io.ldWbPtr <> virtualLoadQueue.io.ldWbPtr 226 loadQueueRAR.io.validCount<> io.rarValidCount 227 for (w <- 0 until LoadPipelineWidth) { 228 loadQueueRAR.io.query(w).req <> io.ldu.ldld_nuke_query(w).req // from load_s1 229 loadQueueRAR.io.query(w).resp <> io.ldu.ldld_nuke_query(w).resp // to load_s2 230 loadQueueRAR.io.query(w).revoke := io.ldu.ldld_nuke_query(w).revoke // from load_s3 231 } 232 233 /** 234 * LoadQueueRAW 235 */ 236 loadQueueRAW.io.redirect <> io.redirect 237 loadQueueRAW.io.storeIn <> io.sta.storeAddrIn 238 loadQueueRAW.io.stAddrReadySqPtr <> io.sq.stAddrReadySqPtr 239 loadQueueRAW.io.stIssuePtr <> io.sq.stIssuePtr 240 for (w <- 0 until LoadPipelineWidth) { 241 loadQueueRAW.io.query(w).req <> io.ldu.stld_nuke_query(w).req // from load_s1 242 loadQueueRAW.io.query(w).resp <> io.ldu.stld_nuke_query(w).resp // to load_s2 243 loadQueueRAW.io.query(w).revoke := io.ldu.stld_nuke_query(w).revoke // from load_s3 244 } 245 246 /** 247 * VirtualLoadQueue 248 */ 249 virtualLoadQueue.io.redirect <> io.redirect 250 virtualLoadQueue.io.vecCommit <> io.vecFeedback 251 virtualLoadQueue.io.enq <> io.enq 252 virtualLoadQueue.io.ldin <> io.ldu.ldin // from load_s3 253 virtualLoadQueue.io.lqFull <> io.lqFull 254 virtualLoadQueue.io.lqDeq <> io.lqDeq 255 virtualLoadQueue.io.lqCancelCnt <> io.lqCancelCnt 256 virtualLoadQueue.io.lqEmpty <> io.lqEmpty 257 virtualLoadQueue.io.ldWbPtr <> io.lqDeqPtr 258 259 /** 260 * Load queue exception buffer 261 */ 262 exceptionBuffer.io.redirect <> io.redirect 263 for (i <- 0 until LoadPipelineWidth) { 264 exceptionBuffer.io.req(i).valid := io.ldu.ldin(i).valid && !io.ldu.ldin(i).bits.isvec // from load_s3 265 exceptionBuffer.io.req(i).bits := io.ldu.ldin(i).bits 266 } 267 // vlsu exception! 268 for (i <- 0 until VecLoadPipelineWidth) { 269 exceptionBuffer.io.req(LoadPipelineWidth + i).valid := io.vecFeedback(i).valid && io.vecFeedback(i).bits.feedback(VecFeedbacks.FLUSH) // have exception 270 exceptionBuffer.io.req(LoadPipelineWidth + i).bits := DontCare 271 exceptionBuffer.io.req(LoadPipelineWidth + i).bits.vaddr := io.vecFeedback(i).bits.vaddr 272 exceptionBuffer.io.req(LoadPipelineWidth + i).bits.fullva := io.vecFeedback(i).bits.vaddr 273 exceptionBuffer.io.req(LoadPipelineWidth + i).bits.vaNeedExt := io.vecFeedback(i).bits.vaNeedExt 274 exceptionBuffer.io.req(LoadPipelineWidth + i).bits.gpaddr := io.vecFeedback(i).bits.gpaddr 275 exceptionBuffer.io.req(LoadPipelineWidth + i).bits.uop.uopIdx := io.vecFeedback(i).bits.uopidx 276 exceptionBuffer.io.req(LoadPipelineWidth + i).bits.uop.robIdx := io.vecFeedback(i).bits.robidx 277 exceptionBuffer.io.req(LoadPipelineWidth + i).bits.uop.vpu.vstart := io.vecFeedback(i).bits.vstart 278 exceptionBuffer.io.req(LoadPipelineWidth + i).bits.uop.vpu.vl := io.vecFeedback(i).bits.vl 279 exceptionBuffer.io.req(LoadPipelineWidth + i).bits.uop.exceptionVec := io.vecFeedback(i).bits.exceptionVec 280 } 281 // mmio non-data error exception 282 exceptionBuffer.io.req(LoadPipelineWidth + VecLoadPipelineWidth) := uncacheBuffer.io.exception 283 exceptionBuffer.io.req(LoadPipelineWidth + VecLoadPipelineWidth).bits.vaNeedExt := true.B 284 285 loadQueueReplay.io.loadMisalignFull := io.loadMisalignFull 286 loadQueueReplay.io.misalignAllowSpec := io.misalignAllowSpec 287 288 io.exceptionAddr <> exceptionBuffer.io.exceptionAddr 289 290 /** 291 * Load uncache buffer 292 */ 293 uncacheBuffer.io.redirect <> io.redirect 294 uncacheBuffer.io.mmioOut <> io.ldout 295 uncacheBuffer.io.ncOut <> io.ncOut 296 uncacheBuffer.io.mmioRawData <> io.ld_raw_data 297 uncacheBuffer.io.rob <> io.rob 298 uncacheBuffer.io.uncache <> io.uncache 299 300 for ((buff, w) <- uncacheBuffer.io.req.zipWithIndex) { 301 // from load_s3 302 val ldinBits = io.ldu.ldin(w).bits 303 buff.valid := io.ldu.ldin(w).valid && !ldinBits.nc_with_data 304 buff.bits := ldinBits 305 } 306 307 io.uncache.resp.ready := true.B 308 309 io.nuke_rollback := loadQueueRAW.io.rollback 310 io.nack_rollback(0) := uncacheBuffer.io.rollback 311 312 /* <------- DANGEROUS: Don't change sequence here ! -------> */ 313 314 /** 315 * LoadQueueReplay 316 */ 317 loadQueueReplay.io.redirect <> io.redirect 318 loadQueueReplay.io.enq <> io.ldu.ldin // from load_s3 319 loadQueueReplay.io.storeAddrIn <> io.sta.storeAddrIn // from store_s1 320 loadQueueReplay.io.storeDataIn <> io.std.storeDataIn // from store_s0 321 loadQueueReplay.io.replay <> io.replay 322 //loadQueueReplay.io.refill <> io.refill 323 loadQueueReplay.io.tl_d_channel <> io.tl_d_channel 324 loadQueueReplay.io.stAddrReadySqPtr <> io.sq.stAddrReadySqPtr 325 loadQueueReplay.io.stAddrReadyVec <> io.sq.stAddrReadyVec 326 loadQueueReplay.io.stDataReadySqPtr <> io.sq.stDataReadySqPtr 327 loadQueueReplay.io.stDataReadyVec <> io.sq.stDataReadyVec 328 loadQueueReplay.io.sqEmpty <> io.sq.sqEmpty 329 loadQueueReplay.io.lqFull <> io.lq_rep_full 330 loadQueueReplay.io.ldWbPtr <> virtualLoadQueue.io.ldWbPtr 331 loadQueueReplay.io.rarFull <> loadQueueRAR.io.lqFull 332 loadQueueReplay.io.rawFull <> loadQueueRAW.io.lqFull 333 loadQueueReplay.io.l2_hint <> io.l2_hint 334 loadQueueReplay.io.tlb_hint <> io.tlb_hint 335 loadQueueReplay.io.tlbReplayDelayCycleCtrl <> io.tlbReplayDelayCycleCtrl 336 337 // TODO: implement it! 338 loadQueueReplay.io.vecFeedback := io.vecFeedback 339 340 loadQueueReplay.io.debugTopDown <> io.debugTopDown 341 342 virtualLoadQueue.io.noUopsIssued := io.noUopsIssed 343 344 val full_mask = Cat(loadQueueRAR.io.lqFull, loadQueueRAW.io.lqFull, loadQueueReplay.io.lqFull) 345 XSPerfAccumulate("full_mask_000", full_mask === 0.U) 346 XSPerfAccumulate("full_mask_001", full_mask === 1.U) 347 XSPerfAccumulate("full_mask_010", full_mask === 2.U) 348 XSPerfAccumulate("full_mask_011", full_mask === 3.U) 349 XSPerfAccumulate("full_mask_100", full_mask === 4.U) 350 XSPerfAccumulate("full_mask_101", full_mask === 5.U) 351 XSPerfAccumulate("full_mask_110", full_mask === 6.U) 352 XSPerfAccumulate("full_mask_111", full_mask === 7.U) 353 XSPerfAccumulate("nuke_rollback", io.nuke_rollback.map(_.valid).reduce(_ || _).asUInt) 354 XSPerfAccumulate("nack_rollabck", io.nack_rollback.map(_.valid).reduce(_ || _).asUInt) 355 356 // perf cnt 357 val perfEvents = Seq(virtualLoadQueue, loadQueueRAR, loadQueueRAW, loadQueueReplay).flatMap(_.getPerfEvents) ++ 358 Seq( 359 ("full_mask_000", full_mask === 0.U), 360 ("full_mask_001", full_mask === 1.U), 361 ("full_mask_010", full_mask === 2.U), 362 ("full_mask_011", full_mask === 3.U), 363 ("full_mask_100", full_mask === 4.U), 364 ("full_mask_101", full_mask === 5.U), 365 ("full_mask_110", full_mask === 6.U), 366 ("full_mask_111", full_mask === 7.U), 367 ("nuke_rollback", io.nuke_rollback.map(_.valid).reduce(_ || _).asUInt), 368 ("nack_rollback", io.nack_rollback.map(_.valid).reduce(_ || _).asUInt) 369 ) 370 generatePerfEvent() 371 // end 372} 373