1c590fb32Scz4e/*************************************************************************************** 2c590fb32Scz4e* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3c590fb32Scz4e* Copyright (c) 2020-2021 Peng Cheng Laboratory 4c590fb32Scz4e* 5c590fb32Scz4e* XiangShan is licensed under Mulan PSL v2. 6c590fb32Scz4e* You can use this software according to the terms and conditions of the Mulan PSL v2. 7c590fb32Scz4e* You may obtain a copy of Mulan PSL v2 at: 8c590fb32Scz4e* http://license.coscl.org.cn/MulanPSL2 9c590fb32Scz4e* 10c590fb32Scz4e* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11c590fb32Scz4e* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12c590fb32Scz4e* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13c590fb32Scz4e* 14c590fb32Scz4e* See the Mulan PSL v2 for more details. 15c590fb32Scz4e***************************************************************************************/ 16c590fb32Scz4e 17c590fb32Scz4epackage xiangshan.mem 18c590fb32Scz4e 19c590fb32Scz4eimport org.chipsalliance.cde.config.Parameters 20c590fb32Scz4eimport chisel3._ 21c590fb32Scz4eimport chisel3.util._ 22c590fb32Scz4eimport freechips.rocketchip.diplomacy._ 23c590fb32Scz4eimport freechips.rocketchip.diplomacy.{BundleBridgeSource, LazyModule, LazyModuleImp} 24c590fb32Scz4eimport freechips.rocketchip.interrupts.{IntSinkNode, IntSinkPortSimple} 25c590fb32Scz4eimport freechips.rocketchip.tile.HasFPUParameters 26c590fb32Scz4eimport freechips.rocketchip.tilelink._ 27c590fb32Scz4eimport utils._ 28c590fb32Scz4eimport utility._ 29602aa9f1Scz4eimport utility.mbist.{MbistInterface, MbistPipeline} 3030f35717Scz4eimport utility.sram.{SramBroadcastBundle, SramHelper} 318cfc24b2STang Haojinimport system.{HasSoCParameter, SoCParamsKey} 32c590fb32Scz4eimport xiangshan._ 33c590fb32Scz4eimport xiangshan.ExceptionNO._ 34c590fb32Scz4eimport xiangshan.frontend.HasInstrMMIOConst 35c590fb32Scz4eimport xiangshan.backend.Bundles.{DynInst, MemExuInput, MemExuOutput} 36c590fb32Scz4eimport xiangshan.backend.ctrlblock.{DebugLSIO, LsTopdownInfo} 37c590fb32Scz4eimport xiangshan.backend.exu.MemExeUnit 38c590fb32Scz4eimport xiangshan.backend.fu._ 39c590fb32Scz4eimport xiangshan.backend.fu.FuType._ 40a67fd0f5SGuanghui Chengimport xiangshan.backend.fu.NewCSR.{CsrTriggerBundle, TriggerUtil, PFEvent} 41075d4937Sjunxiong-jiimport xiangshan.backend.fu.util.{CSRConst, SdtrigExt} 42c590fb32Scz4eimport xiangshan.backend.{BackendToTopBundle, TopToBackendBundle} 43c590fb32Scz4eimport xiangshan.backend.rob.{RobDebugRollingIO, RobPtr, RobLsqIO} 44c590fb32Scz4eimport xiangshan.backend.datapath.NewPipelineConnect 45c590fb32Scz4eimport xiangshan.backend.trace.{Itype, TraceCoreInterface} 46c590fb32Scz4eimport xiangshan.backend.Bundles._ 47c590fb32Scz4eimport xiangshan.mem._ 48c590fb32Scz4eimport xiangshan.mem.mdp._ 499e12e8edScz4eimport xiangshan.mem.Bundles._ 50c590fb32Scz4eimport xiangshan.mem.prefetch.{BasePrefecher, L1Prefetcher, SMSParams, SMSPrefetcher} 51c590fb32Scz4eimport xiangshan.cache._ 52c590fb32Scz4eimport xiangshan.cache.mmu._ 534b2c87baS梁森 Liang Senimport coupledL2.PrefetchRecv 548cfc24b2STang Haojinimport utility.mbist.{MbistInterface, MbistPipeline} 558cfc24b2STang Haojinimport utility.sram.{SramBroadcastBundle, SramHelper} 56602aa9f1Scz4e 57c590fb32Scz4etrait HasMemBlockParameters extends HasXSParameter { 58c590fb32Scz4e // number of memory units 59c590fb32Scz4e val LduCnt = backendParams.LduCnt 60c590fb32Scz4e val StaCnt = backendParams.StaCnt 61c590fb32Scz4e val StdCnt = backendParams.StdCnt 62c590fb32Scz4e val HyuCnt = backendParams.HyuCnt 63c590fb32Scz4e val VlduCnt = backendParams.VlduCnt 64c590fb32Scz4e val VstuCnt = backendParams.VstuCnt 65c590fb32Scz4e 66c590fb32Scz4e val LdExuCnt = LduCnt + HyuCnt 67c590fb32Scz4e val StAddrCnt = StaCnt + HyuCnt 68c590fb32Scz4e val StDataCnt = StdCnt 69c590fb32Scz4e val MemExuCnt = LduCnt + HyuCnt + StaCnt + StdCnt 70c590fb32Scz4e val MemAddrExtCnt = LdExuCnt + StaCnt 71c590fb32Scz4e val MemVExuCnt = VlduCnt + VstuCnt 72c590fb32Scz4e 73c590fb32Scz4e val AtomicWBPort = 0 74c590fb32Scz4e val MisalignWBPort = 1 75c590fb32Scz4e val UncacheWBPort = 2 76c590fb32Scz4e val NCWBPorts = Seq(1, 2) 77c590fb32Scz4e} 78c590fb32Scz4e 79c590fb32Scz4eabstract class MemBlockBundle(implicit val p: Parameters) extends Bundle with HasMemBlockParameters 80c590fb32Scz4e 81c590fb32Scz4eclass Std(cfg: FuConfig)(implicit p: Parameters) extends FuncUnit(cfg) { 82c590fb32Scz4e io.in.ready := io.out.ready 83c590fb32Scz4e io.out.valid := io.in.valid 84c590fb32Scz4e io.out.bits := 0.U.asTypeOf(io.out.bits) 85c590fb32Scz4e io.out.bits.res.data := io.in.bits.data.src(0) 86c590fb32Scz4e io.out.bits.ctrl.robIdx := io.in.bits.ctrl.robIdx 87c590fb32Scz4e} 88c590fb32Scz4e 89c590fb32Scz4eclass ooo_to_mem(implicit p: Parameters) extends MemBlockBundle { 90c590fb32Scz4e val backendToTopBypass = Flipped(new BackendToTopBundle) 91c590fb32Scz4e 92c590fb32Scz4e val loadFastMatch = Vec(LdExuCnt, Input(UInt(LdExuCnt.W))) 93c590fb32Scz4e val loadFastFuOpType = Vec(LdExuCnt, Input(FuOpType())) 94c590fb32Scz4e val loadFastImm = Vec(LdExuCnt, Input(UInt(12.W))) 95c590fb32Scz4e val sfence = Input(new SfenceBundle) 96c590fb32Scz4e val tlbCsr = Input(new TlbCsrBundle) 97c590fb32Scz4e val lsqio = new Bundle { 98c590fb32Scz4e val lcommit = Input(UInt(log2Up(CommitWidth + 1).W)) 99c590fb32Scz4e val scommit = Input(UInt(log2Up(CommitWidth + 1).W)) 100c590fb32Scz4e val pendingMMIOld = Input(Bool()) 101c590fb32Scz4e val pendingld = Input(Bool()) 102c590fb32Scz4e val pendingst = Input(Bool()) 103c590fb32Scz4e val pendingVst = Input(Bool()) 104c590fb32Scz4e val commit = Input(Bool()) 105c590fb32Scz4e val pendingPtr = Input(new RobPtr) 106c590fb32Scz4e val pendingPtrNext = Input(new RobPtr) 107c590fb32Scz4e } 108c590fb32Scz4e 109c590fb32Scz4e val isStoreException = Input(Bool()) 110c590fb32Scz4e val isVlsException = Input(Bool()) 111c590fb32Scz4e val csrCtrl = Flipped(new CustomCSRCtrlIO) 112c590fb32Scz4e val enqLsq = new LsqEnqIO 113c590fb32Scz4e val flushSb = Input(Bool()) 114c590fb32Scz4e 115c590fb32Scz4e val storePc = Vec(StaCnt, Input(UInt(VAddrBits.W))) // for hw prefetch 116c590fb32Scz4e val hybridPc = Vec(HyuCnt, Input(UInt(VAddrBits.W))) // for hw prefetch 117c590fb32Scz4e 118c590fb32Scz4e val issueLda = MixedVec(Seq.fill(LduCnt)(Flipped(DecoupledIO(new MemExuInput)))) 119c590fb32Scz4e val issueSta = MixedVec(Seq.fill(StaCnt)(Flipped(DecoupledIO(new MemExuInput)))) 120c590fb32Scz4e val issueStd = MixedVec(Seq.fill(StdCnt)(Flipped(DecoupledIO(new MemExuInput)))) 121c590fb32Scz4e val issueHya = MixedVec(Seq.fill(HyuCnt)(Flipped(DecoupledIO(new MemExuInput)))) 122c590fb32Scz4e val issueVldu = MixedVec(Seq.fill(VlduCnt)(Flipped(DecoupledIO(new MemExuInput(isVector=true))))) 123c590fb32Scz4e 124c590fb32Scz4e def issueUops = issueLda ++ issueSta ++ issueStd ++ issueHya ++ issueVldu 125c590fb32Scz4e} 126c590fb32Scz4e 127c590fb32Scz4eclass mem_to_ooo(implicit p: Parameters) extends MemBlockBundle { 128c590fb32Scz4e val topToBackendBypass = new TopToBackendBundle 129c590fb32Scz4e 130c590fb32Scz4e val otherFastWakeup = Vec(LdExuCnt, ValidIO(new DynInst)) 131c590fb32Scz4e val lqCancelCnt = Output(UInt(log2Up(VirtualLoadQueueSize + 1).W)) 132c590fb32Scz4e val sqCancelCnt = Output(UInt(log2Up(StoreQueueSize + 1).W)) 133c590fb32Scz4e val sqDeq = Output(UInt(log2Ceil(EnsbufferWidth + 1).W)) 134c590fb32Scz4e val lqDeq = Output(UInt(log2Up(CommitWidth + 1).W)) 135c590fb32Scz4e // used by VLSU issue queue, the vector store would wait all store before it, and the vector load would wait all load 136c590fb32Scz4e val sqDeqPtr = Output(new SqPtr) 137c590fb32Scz4e val lqDeqPtr = Output(new LqPtr) 138c590fb32Scz4e val stIn = Vec(StAddrCnt, ValidIO(new MemExuInput)) 139c590fb32Scz4e val stIssuePtr = Output(new SqPtr()) 140c590fb32Scz4e 141c590fb32Scz4e val memoryViolation = ValidIO(new Redirect) 142c590fb32Scz4e val sbIsEmpty = Output(Bool()) 143c590fb32Scz4e 144c590fb32Scz4e val lsTopdownInfo = Vec(LdExuCnt, Output(new LsTopdownInfo)) 145c590fb32Scz4e 146c590fb32Scz4e val lsqio = new Bundle { 147c590fb32Scz4e val vaddr = Output(UInt(XLEN.W)) 148c590fb32Scz4e val vstart = Output(UInt((log2Up(VLEN) + 1).W)) 149c590fb32Scz4e val vl = Output(UInt((log2Up(VLEN) + 1).W)) 150c590fb32Scz4e val gpaddr = Output(UInt(XLEN.W)) 151c590fb32Scz4e val isForVSnonLeafPTE = Output(Bool()) 152c590fb32Scz4e val mmio = Output(Vec(LoadPipelineWidth, Bool())) 153c590fb32Scz4e val uop = Output(Vec(LoadPipelineWidth, new DynInst)) 154c590fb32Scz4e val lqCanAccept = Output(Bool()) 155c590fb32Scz4e val sqCanAccept = Output(Bool()) 156c590fb32Scz4e } 157c590fb32Scz4e 158c590fb32Scz4e val storeDebugInfo = Vec(EnsbufferWidth, new Bundle { 159c590fb32Scz4e val robidx = Output(new RobPtr) 160c590fb32Scz4e val pc = Input(UInt(VAddrBits.W)) 161c590fb32Scz4e }) 162c590fb32Scz4e 163c590fb32Scz4e val writebackLda = Vec(LduCnt, DecoupledIO(new MemExuOutput)) 164c590fb32Scz4e val writebackSta = Vec(StaCnt, DecoupledIO(new MemExuOutput)) 165c590fb32Scz4e val writebackStd = Vec(StdCnt, DecoupledIO(new MemExuOutput)) 166c590fb32Scz4e val writebackHyuLda = Vec(HyuCnt, DecoupledIO(new MemExuOutput)) 167c590fb32Scz4e val writebackHyuSta = Vec(HyuCnt, DecoupledIO(new MemExuOutput)) 168c590fb32Scz4e val writebackVldu = Vec(VlduCnt, DecoupledIO(new MemExuOutput(isVector = true))) 169c590fb32Scz4e def writeBack: Seq[DecoupledIO[MemExuOutput]] = { 170c590fb32Scz4e writebackSta ++ 171c590fb32Scz4e writebackHyuLda ++ writebackHyuSta ++ 172c590fb32Scz4e writebackLda ++ 173c590fb32Scz4e writebackVldu ++ 174c590fb32Scz4e writebackStd 175c590fb32Scz4e } 176c590fb32Scz4e 177c590fb32Scz4e val ldaIqFeedback = Vec(LduCnt, new MemRSFeedbackIO) 178c590fb32Scz4e val staIqFeedback = Vec(StaCnt, new MemRSFeedbackIO) 179c590fb32Scz4e val hyuIqFeedback = Vec(HyuCnt, new MemRSFeedbackIO) 180c590fb32Scz4e val vstuIqFeedback= Vec(VstuCnt, new MemRSFeedbackIO(isVector = true)) 181c590fb32Scz4e val vlduIqFeedback= Vec(VlduCnt, new MemRSFeedbackIO(isVector = true)) 182c590fb32Scz4e val ldCancel = Vec(backendParams.LdExuCnt, new LoadCancelIO) 183c590fb32Scz4e val wakeup = Vec(backendParams.LdExuCnt, Valid(new DynInst)) 184c590fb32Scz4e 185c590fb32Scz4e val s3_delayed_load_error = Vec(LdExuCnt, Output(Bool())) 186c590fb32Scz4e} 187c590fb32Scz4e 188c590fb32Scz4eclass MemCoreTopDownIO extends Bundle { 189c590fb32Scz4e val robHeadMissInDCache = Output(Bool()) 190c590fb32Scz4e val robHeadTlbReplay = Output(Bool()) 191c590fb32Scz4e val robHeadTlbMiss = Output(Bool()) 192c590fb32Scz4e val robHeadLoadVio = Output(Bool()) 193c590fb32Scz4e val robHeadLoadMSHR = Output(Bool()) 194c590fb32Scz4e} 195c590fb32Scz4e 196c590fb32Scz4eclass fetch_to_mem(implicit p: Parameters) extends XSBundle{ 197c590fb32Scz4e val itlb = Flipped(new TlbPtwIO()) 198c590fb32Scz4e} 199c590fb32Scz4e 200c590fb32Scz4e// triple buffer applied in i-mmio path (two at MemBlock, one at L2Top) 201c590fb32Scz4eclass InstrUncacheBuffer()(implicit p: Parameters) extends LazyModule with HasInstrMMIOConst { 202c590fb32Scz4e val node = new TLBufferNode(BufferParams.default, BufferParams.default, BufferParams.default, BufferParams.default, BufferParams.default) 203c590fb32Scz4e lazy val module = new InstrUncacheBufferImpl 204c590fb32Scz4e 205c590fb32Scz4e class InstrUncacheBufferImpl extends LazyModuleImp(this) { 206c590fb32Scz4e (node.in zip node.out) foreach { case ((in, edgeIn), (out, edgeOut)) => 207c590fb32Scz4e out.a <> BufferParams.default(BufferParams.default(in.a)) 208c590fb32Scz4e in.d <> BufferParams.default(BufferParams.default(out.d)) 209c590fb32Scz4e 210c590fb32Scz4e // only a.valid, a.ready, a.address can change 211c590fb32Scz4e // hoping that the rest would be optimized to keep MemBlock port unchanged after adding buffer 212c590fb32Scz4e out.a.bits.data := 0.U 213c590fb32Scz4e out.a.bits.mask := Fill(mmioBusBytes, 1.U(1.W)) 214c590fb32Scz4e out.a.bits.opcode := 4.U // Get 215c590fb32Scz4e out.a.bits.size := log2Ceil(mmioBusBytes).U 216c590fb32Scz4e out.a.bits.source := 0.U 217c590fb32Scz4e } 218c590fb32Scz4e } 219c590fb32Scz4e} 220c590fb32Scz4e 221c590fb32Scz4e// triple buffer applied in L1I$-L2 path (two at MemBlock, one at L2Top) 222c590fb32Scz4eclass ICacheBuffer()(implicit p: Parameters) extends LazyModule { 223c590fb32Scz4e val node = new TLBufferNode(BufferParams.default, BufferParams.default, BufferParams.default, BufferParams.default, BufferParams.default) 224c590fb32Scz4e lazy val module = new ICacheBufferImpl 225c590fb32Scz4e 226c590fb32Scz4e class ICacheBufferImpl extends LazyModuleImp(this) { 227c590fb32Scz4e (node.in zip node.out) foreach { case ((in, edgeIn), (out, edgeOut)) => 228c590fb32Scz4e out.a <> BufferParams.default(BufferParams.default(in.a)) 229c590fb32Scz4e in.d <> BufferParams.default(BufferParams.default(out.d)) 230c590fb32Scz4e } 231c590fb32Scz4e } 232c590fb32Scz4e} 233c590fb32Scz4e 234c590fb32Scz4eclass ICacheCtrlBuffer()(implicit p: Parameters) extends LazyModule { 235c590fb32Scz4e val node = new TLBufferNode(BufferParams.default, BufferParams.default, BufferParams.default, BufferParams.default, BufferParams.default) 236c590fb32Scz4e lazy val module = new ICacheCtrlBufferImpl 237c590fb32Scz4e 238c590fb32Scz4e class ICacheCtrlBufferImpl extends LazyModuleImp(this) { 239c590fb32Scz4e (node.in zip node.out) foreach { case ((in, edgeIn), (out, edgeOut)) => 240c590fb32Scz4e out.a <> BufferParams.default(BufferParams.default(in.a)) 241c590fb32Scz4e in.d <> BufferParams.default(BufferParams.default(out.d)) 242c590fb32Scz4e } 243c590fb32Scz4e } 244c590fb32Scz4e} 245c590fb32Scz4e 246c590fb32Scz4e// Frontend bus goes through MemBlock 247c590fb32Scz4eclass FrontendBridge()(implicit p: Parameters) extends LazyModule { 248c590fb32Scz4e val icache_node = LazyModule(new ICacheBuffer()).suggestName("icache").node// to keep IO port name 249c590fb32Scz4e val icachectrl_node = LazyModule(new ICacheCtrlBuffer()).suggestName("icachectrl").node 250c590fb32Scz4e val instr_uncache_node = LazyModule(new InstrUncacheBuffer()).suggestName("instr_uncache").node 251c590fb32Scz4e lazy val module = new LazyModuleImp(this) { 252c590fb32Scz4e } 253c590fb32Scz4e} 254c590fb32Scz4e 255c590fb32Scz4eclass MemBlockInlined()(implicit p: Parameters) extends LazyModule 256c590fb32Scz4e with HasXSParameter { 257c590fb32Scz4e override def shouldBeInlined: Boolean = true 258c590fb32Scz4e 259c590fb32Scz4e val dcache = LazyModule(new DCacheWrapper()) 260c590fb32Scz4e val uncache = LazyModule(new Uncache()) 261c590fb32Scz4e val uncache_port = TLTempNode() 262c590fb32Scz4e val uncache_xbar = TLXbar() 263c590fb32Scz4e val ptw = LazyModule(new L2TLBWrapper()) 264c590fb32Scz4e val ptw_to_l2_buffer = if (!coreParams.softPTW) LazyModule(new TLBuffer) else null 265c590fb32Scz4e val l1d_to_l2_buffer = if (coreParams.dcacheParametersOpt.nonEmpty) LazyModule(new TLBuffer) else null 266c590fb32Scz4e val dcache_port = TLNameNode("dcache_client") // to keep dcache-L2 port name 267c590fb32Scz4e val l2_pf_sender_opt = coreParams.prefetcher.map(_ => 268c590fb32Scz4e BundleBridgeSource(() => new PrefetchRecv) 269c590fb32Scz4e ) 270c590fb32Scz4e val l3_pf_sender_opt = if (p(SoCParamsKey).L3CacheParamsOpt.nonEmpty) coreParams.prefetcher.map(_ => 271c590fb32Scz4e BundleBridgeSource(() => new huancun.PrefetchRecv) 272c590fb32Scz4e ) else None 273c590fb32Scz4e val frontendBridge = LazyModule(new FrontendBridge) 274c590fb32Scz4e // interrupt sinks 275c590fb32Scz4e val clint_int_sink = IntSinkNode(IntSinkPortSimple(1, 2)) 276c590fb32Scz4e val debug_int_sink = IntSinkNode(IntSinkPortSimple(1, 1)) 277c590fb32Scz4e val plic_int_sink = IntSinkNode(IntSinkPortSimple(2, 1)) 278c590fb32Scz4e val nmi_int_sink = IntSinkNode(IntSinkPortSimple(1, (new NonmaskableInterruptIO).elements.size)) 27976cb49abScz4e val beu_local_int_sink = IntSinkNode(IntSinkPortSimple(1, 1)) 280c590fb32Scz4e 281c590fb32Scz4e if (!coreParams.softPTW) { 282c590fb32Scz4e ptw_to_l2_buffer.node := ptw.node 283c590fb32Scz4e } 284c590fb32Scz4e uncache_xbar := TLBuffer() := uncache.clientNode 285c590fb32Scz4e if (dcache.uncacheNode.isDefined) { 286c590fb32Scz4e dcache.uncacheNode.get := TLBuffer.chainNode(2) := uncache_xbar 287c590fb32Scz4e } 288c590fb32Scz4e uncache_port := TLBuffer.chainNode(2) := uncache_xbar 289c590fb32Scz4e 290c590fb32Scz4e lazy val module = new MemBlockInlinedImp(this) 291c590fb32Scz4e} 292c590fb32Scz4e 293c590fb32Scz4eclass MemBlockInlinedImp(outer: MemBlockInlined) extends LazyModuleImp(outer) 294c590fb32Scz4e with HasXSParameter 295c590fb32Scz4e with HasFPUParameters 296c590fb32Scz4e with HasPerfEvents 2978cfc24b2STang Haojin with HasSoCParameter 298c590fb32Scz4e with HasL1PrefetchSourceParameter 299c590fb32Scz4e with HasCircularQueuePtrHelper 300c590fb32Scz4e with HasMemBlockParameters 301c590fb32Scz4e with HasTlbConst 302c590fb32Scz4e with SdtrigExt 303c590fb32Scz4e{ 304c590fb32Scz4e val io = IO(new Bundle { 305c590fb32Scz4e val hartId = Input(UInt(hartIdLen.W)) 306c590fb32Scz4e val redirect = Flipped(ValidIO(new Redirect)) 307c590fb32Scz4e 308c590fb32Scz4e val ooo_to_mem = new ooo_to_mem 309c590fb32Scz4e val mem_to_ooo = new mem_to_ooo 310c590fb32Scz4e val fetch_to_mem = new fetch_to_mem 311c590fb32Scz4e 312c590fb32Scz4e val ifetchPrefetch = Vec(LduCnt, ValidIO(new SoftIfetchPrefetchBundle)) 313c590fb32Scz4e 314c590fb32Scz4e // misc 315c590fb32Scz4e val error = ValidIO(new L1CacheErrorInfo) 316c590fb32Scz4e val memInfo = new Bundle { 317c590fb32Scz4e val sqFull = Output(Bool()) 318c590fb32Scz4e val lqFull = Output(Bool()) 319c590fb32Scz4e val dcacheMSHRFull = Output(Bool()) 320c590fb32Scz4e } 321c590fb32Scz4e val debug_ls = new DebugLSIO 322c590fb32Scz4e val l2_hint = Input(Valid(new L2ToL1Hint())) 323c590fb32Scz4e val l2PfqBusy = Input(Bool()) 324c590fb32Scz4e val l2_tlb_req = Flipped(new TlbRequestIO(nRespDups = 2)) 325c590fb32Scz4e val l2_pmp_resp = new PMPRespBundle 326c590fb32Scz4e val l2_flush_done = Input(Bool()) 327c590fb32Scz4e 328c590fb32Scz4e val debugTopDown = new Bundle { 329c590fb32Scz4e val robHeadVaddr = Flipped(Valid(UInt(VAddrBits.W))) 330c590fb32Scz4e val toCore = new MemCoreTopDownIO 331c590fb32Scz4e } 332c590fb32Scz4e val debugRolling = Flipped(new RobDebugRollingIO) 333c590fb32Scz4e 334c590fb32Scz4e // All the signals from/to frontend/backend to/from bus will go through MemBlock 335c590fb32Scz4e val fromTopToBackend = Input(new Bundle { 3368cfc24b2STang Haojin val msiInfo = ValidIO(UInt(soc.IMSICParams.MSI_INFO_WIDTH.W)) 337c590fb32Scz4e val clintTime = ValidIO(UInt(64.W)) 338c590fb32Scz4e }) 339c590fb32Scz4e val inner_hartId = Output(UInt(hartIdLen.W)) 340c590fb32Scz4e val inner_reset_vector = Output(UInt(PAddrBits.W)) 341c590fb32Scz4e val outer_reset_vector = Input(UInt(PAddrBits.W)) 342c590fb32Scz4e val outer_cpu_halt = Output(Bool()) 343c590fb32Scz4e val outer_l2_flush_en = Output(Bool()) 344c590fb32Scz4e val outer_power_down_en = Output(Bool()) 345c590fb32Scz4e val outer_cpu_critical_error = Output(Bool()) 3468cfc24b2STang Haojin val outer_msi_ack = Output(Bool()) 347c590fb32Scz4e val inner_beu_errors_icache = Input(new L1BusErrorUnitInfo) 348c590fb32Scz4e val outer_beu_errors_icache = Output(new L1BusErrorUnitInfo) 349c590fb32Scz4e val inner_hc_perfEvents = Output(Vec(numPCntHc * coreParams.L2NBanks + 1, new PerfEvent)) 350c590fb32Scz4e val outer_hc_perfEvents = Input(Vec(numPCntHc * coreParams.L2NBanks + 1, new PerfEvent)) 351c590fb32Scz4e 352c590fb32Scz4e // reset signals of frontend & backend are generated in memblock 353c590fb32Scz4e val reset_backend = Output(Reset()) 354c590fb32Scz4e // Reset singal from frontend. 355c590fb32Scz4e val resetInFrontendBypass = new Bundle{ 356c590fb32Scz4e val fromFrontend = Input(Bool()) 357c590fb32Scz4e val toL2Top = Output(Bool()) 358c590fb32Scz4e } 359c590fb32Scz4e val traceCoreInterfaceBypass = new Bundle{ 360c590fb32Scz4e val fromBackend = Flipped(new TraceCoreInterface(hasOffset = true)) 361c590fb32Scz4e val toL2Top = new TraceCoreInterface 362c590fb32Scz4e } 363c590fb32Scz4e 364c590fb32Scz4e val topDownInfo = new Bundle { 365c590fb32Scz4e val fromL2Top = Input(new TopDownFromL2Top) 366c590fb32Scz4e val toBackend = Flipped(new TopDownInfo) 367c590fb32Scz4e } 36830f35717Scz4e val dft = Option.when(hasDFT)(Input(new SramBroadcastBundle)) 36930f35717Scz4e val dft_reset = Option.when(hasMbist)(Input(new DFTResetSignals())) 37030f35717Scz4e val dft_frnt = Option.when(hasDFT)(Output(new SramBroadcastBundle)) 37130f35717Scz4e val dft_reset_frnt = Option.when(hasMbist)(Output(new DFTResetSignals())) 37230f35717Scz4e val dft_bcknd = Option.when(hasDFT)(Output(new SramBroadcastBundle)) 37330f35717Scz4e val dft_reset_bcknd = Option.when(hasMbist)(Output(new DFTResetSignals())) 374c590fb32Scz4e }) 375c590fb32Scz4e 3761592abd1SYan Xu io.mem_to_ooo.writeBack.zipWithIndex.foreach{ case (wb, i) => 3771592abd1SYan Xu PerfCCT.updateInstPos(wb.bits.uop.debug_seqNum, PerfCCT.InstPos.AtBypassVal.id.U, wb.valid, clock, reset) 3781592abd1SYan Xu } 3791592abd1SYan Xu 380c590fb32Scz4e dontTouch(io.inner_hartId) 381c590fb32Scz4e dontTouch(io.inner_reset_vector) 382c590fb32Scz4e dontTouch(io.outer_reset_vector) 383c590fb32Scz4e dontTouch(io.outer_cpu_halt) 384c590fb32Scz4e dontTouch(io.outer_l2_flush_en) 385c590fb32Scz4e dontTouch(io.outer_power_down_en) 386c590fb32Scz4e dontTouch(io.outer_cpu_critical_error) 387c590fb32Scz4e dontTouch(io.inner_beu_errors_icache) 388c590fb32Scz4e dontTouch(io.outer_beu_errors_icache) 389c590fb32Scz4e dontTouch(io.inner_hc_perfEvents) 390c590fb32Scz4e dontTouch(io.outer_hc_perfEvents) 391c590fb32Scz4e 392c590fb32Scz4e val redirect = RegNextWithEnable(io.redirect) 393c590fb32Scz4e 394c590fb32Scz4e private val dcache = outer.dcache.module 395c590fb32Scz4e val uncache = outer.uncache.module 396c590fb32Scz4e 397c590fb32Scz4e //val delayedDcacheRefill = RegNext(dcache.io.lsu.lsq) 398c590fb32Scz4e 399c590fb32Scz4e val csrCtrl = DelayN(io.ooo_to_mem.csrCtrl, 2) 400c590fb32Scz4e dcache.io.l2_pf_store_only := RegNext(io.ooo_to_mem.csrCtrl.pf_ctrl.l2_pf_store_only, false.B) 401c590fb32Scz4e io.error <> DelayNWithValid(dcache.io.error, 2) 402c590fb32Scz4e when(!csrCtrl.cache_error_enable){ 403c590fb32Scz4e io.error.bits.report_to_beu := false.B 404c590fb32Scz4e io.error.valid := false.B 405c590fb32Scz4e } 406c590fb32Scz4e 407c590fb32Scz4e val loadUnits = Seq.fill(LduCnt)(Module(new LoadUnit)) 408c590fb32Scz4e val storeUnits = Seq.fill(StaCnt)(Module(new StoreUnit)) 409c590fb32Scz4e val stdExeUnits = Seq.fill(StdCnt)(Module(new MemExeUnit(backendParams.memSchdParams.get.issueBlockParams.find(_.StdCnt != 0).get.exuBlockParams.head))) 410c590fb32Scz4e val hybridUnits = Seq.fill(HyuCnt)(Module(new HybridUnit)) // Todo: replace it with HybridUnit 411c590fb32Scz4e val stData = stdExeUnits.map(_.io.out) 412c590fb32Scz4e val exeUnits = loadUnits ++ storeUnits 413c590fb32Scz4e 414c590fb32Scz4e // The number of vector load/store units is decoupled with the number of load/store units 415c590fb32Scz4e val vlSplit = Seq.fill(VlduCnt)(Module(new VLSplitImp)) 416c590fb32Scz4e val vsSplit = Seq.fill(VstuCnt)(Module(new VSSplitImp)) 417c590fb32Scz4e val vlMergeBuffer = Module(new VLMergeBufferImp) 418c590fb32Scz4e val vsMergeBuffer = Seq.fill(VstuCnt)(Module(new VSMergeBufferImp)) 419c590fb32Scz4e val vSegmentUnit = Module(new VSegmentUnit) 420c590fb32Scz4e val vfofBuffer = Module(new VfofBuffer) 421c590fb32Scz4e 422c590fb32Scz4e // misalign Buffer 423c590fb32Scz4e val loadMisalignBuffer = Module(new LoadMisalignBuffer) 424c590fb32Scz4e val storeMisalignBuffer = Module(new StoreMisalignBuffer) 425c590fb32Scz4e 426c590fb32Scz4e val l1_pf_req = Wire(Decoupled(new L1PrefetchReq())) 427c590fb32Scz4e dcache.io.sms_agt_evict_req.ready := false.B 42805cc6da9SYanqin Li val l1D_pf_enable = GatedRegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_enable, 2, Some(false.B)) 429c590fb32Scz4e val prefetcherOpt: Option[BasePrefecher] = coreParams.prefetcher.map { 430c590fb32Scz4e case _: SMSParams => 431c590fb32Scz4e val sms = Module(new SMSPrefetcher()) 43205cc6da9SYanqin Li val enableSMS = Constantin.createRecord(s"enableSMS$hartId", initValue = true) 43305cc6da9SYanqin Li // constantinCtrl && master switch csrCtrl && single switch csrCtrl 43405cc6da9SYanqin Li sms.io.enable := enableSMS && l1D_pf_enable && 43505cc6da9SYanqin Li GatedRegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l2_pf_recv_enable, 2, Some(false.B)) 436c590fb32Scz4e sms.io_agt_en := GatedRegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_enable_agt, 2, Some(false.B)) 437c590fb32Scz4e sms.io_pht_en := GatedRegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_enable_pht, 2, Some(false.B)) 438c590fb32Scz4e sms.io_act_threshold := GatedRegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_active_threshold, 2, Some(12.U)) 439c590fb32Scz4e sms.io_act_stride := GatedRegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_active_stride, 2, Some(30.U)) 440c590fb32Scz4e sms.io_stride_en := false.B 441c590fb32Scz4e sms.io_dcache_evict <> dcache.io.sms_agt_evict_req 4424b2c87baS梁森 Liang Sen val mbistSmsPl = MbistPipeline.PlaceMbistPipeline(1, "MbistPipeSms", hasMbist) 443c590fb32Scz4e sms 444c590fb32Scz4e } 445c590fb32Scz4e prefetcherOpt.foreach{ pf => pf.io.l1_req.ready := false.B } 446c590fb32Scz4e val hartId = p(XSCoreParamsKey).HartId 447c590fb32Scz4e val l1PrefetcherOpt: Option[BasePrefecher] = coreParams.prefetcher.map { 448c590fb32Scz4e case _ => 449c590fb32Scz4e val l1Prefetcher = Module(new L1Prefetcher()) 4509db05eaeScz4e val enableL1StreamPrefetcher = Constantin.createRecord(s"enableL1StreamPrefetcher$hartId", initValue = true) 45105cc6da9SYanqin Li // constantinCtrl && master switch csrCtrl && single switch csrCtrl 45205cc6da9SYanqin Li l1Prefetcher.io.enable := enableL1StreamPrefetcher && l1D_pf_enable && 45305cc6da9SYanqin Li GatedRegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_enable_stride, 2, Some(false.B)) 454c590fb32Scz4e l1Prefetcher.pf_ctrl <> dcache.io.pf_ctrl 455c590fb32Scz4e l1Prefetcher.l2PfqBusy := io.l2PfqBusy 456c590fb32Scz4e 457c590fb32Scz4e // stride will train on miss or prefetch hit 458c590fb32Scz4e for (i <- 0 until LduCnt) { 459c590fb32Scz4e val source = loadUnits(i).io.prefetch_train_l1 460c590fb32Scz4e l1Prefetcher.stride_train(i).valid := source.valid && source.bits.isFirstIssue && ( 461c590fb32Scz4e source.bits.miss || isFromStride(source.bits.meta_prefetch) 462c590fb32Scz4e ) 463c590fb32Scz4e l1Prefetcher.stride_train(i).bits := source.bits 464c590fb32Scz4e val loadPc = RegNext(io.ooo_to_mem.issueLda(i).bits.uop.pc) // for s1 465c590fb32Scz4e l1Prefetcher.stride_train(i).bits.uop.pc := Mux( 466c590fb32Scz4e loadUnits(i).io.s2_ptr_chasing, 467c590fb32Scz4e RegEnable(loadPc, loadUnits(i).io.s2_prefetch_spec), 468c590fb32Scz4e RegEnable(RegEnable(loadPc, loadUnits(i).io.s1_prefetch_spec), loadUnits(i).io.s2_prefetch_spec) 469c590fb32Scz4e ) 470c590fb32Scz4e } 471c590fb32Scz4e for (i <- 0 until HyuCnt) { 472c590fb32Scz4e val source = hybridUnits(i).io.prefetch_train_l1 473c590fb32Scz4e l1Prefetcher.stride_train.drop(LduCnt)(i).valid := source.valid && source.bits.isFirstIssue && ( 474c590fb32Scz4e source.bits.miss || isFromStride(source.bits.meta_prefetch) 475c590fb32Scz4e ) 476c590fb32Scz4e l1Prefetcher.stride_train.drop(LduCnt)(i).bits := source.bits 477c590fb32Scz4e l1Prefetcher.stride_train.drop(LduCnt)(i).bits.uop.pc := Mux( 478c590fb32Scz4e hybridUnits(i).io.ldu_io.s2_ptr_chasing, 479c590fb32Scz4e RegNext(io.ooo_to_mem.hybridPc(i)), 480c590fb32Scz4e RegNext(RegNext(io.ooo_to_mem.hybridPc(i))) 481c590fb32Scz4e ) 482c590fb32Scz4e } 483c590fb32Scz4e l1Prefetcher 484c590fb32Scz4e } 485c590fb32Scz4e // load prefetch to l1 Dcache 486c590fb32Scz4e l1PrefetcherOpt match { 487c590fb32Scz4e case Some(pf) => l1_pf_req <> Pipeline(in = pf.io.l1_req, depth = 1, pipe = false, name = Some("pf_queue_to_ldu_reg")) 488c590fb32Scz4e case None => 489c590fb32Scz4e l1_pf_req.valid := false.B 490c590fb32Scz4e l1_pf_req.bits := DontCare 491c590fb32Scz4e } 492c590fb32Scz4e val pf_train_on_hit = RegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_train_on_hit, 2, Some(true.B)) 493c590fb32Scz4e 494c590fb32Scz4e loadUnits.zipWithIndex.map(x => x._1.suggestName("LoadUnit_"+x._2)) 495c590fb32Scz4e storeUnits.zipWithIndex.map(x => x._1.suggestName("StoreUnit_"+x._2)) 496c590fb32Scz4e hybridUnits.zipWithIndex.map(x => x._1.suggestName("HybridUnit_"+x._2)) 497c590fb32Scz4e val atomicsUnit = Module(new AtomicsUnit) 498c590fb32Scz4e 499c590fb32Scz4e 500c590fb32Scz4e val ldaExeWbReqs = Wire(Vec(LduCnt, Decoupled(new MemExuOutput))) 501c590fb32Scz4e // atomicsUnit will overwrite the source from ldu if it is about to writeback 502c590fb32Scz4e val atomicWritebackOverride = Mux( 503c590fb32Scz4e atomicsUnit.io.out.valid, 504c590fb32Scz4e atomicsUnit.io.out.bits, 505c590fb32Scz4e loadUnits(AtomicWBPort).io.ldout.bits 506c590fb32Scz4e ) 507c590fb32Scz4e ldaExeWbReqs(AtomicWBPort).valid := atomicsUnit.io.out.valid || loadUnits(AtomicWBPort).io.ldout.valid 508c590fb32Scz4e ldaExeWbReqs(AtomicWBPort).bits := atomicWritebackOverride 509c590fb32Scz4e atomicsUnit.io.out.ready := ldaExeWbReqs(AtomicWBPort).ready 510c590fb32Scz4e loadUnits(AtomicWBPort).io.ldout.ready := ldaExeWbReqs(AtomicWBPort).ready 511c590fb32Scz4e 512c590fb32Scz4e val st_data_atomics = Seq.tabulate(StdCnt)(i => 513c590fb32Scz4e stData(i).valid && FuType.storeIsAMO(stData(i).bits.uop.fuType) 514c590fb32Scz4e ) 515c590fb32Scz4e 516c590fb32Scz4e // misalignBuffer will overwrite the source from ldu if it is about to writeback 517c590fb32Scz4e val misalignWritebackOverride = Mux( 518c590fb32Scz4e loadUnits(MisalignWBPort).io.ldout.valid, 519c590fb32Scz4e loadUnits(MisalignWBPort).io.ldout.bits, 520c590fb32Scz4e loadMisalignBuffer.io.writeBack.bits 521c590fb32Scz4e ) 522c590fb32Scz4e ldaExeWbReqs(MisalignWBPort).valid := loadMisalignBuffer.io.writeBack.valid || loadUnits(MisalignWBPort).io.ldout.valid 523c590fb32Scz4e ldaExeWbReqs(MisalignWBPort).bits := misalignWritebackOverride 524c590fb32Scz4e loadMisalignBuffer.io.writeBack.ready := ldaExeWbReqs(MisalignWBPort).ready && !loadUnits(MisalignWBPort).io.ldout.valid 525c590fb32Scz4e loadMisalignBuffer.io.loadOutValid := loadUnits(MisalignWBPort).io.ldout.valid 526c590fb32Scz4e loadMisalignBuffer.io.loadVecOutValid := loadUnits(MisalignWBPort).io.vecldout.valid 527c590fb32Scz4e loadUnits(MisalignWBPort).io.ldout.ready := ldaExeWbReqs(MisalignWBPort).ready 528c590fb32Scz4e ldaExeWbReqs(MisalignWBPort).bits.isFromLoadUnit := loadUnits(MisalignWBPort).io.ldout.bits.isFromLoadUnit || loadMisalignBuffer.io.writeBack.valid 529c590fb32Scz4e 530c590fb32Scz4e // loadUnit will overwrite the source from uncache if it is about to writeback 531c590fb32Scz4e ldaExeWbReqs(UncacheWBPort) <> loadUnits(UncacheWBPort).io.ldout 532c590fb32Scz4e io.mem_to_ooo.writebackLda <> ldaExeWbReqs 533c590fb32Scz4e io.mem_to_ooo.writebackSta <> storeUnits.map(_.io.stout) 534c590fb32Scz4e io.mem_to_ooo.writebackStd.zip(stdExeUnits).foreach {x => 535c590fb32Scz4e x._1.bits := x._2.io.out.bits 536c590fb32Scz4e // AMOs do not need to write back std now. 537c590fb32Scz4e x._1.valid := x._2.io.out.fire && !FuType.storeIsAMO(x._2.io.out.bits.uop.fuType) 538c590fb32Scz4e } 539c590fb32Scz4e io.mem_to_ooo.writebackHyuLda <> hybridUnits.map(_.io.ldout) 540c590fb32Scz4e io.mem_to_ooo.writebackHyuSta <> hybridUnits.map(_.io.stout) 541c590fb32Scz4e io.mem_to_ooo.otherFastWakeup := DontCare 542c590fb32Scz4e io.mem_to_ooo.otherFastWakeup.drop(HyuCnt).take(LduCnt).zip(loadUnits.map(_.io.fast_uop)).foreach{case(a,b)=> a := b} 543c590fb32Scz4e io.mem_to_ooo.otherFastWakeup.take(HyuCnt).zip(hybridUnits.map(_.io.ldu_io.fast_uop)).foreach{case(a,b)=> a:=b} 544c590fb32Scz4e val stOut = io.mem_to_ooo.writebackSta ++ io.mem_to_ooo.writebackHyuSta 545c590fb32Scz4e 546c590fb32Scz4e // prefetch to l1 req 547c590fb32Scz4e // Stream's confidence is always 1 548c590fb32Scz4e // (LduCnt + HyuCnt) l1_pf_reqs ? 549c590fb32Scz4e loadUnits.foreach(load_unit => { 550c590fb32Scz4e load_unit.io.prefetch_req.valid <> l1_pf_req.valid 551c590fb32Scz4e load_unit.io.prefetch_req.bits <> l1_pf_req.bits 552c590fb32Scz4e }) 553c590fb32Scz4e 554c590fb32Scz4e hybridUnits.foreach(hybrid_unit => { 555c590fb32Scz4e hybrid_unit.io.ldu_io.prefetch_req.valid <> l1_pf_req.valid 556c590fb32Scz4e hybrid_unit.io.ldu_io.prefetch_req.bits <> l1_pf_req.bits 557c590fb32Scz4e }) 558c590fb32Scz4e 559c590fb32Scz4e // NOTE: loadUnits(0) has higher bank conflict and miss queue arb priority than loadUnits(1) and loadUnits(2) 560c590fb32Scz4e // when loadUnits(1)/loadUnits(2) stage 0 is busy, hw prefetch will never use that pipeline 561c590fb32Scz4e val LowConfPorts = if (LduCnt == 2) Seq(1) else if (LduCnt == 3) Seq(1, 2) else Seq(0) 562c590fb32Scz4e LowConfPorts.map{case i => loadUnits(i).io.prefetch_req.bits.confidence := 0.U} 563c590fb32Scz4e hybridUnits.foreach(hybrid_unit => { hybrid_unit.io.ldu_io.prefetch_req.bits.confidence := 0.U }) 564c590fb32Scz4e 565c590fb32Scz4e val canAcceptHighConfPrefetch = loadUnits.map(_.io.canAcceptHighConfPrefetch) ++ 566c590fb32Scz4e hybridUnits.map(_.io.canAcceptLowConfPrefetch) 567c590fb32Scz4e val canAcceptLowConfPrefetch = loadUnits.map(_.io.canAcceptLowConfPrefetch) ++ 568c590fb32Scz4e hybridUnits.map(_.io.canAcceptLowConfPrefetch) 569c590fb32Scz4e l1_pf_req.ready := (0 until LduCnt + HyuCnt).map{ 570c590fb32Scz4e case i => { 571c590fb32Scz4e if (LowConfPorts.contains(i)) { 572c590fb32Scz4e loadUnits(i).io.canAcceptLowConfPrefetch 573c590fb32Scz4e } else { 574c590fb32Scz4e Mux(l1_pf_req.bits.confidence === 1.U, canAcceptHighConfPrefetch(i), canAcceptLowConfPrefetch(i)) 575c590fb32Scz4e } 576c590fb32Scz4e } 577c590fb32Scz4e }.reduce(_ || _) 578c590fb32Scz4e 579c590fb32Scz4e // l1 pf fuzzer interface 580c590fb32Scz4e val DebugEnableL1PFFuzzer = false 581c590fb32Scz4e if (DebugEnableL1PFFuzzer) { 582c590fb32Scz4e // l1 pf req fuzzer 583c590fb32Scz4e val fuzzer = Module(new L1PrefetchFuzzer()) 584c590fb32Scz4e fuzzer.io.vaddr := DontCare 585c590fb32Scz4e fuzzer.io.paddr := DontCare 586c590fb32Scz4e 587c590fb32Scz4e // override load_unit prefetch_req 588c590fb32Scz4e loadUnits.foreach(load_unit => { 589c590fb32Scz4e load_unit.io.prefetch_req.valid <> fuzzer.io.req.valid 590c590fb32Scz4e load_unit.io.prefetch_req.bits <> fuzzer.io.req.bits 591c590fb32Scz4e }) 592c590fb32Scz4e 593c590fb32Scz4e // override hybrid_unit prefetch_req 594c590fb32Scz4e hybridUnits.foreach(hybrid_unit => { 595c590fb32Scz4e hybrid_unit.io.ldu_io.prefetch_req.valid <> fuzzer.io.req.valid 596c590fb32Scz4e hybrid_unit.io.ldu_io.prefetch_req.bits <> fuzzer.io.req.bits 597c590fb32Scz4e }) 598c590fb32Scz4e 599c590fb32Scz4e fuzzer.io.req.ready := l1_pf_req.ready 600c590fb32Scz4e } 601c590fb32Scz4e 602c590fb32Scz4e // TODO: fast load wakeup 603c590fb32Scz4e val lsq = Module(new LsqWrapper) 604c590fb32Scz4e val sbuffer = Module(new Sbuffer) 605c590fb32Scz4e // if you wants to stress test dcache store, use FakeSbuffer 606c590fb32Scz4e // val sbuffer = Module(new FakeSbuffer) // out of date now 607c590fb32Scz4e io.mem_to_ooo.stIssuePtr := lsq.io.issuePtrExt 608c590fb32Scz4e 609c590fb32Scz4e dcache.io.hartId := io.hartId 610c590fb32Scz4e lsq.io.hartId := io.hartId 611c590fb32Scz4e sbuffer.io.hartId := io.hartId 612c590fb32Scz4e atomicsUnit.io.hartId := io.hartId 613c590fb32Scz4e 614c590fb32Scz4e dcache.io.lqEmpty := lsq.io.lqEmpty 615c590fb32Scz4e 616c590fb32Scz4e // load/store prefetch to l2 cache 617c590fb32Scz4e prefetcherOpt.foreach(sms_pf => { 618c590fb32Scz4e l1PrefetcherOpt.foreach(l1_pf => { 619c590fb32Scz4e val sms_pf_to_l2 = DelayNWithValid(sms_pf.io.l2_req, 2) 620c590fb32Scz4e val l1_pf_to_l2 = DelayNWithValid(l1_pf.io.l2_req, 2) 621c590fb32Scz4e 622c590fb32Scz4e outer.l2_pf_sender_opt.get.out.head._1.addr_valid := sms_pf_to_l2.valid || l1_pf_to_l2.valid 623c590fb32Scz4e outer.l2_pf_sender_opt.get.out.head._1.addr := Mux(l1_pf_to_l2.valid, l1_pf_to_l2.bits.addr, sms_pf_to_l2.bits.addr) 624c590fb32Scz4e outer.l2_pf_sender_opt.get.out.head._1.pf_source := Mux(l1_pf_to_l2.valid, l1_pf_to_l2.bits.source, sms_pf_to_l2.bits.source) 625c590fb32Scz4e outer.l2_pf_sender_opt.get.out.head._1.l2_pf_en := RegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l2_pf_enable, 2, Some(true.B)) 626c590fb32Scz4e 627c590fb32Scz4e val l2_trace = Wire(new LoadPfDbBundle) 628c590fb32Scz4e l2_trace.paddr := outer.l2_pf_sender_opt.get.out.head._1.addr 629c590fb32Scz4e val table = ChiselDB.createTable(s"L2PrefetchTrace$hartId", new LoadPfDbBundle, basicDB = false) 630c590fb32Scz4e table.log(l2_trace, l1_pf_to_l2.valid, "StreamPrefetchTrace", clock, reset) 631c590fb32Scz4e table.log(l2_trace, !l1_pf_to_l2.valid && sms_pf_to_l2.valid, "L2PrefetchTrace", clock, reset) 632c590fb32Scz4e 633c590fb32Scz4e val l1_pf_to_l3 = ValidIODelay(l1_pf.io.l3_req, 4) 634c590fb32Scz4e outer.l3_pf_sender_opt.foreach(_.out.head._1.addr_valid := l1_pf_to_l3.valid) 635c590fb32Scz4e outer.l3_pf_sender_opt.foreach(_.out.head._1.addr := l1_pf_to_l3.bits) 636c590fb32Scz4e outer.l3_pf_sender_opt.foreach(_.out.head._1.l2_pf_en := RegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l2_pf_enable, 4, Some(true.B))) 637c590fb32Scz4e 638c590fb32Scz4e val l3_trace = Wire(new LoadPfDbBundle) 639c590fb32Scz4e l3_trace.paddr := outer.l3_pf_sender_opt.map(_.out.head._1.addr).getOrElse(0.U) 640c590fb32Scz4e val l3_table = ChiselDB.createTable(s"L3PrefetchTrace$hartId", new LoadPfDbBundle, basicDB = false) 641c590fb32Scz4e l3_table.log(l3_trace, l1_pf_to_l3.valid, "StreamPrefetchTrace", clock, reset) 642c590fb32Scz4e 643c590fb32Scz4e XSPerfAccumulate("prefetch_fire_l2", outer.l2_pf_sender_opt.get.out.head._1.addr_valid) 644c590fb32Scz4e XSPerfAccumulate("prefetch_fire_l3", outer.l3_pf_sender_opt.map(_.out.head._1.addr_valid).getOrElse(false.B)) 645c590fb32Scz4e XSPerfAccumulate("l1pf_fire_l2", l1_pf_to_l2.valid) 646c590fb32Scz4e XSPerfAccumulate("sms_fire_l2", !l1_pf_to_l2.valid && sms_pf_to_l2.valid) 647c590fb32Scz4e XSPerfAccumulate("sms_block_by_l1pf", l1_pf_to_l2.valid && sms_pf_to_l2.valid) 648c590fb32Scz4e }) 649c590fb32Scz4e }) 650c590fb32Scz4e 651c590fb32Scz4e // ptw 652c590fb32Scz4e val sfence = RegNext(RegNext(io.ooo_to_mem.sfence)) 653c590fb32Scz4e val tlbcsr = RegNext(RegNext(io.ooo_to_mem.tlbCsr)) 654c590fb32Scz4e private val ptw = outer.ptw.module 655c590fb32Scz4e private val ptw_to_l2_buffer = outer.ptw_to_l2_buffer.module 656c590fb32Scz4e private val l1d_to_l2_buffer = outer.l1d_to_l2_buffer.module 657c590fb32Scz4e ptw.io.hartId := io.hartId 658c590fb32Scz4e ptw.io.sfence <> sfence 659c590fb32Scz4e ptw.io.csr.tlb <> tlbcsr 660c590fb32Scz4e ptw.io.csr.distribute_csr <> csrCtrl.distribute_csr 661c590fb32Scz4e 662c590fb32Scz4e val perfEventsPTW = if (!coreParams.softPTW) { 663c590fb32Scz4e ptw.getPerfEvents 664c590fb32Scz4e } else { 665c590fb32Scz4e Seq() 666c590fb32Scz4e } 667c590fb32Scz4e 668c590fb32Scz4e // dtlb 669c590fb32Scz4e val dtlb_ld_tlb_ld = Module(new TLBNonBlock(LduCnt + HyuCnt + 1, 2, ldtlbParams)) 670c590fb32Scz4e val dtlb_st_tlb_st = Module(new TLBNonBlock(StaCnt, 1, sttlbParams)) 671c590fb32Scz4e val dtlb_prefetch_tlb_prefetch = Module(new TLBNonBlock(2, 2, pftlbParams)) 672c590fb32Scz4e val dtlb_ld = Seq(dtlb_ld_tlb_ld.io) 673c590fb32Scz4e val dtlb_st = Seq(dtlb_st_tlb_st.io) 674c590fb32Scz4e val dtlb_prefetch = Seq(dtlb_prefetch_tlb_prefetch.io) 675c590fb32Scz4e /* tlb vec && constant variable */ 676c590fb32Scz4e val dtlb = dtlb_ld ++ dtlb_st ++ dtlb_prefetch 677c590fb32Scz4e val (dtlb_ld_idx, dtlb_st_idx, dtlb_pf_idx) = (0, 1, 2) 678c590fb32Scz4e val TlbSubSizeVec = Seq(LduCnt + HyuCnt + 1, StaCnt, 2) // (load + hyu + stream pf, store, sms+l2bop) 679c590fb32Scz4e val DTlbSize = TlbSubSizeVec.sum 680c590fb32Scz4e val TlbStartVec = TlbSubSizeVec.scanLeft(0)(_ + _).dropRight(1) 681c590fb32Scz4e val TlbEndVec = TlbSubSizeVec.scanLeft(0)(_ + _).drop(1) 682c590fb32Scz4e 683c590fb32Scz4e val ptwio = Wire(new VectorTlbPtwIO(DTlbSize)) 684c590fb32Scz4e val dtlb_reqs = dtlb.map(_.requestor).flatten 685c590fb32Scz4e val dtlb_pmps = dtlb.map(_.pmp).flatten 686c590fb32Scz4e dtlb.map(_.hartId := io.hartId) 687c590fb32Scz4e dtlb.map(_.sfence := sfence) 688c590fb32Scz4e dtlb.map(_.csr := tlbcsr) 689c590fb32Scz4e dtlb.map(_.flushPipe.map(a => a := false.B)) // non-block doesn't need 690c590fb32Scz4e dtlb.map(_.redirect := redirect) 691c590fb32Scz4e if (refillBothTlb) { 692c590fb32Scz4e require(ldtlbParams.outReplace == sttlbParams.outReplace) 693c590fb32Scz4e require(ldtlbParams.outReplace == hytlbParams.outReplace) 694c590fb32Scz4e require(ldtlbParams.outReplace == pftlbParams.outReplace) 695c590fb32Scz4e require(ldtlbParams.outReplace) 696c590fb32Scz4e 697c590fb32Scz4e val replace = Module(new TlbReplace(DTlbSize, ldtlbParams)) 698c590fb32Scz4e replace.io.apply_sep(dtlb_ld.map(_.replace) ++ dtlb_st.map(_.replace) ++ dtlb_prefetch.map(_.replace), ptwio.resp.bits.data.s1.entry.tag) 699c590fb32Scz4e } else { 700c590fb32Scz4e // TODO: there will be bugs in TlbReplace when outReplace enable, since the order of Hyu is not right. 701c590fb32Scz4e if (ldtlbParams.outReplace) { 702c590fb32Scz4e val replace_ld = Module(new TlbReplace(LduCnt + 1, ldtlbParams)) 703c590fb32Scz4e replace_ld.io.apply_sep(dtlb_ld.map(_.replace), ptwio.resp.bits.data.s1.entry.tag) 704c590fb32Scz4e } 705c590fb32Scz4e if (hytlbParams.outReplace) { 706c590fb32Scz4e val replace_hy = Module(new TlbReplace(HyuCnt, hytlbParams)) 707c590fb32Scz4e replace_hy.io.apply_sep(dtlb_ld.map(_.replace), ptwio.resp.bits.data.s1.entry.tag) 708c590fb32Scz4e } 709c590fb32Scz4e if (sttlbParams.outReplace) { 710c590fb32Scz4e val replace_st = Module(new TlbReplace(StaCnt, sttlbParams)) 711c590fb32Scz4e replace_st.io.apply_sep(dtlb_st.map(_.replace), ptwio.resp.bits.data.s1.entry.tag) 712c590fb32Scz4e } 713c590fb32Scz4e if (pftlbParams.outReplace) { 714c590fb32Scz4e val replace_pf = Module(new TlbReplace(2, pftlbParams)) 715c590fb32Scz4e replace_pf.io.apply_sep(dtlb_prefetch.map(_.replace), ptwio.resp.bits.data.s1.entry.tag) 716c590fb32Scz4e } 717c590fb32Scz4e } 718c590fb32Scz4e 719c590fb32Scz4e val ptw_resp_next = RegEnable(ptwio.resp.bits, ptwio.resp.valid) 720c590fb32Scz4e val ptw_resp_v = RegNext(ptwio.resp.valid && !(sfence.valid && tlbcsr.satp.changed && tlbcsr.vsatp.changed && tlbcsr.hgatp.changed), init = false.B) 721c590fb32Scz4e ptwio.resp.ready := true.B 722c590fb32Scz4e 723c590fb32Scz4e val tlbreplay = WireInit(VecInit(Seq.fill(LdExuCnt)(false.B))) 724c590fb32Scz4e val tlbreplay_reg = GatedValidRegNext(tlbreplay) 725c590fb32Scz4e val dtlb_ld0_tlbreplay_reg = GatedValidRegNext(dtlb_ld(0).tlbreplay) 726c590fb32Scz4e 727c590fb32Scz4e if (backendParams.debugEn){ dontTouch(tlbreplay) } 728c590fb32Scz4e 729c590fb32Scz4e for (i <- 0 until LdExuCnt) { 730c590fb32Scz4e tlbreplay(i) := dtlb_ld(0).ptw.req(i).valid && ptw_resp_next.vector(0) && ptw_resp_v && 731c590fb32Scz4e ptw_resp_next.data.hit(dtlb_ld(0).ptw.req(i).bits.vpn, tlbcsr.satp.asid, tlbcsr.vsatp.asid, tlbcsr.hgatp.vmid, allType = true, ignoreAsid = true) 732c590fb32Scz4e } 733c590fb32Scz4e 734c590fb32Scz4e dtlb.flatMap(a => a.ptw.req) 735c590fb32Scz4e .zipWithIndex 736c590fb32Scz4e .foreach{ case (tlb, i) => 737c590fb32Scz4e tlb.ready := ptwio.req(i).ready 738c590fb32Scz4e ptwio.req(i).bits := tlb.bits 739c590fb32Scz4e val vector_hit = if (refillBothTlb) Cat(ptw_resp_next.vector).orR 740c590fb32Scz4e else if (i < TlbEndVec(dtlb_ld_idx)) Cat(ptw_resp_next.vector.slice(TlbStartVec(dtlb_ld_idx), TlbEndVec(dtlb_ld_idx))).orR 741c590fb32Scz4e else if (i < TlbEndVec(dtlb_st_idx)) Cat(ptw_resp_next.vector.slice(TlbStartVec(dtlb_st_idx), TlbEndVec(dtlb_st_idx))).orR 742c590fb32Scz4e else Cat(ptw_resp_next.vector.slice(TlbStartVec(dtlb_pf_idx), TlbEndVec(dtlb_pf_idx))).orR 743c590fb32Scz4e ptwio.req(i).valid := tlb.valid && !(ptw_resp_v && vector_hit && ptw_resp_next.data.hit(tlb.bits.vpn, tlbcsr.satp.asid, tlbcsr.vsatp.asid, tlbcsr.hgatp.vmid, allType = true, ignoreAsid = true)) 744c590fb32Scz4e } 745c590fb32Scz4e dtlb.foreach(_.ptw.resp.bits := ptw_resp_next.data) 746c590fb32Scz4e if (refillBothTlb) { 747c590fb32Scz4e dtlb.foreach(_.ptw.resp.valid := ptw_resp_v && Cat(ptw_resp_next.vector).orR) 748c590fb32Scz4e } else { 749c590fb32Scz4e dtlb_ld.foreach(_.ptw.resp.valid := ptw_resp_v && Cat(ptw_resp_next.vector.slice(TlbStartVec(dtlb_ld_idx), TlbEndVec(dtlb_ld_idx))).orR) 750c590fb32Scz4e dtlb_st.foreach(_.ptw.resp.valid := ptw_resp_v && Cat(ptw_resp_next.vector.slice(TlbStartVec(dtlb_st_idx), TlbEndVec(dtlb_st_idx))).orR) 751c590fb32Scz4e dtlb_prefetch.foreach(_.ptw.resp.valid := ptw_resp_v && Cat(ptw_resp_next.vector.slice(TlbStartVec(dtlb_pf_idx), TlbEndVec(dtlb_pf_idx))).orR) 752c590fb32Scz4e } 753c590fb32Scz4e dtlb_ld.foreach(_.ptw.resp.bits.getGpa := Cat(ptw_resp_next.getGpa.take(LduCnt + HyuCnt + 1)).orR) 754c590fb32Scz4e dtlb_st.foreach(_.ptw.resp.bits.getGpa := Cat(ptw_resp_next.getGpa.slice(LduCnt + HyuCnt + 1, LduCnt + HyuCnt + 1 + StaCnt)).orR) 755c590fb32Scz4e dtlb_prefetch.foreach(_.ptw.resp.bits.getGpa := Cat(ptw_resp_next.getGpa.drop(LduCnt + HyuCnt + 1 + StaCnt)).orR) 756c590fb32Scz4e 757c590fb32Scz4e val dtlbRepeater = PTWNewFilter(ldtlbParams.fenceDelay, ptwio, ptw.io.tlb(1), sfence, tlbcsr, l2tlbParams.dfilterSize) 758c590fb32Scz4e val itlbRepeater3 = PTWRepeaterNB(passReady = false, itlbParams.fenceDelay, io.fetch_to_mem.itlb, ptw.io.tlb(0), sfence, tlbcsr) 759c590fb32Scz4e 760c590fb32Scz4e lsq.io.debugTopDown.robHeadMissInDTlb := dtlbRepeater.io.rob_head_miss_in_tlb 761c590fb32Scz4e 762c590fb32Scz4e // pmp 763c590fb32Scz4e val pmp = Module(new PMP()) 764c590fb32Scz4e pmp.io.distribute_csr <> csrCtrl.distribute_csr 765c590fb32Scz4e 766c590fb32Scz4e val pmp_checkers = Seq.fill(DTlbSize)(Module(new PMPChecker(4, leaveHitMux = true))) 767c590fb32Scz4e val pmp_check = pmp_checkers.map(_.io) 768c590fb32Scz4e for ((p,d) <- pmp_check zip dtlb_pmps) { 7698882eb68SXin Tian if (HasBitmapCheck) { 7708882eb68SXin Tian p.apply(tlbcsr.mbmc.CMODE.asBool, tlbcsr.priv.dmode, pmp.io.pmp, pmp.io.pma, d) 7718882eb68SXin Tian } else { 772c590fb32Scz4e p.apply(tlbcsr.priv.dmode, pmp.io.pmp, pmp.io.pma, d) 7738882eb68SXin Tian } 774c590fb32Scz4e require(p.req.bits.size.getWidth == d.bits.size.getWidth) 775c590fb32Scz4e } 776c590fb32Scz4e 777c590fb32Scz4e for (i <- 0 until LduCnt) { 778c590fb32Scz4e io.debug_ls.debugLsInfo(i) := loadUnits(i).io.debug_ls 779c590fb32Scz4e } 780c590fb32Scz4e for (i <- 0 until HyuCnt) { 781c590fb32Scz4e io.debug_ls.debugLsInfo.drop(LduCnt)(i) := hybridUnits(i).io.ldu_io.debug_ls 782c590fb32Scz4e } 783c590fb32Scz4e for (i <- 0 until StaCnt) { 784c590fb32Scz4e io.debug_ls.debugLsInfo.drop(LduCnt + HyuCnt)(i) := storeUnits(i).io.debug_ls 785c590fb32Scz4e } 786c590fb32Scz4e for (i <- 0 until HyuCnt) { 787c590fb32Scz4e io.debug_ls.debugLsInfo.drop(LduCnt + HyuCnt + StaCnt)(i) := hybridUnits(i).io.stu_io.debug_ls 788c590fb32Scz4e } 789c590fb32Scz4e 790c590fb32Scz4e io.mem_to_ooo.lsTopdownInfo := loadUnits.map(_.io.lsTopdownInfo) ++ hybridUnits.map(_.io.ldu_io.lsTopdownInfo) 791c590fb32Scz4e 792c590fb32Scz4e // trigger 793c590fb32Scz4e val tdata = RegInit(VecInit(Seq.fill(TriggerNum)(0.U.asTypeOf(new MatchTriggerIO)))) 794c590fb32Scz4e val tEnable = RegInit(VecInit(Seq.fill(TriggerNum)(false.B))) 795c590fb32Scz4e tEnable := csrCtrl.mem_trigger.tEnableVec 796c590fb32Scz4e when(csrCtrl.mem_trigger.tUpdate.valid) { 797c590fb32Scz4e tdata(csrCtrl.mem_trigger.tUpdate.bits.addr) := csrCtrl.mem_trigger.tUpdate.bits.tdata 798c590fb32Scz4e } 799c590fb32Scz4e val triggerCanRaiseBpExp = csrCtrl.mem_trigger.triggerCanRaiseBpExp 800c590fb32Scz4e val debugMode = csrCtrl.mem_trigger.debugMode 801c590fb32Scz4e 802c590fb32Scz4e val backendTriggerTimingVec = VecInit(tdata.map(_.timing)) 803c590fb32Scz4e val backendTriggerChainVec = VecInit(tdata.map(_.chain)) 804c590fb32Scz4e 805c590fb32Scz4e XSDebug(tEnable.asUInt.orR, "Debug Mode: At least one store trigger is enabled\n") 806c590fb32Scz4e for (j <- 0 until TriggerNum) 807c590fb32Scz4e PrintTriggerInfo(tEnable(j), tdata(j)) 808c590fb32Scz4e 809c590fb32Scz4e // The segment instruction is executed atomically. 810c590fb32Scz4e // After the segment instruction directive starts executing, no other instructions should be executed. 811c590fb32Scz4e val vSegmentFlag = RegInit(false.B) 812c590fb32Scz4e 813c590fb32Scz4e when(GatedValidRegNext(vSegmentUnit.io.in.fire)) { 814c590fb32Scz4e vSegmentFlag := true.B 815c590fb32Scz4e }.elsewhen(GatedValidRegNext(vSegmentUnit.io.uopwriteback.valid)) { 816c590fb32Scz4e vSegmentFlag := false.B 817c590fb32Scz4e } 818c590fb32Scz4e 819522c7f99SAnzo val misalign_allow_spec = RegInit(true.B) 820522c7f99SAnzo val ldu_rollback_with_misalign_nack = loadUnits.map(ldu => 821522c7f99SAnzo ldu.io.lsq.ldin.bits.isFrmMisAlignBuf && ldu.io.lsq.ldin.bits.rep_info.rar_nack && ldu.io.rollback.valid 822522c7f99SAnzo ).reduce(_ || _) 823522c7f99SAnzo when (ldu_rollback_with_misalign_nack) { 824522c7f99SAnzo misalign_allow_spec := false.B 825522c7f99SAnzo } .elsewhen(lsq.io.rarValidCount < (LoadQueueRARSize - 4).U) { 826522c7f99SAnzo misalign_allow_spec := true.B 827522c7f99SAnzo } 828522c7f99SAnzo 829c590fb32Scz4e // LoadUnit 830c590fb32Scz4e val correctMissTrain = Constantin.createRecord(s"CorrectMissTrain$hartId", initValue = false) 831c590fb32Scz4e 832c590fb32Scz4e for (i <- 0 until LduCnt) { 833c590fb32Scz4e loadUnits(i).io.redirect <> redirect 834522c7f99SAnzo loadUnits(i).io.misalign_allow_spec := misalign_allow_spec 835c590fb32Scz4e 836c590fb32Scz4e // get input form dispatch 837c590fb32Scz4e loadUnits(i).io.ldin <> io.ooo_to_mem.issueLda(i) 838c590fb32Scz4e loadUnits(i).io.feedback_slow <> io.mem_to_ooo.ldaIqFeedback(i).feedbackSlow 839c590fb32Scz4e io.mem_to_ooo.ldaIqFeedback(i).feedbackFast := DontCare 840c590fb32Scz4e loadUnits(i).io.correctMissTrain := correctMissTrain 841c590fb32Scz4e io.mem_to_ooo.ldCancel.drop(HyuCnt)(i) := loadUnits(i).io.ldCancel 842c590fb32Scz4e io.mem_to_ooo.wakeup.drop(HyuCnt)(i) := loadUnits(i).io.wakeup 843c590fb32Scz4e 844c590fb32Scz4e // vector 845c590fb32Scz4e if (i < VlduCnt) { 846c590fb32Scz4e loadUnits(i).io.vecldout.ready := false.B 847c590fb32Scz4e } else { 848c590fb32Scz4e loadUnits(i).io.vecldin.valid := false.B 849c590fb32Scz4e loadUnits(i).io.vecldin.bits := DontCare 850c590fb32Scz4e loadUnits(i).io.vecldout.ready := false.B 851c590fb32Scz4e } 852c590fb32Scz4e 853c590fb32Scz4e // fast replay 854c590fb32Scz4e loadUnits(i).io.fast_rep_in <> loadUnits(i).io.fast_rep_out 855c590fb32Scz4e 856c590fb32Scz4e // SoftPrefetch to frontend (prefetch.i) 857c590fb32Scz4e loadUnits(i).io.ifetchPrefetch <> io.ifetchPrefetch(i) 858c590fb32Scz4e 859c590fb32Scz4e // dcache access 860c590fb32Scz4e loadUnits(i).io.dcache <> dcache.io.lsu.load(i) 861c590fb32Scz4e if(i == 0){ 862c590fb32Scz4e vSegmentUnit.io.rdcache := DontCare 863c590fb32Scz4e dcache.io.lsu.load(i).req.valid := loadUnits(i).io.dcache.req.valid || vSegmentUnit.io.rdcache.req.valid 864c590fb32Scz4e dcache.io.lsu.load(i).req.bits := Mux1H(Seq( 865c590fb32Scz4e vSegmentUnit.io.rdcache.req.valid -> vSegmentUnit.io.rdcache.req.bits, 866c590fb32Scz4e loadUnits(i).io.dcache.req.valid -> loadUnits(i).io.dcache.req.bits 867c590fb32Scz4e )) 868c590fb32Scz4e vSegmentUnit.io.rdcache.req.ready := dcache.io.lsu.load(i).req.ready 869c590fb32Scz4e } 870c590fb32Scz4e 871c590fb32Scz4e // Dcache requests must also be preempted by the segment. 872c590fb32Scz4e when(vSegmentFlag){ 873c590fb32Scz4e loadUnits(i).io.dcache.req.ready := false.B // Dcache is preempted. 874c590fb32Scz4e 875c590fb32Scz4e dcache.io.lsu.load(0).pf_source := vSegmentUnit.io.rdcache.pf_source 876c590fb32Scz4e dcache.io.lsu.load(0).s1_paddr_dup_lsu := vSegmentUnit.io.rdcache.s1_paddr_dup_lsu 877c590fb32Scz4e dcache.io.lsu.load(0).s1_paddr_dup_dcache := vSegmentUnit.io.rdcache.s1_paddr_dup_dcache 878c590fb32Scz4e dcache.io.lsu.load(0).s1_kill := vSegmentUnit.io.rdcache.s1_kill 879c590fb32Scz4e dcache.io.lsu.load(0).s2_kill := vSegmentUnit.io.rdcache.s2_kill 880c590fb32Scz4e dcache.io.lsu.load(0).s0_pc := vSegmentUnit.io.rdcache.s0_pc 881c590fb32Scz4e dcache.io.lsu.load(0).s1_pc := vSegmentUnit.io.rdcache.s1_pc 882c590fb32Scz4e dcache.io.lsu.load(0).s2_pc := vSegmentUnit.io.rdcache.s2_pc 883c590fb32Scz4e dcache.io.lsu.load(0).is128Req := vSegmentUnit.io.rdcache.is128Req 884c590fb32Scz4e }.otherwise { 885c590fb32Scz4e loadUnits(i).io.dcache.req.ready := dcache.io.lsu.load(i).req.ready 886c590fb32Scz4e 887c590fb32Scz4e dcache.io.lsu.load(0).pf_source := loadUnits(0).io.dcache.pf_source 888c590fb32Scz4e dcache.io.lsu.load(0).s1_paddr_dup_lsu := loadUnits(0).io.dcache.s1_paddr_dup_lsu 889c590fb32Scz4e dcache.io.lsu.load(0).s1_paddr_dup_dcache := loadUnits(0).io.dcache.s1_paddr_dup_dcache 890c590fb32Scz4e dcache.io.lsu.load(0).s1_kill := loadUnits(0).io.dcache.s1_kill 891c590fb32Scz4e dcache.io.lsu.load(0).s2_kill := loadUnits(0).io.dcache.s2_kill 892c590fb32Scz4e dcache.io.lsu.load(0).s0_pc := loadUnits(0).io.dcache.s0_pc 893c590fb32Scz4e dcache.io.lsu.load(0).s1_pc := loadUnits(0).io.dcache.s1_pc 894c590fb32Scz4e dcache.io.lsu.load(0).s2_pc := loadUnits(0).io.dcache.s2_pc 895c590fb32Scz4e dcache.io.lsu.load(0).is128Req := loadUnits(0).io.dcache.is128Req 896c590fb32Scz4e } 897c590fb32Scz4e 898c590fb32Scz4e // forward 899c590fb32Scz4e loadUnits(i).io.lsq.forward <> lsq.io.forward(i) 900c590fb32Scz4e loadUnits(i).io.sbuffer <> sbuffer.io.forward(i) 901c590fb32Scz4e loadUnits(i).io.ubuffer <> uncache.io.forward(i) 902c590fb32Scz4e loadUnits(i).io.tl_d_channel := dcache.io.lsu.forward_D(i) 903c590fb32Scz4e loadUnits(i).io.forward_mshr <> dcache.io.lsu.forward_mshr(i) 904c590fb32Scz4e // ld-ld violation check 905c590fb32Scz4e loadUnits(i).io.lsq.ldld_nuke_query <> lsq.io.ldu.ldld_nuke_query(i) 906c590fb32Scz4e loadUnits(i).io.lsq.stld_nuke_query <> lsq.io.ldu.stld_nuke_query(i) 907522c7f99SAnzo // loadqueue old ptr 908522c7f99SAnzo loadUnits(i).io.lsq.lqDeqPtr := lsq.io.lqDeqPtr 909c590fb32Scz4e loadUnits(i).io.csrCtrl <> csrCtrl 910c590fb32Scz4e // dcache refill req 911c590fb32Scz4e // loadUnits(i).io.refill <> delayedDcacheRefill 912c590fb32Scz4e // dtlb 913c590fb32Scz4e loadUnits(i).io.tlb <> dtlb_reqs.take(LduCnt)(i) 914c590fb32Scz4e if(i == 0 ){ // port 0 assign to vsegmentUnit 915c590fb32Scz4e val vsegmentDtlbReqValid = vSegmentUnit.io.dtlb.req.valid // segment tlb resquest need to delay 1 cycle 916c590fb32Scz4e dtlb_reqs.take(LduCnt)(i).req.valid := loadUnits(i).io.tlb.req.valid || RegNext(vsegmentDtlbReqValid) 917c590fb32Scz4e vSegmentUnit.io.dtlb.req.ready := dtlb_reqs.take(LduCnt)(i).req.ready 918c590fb32Scz4e dtlb_reqs.take(LduCnt)(i).req.bits := ParallelPriorityMux(Seq( 919c590fb32Scz4e RegNext(vsegmentDtlbReqValid) -> RegEnable(vSegmentUnit.io.dtlb.req.bits, vsegmentDtlbReqValid), 920c590fb32Scz4e loadUnits(i).io.tlb.req.valid -> loadUnits(i).io.tlb.req.bits 921c590fb32Scz4e )) 922c590fb32Scz4e } 923c590fb32Scz4e // pmp 924c590fb32Scz4e loadUnits(i).io.pmp <> pmp_check(i).resp 925c590fb32Scz4e // st-ld violation query 926c590fb32Scz4e val stld_nuke_query = storeUnits.map(_.io.stld_nuke_query) ++ hybridUnits.map(_.io.stu_io.stld_nuke_query) 927c590fb32Scz4e for (s <- 0 until StorePipelineWidth) { 928c590fb32Scz4e loadUnits(i).io.stld_nuke_query(s) := stld_nuke_query(s) 929c590fb32Scz4e } 930c590fb32Scz4e loadUnits(i).io.lq_rep_full <> lsq.io.lq_rep_full 931c590fb32Scz4e // load prefetch train 932c590fb32Scz4e prefetcherOpt.foreach(pf => { 933c590fb32Scz4e // sms will train on all miss load sources 934c590fb32Scz4e val source = loadUnits(i).io.prefetch_train 935c590fb32Scz4e pf.io.ld_in(i).valid := Mux(pf_train_on_hit, 936c590fb32Scz4e source.valid, 937c590fb32Scz4e source.valid && source.bits.isFirstIssue && source.bits.miss 938c590fb32Scz4e ) 939c590fb32Scz4e pf.io.ld_in(i).bits := source.bits 940c590fb32Scz4e val loadPc = RegNext(io.ooo_to_mem.issueLda(i).bits.uop.pc) // for s1 941c590fb32Scz4e pf.io.ld_in(i).bits.uop.pc := Mux( 942c590fb32Scz4e loadUnits(i).io.s2_ptr_chasing, 943c590fb32Scz4e RegEnable(loadPc, loadUnits(i).io.s2_prefetch_spec), 944c590fb32Scz4e RegEnable(RegEnable(loadPc, loadUnits(i).io.s1_prefetch_spec), loadUnits(i).io.s2_prefetch_spec) 945c590fb32Scz4e ) 946c590fb32Scz4e }) 947c590fb32Scz4e l1PrefetcherOpt.foreach(pf => { 948c590fb32Scz4e // stream will train on all load sources 949c590fb32Scz4e val source = loadUnits(i).io.prefetch_train_l1 950c590fb32Scz4e pf.io.ld_in(i).valid := source.valid && source.bits.isFirstIssue 951c590fb32Scz4e pf.io.ld_in(i).bits := source.bits 952c590fb32Scz4e }) 953c590fb32Scz4e 954c590fb32Scz4e // load to load fast forward: load(i) prefers data(i) 955c590fb32Scz4e val l2l_fwd_out = loadUnits.map(_.io.l2l_fwd_out) ++ hybridUnits.map(_.io.ldu_io.l2l_fwd_out) 956c590fb32Scz4e val fastPriority = (i until LduCnt + HyuCnt) ++ (0 until i) 957c590fb32Scz4e val fastValidVec = fastPriority.map(j => l2l_fwd_out(j).valid) 958c590fb32Scz4e val fastDataVec = fastPriority.map(j => l2l_fwd_out(j).data) 959c590fb32Scz4e val fastErrorVec = fastPriority.map(j => l2l_fwd_out(j).dly_ld_err) 960c590fb32Scz4e val fastMatchVec = fastPriority.map(j => io.ooo_to_mem.loadFastMatch(i)(j)) 961c590fb32Scz4e loadUnits(i).io.l2l_fwd_in.valid := VecInit(fastValidVec).asUInt.orR 962c590fb32Scz4e loadUnits(i).io.l2l_fwd_in.data := ParallelPriorityMux(fastValidVec, fastDataVec) 963c590fb32Scz4e loadUnits(i).io.l2l_fwd_in.dly_ld_err := ParallelPriorityMux(fastValidVec, fastErrorVec) 964c590fb32Scz4e val fastMatch = ParallelPriorityMux(fastValidVec, fastMatchVec) 965c590fb32Scz4e loadUnits(i).io.ld_fast_match := fastMatch 966c590fb32Scz4e loadUnits(i).io.ld_fast_imm := io.ooo_to_mem.loadFastImm(i) 967c590fb32Scz4e loadUnits(i).io.ld_fast_fuOpType := io.ooo_to_mem.loadFastFuOpType(i) 968c590fb32Scz4e loadUnits(i).io.replay <> lsq.io.replay(i) 969c590fb32Scz4e 970c590fb32Scz4e val l2_hint = RegNext(io.l2_hint) 971c590fb32Scz4e 972c590fb32Scz4e // L2 Hint for DCache 973c590fb32Scz4e dcache.io.l2_hint <> l2_hint 974c590fb32Scz4e 975c590fb32Scz4e loadUnits(i).io.l2_hint <> l2_hint 976c590fb32Scz4e loadUnits(i).io.tlb_hint.id := dtlbRepeater.io.hint.get.req(i).id 977c590fb32Scz4e loadUnits(i).io.tlb_hint.full := dtlbRepeater.io.hint.get.req(i).full || 978c590fb32Scz4e tlbreplay_reg(i) || dtlb_ld0_tlbreplay_reg(i) 979c590fb32Scz4e 980c590fb32Scz4e // passdown to lsq (load s2) 981c590fb32Scz4e lsq.io.ldu.ldin(i) <> loadUnits(i).io.lsq.ldin 982c590fb32Scz4e if (i == UncacheWBPort) { 983c590fb32Scz4e lsq.io.ldout(i) <> loadUnits(i).io.lsq.uncache 984c590fb32Scz4e } else { 985c590fb32Scz4e lsq.io.ldout(i).ready := true.B 986c590fb32Scz4e loadUnits(i).io.lsq.uncache.valid := false.B 987c590fb32Scz4e loadUnits(i).io.lsq.uncache.bits := DontCare 988c590fb32Scz4e } 989c590fb32Scz4e lsq.io.ld_raw_data(i) <> loadUnits(i).io.lsq.ld_raw_data 990c590fb32Scz4e lsq.io.ncOut(i) <> loadUnits(i).io.lsq.nc_ldin 991c590fb32Scz4e lsq.io.l2_hint.valid := l2_hint.valid 992c590fb32Scz4e lsq.io.l2_hint.bits.sourceId := l2_hint.bits.sourceId 993c590fb32Scz4e lsq.io.l2_hint.bits.isKeyword := l2_hint.bits.isKeyword 994c590fb32Scz4e 995c590fb32Scz4e lsq.io.tlb_hint <> dtlbRepeater.io.hint.get 996c590fb32Scz4e 997c590fb32Scz4e // connect misalignBuffer 9984ec1f462Scz4e loadMisalignBuffer.io.enq(i) <> loadUnits(i).io.misalign_enq 999c590fb32Scz4e 1000c590fb32Scz4e if (i == MisalignWBPort) { 1001c590fb32Scz4e loadUnits(i).io.misalign_ldin <> loadMisalignBuffer.io.splitLoadReq 1002c590fb32Scz4e loadUnits(i).io.misalign_ldout <> loadMisalignBuffer.io.splitLoadResp 1003c590fb32Scz4e } else { 1004c590fb32Scz4e loadUnits(i).io.misalign_ldin.valid := false.B 1005c590fb32Scz4e loadUnits(i).io.misalign_ldin.bits := DontCare 1006c590fb32Scz4e } 1007c590fb32Scz4e 1008c590fb32Scz4e // alter writeback exception info 1009c590fb32Scz4e io.mem_to_ooo.s3_delayed_load_error(i) := loadUnits(i).io.s3_dly_ld_err 1010c590fb32Scz4e 1011c590fb32Scz4e // update mem dependency predictor 1012c590fb32Scz4e // io.memPredUpdate(i) := DontCare 1013c590fb32Scz4e 1014c590fb32Scz4e // -------------------------------- 1015c590fb32Scz4e // Load Triggers 1016c590fb32Scz4e // -------------------------------- 1017c590fb32Scz4e loadUnits(i).io.fromCsrTrigger.tdataVec := tdata 1018c590fb32Scz4e loadUnits(i).io.fromCsrTrigger.tEnableVec := tEnable 1019c590fb32Scz4e loadUnits(i).io.fromCsrTrigger.triggerCanRaiseBpExp := triggerCanRaiseBpExp 1020c590fb32Scz4e loadUnits(i).io.fromCsrTrigger.debugMode := debugMode 1021c590fb32Scz4e } 1022c590fb32Scz4e 1023c590fb32Scz4e for (i <- 0 until HyuCnt) { 1024c590fb32Scz4e hybridUnits(i).io.redirect <> redirect 1025c590fb32Scz4e 1026c590fb32Scz4e // get input from dispatch 1027c590fb32Scz4e hybridUnits(i).io.lsin <> io.ooo_to_mem.issueHya(i) 1028c590fb32Scz4e hybridUnits(i).io.feedback_slow <> io.mem_to_ooo.hyuIqFeedback(i).feedbackSlow 1029c590fb32Scz4e hybridUnits(i).io.feedback_fast <> io.mem_to_ooo.hyuIqFeedback(i).feedbackFast 1030c590fb32Scz4e hybridUnits(i).io.correctMissTrain := correctMissTrain 1031c590fb32Scz4e io.mem_to_ooo.ldCancel.take(HyuCnt)(i) := hybridUnits(i).io.ldu_io.ldCancel 1032c590fb32Scz4e io.mem_to_ooo.wakeup.take(HyuCnt)(i) := hybridUnits(i).io.ldu_io.wakeup 1033c590fb32Scz4e 1034c590fb32Scz4e // ------------------------------------ 1035c590fb32Scz4e // Load Port 1036c590fb32Scz4e // ------------------------------------ 1037c590fb32Scz4e // fast replay 1038c590fb32Scz4e hybridUnits(i).io.ldu_io.fast_rep_in <> hybridUnits(i).io.ldu_io.fast_rep_out 1039c590fb32Scz4e 1040c590fb32Scz4e // get input from dispatch 1041c590fb32Scz4e hybridUnits(i).io.ldu_io.dcache <> dcache.io.lsu.load(LduCnt + i) 1042c590fb32Scz4e hybridUnits(i).io.stu_io.dcache <> dcache.io.lsu.sta(StaCnt + i) 1043c590fb32Scz4e 1044c590fb32Scz4e // dcache access 1045c590fb32Scz4e hybridUnits(i).io.ldu_io.lsq.forward <> lsq.io.forward(LduCnt + i) 1046c590fb32Scz4e // forward 1047c590fb32Scz4e hybridUnits(i).io.ldu_io.sbuffer <> sbuffer.io.forward(LduCnt + i) 1048c590fb32Scz4e hybridUnits(i).io.ldu_io.ubuffer <> uncache.io.forward(LduCnt + i) 1049c590fb32Scz4e // hybridUnits(i).io.ldu_io.vec_forward <> vsFlowQueue.io.forward(LduCnt + i) 1050c590fb32Scz4e hybridUnits(i).io.ldu_io.vec_forward := DontCare 1051c590fb32Scz4e hybridUnits(i).io.ldu_io.tl_d_channel := dcache.io.lsu.forward_D(LduCnt + i) 1052c590fb32Scz4e hybridUnits(i).io.ldu_io.forward_mshr <> dcache.io.lsu.forward_mshr(LduCnt + i) 1053c590fb32Scz4e // ld-ld violation check 1054c590fb32Scz4e hybridUnits(i).io.ldu_io.lsq.ldld_nuke_query <> lsq.io.ldu.ldld_nuke_query(LduCnt + i) 1055c590fb32Scz4e hybridUnits(i).io.ldu_io.lsq.stld_nuke_query <> lsq.io.ldu.stld_nuke_query(LduCnt + i) 1056c590fb32Scz4e hybridUnits(i).io.csrCtrl <> csrCtrl 1057c590fb32Scz4e // dcache refill req 1058c590fb32Scz4e hybridUnits(i).io.ldu_io.tlb_hint.id := dtlbRepeater.io.hint.get.req(LduCnt + i).id 1059c590fb32Scz4e hybridUnits(i).io.ldu_io.tlb_hint.full := dtlbRepeater.io.hint.get.req(LduCnt + i).full || 1060c590fb32Scz4e tlbreplay_reg(LduCnt + i) || dtlb_ld0_tlbreplay_reg(LduCnt + i) 1061c590fb32Scz4e 1062c590fb32Scz4e // dtlb 1063c590fb32Scz4e hybridUnits(i).io.tlb <> dtlb_ld.head.requestor(LduCnt + i) 1064c590fb32Scz4e // pmp 1065c590fb32Scz4e hybridUnits(i).io.pmp <> pmp_check.drop(LduCnt)(i).resp 1066c590fb32Scz4e // st-ld violation query 1067c590fb32Scz4e val stld_nuke_query = VecInit(storeUnits.map(_.io.stld_nuke_query) ++ hybridUnits.map(_.io.stu_io.stld_nuke_query)) 1068c590fb32Scz4e hybridUnits(i).io.ldu_io.stld_nuke_query := stld_nuke_query 1069c590fb32Scz4e hybridUnits(i).io.ldu_io.lq_rep_full <> lsq.io.lq_rep_full 1070c590fb32Scz4e // load prefetch train 1071c590fb32Scz4e prefetcherOpt.foreach(pf => { 1072c590fb32Scz4e val source = hybridUnits(i).io.prefetch_train 1073c590fb32Scz4e pf.io.ld_in(LduCnt + i).valid := Mux(pf_train_on_hit, 1074c590fb32Scz4e source.valid, 1075c590fb32Scz4e source.valid && source.bits.isFirstIssue && source.bits.miss 1076c590fb32Scz4e ) 1077c590fb32Scz4e pf.io.ld_in(LduCnt + i).bits := source.bits 1078c590fb32Scz4e pf.io.ld_in(LduCnt + i).bits.uop.pc := Mux(hybridUnits(i).io.ldu_io.s2_ptr_chasing, io.ooo_to_mem.hybridPc(i), RegNext(io.ooo_to_mem.hybridPc(i))) 1079c590fb32Scz4e }) 1080c590fb32Scz4e l1PrefetcherOpt.foreach(pf => { 1081c590fb32Scz4e // stream will train on all load sources 1082c590fb32Scz4e val source = hybridUnits(i).io.prefetch_train_l1 1083c590fb32Scz4e pf.io.ld_in(LduCnt + i).valid := source.valid && source.bits.isFirstIssue && 1084c590fb32Scz4e FuType.isLoad(source.bits.uop.fuType) 1085c590fb32Scz4e pf.io.ld_in(LduCnt + i).bits := source.bits 1086c590fb32Scz4e pf.io.st_in(StaCnt + i).valid := false.B 1087c590fb32Scz4e pf.io.st_in(StaCnt + i).bits := DontCare 1088c590fb32Scz4e }) 1089c590fb32Scz4e prefetcherOpt.foreach(pf => { 1090c590fb32Scz4e val source = hybridUnits(i).io.prefetch_train 1091c590fb32Scz4e pf.io.st_in(StaCnt + i).valid := Mux(pf_train_on_hit, 1092c590fb32Scz4e source.valid, 1093c590fb32Scz4e source.valid && source.bits.isFirstIssue && source.bits.miss 1094c590fb32Scz4e ) && FuType.isStore(source.bits.uop.fuType) 1095c590fb32Scz4e pf.io.st_in(StaCnt + i).bits := source.bits 1096c590fb32Scz4e pf.io.st_in(StaCnt + i).bits.uop.pc := RegNext(io.ooo_to_mem.hybridPc(i)) 1097c590fb32Scz4e }) 1098c590fb32Scz4e 1099c590fb32Scz4e // load to load fast forward: load(i) prefers data(i) 1100c590fb32Scz4e val l2l_fwd_out = loadUnits.map(_.io.l2l_fwd_out) ++ hybridUnits.map(_.io.ldu_io.l2l_fwd_out) 1101c590fb32Scz4e val fastPriority = (LduCnt + i until LduCnt + HyuCnt) ++ (0 until LduCnt + i) 1102c590fb32Scz4e val fastValidVec = fastPriority.map(j => l2l_fwd_out(j).valid) 1103c590fb32Scz4e val fastDataVec = fastPriority.map(j => l2l_fwd_out(j).data) 1104c590fb32Scz4e val fastErrorVec = fastPriority.map(j => l2l_fwd_out(j).dly_ld_err) 1105c590fb32Scz4e val fastMatchVec = fastPriority.map(j => io.ooo_to_mem.loadFastMatch(LduCnt + i)(j)) 1106c590fb32Scz4e hybridUnits(i).io.ldu_io.l2l_fwd_in.valid := VecInit(fastValidVec).asUInt.orR 1107c590fb32Scz4e hybridUnits(i).io.ldu_io.l2l_fwd_in.data := ParallelPriorityMux(fastValidVec, fastDataVec) 1108c590fb32Scz4e hybridUnits(i).io.ldu_io.l2l_fwd_in.dly_ld_err := ParallelPriorityMux(fastValidVec, fastErrorVec) 1109c590fb32Scz4e val fastMatch = ParallelPriorityMux(fastValidVec, fastMatchVec) 1110c590fb32Scz4e hybridUnits(i).io.ldu_io.ld_fast_match := fastMatch 1111c590fb32Scz4e hybridUnits(i).io.ldu_io.ld_fast_imm := io.ooo_to_mem.loadFastImm(LduCnt + i) 1112c590fb32Scz4e hybridUnits(i).io.ldu_io.ld_fast_fuOpType := io.ooo_to_mem.loadFastFuOpType(LduCnt + i) 1113c590fb32Scz4e hybridUnits(i).io.ldu_io.replay <> lsq.io.replay(LduCnt + i) 1114c590fb32Scz4e hybridUnits(i).io.ldu_io.l2_hint <> io.l2_hint 1115c590fb32Scz4e 1116c590fb32Scz4e // uncache 1117c590fb32Scz4e lsq.io.ldout.drop(LduCnt)(i) <> hybridUnits(i).io.ldu_io.lsq.uncache 1118c590fb32Scz4e lsq.io.ld_raw_data.drop(LduCnt)(i) <> hybridUnits(i).io.ldu_io.lsq.ld_raw_data 1119c590fb32Scz4e 1120c590fb32Scz4e 1121c590fb32Scz4e // passdown to lsq (load s2) 1122c590fb32Scz4e hybridUnits(i).io.ldu_io.lsq.nc_ldin.valid := false.B 1123c590fb32Scz4e hybridUnits(i).io.ldu_io.lsq.nc_ldin.bits := DontCare 1124c590fb32Scz4e lsq.io.ldu.ldin(LduCnt + i) <> hybridUnits(i).io.ldu_io.lsq.ldin 1125c590fb32Scz4e // Lsq to sta unit 1126c590fb32Scz4e lsq.io.sta.storeMaskIn(StaCnt + i) <> hybridUnits(i).io.stu_io.st_mask_out 1127c590fb32Scz4e 1128c590fb32Scz4e // Lsq to std unit's rs 1129c590fb32Scz4e lsq.io.std.storeDataIn(StaCnt + i) := stData(StaCnt + i) 1130c590fb32Scz4e lsq.io.std.storeDataIn(StaCnt + i).valid := stData(StaCnt + i).valid && !st_data_atomics(StaCnt + i) 1131c590fb32Scz4e // prefetch 1132c590fb32Scz4e hybridUnits(i).io.stu_io.prefetch_req <> sbuffer.io.store_prefetch(StaCnt + i) 1133c590fb32Scz4e 1134c590fb32Scz4e io.mem_to_ooo.s3_delayed_load_error(LduCnt + i) := hybridUnits(i).io.ldu_io.s3_dly_ld_err 1135c590fb32Scz4e 1136c590fb32Scz4e // ------------------------------------ 1137c590fb32Scz4e // Store Port 1138c590fb32Scz4e // ------------------------------------ 1139c590fb32Scz4e hybridUnits(i).io.stu_io.lsq <> lsq.io.sta.storeAddrIn.takeRight(HyuCnt)(i) 1140c590fb32Scz4e hybridUnits(i).io.stu_io.lsq_replenish <> lsq.io.sta.storeAddrInRe.takeRight(HyuCnt)(i) 1141c590fb32Scz4e 1142c590fb32Scz4e lsq.io.sta.storeMaskIn.takeRight(HyuCnt)(i) <> hybridUnits(i).io.stu_io.st_mask_out 1143c590fb32Scz4e io.mem_to_ooo.stIn.takeRight(HyuCnt)(i).valid := hybridUnits(i).io.stu_io.issue.valid 1144c590fb32Scz4e io.mem_to_ooo.stIn.takeRight(HyuCnt)(i).bits := hybridUnits(i).io.stu_io.issue.bits 1145c590fb32Scz4e 1146c590fb32Scz4e // ------------------------------------ 1147c590fb32Scz4e // Vector Store Port 1148c590fb32Scz4e // ------------------------------------ 1149c590fb32Scz4e hybridUnits(i).io.vec_stu_io.isFirstIssue := true.B 1150c590fb32Scz4e 1151c590fb32Scz4e // ------------------------- 1152c590fb32Scz4e // Store Triggers 1153c590fb32Scz4e // ------------------------- 1154c590fb32Scz4e hybridUnits(i).io.fromCsrTrigger.tdataVec := tdata 1155c590fb32Scz4e hybridUnits(i).io.fromCsrTrigger.tEnableVec := tEnable 1156c590fb32Scz4e hybridUnits(i).io.fromCsrTrigger.triggerCanRaiseBpExp := triggerCanRaiseBpExp 1157c590fb32Scz4e hybridUnits(i).io.fromCsrTrigger.debugMode := debugMode 1158c590fb32Scz4e } 1159c590fb32Scz4e 1160c590fb32Scz4e // misalignBuffer 1161c590fb32Scz4e loadMisalignBuffer.io.redirect <> redirect 1162c590fb32Scz4e loadMisalignBuffer.io.rob.lcommit := io.ooo_to_mem.lsqio.lcommit 1163c590fb32Scz4e loadMisalignBuffer.io.rob.scommit := io.ooo_to_mem.lsqio.scommit 1164c590fb32Scz4e loadMisalignBuffer.io.rob.pendingMMIOld := io.ooo_to_mem.lsqio.pendingMMIOld 1165c590fb32Scz4e loadMisalignBuffer.io.rob.pendingld := io.ooo_to_mem.lsqio.pendingld 1166c590fb32Scz4e loadMisalignBuffer.io.rob.pendingst := io.ooo_to_mem.lsqio.pendingst 1167c590fb32Scz4e loadMisalignBuffer.io.rob.pendingVst := io.ooo_to_mem.lsqio.pendingVst 1168c590fb32Scz4e loadMisalignBuffer.io.rob.commit := io.ooo_to_mem.lsqio.commit 1169c590fb32Scz4e loadMisalignBuffer.io.rob.pendingPtr := io.ooo_to_mem.lsqio.pendingPtr 1170c590fb32Scz4e loadMisalignBuffer.io.rob.pendingPtrNext := io.ooo_to_mem.lsqio.pendingPtrNext 1171c590fb32Scz4e 1172c590fb32Scz4e lsq.io.loadMisalignFull := loadMisalignBuffer.io.loadMisalignFull 1173522c7f99SAnzo lsq.io.misalignAllowSpec := misalign_allow_spec 1174c590fb32Scz4e 1175c590fb32Scz4e storeMisalignBuffer.io.redirect <> redirect 1176c590fb32Scz4e storeMisalignBuffer.io.rob.lcommit := io.ooo_to_mem.lsqio.lcommit 1177c590fb32Scz4e storeMisalignBuffer.io.rob.scommit := io.ooo_to_mem.lsqio.scommit 1178c590fb32Scz4e storeMisalignBuffer.io.rob.pendingMMIOld := io.ooo_to_mem.lsqio.pendingMMIOld 1179c590fb32Scz4e storeMisalignBuffer.io.rob.pendingld := io.ooo_to_mem.lsqio.pendingld 1180c590fb32Scz4e storeMisalignBuffer.io.rob.pendingst := io.ooo_to_mem.lsqio.pendingst 1181c590fb32Scz4e storeMisalignBuffer.io.rob.pendingVst := io.ooo_to_mem.lsqio.pendingVst 1182c590fb32Scz4e storeMisalignBuffer.io.rob.commit := io.ooo_to_mem.lsqio.commit 1183c590fb32Scz4e storeMisalignBuffer.io.rob.pendingPtr := io.ooo_to_mem.lsqio.pendingPtr 1184c590fb32Scz4e storeMisalignBuffer.io.rob.pendingPtrNext := io.ooo_to_mem.lsqio.pendingPtrNext 1185c590fb32Scz4e 1186c590fb32Scz4e lsq.io.maControl <> storeMisalignBuffer.io.sqControl 1187c590fb32Scz4e 1188c590fb32Scz4e lsq.io.cmoOpReq <> dcache.io.cmoOpReq 1189c590fb32Scz4e lsq.io.cmoOpResp <> dcache.io.cmoOpResp 1190c590fb32Scz4e 1191c590fb32Scz4e // Prefetcher 1192c590fb32Scz4e val StreamDTLBPortIndex = TlbStartVec(dtlb_ld_idx) + LduCnt + HyuCnt 1193c590fb32Scz4e val PrefetcherDTLBPortIndex = TlbStartVec(dtlb_pf_idx) 1194c590fb32Scz4e val L2toL1DLBPortIndex = TlbStartVec(dtlb_pf_idx) + 1 1195c590fb32Scz4e prefetcherOpt match { 1196c590fb32Scz4e case Some(pf) => 1197c590fb32Scz4e dtlb_reqs(PrefetcherDTLBPortIndex) <> pf.io.tlb_req 1198c590fb32Scz4e pf.io.pmp_resp := pmp_check(PrefetcherDTLBPortIndex).resp 1199c590fb32Scz4e case None => 1200c590fb32Scz4e dtlb_reqs(PrefetcherDTLBPortIndex) := DontCare 1201c590fb32Scz4e dtlb_reqs(PrefetcherDTLBPortIndex).req.valid := false.B 1202c590fb32Scz4e dtlb_reqs(PrefetcherDTLBPortIndex).resp.ready := true.B 1203c590fb32Scz4e } 1204c590fb32Scz4e l1PrefetcherOpt match { 1205c590fb32Scz4e case Some(pf) => 1206c590fb32Scz4e dtlb_reqs(StreamDTLBPortIndex) <> pf.io.tlb_req 1207c590fb32Scz4e pf.io.pmp_resp := pmp_check(StreamDTLBPortIndex).resp 1208c590fb32Scz4e case None => 1209c590fb32Scz4e dtlb_reqs(StreamDTLBPortIndex) := DontCare 1210c590fb32Scz4e dtlb_reqs(StreamDTLBPortIndex).req.valid := false.B 1211c590fb32Scz4e dtlb_reqs(StreamDTLBPortIndex).resp.ready := true.B 1212c590fb32Scz4e } 1213c590fb32Scz4e dtlb_reqs(L2toL1DLBPortIndex) <> io.l2_tlb_req 1214c590fb32Scz4e dtlb_reqs(L2toL1DLBPortIndex).resp.ready := true.B 1215c590fb32Scz4e io.l2_pmp_resp := pmp_check(L2toL1DLBPortIndex).resp 1216c590fb32Scz4e 1217c590fb32Scz4e // StoreUnit 1218c590fb32Scz4e for (i <- 0 until StdCnt) { 1219c590fb32Scz4e stdExeUnits(i).io.flush <> redirect 1220c590fb32Scz4e stdExeUnits(i).io.in.valid := io.ooo_to_mem.issueStd(i).valid 1221c590fb32Scz4e io.ooo_to_mem.issueStd(i).ready := stdExeUnits(i).io.in.ready 1222c590fb32Scz4e stdExeUnits(i).io.in.bits := io.ooo_to_mem.issueStd(i).bits 1223c590fb32Scz4e } 1224c590fb32Scz4e 1225c590fb32Scz4e for (i <- 0 until StaCnt) { 1226c590fb32Scz4e val stu = storeUnits(i) 1227c590fb32Scz4e 1228c590fb32Scz4e stu.io.redirect <> redirect 1229c590fb32Scz4e stu.io.csrCtrl <> csrCtrl 1230c590fb32Scz4e stu.io.dcache <> dcache.io.lsu.sta(i) 1231c590fb32Scz4e stu.io.feedback_slow <> io.mem_to_ooo.staIqFeedback(i).feedbackSlow 1232c590fb32Scz4e stu.io.stin <> io.ooo_to_mem.issueSta(i) 1233c590fb32Scz4e stu.io.lsq <> lsq.io.sta.storeAddrIn(i) 1234c590fb32Scz4e stu.io.lsq_replenish <> lsq.io.sta.storeAddrInRe(i) 1235c590fb32Scz4e // dtlb 1236c590fb32Scz4e stu.io.tlb <> dtlb_st.head.requestor(i) 1237c590fb32Scz4e stu.io.pmp <> pmp_check(LduCnt + HyuCnt + 1 + i).resp 1238c590fb32Scz4e 1239c590fb32Scz4e // ------------------------- 1240c590fb32Scz4e // Store Triggers 1241c590fb32Scz4e // ------------------------- 1242c590fb32Scz4e stu.io.fromCsrTrigger.tdataVec := tdata 1243c590fb32Scz4e stu.io.fromCsrTrigger.tEnableVec := tEnable 1244c590fb32Scz4e stu.io.fromCsrTrigger.triggerCanRaiseBpExp := triggerCanRaiseBpExp 1245c590fb32Scz4e stu.io.fromCsrTrigger.debugMode := debugMode 1246c590fb32Scz4e 1247c590fb32Scz4e // prefetch 1248c590fb32Scz4e stu.io.prefetch_req <> sbuffer.io.store_prefetch(i) 1249c590fb32Scz4e 1250c590fb32Scz4e // store unit does not need fast feedback 1251c590fb32Scz4e io.mem_to_ooo.staIqFeedback(i).feedbackFast := DontCare 1252c590fb32Scz4e 1253c590fb32Scz4e // Lsq to sta unit 1254c590fb32Scz4e lsq.io.sta.storeMaskIn(i) <> stu.io.st_mask_out 1255c590fb32Scz4e 1256c590fb32Scz4e // connect misalignBuffer 12574ec1f462Scz4e storeMisalignBuffer.io.enq(i) <> stu.io.misalign_enq 1258c590fb32Scz4e 1259c590fb32Scz4e if (i == 0) { 1260c590fb32Scz4e stu.io.misalign_stin <> storeMisalignBuffer.io.splitStoreReq 1261c590fb32Scz4e stu.io.misalign_stout <> storeMisalignBuffer.io.splitStoreResp 1262c590fb32Scz4e } else { 1263c590fb32Scz4e stu.io.misalign_stin.valid := false.B 1264c590fb32Scz4e stu.io.misalign_stin.bits := DontCare 1265c590fb32Scz4e } 1266c590fb32Scz4e 1267c590fb32Scz4e // Lsq to std unit's rs 1268c590fb32Scz4e if (i < VstuCnt){ 1269c590fb32Scz4e when (vsSplit(i).io.vstd.get.valid) { 1270c590fb32Scz4e lsq.io.std.storeDataIn(i).valid := true.B 1271c590fb32Scz4e lsq.io.std.storeDataIn(i).bits := vsSplit(i).io.vstd.get.bits 1272c590fb32Scz4e stData(i).ready := false.B 1273c590fb32Scz4e }.otherwise { 1274c590fb32Scz4e lsq.io.std.storeDataIn(i).valid := stData(i).valid && !st_data_atomics(i) 1275c590fb32Scz4e lsq.io.std.storeDataIn(i).bits.uop := stData(i).bits.uop 1276c590fb32Scz4e lsq.io.std.storeDataIn(i).bits.data := stData(i).bits.data 1277c590fb32Scz4e lsq.io.std.storeDataIn(i).bits.mask.map(_ := 0.U) 1278c590fb32Scz4e lsq.io.std.storeDataIn(i).bits.vdIdx.map(_ := 0.U) 1279c590fb32Scz4e lsq.io.std.storeDataIn(i).bits.vdIdxInField.map(_ := 0.U) 1280c590fb32Scz4e stData(i).ready := true.B 1281c590fb32Scz4e } 1282c590fb32Scz4e } else { 1283c590fb32Scz4e lsq.io.std.storeDataIn(i).valid := stData(i).valid && !st_data_atomics(i) 1284c590fb32Scz4e lsq.io.std.storeDataIn(i).bits.uop := stData(i).bits.uop 1285c590fb32Scz4e lsq.io.std.storeDataIn(i).bits.data := stData(i).bits.data 1286c590fb32Scz4e lsq.io.std.storeDataIn(i).bits.mask.map(_ := 0.U) 1287c590fb32Scz4e lsq.io.std.storeDataIn(i).bits.vdIdx.map(_ := 0.U) 1288c590fb32Scz4e lsq.io.std.storeDataIn(i).bits.vdIdxInField.map(_ := 0.U) 1289c590fb32Scz4e stData(i).ready := true.B 1290c590fb32Scz4e } 1291c590fb32Scz4e lsq.io.std.storeDataIn.map(_.bits.debug := 0.U.asTypeOf(new DebugBundle)) 1292c590fb32Scz4e lsq.io.std.storeDataIn.foreach(_.bits.isFromLoadUnit := DontCare) 1293c590fb32Scz4e 1294c590fb32Scz4e 1295c590fb32Scz4e // store prefetch train 1296c590fb32Scz4e l1PrefetcherOpt.foreach(pf => { 1297c590fb32Scz4e // stream will train on all load sources 1298c590fb32Scz4e pf.io.st_in(i).valid := false.B 1299c590fb32Scz4e pf.io.st_in(i).bits := DontCare 1300c590fb32Scz4e }) 1301c590fb32Scz4e 1302c590fb32Scz4e prefetcherOpt.foreach(pf => { 1303c590fb32Scz4e pf.io.st_in(i).valid := Mux(pf_train_on_hit, 1304c590fb32Scz4e stu.io.prefetch_train.valid, 1305c590fb32Scz4e stu.io.prefetch_train.valid && stu.io.prefetch_train.bits.isFirstIssue && ( 1306c590fb32Scz4e stu.io.prefetch_train.bits.miss 1307c590fb32Scz4e ) 1308c590fb32Scz4e ) 1309c590fb32Scz4e pf.io.st_in(i).bits := stu.io.prefetch_train.bits 1310c590fb32Scz4e pf.io.st_in(i).bits.uop.pc := RegEnable(RegEnable(io.ooo_to_mem.storePc(i), stu.io.s1_prefetch_spec), stu.io.s2_prefetch_spec) 1311c590fb32Scz4e }) 1312c590fb32Scz4e 1313c590fb32Scz4e // 1. sync issue info to store set LFST 1314c590fb32Scz4e // 2. when store issue, broadcast issued sqPtr to wake up the following insts 1315c590fb32Scz4e // io.stIn(i).valid := io.issue(exuParameters.LduCnt + i).valid 1316c590fb32Scz4e // io.stIn(i).bits := io.issue(exuParameters.LduCnt + i).bits 1317c590fb32Scz4e io.mem_to_ooo.stIn(i).valid := stu.io.issue.valid 1318c590fb32Scz4e io.mem_to_ooo.stIn(i).bits := stu.io.issue.bits 1319c590fb32Scz4e 1320c590fb32Scz4e stu.io.stout.ready := true.B 1321c590fb32Scz4e 1322c590fb32Scz4e // vector 1323c590fb32Scz4e if (i < VstuCnt) { 1324c590fb32Scz4e stu.io.vecstin <> vsSplit(i).io.out 1325c590fb32Scz4e // vsFlowQueue.io.pipeFeedback(i) <> stu.io.vec_feedback_slow // need connect 1326c590fb32Scz4e } else { 1327c590fb32Scz4e stu.io.vecstin.valid := false.B 1328c590fb32Scz4e stu.io.vecstin.bits := DontCare 1329c590fb32Scz4e stu.io.vecstout.ready := false.B 1330c590fb32Scz4e } 1331c590fb32Scz4e stu.io.vec_isFirstIssue := true.B // TODO 1332c590fb32Scz4e } 1333c590fb32Scz4e 13343c808de0SAnzo val sqOtherStout = WireInit(0.U.asTypeOf(DecoupledIO(new MemExuOutput))) 13353c808de0SAnzo sqOtherStout.valid := lsq.io.mmioStout.valid || lsq.io.cboZeroStout.valid 13363c808de0SAnzo sqOtherStout.bits := Mux(lsq.io.cboZeroStout.valid, lsq.io.cboZeroStout.bits, lsq.io.mmioStout.bits) 13373c808de0SAnzo assert(!(lsq.io.mmioStout.valid && lsq.io.cboZeroStout.valid), "Cannot writeback to mmio and cboZero at the same time.") 13383c808de0SAnzo 13393c808de0SAnzo // Store writeback by StoreQueue: 13403c808de0SAnzo // 1. cbo Zero 13413c808de0SAnzo // 2. mmio 13423c808de0SAnzo // Currently, the two should not be present at the same time, so simply make cbo zero a higher priority. 13433c808de0SAnzo val otherStout = WireInit(0.U.asTypeOf(lsq.io.mmioStout)) 1344c590fb32Scz4e NewPipelineConnect( 13453c808de0SAnzo sqOtherStout, otherStout, otherStout.fire, 1346c590fb32Scz4e false.B, 13473c808de0SAnzo Option("otherStoutConnect") 1348c590fb32Scz4e ) 13493c808de0SAnzo otherStout.ready := false.B 13503c808de0SAnzo when (otherStout.valid && !storeUnits(0).io.stout.valid) { 1351c590fb32Scz4e stOut(0).valid := true.B 13523c808de0SAnzo stOut(0).bits := otherStout.bits 13533c808de0SAnzo otherStout.ready := true.B 1354c590fb32Scz4e } 13553c808de0SAnzo lsq.io.mmioStout.ready := sqOtherStout.ready 13563c808de0SAnzo lsq.io.cboZeroStout.ready := sqOtherStout.ready 1357c590fb32Scz4e 1358c590fb32Scz4e // vec mmio writeback 1359c590fb32Scz4e lsq.io.vecmmioStout.ready := false.B 1360c590fb32Scz4e 1361c590fb32Scz4e // miss align buffer will overwrite stOut(0) 13623c808de0SAnzo val storeMisalignCanWriteBack = !otherStout.valid && !storeUnits(0).io.stout.valid && !storeUnits(0).io.vecstout.valid 1363c590fb32Scz4e storeMisalignBuffer.io.writeBack.ready := storeMisalignCanWriteBack 1364c590fb32Scz4e storeMisalignBuffer.io.storeOutValid := storeUnits(0).io.stout.valid 1365c590fb32Scz4e storeMisalignBuffer.io.storeVecOutValid := storeUnits(0).io.vecstout.valid 1366c590fb32Scz4e when (storeMisalignBuffer.io.writeBack.valid && storeMisalignCanWriteBack) { 1367c590fb32Scz4e stOut(0).valid := true.B 1368c590fb32Scz4e stOut(0).bits := storeMisalignBuffer.io.writeBack.bits 1369c590fb32Scz4e } 1370c590fb32Scz4e 1371c590fb32Scz4e // Uncache 1372c590fb32Scz4e uncache.io.enableOutstanding := io.ooo_to_mem.csrCtrl.uncache_write_outstanding_enable 1373c590fb32Scz4e uncache.io.hartId := io.hartId 1374c590fb32Scz4e lsq.io.uncacheOutstanding := io.ooo_to_mem.csrCtrl.uncache_write_outstanding_enable 1375c590fb32Scz4e 1376c590fb32Scz4e // Lsq 1377c590fb32Scz4e io.mem_to_ooo.lsqio.mmio := lsq.io.rob.mmio 1378c590fb32Scz4e io.mem_to_ooo.lsqio.uop := lsq.io.rob.uop 1379c590fb32Scz4e lsq.io.rob.lcommit := io.ooo_to_mem.lsqio.lcommit 1380c590fb32Scz4e lsq.io.rob.scommit := io.ooo_to_mem.lsqio.scommit 1381c590fb32Scz4e lsq.io.rob.pendingMMIOld := io.ooo_to_mem.lsqio.pendingMMIOld 1382c590fb32Scz4e lsq.io.rob.pendingld := io.ooo_to_mem.lsqio.pendingld 1383c590fb32Scz4e lsq.io.rob.pendingst := io.ooo_to_mem.lsqio.pendingst 1384c590fb32Scz4e lsq.io.rob.pendingVst := io.ooo_to_mem.lsqio.pendingVst 1385c590fb32Scz4e lsq.io.rob.commit := io.ooo_to_mem.lsqio.commit 1386c590fb32Scz4e lsq.io.rob.pendingPtr := io.ooo_to_mem.lsqio.pendingPtr 1387c590fb32Scz4e lsq.io.rob.pendingPtrNext := io.ooo_to_mem.lsqio.pendingPtrNext 1388c590fb32Scz4e 1389c590fb32Scz4e // lsq.io.rob <> io.lsqio.rob 1390c590fb32Scz4e lsq.io.enq <> io.ooo_to_mem.enqLsq 1391c590fb32Scz4e lsq.io.brqRedirect <> redirect 1392c590fb32Scz4e 1393c590fb32Scz4e // violation rollback 1394c590fb32Scz4e def selectOldestRedirect(xs: Seq[Valid[Redirect]]): Vec[Bool] = { 1395c590fb32Scz4e val compareVec = (0 until xs.length).map(i => (0 until i).map(j => isAfter(xs(j).bits.robIdx, xs(i).bits.robIdx))) 1396c590fb32Scz4e val resultOnehot = VecInit((0 until xs.length).map(i => Cat((0 until xs.length).map(j => 1397c590fb32Scz4e (if (j < i) !xs(j).valid || compareVec(i)(j) 1398c590fb32Scz4e else if (j == i) xs(i).valid 1399c590fb32Scz4e else !xs(j).valid || !compareVec(j)(i)) 1400c590fb32Scz4e )).andR)) 1401c590fb32Scz4e resultOnehot 1402c590fb32Scz4e } 1403c590fb32Scz4e val allRedirect = loadUnits.map(_.io.rollback) ++ hybridUnits.map(_.io.ldu_io.rollback) ++ lsq.io.nack_rollback ++ lsq.io.nuke_rollback 1404c590fb32Scz4e val oldestOneHot = selectOldestRedirect(allRedirect) 1405c590fb32Scz4e val oldestRedirect = WireDefault(Mux1H(oldestOneHot, allRedirect)) 1406c590fb32Scz4e // memory replay would not cause IAF/IPF/IGPF 1407c590fb32Scz4e oldestRedirect.bits.cfiUpdate.backendIAF := false.B 1408c590fb32Scz4e oldestRedirect.bits.cfiUpdate.backendIPF := false.B 1409c590fb32Scz4e oldestRedirect.bits.cfiUpdate.backendIGPF := false.B 1410c590fb32Scz4e io.mem_to_ooo.memoryViolation := oldestRedirect 1411c590fb32Scz4e io.mem_to_ooo.lsqio.lqCanAccept := lsq.io.lqCanAccept 1412c590fb32Scz4e io.mem_to_ooo.lsqio.sqCanAccept := lsq.io.sqCanAccept 1413c590fb32Scz4e 1414c590fb32Scz4e // lsq.io.uncache <> uncache.io.lsq 1415c590fb32Scz4e val s_idle :: s_scalar_uncache :: s_vector_uncache :: Nil = Enum(3) 1416c590fb32Scz4e val uncacheState = RegInit(s_idle) 1417c590fb32Scz4e val uncacheReq = Wire(Decoupled(new UncacheWordReq)) 1418c590fb32Scz4e val uncacheIdResp = uncache.io.lsq.idResp 1419c590fb32Scz4e val uncacheResp = Wire(Decoupled(new UncacheWordResp)) 1420c590fb32Scz4e 1421c590fb32Scz4e uncacheReq.bits := DontCare 1422c590fb32Scz4e uncacheReq.valid := false.B 1423c590fb32Scz4e uncacheReq.ready := false.B 1424c590fb32Scz4e uncacheResp.bits := DontCare 1425c590fb32Scz4e uncacheResp.valid := false.B 1426c590fb32Scz4e uncacheResp.ready := false.B 1427c590fb32Scz4e lsq.io.uncache.req.ready := false.B 1428c590fb32Scz4e lsq.io.uncache.idResp.valid := false.B 1429c590fb32Scz4e lsq.io.uncache.idResp.bits := DontCare 1430c590fb32Scz4e lsq.io.uncache.resp.valid := false.B 1431c590fb32Scz4e lsq.io.uncache.resp.bits := DontCare 1432c590fb32Scz4e 1433c590fb32Scz4e switch (uncacheState) { 1434c590fb32Scz4e is (s_idle) { 1435c590fb32Scz4e when (uncacheReq.fire) { 1436c590fb32Scz4e when (lsq.io.uncache.req.valid) { 1437c590fb32Scz4e when (!lsq.io.uncache.req.bits.nc || !io.ooo_to_mem.csrCtrl.uncache_write_outstanding_enable) { 1438c590fb32Scz4e uncacheState := s_scalar_uncache 1439c590fb32Scz4e } 1440c590fb32Scz4e }.otherwise { 1441c590fb32Scz4e // val isStore = vsFlowQueue.io.uncache.req.bits.cmd === MemoryOpConstants.M_XWR 1442c590fb32Scz4e when (!io.ooo_to_mem.csrCtrl.uncache_write_outstanding_enable) { 1443c590fb32Scz4e uncacheState := s_vector_uncache 1444c590fb32Scz4e } 1445c590fb32Scz4e } 1446c590fb32Scz4e } 1447c590fb32Scz4e } 1448c590fb32Scz4e 1449c590fb32Scz4e is (s_scalar_uncache) { 1450c590fb32Scz4e when (uncacheResp.fire) { 1451c590fb32Scz4e uncacheState := s_idle 1452c590fb32Scz4e } 1453c590fb32Scz4e } 1454c590fb32Scz4e 1455c590fb32Scz4e is (s_vector_uncache) { 1456c590fb32Scz4e when (uncacheResp.fire) { 1457c590fb32Scz4e uncacheState := s_idle 1458c590fb32Scz4e } 1459c590fb32Scz4e } 1460c590fb32Scz4e } 1461c590fb32Scz4e 1462c590fb32Scz4e when (lsq.io.uncache.req.valid) { 1463c590fb32Scz4e uncacheReq <> lsq.io.uncache.req 1464c590fb32Scz4e } 1465c590fb32Scz4e when (io.ooo_to_mem.csrCtrl.uncache_write_outstanding_enable) { 1466c590fb32Scz4e lsq.io.uncache.resp <> uncacheResp 1467c590fb32Scz4e lsq.io.uncache.idResp <> uncacheIdResp 1468c590fb32Scz4e }.otherwise { 1469c590fb32Scz4e when (uncacheState === s_scalar_uncache) { 1470c590fb32Scz4e lsq.io.uncache.resp <> uncacheResp 1471c590fb32Scz4e lsq.io.uncache.idResp <> uncacheIdResp 1472c590fb32Scz4e } 1473c590fb32Scz4e } 1474c590fb32Scz4e // delay dcache refill for 1 cycle for better timing 1475c590fb32Scz4e AddPipelineReg(uncacheReq, uncache.io.lsq.req, false.B) 1476c590fb32Scz4e AddPipelineReg(uncache.io.lsq.resp, uncacheResp, false.B) 1477c590fb32Scz4e 1478c590fb32Scz4e //lsq.io.refill := delayedDcacheRefill 1479c590fb32Scz4e lsq.io.release := dcache.io.lsu.release 1480c590fb32Scz4e lsq.io.lqCancelCnt <> io.mem_to_ooo.lqCancelCnt 1481c590fb32Scz4e lsq.io.sqCancelCnt <> io.mem_to_ooo.sqCancelCnt 1482c590fb32Scz4e lsq.io.lqDeq <> io.mem_to_ooo.lqDeq 1483c590fb32Scz4e lsq.io.sqDeq <> io.mem_to_ooo.sqDeq 1484c590fb32Scz4e // Todo: assign these 1485c590fb32Scz4e io.mem_to_ooo.sqDeqPtr := lsq.io.sqDeqPtr 1486c590fb32Scz4e io.mem_to_ooo.lqDeqPtr := lsq.io.lqDeqPtr 1487c590fb32Scz4e lsq.io.tl_d_channel <> dcache.io.lsu.tl_d_channel 1488c590fb32Scz4e 1489c590fb32Scz4e // LSQ to store buffer 1490c590fb32Scz4e lsq.io.sbuffer <> sbuffer.io.in 1491c590fb32Scz4e sbuffer.io.in(0).valid := lsq.io.sbuffer(0).valid || vSegmentUnit.io.sbuffer.valid 1492c590fb32Scz4e sbuffer.io.in(0).bits := Mux1H(Seq( 1493c590fb32Scz4e vSegmentUnit.io.sbuffer.valid -> vSegmentUnit.io.sbuffer.bits, 1494c590fb32Scz4e lsq.io.sbuffer(0).valid -> lsq.io.sbuffer(0).bits 1495c590fb32Scz4e )) 1496c590fb32Scz4e vSegmentUnit.io.sbuffer.ready := sbuffer.io.in(0).ready 1497c590fb32Scz4e lsq.io.sqEmpty <> sbuffer.io.sqempty 1498c590fb32Scz4e dcache.io.force_write := lsq.io.force_write 1499c590fb32Scz4e 1500c590fb32Scz4e // Initialize when unenabled difftest. 1501c590fb32Scz4e sbuffer.io.vecDifftestInfo := DontCare 1502c590fb32Scz4e lsq.io.sbufferVecDifftestInfo := DontCare 1503c590fb32Scz4e vSegmentUnit.io.vecDifftestInfo := DontCare 1504c590fb32Scz4e if (env.EnableDifftest) { 1505c590fb32Scz4e sbuffer.io.vecDifftestInfo .zipWithIndex.map{ case (sbufferPort, index) => 1506c590fb32Scz4e if (index == 0) { 1507c590fb32Scz4e val vSegmentDifftestValid = vSegmentUnit.io.vecDifftestInfo.valid 1508c590fb32Scz4e sbufferPort.valid := Mux(vSegmentDifftestValid, vSegmentUnit.io.vecDifftestInfo.valid, lsq.io.sbufferVecDifftestInfo(0).valid) 1509c590fb32Scz4e sbufferPort.bits := Mux(vSegmentDifftestValid, vSegmentUnit.io.vecDifftestInfo.bits, lsq.io.sbufferVecDifftestInfo(0).bits) 1510c590fb32Scz4e 1511c590fb32Scz4e vSegmentUnit.io.vecDifftestInfo.ready := sbufferPort.ready 1512c590fb32Scz4e lsq.io.sbufferVecDifftestInfo(0).ready := sbufferPort.ready 1513c590fb32Scz4e } else { 1514c590fb32Scz4e sbufferPort <> lsq.io.sbufferVecDifftestInfo(index) 1515c590fb32Scz4e } 1516c590fb32Scz4e } 1517c590fb32Scz4e } 1518c590fb32Scz4e 1519c590fb32Scz4e // lsq.io.vecStoreRetire <> vsFlowQueue.io.sqRelease 1520c590fb32Scz4e // lsq.io.vecWriteback.valid := vlWrapper.io.uopWriteback.fire && 1521c590fb32Scz4e // vlWrapper.io.uopWriteback.bits.uop.vpu.lastUop 1522c590fb32Scz4e // lsq.io.vecWriteback.bits := vlWrapper.io.uopWriteback.bits 1523c590fb32Scz4e 1524c590fb32Scz4e // vector 1525c590fb32Scz4e val vLoadCanAccept = (0 until VlduCnt).map(i => 1526c590fb32Scz4e vlSplit(i).io.in.ready && VlduType.isVecLd(io.ooo_to_mem.issueVldu(i).bits.uop.fuOpType) 1527c590fb32Scz4e ) 1528c590fb32Scz4e val vStoreCanAccept = (0 until VstuCnt).map(i => 1529c590fb32Scz4e vsSplit(i).io.in.ready && VstuType.isVecSt(io.ooo_to_mem.issueVldu(i).bits.uop.fuOpType) 1530c590fb32Scz4e ) 1531c590fb32Scz4e val isSegment = io.ooo_to_mem.issueVldu.head.valid && isVsegls(io.ooo_to_mem.issueVldu.head.bits.uop.fuType) 1532c590fb32Scz4e val isFixVlUop = io.ooo_to_mem.issueVldu.map{x => 1533c590fb32Scz4e x.bits.uop.vpu.isVleff && x.bits.uop.vpu.lastUop && x.valid 1534c590fb32Scz4e } 1535c590fb32Scz4e 1536c590fb32Scz4e // init port 1537c590fb32Scz4e /** 1538c590fb32Scz4e * TODO: splited vsMergebuffer maybe remove, if one RS can accept two feedback, or don't need RS replay uop 1539c590fb32Scz4e * for now: 1540c590fb32Scz4e * RS0 -> VsSplit0 -> stu0 -> vsMergebuffer0 -> feedback -> RS0 1541c590fb32Scz4e * RS1 -> VsSplit1 -> stu1 -> vsMergebuffer1 -> feedback -> RS1 1542c590fb32Scz4e * 1543c590fb32Scz4e * vector load don't need feedback 1544c590fb32Scz4e * 1545c590fb32Scz4e * RS0 -> VlSplit0 -> ldu0 -> | 1546c590fb32Scz4e * RS1 -> VlSplit1 -> ldu1 -> | -> vlMergebuffer 1547c590fb32Scz4e * replayIO -> ldu3 -> | 1548c590fb32Scz4e * */ 1549c590fb32Scz4e (0 until VstuCnt).foreach{i => 1550c590fb32Scz4e vsMergeBuffer(i).io.fromPipeline := DontCare 1551c590fb32Scz4e vsMergeBuffer(i).io.fromSplit := DontCare 1552c590fb32Scz4e 1553c590fb32Scz4e vsMergeBuffer(i).io.fromMisalignBuffer.get.flush := storeMisalignBuffer.io.toVecStoreMergeBuffer(i).flush 1554c590fb32Scz4e vsMergeBuffer(i).io.fromMisalignBuffer.get.mbIndex := storeMisalignBuffer.io.toVecStoreMergeBuffer(i).mbIndex 1555c590fb32Scz4e } 1556c590fb32Scz4e 1557c590fb32Scz4e (0 until VstuCnt).foreach{i => 1558c590fb32Scz4e vsSplit(i).io.redirect <> redirect 1559c590fb32Scz4e vsSplit(i).io.in <> io.ooo_to_mem.issueVldu(i) 1560c590fb32Scz4e vsSplit(i).io.in.valid := io.ooo_to_mem.issueVldu(i).valid && 1561c590fb32Scz4e vStoreCanAccept(i) && !isSegment 1562c590fb32Scz4e vsSplit(i).io.toMergeBuffer <> vsMergeBuffer(i).io.fromSplit.head 1563c590fb32Scz4e NewPipelineConnect( 1564c590fb32Scz4e vsSplit(i).io.out, storeUnits(i).io.vecstin, storeUnits(i).io.vecstin.fire, 1565c590fb32Scz4e Mux(vsSplit(i).io.out.fire, vsSplit(i).io.out.bits.uop.robIdx.needFlush(io.redirect), storeUnits(i).io.vecstin.bits.uop.robIdx.needFlush(io.redirect)), 1566c590fb32Scz4e Option("VsSplitConnectStu") 1567c590fb32Scz4e ) 1568c590fb32Scz4e vsSplit(i).io.vstd.get := DontCare // Todo: Discuss how to pass vector store data 1569c590fb32Scz4e 1570c590fb32Scz4e vsSplit(i).io.vstdMisalign.get.storeMisalignBufferEmpty := !storeMisalignBuffer.io.full 1571c590fb32Scz4e vsSplit(i).io.vstdMisalign.get.storePipeEmpty := !storeUnits(i).io.s0_s1_valid 1572c590fb32Scz4e 1573c590fb32Scz4e } 1574c590fb32Scz4e (0 until VlduCnt).foreach{i => 1575c590fb32Scz4e vlSplit(i).io.redirect <> redirect 1576c590fb32Scz4e vlSplit(i).io.in <> io.ooo_to_mem.issueVldu(i) 1577c590fb32Scz4e vlSplit(i).io.in.valid := io.ooo_to_mem.issueVldu(i).valid && 1578c590fb32Scz4e vLoadCanAccept(i) && !isSegment && !isFixVlUop(i) 1579c590fb32Scz4e vlSplit(i).io.toMergeBuffer <> vlMergeBuffer.io.fromSplit(i) 1580c590fb32Scz4e vlSplit(i).io.threshold.get.valid := vlMergeBuffer.io.toSplit.get.threshold 1581c590fb32Scz4e vlSplit(i).io.threshold.get.bits := lsq.io.lqDeqPtr 1582c590fb32Scz4e NewPipelineConnect( 1583c590fb32Scz4e vlSplit(i).io.out, loadUnits(i).io.vecldin, loadUnits(i).io.vecldin.fire, 1584c590fb32Scz4e Mux(vlSplit(i).io.out.fire, vlSplit(i).io.out.bits.uop.robIdx.needFlush(io.redirect), loadUnits(i).io.vecldin.bits.uop.robIdx.needFlush(io.redirect)), 1585c590fb32Scz4e Option("VlSplitConnectLdu") 1586c590fb32Scz4e ) 1587c590fb32Scz4e 1588c590fb32Scz4e //Subsequent instrction will be blocked 1589c590fb32Scz4e vfofBuffer.io.in(i).valid := io.ooo_to_mem.issueVldu(i).valid 1590c590fb32Scz4e vfofBuffer.io.in(i).bits := io.ooo_to_mem.issueVldu(i).bits 1591c590fb32Scz4e } 1592c590fb32Scz4e (0 until LduCnt).foreach{i=> 1593c590fb32Scz4e loadUnits(i).io.vecldout.ready := vlMergeBuffer.io.fromPipeline(i).ready 1594c590fb32Scz4e loadMisalignBuffer.io.vecWriteBack.ready := true.B 1595c590fb32Scz4e 1596c590fb32Scz4e if (i == MisalignWBPort) { 1597c590fb32Scz4e when(loadUnits(i).io.vecldout.valid) { 1598c590fb32Scz4e vlMergeBuffer.io.fromPipeline(i).valid := loadUnits(i).io.vecldout.valid 1599c590fb32Scz4e vlMergeBuffer.io.fromPipeline(i).bits := loadUnits(i).io.vecldout.bits 1600c590fb32Scz4e } .otherwise { 1601c590fb32Scz4e vlMergeBuffer.io.fromPipeline(i).valid := loadMisalignBuffer.io.vecWriteBack.valid 1602c590fb32Scz4e vlMergeBuffer.io.fromPipeline(i).bits := loadMisalignBuffer.io.vecWriteBack.bits 1603c590fb32Scz4e } 1604c590fb32Scz4e } else { 1605c590fb32Scz4e vlMergeBuffer.io.fromPipeline(i).valid := loadUnits(i).io.vecldout.valid 1606c590fb32Scz4e vlMergeBuffer.io.fromPipeline(i).bits := loadUnits(i).io.vecldout.bits 1607c590fb32Scz4e } 1608c590fb32Scz4e } 1609c590fb32Scz4e 1610c590fb32Scz4e (0 until StaCnt).foreach{i=> 1611c590fb32Scz4e if(i < VstuCnt){ 1612c590fb32Scz4e storeUnits(i).io.vecstout.ready := true.B 1613c590fb32Scz4e storeMisalignBuffer.io.vecWriteBack(i).ready := vsMergeBuffer(i).io.fromPipeline.head.ready 1614c590fb32Scz4e 1615c590fb32Scz4e when(storeUnits(i).io.vecstout.valid) { 1616c590fb32Scz4e vsMergeBuffer(i).io.fromPipeline.head.valid := storeUnits(i).io.vecstout.valid 1617c590fb32Scz4e vsMergeBuffer(i).io.fromPipeline.head.bits := storeUnits(i).io.vecstout.bits 1618c590fb32Scz4e } .otherwise { 1619c590fb32Scz4e vsMergeBuffer(i).io.fromPipeline.head.valid := storeMisalignBuffer.io.vecWriteBack(i).valid 1620c590fb32Scz4e vsMergeBuffer(i).io.fromPipeline.head.bits := storeMisalignBuffer.io.vecWriteBack(i).bits 1621c590fb32Scz4e } 1622c590fb32Scz4e } 1623c590fb32Scz4e } 1624c590fb32Scz4e 1625c590fb32Scz4e (0 until VlduCnt).foreach{i=> 1626c590fb32Scz4e io.ooo_to_mem.issueVldu(i).ready := vLoadCanAccept(i) || vStoreCanAccept(i) 1627c590fb32Scz4e } 1628c590fb32Scz4e 1629c590fb32Scz4e vlMergeBuffer.io.redirect <> redirect 1630c590fb32Scz4e vsMergeBuffer.map(_.io.redirect <> redirect) 1631c590fb32Scz4e (0 until VlduCnt).foreach{i=> 1632c590fb32Scz4e vlMergeBuffer.io.toLsq(i) <> lsq.io.ldvecFeedback(i) 1633c590fb32Scz4e } 1634c590fb32Scz4e (0 until VstuCnt).foreach{i=> 1635c590fb32Scz4e vsMergeBuffer(i).io.toLsq.head <> lsq.io.stvecFeedback(i) 1636c590fb32Scz4e } 1637c590fb32Scz4e 1638c590fb32Scz4e (0 until VlduCnt).foreach{i=> 1639c590fb32Scz4e // send to RS 1640c590fb32Scz4e vlMergeBuffer.io.feedback(i) <> io.mem_to_ooo.vlduIqFeedback(i).feedbackSlow 1641c590fb32Scz4e io.mem_to_ooo.vlduIqFeedback(i).feedbackFast := DontCare 1642c590fb32Scz4e } 1643c590fb32Scz4e (0 until VstuCnt).foreach{i => 1644c590fb32Scz4e // send to RS 1645c590fb32Scz4e if (i == 0){ 1646c590fb32Scz4e io.mem_to_ooo.vstuIqFeedback(i).feedbackSlow.valid := vsMergeBuffer(i).io.feedback.head.valid || vSegmentUnit.io.feedback.valid 1647c590fb32Scz4e io.mem_to_ooo.vstuIqFeedback(i).feedbackSlow.bits := Mux1H(Seq( 1648c590fb32Scz4e vSegmentUnit.io.feedback.valid -> vSegmentUnit.io.feedback.bits, 1649c590fb32Scz4e vsMergeBuffer(i).io.feedback.head.valid -> vsMergeBuffer(i).io.feedback.head.bits 1650c590fb32Scz4e )) 1651c590fb32Scz4e io.mem_to_ooo.vstuIqFeedback(i).feedbackFast := DontCare 1652c590fb32Scz4e } else { 1653c590fb32Scz4e vsMergeBuffer(i).io.feedback.head <> io.mem_to_ooo.vstuIqFeedback(i).feedbackSlow 1654c590fb32Scz4e io.mem_to_ooo.vstuIqFeedback(i).feedbackFast := DontCare 1655c590fb32Scz4e } 1656c590fb32Scz4e } 1657c590fb32Scz4e 1658c590fb32Scz4e (0 until VlduCnt).foreach{i=> 1659c590fb32Scz4e if (i == 0){ // for segmentUnit, segmentUnit use port0 writeback 1660c590fb32Scz4e io.mem_to_ooo.writebackVldu(i).valid := vlMergeBuffer.io.uopWriteback(i).valid || vsMergeBuffer(i).io.uopWriteback.head.valid || vSegmentUnit.io.uopwriteback.valid 1661c590fb32Scz4e io.mem_to_ooo.writebackVldu(i).bits := PriorityMux(Seq( 1662c590fb32Scz4e vSegmentUnit.io.uopwriteback.valid -> vSegmentUnit.io.uopwriteback.bits, 1663c590fb32Scz4e vlMergeBuffer.io.uopWriteback(i).valid -> vlMergeBuffer.io.uopWriteback(i).bits, 1664c590fb32Scz4e vsMergeBuffer(i).io.uopWriteback.head.valid -> vsMergeBuffer(i).io.uopWriteback.head.bits, 1665c590fb32Scz4e )) 1666c590fb32Scz4e vlMergeBuffer.io.uopWriteback(i).ready := io.mem_to_ooo.writebackVldu(i).ready && !vSegmentUnit.io.uopwriteback.valid 1667c590fb32Scz4e vsMergeBuffer(i).io.uopWriteback.head.ready := io.mem_to_ooo.writebackVldu(i).ready && !vlMergeBuffer.io.uopWriteback(i).valid && !vSegmentUnit.io.uopwriteback.valid 1668c590fb32Scz4e vSegmentUnit.io.uopwriteback.ready := io.mem_to_ooo.writebackVldu(i).ready 1669c590fb32Scz4e } else if (i == 1) { 1670c590fb32Scz4e io.mem_to_ooo.writebackVldu(i).valid := vlMergeBuffer.io.uopWriteback(i).valid || vsMergeBuffer(i).io.uopWriteback.head.valid || vfofBuffer.io.uopWriteback.valid 1671c590fb32Scz4e io.mem_to_ooo.writebackVldu(i).bits := PriorityMux(Seq( 1672c590fb32Scz4e vfofBuffer.io.uopWriteback.valid -> vfofBuffer.io.uopWriteback.bits, 1673c590fb32Scz4e vlMergeBuffer.io.uopWriteback(i).valid -> vlMergeBuffer.io.uopWriteback(i).bits, 1674c590fb32Scz4e vsMergeBuffer(i).io.uopWriteback.head.valid -> vsMergeBuffer(i).io.uopWriteback.head.bits, 1675c590fb32Scz4e )) 1676c590fb32Scz4e vlMergeBuffer.io.uopWriteback(i).ready := io.mem_to_ooo.writebackVldu(i).ready && !vfofBuffer.io.uopWriteback.valid 1677c590fb32Scz4e vsMergeBuffer(i).io.uopWriteback.head.ready := io.mem_to_ooo.writebackVldu(i).ready && !vlMergeBuffer.io.uopWriteback(i).valid && !vfofBuffer.io.uopWriteback.valid 1678c590fb32Scz4e vfofBuffer.io.uopWriteback.ready := io.mem_to_ooo.writebackVldu(i).ready 1679c590fb32Scz4e } else { 1680c590fb32Scz4e io.mem_to_ooo.writebackVldu(i).valid := vlMergeBuffer.io.uopWriteback(i).valid || vsMergeBuffer(i).io.uopWriteback.head.valid 1681c590fb32Scz4e io.mem_to_ooo.writebackVldu(i).bits := PriorityMux(Seq( 1682c590fb32Scz4e vlMergeBuffer.io.uopWriteback(i).valid -> vlMergeBuffer.io.uopWriteback(i).bits, 1683c590fb32Scz4e vsMergeBuffer(i).io.uopWriteback.head.valid -> vsMergeBuffer(i).io.uopWriteback.head.bits, 1684c590fb32Scz4e )) 1685c590fb32Scz4e vlMergeBuffer.io.uopWriteback(i).ready := io.mem_to_ooo.writebackVldu(i).ready 1686c590fb32Scz4e vsMergeBuffer(i).io.uopWriteback.head.ready := io.mem_to_ooo.writebackVldu(i).ready && !vlMergeBuffer.io.uopWriteback(i).valid 1687c590fb32Scz4e } 1688c590fb32Scz4e 1689c590fb32Scz4e vfofBuffer.io.mergeUopWriteback(i).valid := vlMergeBuffer.io.uopWriteback(i).valid 1690c590fb32Scz4e vfofBuffer.io.mergeUopWriteback(i).bits := vlMergeBuffer.io.uopWriteback(i).bits 1691c590fb32Scz4e } 1692c590fb32Scz4e 1693c590fb32Scz4e 1694c590fb32Scz4e vfofBuffer.io.redirect <> redirect 1695c590fb32Scz4e 1696c590fb32Scz4e // Sbuffer 1697c590fb32Scz4e sbuffer.io.csrCtrl <> csrCtrl 1698c590fb32Scz4e sbuffer.io.dcache <> dcache.io.lsu.store 1699c590fb32Scz4e sbuffer.io.memSetPattenDetected := dcache.io.memSetPattenDetected 1700c590fb32Scz4e sbuffer.io.force_write <> lsq.io.force_write 1701c590fb32Scz4e // flush sbuffer 1702c590fb32Scz4e val cmoFlush = lsq.io.flushSbuffer.valid 1703c590fb32Scz4e val fenceFlush = io.ooo_to_mem.flushSb 1704c590fb32Scz4e val atomicsFlush = atomicsUnit.io.flush_sbuffer.valid || vSegmentUnit.io.flush_sbuffer.valid 1705c590fb32Scz4e val stIsEmpty = sbuffer.io.flush.empty && uncache.io.flush.empty 1706c590fb32Scz4e io.mem_to_ooo.sbIsEmpty := RegNext(stIsEmpty) 1707c590fb32Scz4e 1708c590fb32Scz4e // if both of them tries to flush sbuffer at the same time 1709c590fb32Scz4e // something must have gone wrong 1710c590fb32Scz4e assert(!(fenceFlush && atomicsFlush && cmoFlush)) 1711c590fb32Scz4e sbuffer.io.flush.valid := RegNext(fenceFlush || atomicsFlush || cmoFlush) 1712c590fb32Scz4e uncache.io.flush.valid := sbuffer.io.flush.valid 1713c590fb32Scz4e 1714c590fb32Scz4e // AtomicsUnit: AtomicsUnit will override other control signials, 1715c590fb32Scz4e // as atomics insts (LR/SC/AMO) will block the pipeline 1716c590fb32Scz4e val s_normal +: s_atomics = Enum(StaCnt + HyuCnt + 1) 1717c590fb32Scz4e val state = RegInit(s_normal) 1718c590fb32Scz4e 1719c590fb32Scz4e val st_atomics = Seq.tabulate(StaCnt)(i => 1720c590fb32Scz4e io.ooo_to_mem.issueSta(i).valid && FuType.storeIsAMO((io.ooo_to_mem.issueSta(i).bits.uop.fuType)) 1721c590fb32Scz4e ) ++ Seq.tabulate(HyuCnt)(i => 1722c590fb32Scz4e io.ooo_to_mem.issueHya(i).valid && FuType.storeIsAMO((io.ooo_to_mem.issueHya(i).bits.uop.fuType)) 1723c590fb32Scz4e ) 1724c590fb32Scz4e 1725c590fb32Scz4e for (i <- 0 until StaCnt) when(st_atomics(i)) { 1726c590fb32Scz4e io.ooo_to_mem.issueSta(i).ready := atomicsUnit.io.in.ready 1727c590fb32Scz4e storeUnits(i).io.stin.valid := false.B 1728c590fb32Scz4e 1729c590fb32Scz4e state := s_atomics(i) 1730c590fb32Scz4e } 1731c590fb32Scz4e for (i <- 0 until HyuCnt) when(st_atomics(StaCnt + i)) { 1732c590fb32Scz4e io.ooo_to_mem.issueHya(i).ready := atomicsUnit.io.in.ready 1733c590fb32Scz4e hybridUnits(i).io.lsin.valid := false.B 1734c590fb32Scz4e 1735c590fb32Scz4e state := s_atomics(StaCnt + i) 1736c590fb32Scz4e assert(!st_atomics.zipWithIndex.filterNot(_._2 == StaCnt + i).unzip._1.reduce(_ || _)) 1737c590fb32Scz4e } 1738c590fb32Scz4e when (atomicsUnit.io.out.valid) { 1739c590fb32Scz4e state := s_normal 1740c590fb32Scz4e } 1741c590fb32Scz4e 1742c590fb32Scz4e atomicsUnit.io.in.valid := st_atomics.reduce(_ || _) 1743c590fb32Scz4e atomicsUnit.io.in.bits := Mux1H(Seq.tabulate(StaCnt)(i => 1744c590fb32Scz4e st_atomics(i) -> io.ooo_to_mem.issueSta(i).bits) ++ 1745c590fb32Scz4e Seq.tabulate(HyuCnt)(i => st_atomics(StaCnt+i) -> io.ooo_to_mem.issueHya(i).bits)) 1746c590fb32Scz4e atomicsUnit.io.storeDataIn.zipWithIndex.foreach { case (stdin, i) => 1747c590fb32Scz4e stdin.valid := st_data_atomics(i) 1748c590fb32Scz4e stdin.bits := stData(i).bits 1749c590fb32Scz4e } 1750c590fb32Scz4e atomicsUnit.io.redirect <> redirect 1751c590fb32Scz4e 1752c590fb32Scz4e // TODO: complete amo's pmp support 1753c590fb32Scz4e val amoTlb = dtlb_ld(0).requestor(0) 1754c590fb32Scz4e atomicsUnit.io.dtlb.resp.valid := false.B 1755c590fb32Scz4e atomicsUnit.io.dtlb.resp.bits := DontCare 1756c590fb32Scz4e atomicsUnit.io.dtlb.req.ready := amoTlb.req.ready 1757c590fb32Scz4e atomicsUnit.io.pmpResp := pmp_check(0).resp 1758c590fb32Scz4e 1759c590fb32Scz4e atomicsUnit.io.dcache <> dcache.io.lsu.atomics 1760c590fb32Scz4e atomicsUnit.io.flush_sbuffer.empty := stIsEmpty 1761c590fb32Scz4e 1762c590fb32Scz4e atomicsUnit.io.csrCtrl := csrCtrl 1763c590fb32Scz4e 1764c590fb32Scz4e // for atomicsUnit, it uses loadUnit(0)'s TLB port 1765c590fb32Scz4e 1766c590fb32Scz4e when (state =/= s_normal) { 1767c590fb32Scz4e // use store wb port instead of load 1768c590fb32Scz4e loadUnits(0).io.ldout.ready := false.B 1769c590fb32Scz4e // use load_0's TLB 1770c590fb32Scz4e atomicsUnit.io.dtlb <> amoTlb 1771c590fb32Scz4e 1772c590fb32Scz4e // hw prefetch should be disabled while executing atomic insts 1773c590fb32Scz4e loadUnits.map(i => i.io.prefetch_req.valid := false.B) 1774c590fb32Scz4e 1775c590fb32Scz4e // make sure there's no in-flight uops in load unit 1776c590fb32Scz4e assert(!loadUnits(0).io.ldout.valid) 1777c590fb32Scz4e } 1778c590fb32Scz4e 1779c590fb32Scz4e lsq.io.flushSbuffer.empty := sbuffer.io.sbempty 1780c590fb32Scz4e 1781c590fb32Scz4e for (i <- 0 until StaCnt) { 1782c590fb32Scz4e when (state === s_atomics(i)) { 1783c590fb32Scz4e io.mem_to_ooo.staIqFeedback(i).feedbackSlow := atomicsUnit.io.feedbackSlow 1784c590fb32Scz4e assert(!storeUnits(i).io.feedback_slow.valid) 1785c590fb32Scz4e } 1786c590fb32Scz4e } 1787c590fb32Scz4e for (i <- 0 until HyuCnt) { 1788c590fb32Scz4e when (state === s_atomics(StaCnt + i)) { 1789c590fb32Scz4e io.mem_to_ooo.hyuIqFeedback(i).feedbackSlow := atomicsUnit.io.feedbackSlow 1790c590fb32Scz4e assert(!hybridUnits(i).io.feedback_slow.valid) 1791c590fb32Scz4e } 1792c590fb32Scz4e } 1793c590fb32Scz4e 1794c590fb32Scz4e lsq.io.exceptionAddr.isStore := io.ooo_to_mem.isStoreException 1795c590fb32Scz4e // Exception address is used several cycles after flush. 1796c590fb32Scz4e // We delay it by 10 cycles to ensure its flush safety. 1797c590fb32Scz4e val atomicsException = RegInit(false.B) 1798c590fb32Scz4e when (DelayN(redirect.valid, 10) && atomicsException) { 1799c590fb32Scz4e atomicsException := false.B 1800c590fb32Scz4e }.elsewhen (atomicsUnit.io.exceptionInfo.valid) { 1801c590fb32Scz4e atomicsException := true.B 1802c590fb32Scz4e } 1803c590fb32Scz4e 1804c590fb32Scz4e val misalignBufExceptionOverwrite = loadMisalignBuffer.io.overwriteExpBuf.valid || storeMisalignBuffer.io.overwriteExpBuf.valid 1805c590fb32Scz4e val misalignBufExceptionVaddr = Mux(loadMisalignBuffer.io.overwriteExpBuf.valid, 1806c590fb32Scz4e loadMisalignBuffer.io.overwriteExpBuf.vaddr, 1807c590fb32Scz4e storeMisalignBuffer.io.overwriteExpBuf.vaddr 1808c590fb32Scz4e ) 1809c590fb32Scz4e val misalignBufExceptionIsHyper = Mux(loadMisalignBuffer.io.overwriteExpBuf.valid, 1810c590fb32Scz4e loadMisalignBuffer.io.overwriteExpBuf.isHyper, 1811c590fb32Scz4e storeMisalignBuffer.io.overwriteExpBuf.isHyper 1812c590fb32Scz4e ) 1813c590fb32Scz4e val misalignBufExceptionGpaddr = Mux(loadMisalignBuffer.io.overwriteExpBuf.valid, 1814c590fb32Scz4e loadMisalignBuffer.io.overwriteExpBuf.gpaddr, 1815c590fb32Scz4e storeMisalignBuffer.io.overwriteExpBuf.gpaddr 1816c590fb32Scz4e ) 1817c590fb32Scz4e val misalignBufExceptionIsForVSnonLeafPTE = Mux(loadMisalignBuffer.io.overwriteExpBuf.valid, 1818c590fb32Scz4e loadMisalignBuffer.io.overwriteExpBuf.isForVSnonLeafPTE, 1819c590fb32Scz4e storeMisalignBuffer.io.overwriteExpBuf.isForVSnonLeafPTE 1820c590fb32Scz4e ) 1821c590fb32Scz4e 1822c590fb32Scz4e val vSegmentException = RegInit(false.B) 1823c590fb32Scz4e when (DelayN(redirect.valid, 10) && vSegmentException) { 1824c590fb32Scz4e vSegmentException := false.B 1825c590fb32Scz4e }.elsewhen (vSegmentUnit.io.exceptionInfo.valid) { 1826c590fb32Scz4e vSegmentException := true.B 1827c590fb32Scz4e } 1828c590fb32Scz4e val atomicsExceptionAddress = RegEnable(atomicsUnit.io.exceptionInfo.bits.vaddr, atomicsUnit.io.exceptionInfo.valid) 1829c590fb32Scz4e val vSegmentExceptionVstart = RegEnable(vSegmentUnit.io.exceptionInfo.bits.vstart, vSegmentUnit.io.exceptionInfo.valid) 1830c590fb32Scz4e val vSegmentExceptionVl = RegEnable(vSegmentUnit.io.exceptionInfo.bits.vl, vSegmentUnit.io.exceptionInfo.valid) 1831c590fb32Scz4e val vSegmentExceptionAddress = RegEnable(vSegmentUnit.io.exceptionInfo.bits.vaddr, vSegmentUnit.io.exceptionInfo.valid) 1832c590fb32Scz4e val atomicsExceptionGPAddress = RegEnable(atomicsUnit.io.exceptionInfo.bits.gpaddr, atomicsUnit.io.exceptionInfo.valid) 1833c590fb32Scz4e val vSegmentExceptionGPAddress = RegEnable(vSegmentUnit.io.exceptionInfo.bits.gpaddr, vSegmentUnit.io.exceptionInfo.valid) 1834c590fb32Scz4e val atomicsExceptionIsForVSnonLeafPTE = RegEnable(atomicsUnit.io.exceptionInfo.bits.isForVSnonLeafPTE, atomicsUnit.io.exceptionInfo.valid) 1835c590fb32Scz4e val vSegmentExceptionIsForVSnonLeafPTE = RegEnable(vSegmentUnit.io.exceptionInfo.bits.isForVSnonLeafPTE, vSegmentUnit.io.exceptionInfo.valid) 1836c590fb32Scz4e 1837c590fb32Scz4e val exceptionVaddr = Mux( 1838c590fb32Scz4e atomicsException, 1839c590fb32Scz4e atomicsExceptionAddress, 1840c590fb32Scz4e Mux(misalignBufExceptionOverwrite, 1841c590fb32Scz4e misalignBufExceptionVaddr, 1842c590fb32Scz4e Mux(vSegmentException, 1843c590fb32Scz4e vSegmentExceptionAddress, 1844c590fb32Scz4e lsq.io.exceptionAddr.vaddr 1845c590fb32Scz4e ) 1846c590fb32Scz4e ) 1847c590fb32Scz4e ) 1848c590fb32Scz4e // whether vaddr need ext or is hyper inst: 1849c590fb32Scz4e // VaNeedExt: atomicsException -> false; misalignBufExceptionOverwrite -> true; vSegmentException -> false 1850c590fb32Scz4e // IsHyper: atomicsException -> false; vSegmentException -> false 1851c590fb32Scz4e val exceptionVaNeedExt = !atomicsException && 1852c590fb32Scz4e (misalignBufExceptionOverwrite || 1853c590fb32Scz4e (!vSegmentException && lsq.io.exceptionAddr.vaNeedExt)) 1854c590fb32Scz4e val exceptionIsHyper = !atomicsException && 1855c590fb32Scz4e (misalignBufExceptionOverwrite && misalignBufExceptionIsHyper || 1856c590fb32Scz4e (!vSegmentException && lsq.io.exceptionAddr.isHyper && !misalignBufExceptionOverwrite)) 1857c590fb32Scz4e 1858168f1995SXu, Zefan def GenExceptionVa( 1859168f1995SXu, Zefan mode: UInt, isVirt: Bool, vaNeedExt: Bool, 1860c590fb32Scz4e satp: TlbSatpBundle, vsatp: TlbSatpBundle, hgatp: TlbHgatpBundle, 1861168f1995SXu, Zefan vaddr: UInt 1862168f1995SXu, Zefan ) = { 1863c590fb32Scz4e require(VAddrBits >= 50) 1864c590fb32Scz4e 1865168f1995SXu, Zefan val satpNone = satp.mode === 0.U 1866168f1995SXu, Zefan val satpSv39 = satp.mode === 8.U 1867168f1995SXu, Zefan val satpSv48 = satp.mode === 9.U 1868c590fb32Scz4e 1869168f1995SXu, Zefan val vsatpNone = vsatp.mode === 0.U 1870168f1995SXu, Zefan val vsatpSv39 = vsatp.mode === 8.U 1871168f1995SXu, Zefan val vsatpSv48 = vsatp.mode === 9.U 1872168f1995SXu, Zefan 1873168f1995SXu, Zefan val hgatpNone = hgatp.mode === 0.U 1874168f1995SXu, Zefan val hgatpSv39x4 = hgatp.mode === 8.U 1875168f1995SXu, Zefan val hgatpSv48x4 = hgatp.mode === 9.U 1876168f1995SXu, Zefan 1877168f1995SXu, Zefan // For !isVirt, mode check is necessary, as we don't want virtual memory in M-mode. 1878168f1995SXu, Zefan // For isVirt, mode check is unnecessary, as virt won't be 1 in M-mode. 1879168f1995SXu, Zefan // Also, isVirt includes Hyper Insts, which don't care mode either. 1880168f1995SXu, Zefan 1881168f1995SXu, Zefan val useBareAddr = 1882168f1995SXu, Zefan (isVirt && vsatpNone && hgatpNone) || 1883168f1995SXu, Zefan (!isVirt && (mode === CSRConst.ModeM)) || 1884168f1995SXu, Zefan (!isVirt && (mode =/= CSRConst.ModeM) && satpNone) 1885168f1995SXu, Zefan val useSv39Addr = 1886168f1995SXu, Zefan (isVirt && vsatpSv39) || 1887168f1995SXu, Zefan (!isVirt && (mode =/= CSRConst.ModeM) && satpSv39) 1888168f1995SXu, Zefan val useSv48Addr = 1889168f1995SXu, Zefan (isVirt && vsatpSv48) || 1890168f1995SXu, Zefan (!isVirt && (mode =/= CSRConst.ModeM) && satpSv48) 1891168f1995SXu, Zefan val useSv39x4Addr = isVirt && vsatpNone && hgatpSv39x4 1892168f1995SXu, Zefan val useSv48x4Addr = isVirt && vsatpNone && hgatpSv48x4 1893c590fb32Scz4e 1894c590fb32Scz4e val bareAddr = ZeroExt(vaddr(PAddrBits - 1, 0), XLEN) 1895c590fb32Scz4e val sv39Addr = SignExt(vaddr.take(39), XLEN) 1896c590fb32Scz4e val sv39x4Addr = ZeroExt(vaddr.take(39 + 2), XLEN) 1897c590fb32Scz4e val sv48Addr = SignExt(vaddr.take(48), XLEN) 1898c590fb32Scz4e val sv48x4Addr = ZeroExt(vaddr.take(48 + 2), XLEN) 1899c590fb32Scz4e 1900c590fb32Scz4e val ExceptionVa = Wire(UInt(XLEN.W)) 1901c590fb32Scz4e when (vaNeedExt) { 1902c590fb32Scz4e ExceptionVa := Mux1H(Seq( 1903168f1995SXu, Zefan (useBareAddr) -> bareAddr, 1904168f1995SXu, Zefan (useSv39Addr) -> sv39Addr, 1905168f1995SXu, Zefan (useSv48Addr) -> sv48Addr, 1906168f1995SXu, Zefan (useSv39x4Addr) -> sv39x4Addr, 1907168f1995SXu, Zefan (useSv48x4Addr) -> sv48x4Addr, 1908c590fb32Scz4e )) 1909c590fb32Scz4e } .otherwise { 1910c590fb32Scz4e ExceptionVa := vaddr 1911c590fb32Scz4e } 1912c590fb32Scz4e 1913c590fb32Scz4e ExceptionVa 1914c590fb32Scz4e } 1915c590fb32Scz4e 1916c590fb32Scz4e io.mem_to_ooo.lsqio.vaddr := RegNext( 1917c590fb32Scz4e GenExceptionVa(tlbcsr.priv.dmode, tlbcsr.priv.virt || exceptionIsHyper, exceptionVaNeedExt, 1918c590fb32Scz4e tlbcsr.satp, tlbcsr.vsatp, tlbcsr.hgatp, exceptionVaddr) 1919c590fb32Scz4e ) 1920c590fb32Scz4e 1921c590fb32Scz4e // vsegment instruction is executed atomic, which mean atomicsException and vSegmentException should not raise at the same time. 1922c590fb32Scz4e XSError(atomicsException && vSegmentException, "atomicsException and vSegmentException raise at the same time!") 1923c590fb32Scz4e io.mem_to_ooo.lsqio.vstart := RegNext(Mux(vSegmentException, 1924c590fb32Scz4e vSegmentExceptionVstart, 1925c590fb32Scz4e lsq.io.exceptionAddr.vstart) 1926c590fb32Scz4e ) 1927c590fb32Scz4e io.mem_to_ooo.lsqio.vl := RegNext(Mux(vSegmentException, 1928c590fb32Scz4e vSegmentExceptionVl, 1929c590fb32Scz4e lsq.io.exceptionAddr.vl) 1930c590fb32Scz4e ) 1931c590fb32Scz4e 1932c590fb32Scz4e XSError(atomicsException && atomicsUnit.io.in.valid, "new instruction before exception triggers\n") 1933c590fb32Scz4e io.mem_to_ooo.lsqio.gpaddr := RegNext(Mux( 1934c590fb32Scz4e atomicsException, 1935c590fb32Scz4e atomicsExceptionGPAddress, 1936c590fb32Scz4e Mux(misalignBufExceptionOverwrite, 1937c590fb32Scz4e misalignBufExceptionGpaddr, 1938c590fb32Scz4e Mux(vSegmentException, 1939c590fb32Scz4e vSegmentExceptionGPAddress, 1940c590fb32Scz4e lsq.io.exceptionAddr.gpaddr 1941c590fb32Scz4e ) 1942c590fb32Scz4e ) 1943c590fb32Scz4e )) 1944c590fb32Scz4e io.mem_to_ooo.lsqio.isForVSnonLeafPTE := RegNext(Mux( 1945c590fb32Scz4e atomicsException, 1946c590fb32Scz4e atomicsExceptionIsForVSnonLeafPTE, 1947c590fb32Scz4e Mux(misalignBufExceptionOverwrite, 1948c590fb32Scz4e misalignBufExceptionIsForVSnonLeafPTE, 1949c590fb32Scz4e Mux(vSegmentException, 1950c590fb32Scz4e vSegmentExceptionIsForVSnonLeafPTE, 1951c590fb32Scz4e lsq.io.exceptionAddr.isForVSnonLeafPTE 1952c590fb32Scz4e ) 1953c590fb32Scz4e ) 1954c590fb32Scz4e )) 1955c590fb32Scz4e io.mem_to_ooo.topToBackendBypass match { case x => 1956c590fb32Scz4e x.hartId := io.hartId 1957c590fb32Scz4e x.l2FlushDone := RegNext(io.l2_flush_done) 1958c590fb32Scz4e x.externalInterrupt.msip := outer.clint_int_sink.in.head._1(0) 1959c590fb32Scz4e x.externalInterrupt.mtip := outer.clint_int_sink.in.head._1(1) 1960c590fb32Scz4e x.externalInterrupt.meip := outer.plic_int_sink.in.head._1(0) 1961c590fb32Scz4e x.externalInterrupt.seip := outer.plic_int_sink.in.last._1(0) 1962c590fb32Scz4e x.externalInterrupt.debug := outer.debug_int_sink.in.head._1(0) 196376cb49abScz4e x.externalInterrupt.nmi.nmi_31 := outer.nmi_int_sink.in.head._1(0) | outer.beu_local_int_sink.in.head._1(0) 1964c590fb32Scz4e x.externalInterrupt.nmi.nmi_43 := outer.nmi_int_sink.in.head._1(1) 1965c590fb32Scz4e x.msiInfo := DelayNWithValid(io.fromTopToBackend.msiInfo, 1) 1966c590fb32Scz4e x.clintTime := DelayNWithValid(io.fromTopToBackend.clintTime, 1) 1967c590fb32Scz4e } 1968c590fb32Scz4e 1969c590fb32Scz4e io.memInfo.sqFull := RegNext(lsq.io.sqFull) 1970c590fb32Scz4e io.memInfo.lqFull := RegNext(lsq.io.lqFull) 1971c590fb32Scz4e io.memInfo.dcacheMSHRFull := RegNext(dcache.io.mshrFull) 1972c590fb32Scz4e 1973c590fb32Scz4e io.inner_hartId := io.hartId 1974c590fb32Scz4e io.inner_reset_vector := RegNext(io.outer_reset_vector) 1975c590fb32Scz4e io.outer_cpu_halt := io.ooo_to_mem.backendToTopBypass.cpuHalted 1976c590fb32Scz4e io.outer_l2_flush_en := io.ooo_to_mem.csrCtrl.flush_l2_enable 1977c590fb32Scz4e io.outer_power_down_en := io.ooo_to_mem.csrCtrl.power_down_enable 1978c590fb32Scz4e io.outer_cpu_critical_error := io.ooo_to_mem.backendToTopBypass.cpuCriticalError 19798cfc24b2STang Haojin io.outer_msi_ack := io.ooo_to_mem.backendToTopBypass.msiAck 1980c590fb32Scz4e io.outer_beu_errors_icache := RegNext(io.inner_beu_errors_icache) 1981c590fb32Scz4e io.inner_hc_perfEvents <> RegNext(io.outer_hc_perfEvents) 1982c590fb32Scz4e 1983c590fb32Scz4e // vector segmentUnit 1984c590fb32Scz4e vSegmentUnit.io.in.bits <> io.ooo_to_mem.issueVldu.head.bits 1985c590fb32Scz4e vSegmentUnit.io.in.valid := isSegment && io.ooo_to_mem.issueVldu.head.valid// is segment instruction 1986c590fb32Scz4e vSegmentUnit.io.dtlb.resp.bits <> dtlb_reqs.take(LduCnt).head.resp.bits 1987c590fb32Scz4e vSegmentUnit.io.dtlb.resp.valid <> dtlb_reqs.take(LduCnt).head.resp.valid 1988c590fb32Scz4e vSegmentUnit.io.pmpResp <> pmp_check.head.resp 1989c590fb32Scz4e vSegmentUnit.io.flush_sbuffer.empty := stIsEmpty 1990c590fb32Scz4e vSegmentUnit.io.redirect <> redirect 1991c590fb32Scz4e vSegmentUnit.io.rdcache.resp.bits := dcache.io.lsu.load(0).resp.bits 1992c590fb32Scz4e vSegmentUnit.io.rdcache.resp.valid := dcache.io.lsu.load(0).resp.valid 1993c590fb32Scz4e vSegmentUnit.io.rdcache.s2_bank_conflict := dcache.io.lsu.load(0).s2_bank_conflict 1994c590fb32Scz4e // ------------------------- 1995c590fb32Scz4e // Vector Segment Triggers 1996c590fb32Scz4e // ------------------------- 1997c590fb32Scz4e vSegmentUnit.io.fromCsrTrigger.tdataVec := tdata 1998c590fb32Scz4e vSegmentUnit.io.fromCsrTrigger.tEnableVec := tEnable 1999c590fb32Scz4e vSegmentUnit.io.fromCsrTrigger.triggerCanRaiseBpExp := triggerCanRaiseBpExp 2000c590fb32Scz4e vSegmentUnit.io.fromCsrTrigger.debugMode := debugMode 2001c590fb32Scz4e 2002c590fb32Scz4e // reset tree of MemBlock 2003c590fb32Scz4e if (p(DebugOptionsKey).ResetGen) { 2004c590fb32Scz4e val leftResetTree = ResetGenNode( 2005c590fb32Scz4e Seq( 2006c590fb32Scz4e ModuleNode(ptw), 2007c590fb32Scz4e ModuleNode(ptw_to_l2_buffer), 2008c590fb32Scz4e ModuleNode(lsq), 2009c590fb32Scz4e ModuleNode(dtlb_st_tlb_st), 2010c590fb32Scz4e ModuleNode(dtlb_prefetch_tlb_prefetch), 2011c590fb32Scz4e ModuleNode(pmp) 2012c590fb32Scz4e ) 2013c590fb32Scz4e ++ pmp_checkers.map(ModuleNode(_)) 2014c590fb32Scz4e ++ (if (prefetcherOpt.isDefined) Seq(ModuleNode(prefetcherOpt.get)) else Nil) 2015c590fb32Scz4e ++ (if (l1PrefetcherOpt.isDefined) Seq(ModuleNode(l1PrefetcherOpt.get)) else Nil) 2016c590fb32Scz4e ) 2017c590fb32Scz4e val rightResetTree = ResetGenNode( 2018c590fb32Scz4e Seq( 2019c590fb32Scz4e ModuleNode(sbuffer), 2020c590fb32Scz4e ModuleNode(dtlb_ld_tlb_ld), 2021c590fb32Scz4e ModuleNode(dcache), 2022c590fb32Scz4e ModuleNode(l1d_to_l2_buffer), 2023c590fb32Scz4e CellNode(io.reset_backend) 2024c590fb32Scz4e ) 2025c590fb32Scz4e ) 202630f35717Scz4e ResetGen(leftResetTree, reset, sim = false, io.dft_reset) 202730f35717Scz4e ResetGen(rightResetTree, reset, sim = false, io.dft_reset) 2028c590fb32Scz4e } else { 2029c590fb32Scz4e io.reset_backend := DontCare 2030c590fb32Scz4e } 2031c590fb32Scz4e io.resetInFrontendBypass.toL2Top := io.resetInFrontendBypass.fromFrontend 2032c590fb32Scz4e // trace interface 2033c590fb32Scz4e val traceToL2Top = io.traceCoreInterfaceBypass.toL2Top 2034c590fb32Scz4e val traceFromBackend = io.traceCoreInterfaceBypass.fromBackend 2035c590fb32Scz4e traceFromBackend.fromEncoder := RegNext(traceToL2Top.fromEncoder) 2036c590fb32Scz4e traceToL2Top.toEncoder.trap := RegEnable( 2037c590fb32Scz4e traceFromBackend.toEncoder.trap, 2038c590fb32Scz4e traceFromBackend.toEncoder.groups(0).valid && Itype.isTrap(traceFromBackend.toEncoder.groups(0).bits.itype) 2039c590fb32Scz4e ) 2040c590fb32Scz4e traceToL2Top.toEncoder.priv := RegEnable( 2041c590fb32Scz4e traceFromBackend.toEncoder.priv, 2042c590fb32Scz4e traceFromBackend.toEncoder.groups(0).valid 2043c590fb32Scz4e ) 2044c590fb32Scz4e (0 until TraceGroupNum).foreach { i => 2045c590fb32Scz4e traceToL2Top.toEncoder.groups(i).valid := RegNext(traceFromBackend.toEncoder.groups(i).valid) 2046c590fb32Scz4e traceToL2Top.toEncoder.groups(i).bits.iretire := RegNext(traceFromBackend.toEncoder.groups(i).bits.iretire) 2047c590fb32Scz4e traceToL2Top.toEncoder.groups(i).bits.itype := RegNext(traceFromBackend.toEncoder.groups(i).bits.itype) 2048c590fb32Scz4e traceToL2Top.toEncoder.groups(i).bits.ilastsize := RegEnable( 2049c590fb32Scz4e traceFromBackend.toEncoder.groups(i).bits.ilastsize, 2050c590fb32Scz4e traceFromBackend.toEncoder.groups(i).valid 2051c590fb32Scz4e ) 2052c590fb32Scz4e traceToL2Top.toEncoder.groups(i).bits.iaddr := RegEnable( 2053c590fb32Scz4e traceFromBackend.toEncoder.groups(i).bits.iaddr, 2054c590fb32Scz4e traceFromBackend.toEncoder.groups(i).valid 2055c590fb32Scz4e ) + (RegEnable( 2056c590fb32Scz4e traceFromBackend.toEncoder.groups(i).bits.ftqOffset.getOrElse(0.U), 2057c590fb32Scz4e traceFromBackend.toEncoder.groups(i).valid 2058c590fb32Scz4e ) << instOffsetBits) 2059c590fb32Scz4e } 2060c590fb32Scz4e 2061c590fb32Scz4e 2062c590fb32Scz4e io.mem_to_ooo.storeDebugInfo := DontCare 2063c590fb32Scz4e // store event difftest information 2064c590fb32Scz4e if (env.EnableDifftest) { 2065c590fb32Scz4e (0 until EnsbufferWidth).foreach{i => 2066c590fb32Scz4e io.mem_to_ooo.storeDebugInfo(i).robidx := sbuffer.io.vecDifftestInfo(i).bits.robIdx 2067c590fb32Scz4e sbuffer.io.vecDifftestInfo(i).bits.pc := io.mem_to_ooo.storeDebugInfo(i).pc 2068c590fb32Scz4e } 2069c590fb32Scz4e } 2070c590fb32Scz4e 2071c590fb32Scz4e // top-down info 2072c590fb32Scz4e dcache.io.debugTopDown.robHeadVaddr := io.debugTopDown.robHeadVaddr 2073c590fb32Scz4e dtlbRepeater.io.debugTopDown.robHeadVaddr := io.debugTopDown.robHeadVaddr 2074c590fb32Scz4e lsq.io.debugTopDown.robHeadVaddr := io.debugTopDown.robHeadVaddr 2075c590fb32Scz4e io.debugTopDown.toCore.robHeadMissInDCache := dcache.io.debugTopDown.robHeadMissInDCache 2076c590fb32Scz4e io.debugTopDown.toCore.robHeadTlbReplay := lsq.io.debugTopDown.robHeadTlbReplay 2077c590fb32Scz4e io.debugTopDown.toCore.robHeadTlbMiss := lsq.io.debugTopDown.robHeadTlbMiss 2078c590fb32Scz4e io.debugTopDown.toCore.robHeadLoadVio := lsq.io.debugTopDown.robHeadLoadVio 2079c590fb32Scz4e io.debugTopDown.toCore.robHeadLoadMSHR := lsq.io.debugTopDown.robHeadLoadMSHR 2080c590fb32Scz4e dcache.io.debugTopDown.robHeadOtherReplay := lsq.io.debugTopDown.robHeadOtherReplay 2081c590fb32Scz4e dcache.io.debugRolling := io.debugRolling 2082c590fb32Scz4e 2083c590fb32Scz4e lsq.io.noUopsIssued := io.topDownInfo.toBackend.noUopsIssued 2084c590fb32Scz4e io.topDownInfo.toBackend.lqEmpty := lsq.io.lqEmpty 2085c590fb32Scz4e io.topDownInfo.toBackend.sqEmpty := lsq.io.sqEmpty 2086c590fb32Scz4e io.topDownInfo.toBackend.l1Miss := dcache.io.l1Miss 2087c590fb32Scz4e io.topDownInfo.toBackend.l2TopMiss.l2Miss := RegNext(io.topDownInfo.fromL2Top.l2Miss) 2088c590fb32Scz4e io.topDownInfo.toBackend.l2TopMiss.l3Miss := RegNext(io.topDownInfo.fromL2Top.l3Miss) 2089c590fb32Scz4e 2090c590fb32Scz4e val hyLdDeqCount = PopCount(io.ooo_to_mem.issueHya.map(x => x.valid && FuType.isLoad(x.bits.uop.fuType))) 2091c590fb32Scz4e val hyStDeqCount = PopCount(io.ooo_to_mem.issueHya.map(x => x.valid && FuType.isStore(x.bits.uop.fuType))) 2092c590fb32Scz4e val ldDeqCount = PopCount(io.ooo_to_mem.issueLda.map(_.valid)) +& hyLdDeqCount 2093c590fb32Scz4e val stDeqCount = PopCount(io.ooo_to_mem.issueSta.take(StaCnt).map(_.valid)) +& hyStDeqCount 2094c590fb32Scz4e val iqDeqCount = ldDeqCount +& stDeqCount 2095c590fb32Scz4e XSPerfAccumulate("load_iq_deq_count", ldDeqCount) 2096c590fb32Scz4e XSPerfHistogram("load_iq_deq_count", ldDeqCount, true.B, 0, LdExuCnt + 1) 2097c590fb32Scz4e XSPerfAccumulate("store_iq_deq_count", stDeqCount) 2098c590fb32Scz4e XSPerfHistogram("store_iq_deq_count", stDeqCount, true.B, 0, StAddrCnt + 1) 2099c590fb32Scz4e XSPerfAccumulate("ls_iq_deq_count", iqDeqCount) 2100c590fb32Scz4e 2101c590fb32Scz4e val pfevent = Module(new PFEvent) 2102c590fb32Scz4e pfevent.io.distribute_csr := csrCtrl.distribute_csr 2103c590fb32Scz4e val csrevents = pfevent.io.hpmevent.slice(16,24) 2104c590fb32Scz4e 2105c590fb32Scz4e val perfFromUnits = (loadUnits ++ Seq(sbuffer, lsq, dcache)).flatMap(_.getPerfEvents) 2106c590fb32Scz4e val perfFromPTW = perfEventsPTW.map(x => ("PTW_" + x._1, x._2)) 2107c590fb32Scz4e val perfBlock = Seq(("ldDeqCount", ldDeqCount), 2108c590fb32Scz4e ("stDeqCount", stDeqCount)) 2109c590fb32Scz4e // let index = 0 be no event 2110c590fb32Scz4e val allPerfEvents = Seq(("noEvent", 0.U)) ++ perfFromUnits ++ perfFromPTW ++ perfBlock 2111c590fb32Scz4e 2112c590fb32Scz4e if (printEventCoding) { 2113c590fb32Scz4e for (((name, inc), i) <- allPerfEvents.zipWithIndex) { 2114c590fb32Scz4e println("MemBlock perfEvents Set", name, inc, i) 2115c590fb32Scz4e } 2116c590fb32Scz4e } 2117c590fb32Scz4e 2118c590fb32Scz4e val allPerfInc = allPerfEvents.map(_._2.asTypeOf(new PerfEvent)) 2119c590fb32Scz4e val perfEvents = HPerfMonitor(csrevents, allPerfInc).getPerfEvents 2120c590fb32Scz4e generatePerfEvent() 21214b2c87baS梁森 Liang Sen 21224b2c87baS梁森 Liang Sen private val mbistPl = MbistPipeline.PlaceMbistPipeline(Int.MaxValue, "MbistPipeMemBlk", hasMbist) 21234b2c87baS梁森 Liang Sen private val mbistIntf = if(hasMbist) { 21244b2c87baS梁森 Liang Sen val params = mbistPl.get.nodeParams 21254b2c87baS梁森 Liang Sen val intf = Some(Module(new MbistInterface( 21264b2c87baS梁森 Liang Sen params = Seq(params), 21274b2c87baS梁森 Liang Sen ids = Seq(mbistPl.get.childrenIds), 21284b2c87baS梁森 Liang Sen name = s"MbistIntfMemBlk", 21294b2c87baS梁森 Liang Sen pipelineNum = 1 21304b2c87baS梁森 Liang Sen ))) 21314b2c87baS梁森 Liang Sen intf.get.toPipeline.head <> mbistPl.get.mbist 21324b2c87baS梁森 Liang Sen mbistPl.get.registerCSV(intf.get.info, "MbistMemBlk") 21334b2c87baS梁森 Liang Sen intf.get.mbist := DontCare 21344b2c87baS梁森 Liang Sen dontTouch(intf.get.mbist) 21354b2c87baS梁森 Liang Sen //TODO: add mbist controller connections here 21364b2c87baS梁森 Liang Sen intf 21374b2c87baS梁森 Liang Sen } else { 21384b2c87baS梁森 Liang Sen None 21394b2c87baS梁森 Liang Sen } 214030f35717Scz4e private val sigFromSrams = if (hasDFT) Some(SramHelper.genBroadCastBundleTop()) else None 21414b2c87baS梁森 Liang Sen private val cg = ClockGate.genTeSrc 21424b2c87baS梁森 Liang Sen dontTouch(cg) 2143602aa9f1Scz4e 21444b2c87baS梁森 Liang Sen if (hasMbist) { 214530f35717Scz4e cg.cgen := io.dft.get.cgen 21464b2c87baS梁森 Liang Sen } else { 21474b2c87baS梁森 Liang Sen cg.cgen := false.B 21484b2c87baS梁森 Liang Sen } 2149602aa9f1Scz4e 2150602aa9f1Scz4e // sram debug 215130f35717Scz4e sigFromSrams.foreach({ case sig => sig := DontCare }) 215230f35717Scz4e sigFromSrams.zip(io.dft).foreach { 215330f35717Scz4e case (sig, dft) => 215430f35717Scz4e if (hasMbist) { 215530f35717Scz4e sig.ram_hold := dft.ram_hold 215630f35717Scz4e sig.ram_bypass := dft.ram_bypass 215730f35717Scz4e sig.ram_bp_clken := dft.ram_bp_clken 215830f35717Scz4e sig.ram_aux_clk := dft.ram_aux_clk 215930f35717Scz4e sig.ram_aux_ckbp := dft.ram_aux_ckbp 216030f35717Scz4e sig.ram_mcp_hold := dft.ram_mcp_hold 2161*e5325730Scz4e sig.cgen := dft.cgen 2162602aa9f1Scz4e } 2163602aa9f1Scz4e if (hasSramCtl) { 216430f35717Scz4e sig.ram_ctl := RegNext(dft.ram_ctl) 2165602aa9f1Scz4e } 2166c590fb32Scz4e } 216730f35717Scz4e io.dft_frnt.zip(sigFromSrams).foreach({ case (a, b) => a := b }) 216830f35717Scz4e io.dft_reset_frnt.zip(io.dft_reset).foreach({ case (a, b) => a := b }) 216930f35717Scz4e io.dft_bcknd.zip(sigFromSrams).foreach({ case (a, b) => a := b }) 217030f35717Scz4e io.dft_reset_bcknd.zip(io.dft_reset).foreach({ case (a, b) => a := b }) 217130f35717Scz4e} 2172c590fb32Scz4e 2173c590fb32Scz4eclass MemBlock()(implicit p: Parameters) extends LazyModule 2174c590fb32Scz4e with HasXSParameter { 2175c590fb32Scz4e override def shouldBeInlined: Boolean = false 2176c590fb32Scz4e 2177c590fb32Scz4e val inner = LazyModule(new MemBlockInlined()) 2178c590fb32Scz4e 2179c590fb32Scz4e lazy val module = new MemBlockImp(this) 2180c590fb32Scz4e} 2181c590fb32Scz4e 2182c590fb32Scz4eclass MemBlockImp(wrapper: MemBlock) extends LazyModuleImp(wrapper) { 2183c590fb32Scz4e val io = IO(wrapper.inner.module.io.cloneType) 2184c590fb32Scz4e val io_perf = IO(wrapper.inner.module.io_perf.cloneType) 2185c590fb32Scz4e io <> wrapper.inner.module.io 2186c590fb32Scz4e io_perf <> wrapper.inner.module.io_perf 2187c590fb32Scz4e 2188c590fb32Scz4e if (p(DebugOptionsKey).ResetGen) { 2189602aa9f1Scz4e ResetGen( 2190602aa9f1Scz4e ResetGenNode(Seq(ModuleNode(wrapper.inner.module))), 219130f35717Scz4e reset, sim = false, io.dft_reset 2192602aa9f1Scz4e ) 2193c590fb32Scz4e } 2194c590fb32Scz4e} 2195