xref: /XiangShan/src/main/scala/xiangshan/mem/MemBlock.scala (revision e532573087084bcf483605051028da7765553baf)
1c590fb32Scz4e/***************************************************************************************
2c590fb32Scz4e* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3c590fb32Scz4e* Copyright (c) 2020-2021 Peng Cheng Laboratory
4c590fb32Scz4e*
5c590fb32Scz4e* XiangShan is licensed under Mulan PSL v2.
6c590fb32Scz4e* You can use this software according to the terms and conditions of the Mulan PSL v2.
7c590fb32Scz4e* You may obtain a copy of Mulan PSL v2 at:
8c590fb32Scz4e*          http://license.coscl.org.cn/MulanPSL2
9c590fb32Scz4e*
10c590fb32Scz4e* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11c590fb32Scz4e* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12c590fb32Scz4e* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13c590fb32Scz4e*
14c590fb32Scz4e* See the Mulan PSL v2 for more details.
15c590fb32Scz4e***************************************************************************************/
16c590fb32Scz4e
17c590fb32Scz4epackage xiangshan.mem
18c590fb32Scz4e
19c590fb32Scz4eimport org.chipsalliance.cde.config.Parameters
20c590fb32Scz4eimport chisel3._
21c590fb32Scz4eimport chisel3.util._
22c590fb32Scz4eimport freechips.rocketchip.diplomacy._
23c590fb32Scz4eimport freechips.rocketchip.diplomacy.{BundleBridgeSource, LazyModule, LazyModuleImp}
24c590fb32Scz4eimport freechips.rocketchip.interrupts.{IntSinkNode, IntSinkPortSimple}
25c590fb32Scz4eimport freechips.rocketchip.tile.HasFPUParameters
26c590fb32Scz4eimport freechips.rocketchip.tilelink._
27c590fb32Scz4eimport utils._
28c590fb32Scz4eimport utility._
29602aa9f1Scz4eimport utility.mbist.{MbistInterface, MbistPipeline}
3030f35717Scz4eimport utility.sram.{SramBroadcastBundle, SramHelper}
318cfc24b2STang Haojinimport system.{HasSoCParameter, SoCParamsKey}
32c590fb32Scz4eimport xiangshan._
33c590fb32Scz4eimport xiangshan.ExceptionNO._
34c590fb32Scz4eimport xiangshan.frontend.HasInstrMMIOConst
35c590fb32Scz4eimport xiangshan.backend.Bundles.{DynInst, MemExuInput, MemExuOutput}
36c590fb32Scz4eimport xiangshan.backend.ctrlblock.{DebugLSIO, LsTopdownInfo}
37c590fb32Scz4eimport xiangshan.backend.exu.MemExeUnit
38c590fb32Scz4eimport xiangshan.backend.fu._
39c590fb32Scz4eimport xiangshan.backend.fu.FuType._
40a67fd0f5SGuanghui Chengimport xiangshan.backend.fu.NewCSR.{CsrTriggerBundle, TriggerUtil, PFEvent}
41075d4937Sjunxiong-jiimport xiangshan.backend.fu.util.{CSRConst, SdtrigExt}
42c590fb32Scz4eimport xiangshan.backend.{BackendToTopBundle, TopToBackendBundle}
43c590fb32Scz4eimport xiangshan.backend.rob.{RobDebugRollingIO, RobPtr, RobLsqIO}
44c590fb32Scz4eimport xiangshan.backend.datapath.NewPipelineConnect
45c590fb32Scz4eimport xiangshan.backend.trace.{Itype, TraceCoreInterface}
46c590fb32Scz4eimport xiangshan.backend.Bundles._
47c590fb32Scz4eimport xiangshan.mem._
48c590fb32Scz4eimport xiangshan.mem.mdp._
499e12e8edScz4eimport xiangshan.mem.Bundles._
50c590fb32Scz4eimport xiangshan.mem.prefetch.{BasePrefecher, L1Prefetcher, SMSParams, SMSPrefetcher}
51c590fb32Scz4eimport xiangshan.cache._
52c590fb32Scz4eimport xiangshan.cache.mmu._
534b2c87baS梁森 Liang Senimport coupledL2.PrefetchRecv
548cfc24b2STang Haojinimport utility.mbist.{MbistInterface, MbistPipeline}
558cfc24b2STang Haojinimport utility.sram.{SramBroadcastBundle, SramHelper}
56602aa9f1Scz4e
57c590fb32Scz4etrait HasMemBlockParameters extends HasXSParameter {
58c590fb32Scz4e  // number of memory units
59c590fb32Scz4e  val LduCnt  = backendParams.LduCnt
60c590fb32Scz4e  val StaCnt  = backendParams.StaCnt
61c590fb32Scz4e  val StdCnt  = backendParams.StdCnt
62c590fb32Scz4e  val HyuCnt  = backendParams.HyuCnt
63c590fb32Scz4e  val VlduCnt = backendParams.VlduCnt
64c590fb32Scz4e  val VstuCnt = backendParams.VstuCnt
65c590fb32Scz4e
66c590fb32Scz4e  val LdExuCnt  = LduCnt + HyuCnt
67c590fb32Scz4e  val StAddrCnt = StaCnt + HyuCnt
68c590fb32Scz4e  val StDataCnt = StdCnt
69c590fb32Scz4e  val MemExuCnt = LduCnt + HyuCnt + StaCnt + StdCnt
70c590fb32Scz4e  val MemAddrExtCnt = LdExuCnt + StaCnt
71c590fb32Scz4e  val MemVExuCnt = VlduCnt + VstuCnt
72c590fb32Scz4e
73c590fb32Scz4e  val AtomicWBPort   = 0
74c590fb32Scz4e  val MisalignWBPort = 1
75c590fb32Scz4e  val UncacheWBPort  = 2
76c590fb32Scz4e  val NCWBPorts = Seq(1, 2)
77c590fb32Scz4e}
78c590fb32Scz4e
79c590fb32Scz4eabstract class MemBlockBundle(implicit val p: Parameters) extends Bundle with HasMemBlockParameters
80c590fb32Scz4e
81c590fb32Scz4eclass Std(cfg: FuConfig)(implicit p: Parameters) extends FuncUnit(cfg) {
82c590fb32Scz4e  io.in.ready := io.out.ready
83c590fb32Scz4e  io.out.valid := io.in.valid
84c590fb32Scz4e  io.out.bits := 0.U.asTypeOf(io.out.bits)
85c590fb32Scz4e  io.out.bits.res.data := io.in.bits.data.src(0)
86c590fb32Scz4e  io.out.bits.ctrl.robIdx := io.in.bits.ctrl.robIdx
87c590fb32Scz4e}
88c590fb32Scz4e
89c590fb32Scz4eclass ooo_to_mem(implicit p: Parameters) extends MemBlockBundle {
90c590fb32Scz4e  val backendToTopBypass = Flipped(new BackendToTopBundle)
91c590fb32Scz4e
92c590fb32Scz4e  val loadFastMatch = Vec(LdExuCnt, Input(UInt(LdExuCnt.W)))
93c590fb32Scz4e  val loadFastFuOpType = Vec(LdExuCnt, Input(FuOpType()))
94c590fb32Scz4e  val loadFastImm = Vec(LdExuCnt, Input(UInt(12.W)))
95c590fb32Scz4e  val sfence = Input(new SfenceBundle)
96c590fb32Scz4e  val tlbCsr = Input(new TlbCsrBundle)
97c590fb32Scz4e  val lsqio = new Bundle {
98c590fb32Scz4e    val lcommit = Input(UInt(log2Up(CommitWidth + 1).W))
99c590fb32Scz4e    val scommit = Input(UInt(log2Up(CommitWidth + 1).W))
100c590fb32Scz4e    val pendingMMIOld = Input(Bool())
101c590fb32Scz4e    val pendingld = Input(Bool())
102c590fb32Scz4e    val pendingst = Input(Bool())
103c590fb32Scz4e    val pendingVst = Input(Bool())
104c590fb32Scz4e    val commit = Input(Bool())
105c590fb32Scz4e    val pendingPtr = Input(new RobPtr)
106c590fb32Scz4e    val pendingPtrNext = Input(new RobPtr)
107c590fb32Scz4e  }
108c590fb32Scz4e
109c590fb32Scz4e  val isStoreException = Input(Bool())
110c590fb32Scz4e  val isVlsException = Input(Bool())
111c590fb32Scz4e  val csrCtrl = Flipped(new CustomCSRCtrlIO)
112c590fb32Scz4e  val enqLsq = new LsqEnqIO
113c590fb32Scz4e  val flushSb = Input(Bool())
114c590fb32Scz4e
115c590fb32Scz4e  val storePc = Vec(StaCnt, Input(UInt(VAddrBits.W))) // for hw prefetch
116c590fb32Scz4e  val hybridPc = Vec(HyuCnt, Input(UInt(VAddrBits.W))) // for hw prefetch
117c590fb32Scz4e
118c590fb32Scz4e  val issueLda = MixedVec(Seq.fill(LduCnt)(Flipped(DecoupledIO(new MemExuInput))))
119c590fb32Scz4e  val issueSta = MixedVec(Seq.fill(StaCnt)(Flipped(DecoupledIO(new MemExuInput))))
120c590fb32Scz4e  val issueStd = MixedVec(Seq.fill(StdCnt)(Flipped(DecoupledIO(new MemExuInput))))
121c590fb32Scz4e  val issueHya = MixedVec(Seq.fill(HyuCnt)(Flipped(DecoupledIO(new MemExuInput))))
122c590fb32Scz4e  val issueVldu = MixedVec(Seq.fill(VlduCnt)(Flipped(DecoupledIO(new MemExuInput(isVector=true)))))
123c590fb32Scz4e
124c590fb32Scz4e  def issueUops = issueLda ++ issueSta ++ issueStd ++ issueHya ++ issueVldu
125c590fb32Scz4e}
126c590fb32Scz4e
127c590fb32Scz4eclass mem_to_ooo(implicit p: Parameters) extends MemBlockBundle {
128c590fb32Scz4e  val topToBackendBypass = new TopToBackendBundle
129c590fb32Scz4e
130c590fb32Scz4e  val otherFastWakeup = Vec(LdExuCnt, ValidIO(new DynInst))
131c590fb32Scz4e  val lqCancelCnt = Output(UInt(log2Up(VirtualLoadQueueSize + 1).W))
132c590fb32Scz4e  val sqCancelCnt = Output(UInt(log2Up(StoreQueueSize + 1).W))
133c590fb32Scz4e  val sqDeq = Output(UInt(log2Ceil(EnsbufferWidth + 1).W))
134c590fb32Scz4e  val lqDeq = Output(UInt(log2Up(CommitWidth + 1).W))
135c590fb32Scz4e  // used by VLSU issue queue, the vector store would wait all store before it, and the vector load would wait all load
136c590fb32Scz4e  val sqDeqPtr = Output(new SqPtr)
137c590fb32Scz4e  val lqDeqPtr = Output(new LqPtr)
138c590fb32Scz4e  val stIn = Vec(StAddrCnt, ValidIO(new MemExuInput))
139c590fb32Scz4e  val stIssuePtr = Output(new SqPtr())
140c590fb32Scz4e
141c590fb32Scz4e  val memoryViolation = ValidIO(new Redirect)
142c590fb32Scz4e  val sbIsEmpty = Output(Bool())
143c590fb32Scz4e
144c590fb32Scz4e  val lsTopdownInfo = Vec(LdExuCnt, Output(new LsTopdownInfo))
145c590fb32Scz4e
146c590fb32Scz4e  val lsqio = new Bundle {
147c590fb32Scz4e    val vaddr = Output(UInt(XLEN.W))
148c590fb32Scz4e    val vstart = Output(UInt((log2Up(VLEN) + 1).W))
149c590fb32Scz4e    val vl = Output(UInt((log2Up(VLEN) + 1).W))
150c590fb32Scz4e    val gpaddr = Output(UInt(XLEN.W))
151c590fb32Scz4e    val isForVSnonLeafPTE = Output(Bool())
152c590fb32Scz4e    val mmio = Output(Vec(LoadPipelineWidth, Bool()))
153c590fb32Scz4e    val uop = Output(Vec(LoadPipelineWidth, new DynInst))
154c590fb32Scz4e    val lqCanAccept = Output(Bool())
155c590fb32Scz4e    val sqCanAccept = Output(Bool())
156c590fb32Scz4e  }
157c590fb32Scz4e
158c590fb32Scz4e  val storeDebugInfo = Vec(EnsbufferWidth, new Bundle {
159c590fb32Scz4e    val robidx = Output(new RobPtr)
160c590fb32Scz4e    val pc     = Input(UInt(VAddrBits.W))
161c590fb32Scz4e  })
162c590fb32Scz4e
163c590fb32Scz4e  val writebackLda = Vec(LduCnt, DecoupledIO(new MemExuOutput))
164c590fb32Scz4e  val writebackSta = Vec(StaCnt, DecoupledIO(new MemExuOutput))
165c590fb32Scz4e  val writebackStd = Vec(StdCnt, DecoupledIO(new MemExuOutput))
166c590fb32Scz4e  val writebackHyuLda = Vec(HyuCnt, DecoupledIO(new MemExuOutput))
167c590fb32Scz4e  val writebackHyuSta = Vec(HyuCnt, DecoupledIO(new MemExuOutput))
168c590fb32Scz4e  val writebackVldu = Vec(VlduCnt, DecoupledIO(new MemExuOutput(isVector = true)))
169c590fb32Scz4e  def writeBack: Seq[DecoupledIO[MemExuOutput]] = {
170c590fb32Scz4e    writebackSta ++
171c590fb32Scz4e      writebackHyuLda ++ writebackHyuSta ++
172c590fb32Scz4e      writebackLda ++
173c590fb32Scz4e      writebackVldu ++
174c590fb32Scz4e      writebackStd
175c590fb32Scz4e  }
176c590fb32Scz4e
177c590fb32Scz4e  val ldaIqFeedback = Vec(LduCnt, new MemRSFeedbackIO)
178c590fb32Scz4e  val staIqFeedback = Vec(StaCnt, new MemRSFeedbackIO)
179c590fb32Scz4e  val hyuIqFeedback = Vec(HyuCnt, new MemRSFeedbackIO)
180c590fb32Scz4e  val vstuIqFeedback= Vec(VstuCnt, new MemRSFeedbackIO(isVector = true))
181c590fb32Scz4e  val vlduIqFeedback= Vec(VlduCnt, new MemRSFeedbackIO(isVector = true))
182c590fb32Scz4e  val ldCancel = Vec(backendParams.LdExuCnt, new LoadCancelIO)
183c590fb32Scz4e  val wakeup = Vec(backendParams.LdExuCnt, Valid(new DynInst))
184c590fb32Scz4e
185c590fb32Scz4e  val s3_delayed_load_error = Vec(LdExuCnt, Output(Bool()))
186c590fb32Scz4e}
187c590fb32Scz4e
188c590fb32Scz4eclass MemCoreTopDownIO extends Bundle {
189c590fb32Scz4e  val robHeadMissInDCache = Output(Bool())
190c590fb32Scz4e  val robHeadTlbReplay = Output(Bool())
191c590fb32Scz4e  val robHeadTlbMiss = Output(Bool())
192c590fb32Scz4e  val robHeadLoadVio = Output(Bool())
193c590fb32Scz4e  val robHeadLoadMSHR = Output(Bool())
194c590fb32Scz4e}
195c590fb32Scz4e
196c590fb32Scz4eclass fetch_to_mem(implicit p: Parameters) extends XSBundle{
197c590fb32Scz4e  val itlb = Flipped(new TlbPtwIO())
198c590fb32Scz4e}
199c590fb32Scz4e
200c590fb32Scz4e// triple buffer applied in i-mmio path (two at MemBlock, one at L2Top)
201c590fb32Scz4eclass InstrUncacheBuffer()(implicit p: Parameters) extends LazyModule with HasInstrMMIOConst {
202c590fb32Scz4e  val node = new TLBufferNode(BufferParams.default, BufferParams.default, BufferParams.default, BufferParams.default, BufferParams.default)
203c590fb32Scz4e  lazy val module = new InstrUncacheBufferImpl
204c590fb32Scz4e
205c590fb32Scz4e  class InstrUncacheBufferImpl extends LazyModuleImp(this) {
206c590fb32Scz4e    (node.in zip node.out) foreach { case ((in, edgeIn), (out, edgeOut)) =>
207c590fb32Scz4e      out.a <> BufferParams.default(BufferParams.default(in.a))
208c590fb32Scz4e      in.d <> BufferParams.default(BufferParams.default(out.d))
209c590fb32Scz4e
210c590fb32Scz4e      // only a.valid, a.ready, a.address can change
211c590fb32Scz4e      // hoping that the rest would be optimized to keep MemBlock port unchanged after adding buffer
212c590fb32Scz4e      out.a.bits.data := 0.U
213c590fb32Scz4e      out.a.bits.mask := Fill(mmioBusBytes, 1.U(1.W))
214c590fb32Scz4e      out.a.bits.opcode := 4.U // Get
215c590fb32Scz4e      out.a.bits.size := log2Ceil(mmioBusBytes).U
216c590fb32Scz4e      out.a.bits.source := 0.U
217c590fb32Scz4e    }
218c590fb32Scz4e  }
219c590fb32Scz4e}
220c590fb32Scz4e
221c590fb32Scz4e// triple buffer applied in L1I$-L2 path (two at MemBlock, one at L2Top)
222c590fb32Scz4eclass ICacheBuffer()(implicit p: Parameters) extends LazyModule {
223c590fb32Scz4e  val node = new TLBufferNode(BufferParams.default, BufferParams.default, BufferParams.default, BufferParams.default, BufferParams.default)
224c590fb32Scz4e  lazy val module = new ICacheBufferImpl
225c590fb32Scz4e
226c590fb32Scz4e  class ICacheBufferImpl extends LazyModuleImp(this) {
227c590fb32Scz4e    (node.in zip node.out) foreach { case ((in, edgeIn), (out, edgeOut)) =>
228c590fb32Scz4e      out.a <> BufferParams.default(BufferParams.default(in.a))
229c590fb32Scz4e      in.d <> BufferParams.default(BufferParams.default(out.d))
230c590fb32Scz4e    }
231c590fb32Scz4e  }
232c590fb32Scz4e}
233c590fb32Scz4e
234c590fb32Scz4eclass ICacheCtrlBuffer()(implicit p: Parameters) extends LazyModule {
235c590fb32Scz4e  val node = new TLBufferNode(BufferParams.default, BufferParams.default, BufferParams.default, BufferParams.default, BufferParams.default)
236c590fb32Scz4e  lazy val module = new ICacheCtrlBufferImpl
237c590fb32Scz4e
238c590fb32Scz4e  class ICacheCtrlBufferImpl extends LazyModuleImp(this) {
239c590fb32Scz4e    (node.in zip node.out) foreach { case ((in, edgeIn), (out, edgeOut)) =>
240c590fb32Scz4e      out.a <> BufferParams.default(BufferParams.default(in.a))
241c590fb32Scz4e      in.d <> BufferParams.default(BufferParams.default(out.d))
242c590fb32Scz4e    }
243c590fb32Scz4e  }
244c590fb32Scz4e}
245c590fb32Scz4e
246c590fb32Scz4e// Frontend bus goes through MemBlock
247c590fb32Scz4eclass FrontendBridge()(implicit p: Parameters) extends LazyModule {
248c590fb32Scz4e  val icache_node = LazyModule(new ICacheBuffer()).suggestName("icache").node// to keep IO port name
249c590fb32Scz4e  val icachectrl_node = LazyModule(new ICacheCtrlBuffer()).suggestName("icachectrl").node
250c590fb32Scz4e  val instr_uncache_node = LazyModule(new InstrUncacheBuffer()).suggestName("instr_uncache").node
251c590fb32Scz4e  lazy val module = new LazyModuleImp(this) {
252c590fb32Scz4e  }
253c590fb32Scz4e}
254c590fb32Scz4e
255c590fb32Scz4eclass MemBlockInlined()(implicit p: Parameters) extends LazyModule
256c590fb32Scz4e  with HasXSParameter {
257c590fb32Scz4e  override def shouldBeInlined: Boolean = true
258c590fb32Scz4e
259c590fb32Scz4e  val dcache = LazyModule(new DCacheWrapper())
260c590fb32Scz4e  val uncache = LazyModule(new Uncache())
261c590fb32Scz4e  val uncache_port = TLTempNode()
262c590fb32Scz4e  val uncache_xbar = TLXbar()
263c590fb32Scz4e  val ptw = LazyModule(new L2TLBWrapper())
264c590fb32Scz4e  val ptw_to_l2_buffer = if (!coreParams.softPTW) LazyModule(new TLBuffer) else null
265c590fb32Scz4e  val l1d_to_l2_buffer = if (coreParams.dcacheParametersOpt.nonEmpty) LazyModule(new TLBuffer) else null
266c590fb32Scz4e  val dcache_port = TLNameNode("dcache_client") // to keep dcache-L2 port name
267c590fb32Scz4e  val l2_pf_sender_opt = coreParams.prefetcher.map(_ =>
268c590fb32Scz4e    BundleBridgeSource(() => new PrefetchRecv)
269c590fb32Scz4e  )
270c590fb32Scz4e  val l3_pf_sender_opt = if (p(SoCParamsKey).L3CacheParamsOpt.nonEmpty) coreParams.prefetcher.map(_ =>
271c590fb32Scz4e    BundleBridgeSource(() => new huancun.PrefetchRecv)
272c590fb32Scz4e  ) else None
273c590fb32Scz4e  val frontendBridge = LazyModule(new FrontendBridge)
274c590fb32Scz4e  // interrupt sinks
275c590fb32Scz4e  val clint_int_sink = IntSinkNode(IntSinkPortSimple(1, 2))
276c590fb32Scz4e  val debug_int_sink = IntSinkNode(IntSinkPortSimple(1, 1))
277c590fb32Scz4e  val plic_int_sink = IntSinkNode(IntSinkPortSimple(2, 1))
278c590fb32Scz4e  val nmi_int_sink = IntSinkNode(IntSinkPortSimple(1, (new NonmaskableInterruptIO).elements.size))
27976cb49abScz4e  val beu_local_int_sink = IntSinkNode(IntSinkPortSimple(1, 1))
280c590fb32Scz4e
281c590fb32Scz4e  if (!coreParams.softPTW) {
282c590fb32Scz4e    ptw_to_l2_buffer.node := ptw.node
283c590fb32Scz4e  }
284c590fb32Scz4e  uncache_xbar := TLBuffer() := uncache.clientNode
285c590fb32Scz4e  if (dcache.uncacheNode.isDefined) {
286c590fb32Scz4e    dcache.uncacheNode.get := TLBuffer.chainNode(2) := uncache_xbar
287c590fb32Scz4e  }
288c590fb32Scz4e  uncache_port := TLBuffer.chainNode(2) := uncache_xbar
289c590fb32Scz4e
290c590fb32Scz4e  lazy val module = new MemBlockInlinedImp(this)
291c590fb32Scz4e}
292c590fb32Scz4e
293c590fb32Scz4eclass MemBlockInlinedImp(outer: MemBlockInlined) extends LazyModuleImp(outer)
294c590fb32Scz4e  with HasXSParameter
295c590fb32Scz4e  with HasFPUParameters
296c590fb32Scz4e  with HasPerfEvents
2978cfc24b2STang Haojin  with HasSoCParameter
298c590fb32Scz4e  with HasL1PrefetchSourceParameter
299c590fb32Scz4e  with HasCircularQueuePtrHelper
300c590fb32Scz4e  with HasMemBlockParameters
301c590fb32Scz4e  with HasTlbConst
302c590fb32Scz4e  with SdtrigExt
303c590fb32Scz4e{
304c590fb32Scz4e  val io = IO(new Bundle {
305c590fb32Scz4e    val hartId = Input(UInt(hartIdLen.W))
306c590fb32Scz4e    val redirect = Flipped(ValidIO(new Redirect))
307c590fb32Scz4e
308c590fb32Scz4e    val ooo_to_mem = new ooo_to_mem
309c590fb32Scz4e    val mem_to_ooo = new mem_to_ooo
310c590fb32Scz4e    val fetch_to_mem = new fetch_to_mem
311c590fb32Scz4e
312c590fb32Scz4e    val ifetchPrefetch = Vec(LduCnt, ValidIO(new SoftIfetchPrefetchBundle))
313c590fb32Scz4e
314c590fb32Scz4e    // misc
315c590fb32Scz4e    val error = ValidIO(new L1CacheErrorInfo)
316c590fb32Scz4e    val memInfo = new Bundle {
317c590fb32Scz4e      val sqFull = Output(Bool())
318c590fb32Scz4e      val lqFull = Output(Bool())
319c590fb32Scz4e      val dcacheMSHRFull = Output(Bool())
320c590fb32Scz4e    }
321c590fb32Scz4e    val debug_ls = new DebugLSIO
322c590fb32Scz4e    val l2_hint = Input(Valid(new L2ToL1Hint()))
323c590fb32Scz4e    val l2PfqBusy = Input(Bool())
324c590fb32Scz4e    val l2_tlb_req = Flipped(new TlbRequestIO(nRespDups = 2))
325c590fb32Scz4e    val l2_pmp_resp = new PMPRespBundle
326c590fb32Scz4e    val l2_flush_done = Input(Bool())
327c590fb32Scz4e
328c590fb32Scz4e    val debugTopDown = new Bundle {
329c590fb32Scz4e      val robHeadVaddr = Flipped(Valid(UInt(VAddrBits.W)))
330c590fb32Scz4e      val toCore = new MemCoreTopDownIO
331c590fb32Scz4e    }
332c590fb32Scz4e    val debugRolling = Flipped(new RobDebugRollingIO)
333c590fb32Scz4e
334c590fb32Scz4e    // All the signals from/to frontend/backend to/from bus will go through MemBlock
335c590fb32Scz4e    val fromTopToBackend = Input(new Bundle {
3368cfc24b2STang Haojin      val msiInfo   = ValidIO(UInt(soc.IMSICParams.MSI_INFO_WIDTH.W))
337c590fb32Scz4e      val clintTime = ValidIO(UInt(64.W))
338c590fb32Scz4e    })
339c590fb32Scz4e    val inner_hartId = Output(UInt(hartIdLen.W))
340c590fb32Scz4e    val inner_reset_vector = Output(UInt(PAddrBits.W))
341c590fb32Scz4e    val outer_reset_vector = Input(UInt(PAddrBits.W))
342c590fb32Scz4e    val outer_cpu_halt = Output(Bool())
343c590fb32Scz4e    val outer_l2_flush_en = Output(Bool())
344c590fb32Scz4e    val outer_power_down_en = Output(Bool())
345c590fb32Scz4e    val outer_cpu_critical_error = Output(Bool())
3468cfc24b2STang Haojin    val outer_msi_ack = Output(Bool())
347c590fb32Scz4e    val inner_beu_errors_icache = Input(new L1BusErrorUnitInfo)
348c590fb32Scz4e    val outer_beu_errors_icache = Output(new L1BusErrorUnitInfo)
349c590fb32Scz4e    val inner_hc_perfEvents = Output(Vec(numPCntHc * coreParams.L2NBanks + 1, new PerfEvent))
350c590fb32Scz4e    val outer_hc_perfEvents = Input(Vec(numPCntHc * coreParams.L2NBanks + 1, new PerfEvent))
351c590fb32Scz4e
352c590fb32Scz4e    // reset signals of frontend & backend are generated in memblock
353c590fb32Scz4e    val reset_backend = Output(Reset())
354c590fb32Scz4e    // Reset singal from frontend.
355c590fb32Scz4e    val resetInFrontendBypass = new Bundle{
356c590fb32Scz4e      val fromFrontend = Input(Bool())
357c590fb32Scz4e      val toL2Top      = Output(Bool())
358c590fb32Scz4e    }
359c590fb32Scz4e    val traceCoreInterfaceBypass = new Bundle{
360c590fb32Scz4e      val fromBackend = Flipped(new TraceCoreInterface(hasOffset = true))
361c590fb32Scz4e      val toL2Top     = new TraceCoreInterface
362c590fb32Scz4e    }
363c590fb32Scz4e
364c590fb32Scz4e    val topDownInfo = new Bundle {
365c590fb32Scz4e      val fromL2Top = Input(new TopDownFromL2Top)
366c590fb32Scz4e      val toBackend = Flipped(new TopDownInfo)
367c590fb32Scz4e    }
36830f35717Scz4e    val dft = Option.when(hasDFT)(Input(new SramBroadcastBundle))
36930f35717Scz4e    val dft_reset = Option.when(hasMbist)(Input(new DFTResetSignals()))
37030f35717Scz4e    val dft_frnt = Option.when(hasDFT)(Output(new SramBroadcastBundle))
37130f35717Scz4e    val dft_reset_frnt = Option.when(hasMbist)(Output(new DFTResetSignals()))
37230f35717Scz4e    val dft_bcknd = Option.when(hasDFT)(Output(new SramBroadcastBundle))
37330f35717Scz4e    val dft_reset_bcknd = Option.when(hasMbist)(Output(new DFTResetSignals()))
374c590fb32Scz4e  })
375c590fb32Scz4e
3761592abd1SYan Xu  io.mem_to_ooo.writeBack.zipWithIndex.foreach{ case (wb, i) =>
3771592abd1SYan Xu    PerfCCT.updateInstPos(wb.bits.uop.debug_seqNum, PerfCCT.InstPos.AtBypassVal.id.U, wb.valid, clock, reset)
3781592abd1SYan Xu  }
3791592abd1SYan Xu
380c590fb32Scz4e  dontTouch(io.inner_hartId)
381c590fb32Scz4e  dontTouch(io.inner_reset_vector)
382c590fb32Scz4e  dontTouch(io.outer_reset_vector)
383c590fb32Scz4e  dontTouch(io.outer_cpu_halt)
384c590fb32Scz4e  dontTouch(io.outer_l2_flush_en)
385c590fb32Scz4e  dontTouch(io.outer_power_down_en)
386c590fb32Scz4e  dontTouch(io.outer_cpu_critical_error)
387c590fb32Scz4e  dontTouch(io.inner_beu_errors_icache)
388c590fb32Scz4e  dontTouch(io.outer_beu_errors_icache)
389c590fb32Scz4e  dontTouch(io.inner_hc_perfEvents)
390c590fb32Scz4e  dontTouch(io.outer_hc_perfEvents)
391c590fb32Scz4e
392c590fb32Scz4e  val redirect = RegNextWithEnable(io.redirect)
393c590fb32Scz4e
394c590fb32Scz4e  private val dcache = outer.dcache.module
395c590fb32Scz4e  val uncache = outer.uncache.module
396c590fb32Scz4e
397c590fb32Scz4e  //val delayedDcacheRefill = RegNext(dcache.io.lsu.lsq)
398c590fb32Scz4e
399c590fb32Scz4e  val csrCtrl = DelayN(io.ooo_to_mem.csrCtrl, 2)
400c590fb32Scz4e  dcache.io.l2_pf_store_only := RegNext(io.ooo_to_mem.csrCtrl.pf_ctrl.l2_pf_store_only, false.B)
401c590fb32Scz4e  io.error <> DelayNWithValid(dcache.io.error, 2)
402c590fb32Scz4e  when(!csrCtrl.cache_error_enable){
403c590fb32Scz4e    io.error.bits.report_to_beu := false.B
404c590fb32Scz4e    io.error.valid := false.B
405c590fb32Scz4e  }
406c590fb32Scz4e
407c590fb32Scz4e  val loadUnits = Seq.fill(LduCnt)(Module(new LoadUnit))
408c590fb32Scz4e  val storeUnits = Seq.fill(StaCnt)(Module(new StoreUnit))
409c590fb32Scz4e  val stdExeUnits = Seq.fill(StdCnt)(Module(new MemExeUnit(backendParams.memSchdParams.get.issueBlockParams.find(_.StdCnt != 0).get.exuBlockParams.head)))
410c590fb32Scz4e  val hybridUnits = Seq.fill(HyuCnt)(Module(new HybridUnit)) // Todo: replace it with HybridUnit
411c590fb32Scz4e  val stData = stdExeUnits.map(_.io.out)
412c590fb32Scz4e  val exeUnits = loadUnits ++ storeUnits
413c590fb32Scz4e
414c590fb32Scz4e  // The number of vector load/store units is decoupled with the number of load/store units
415c590fb32Scz4e  val vlSplit = Seq.fill(VlduCnt)(Module(new VLSplitImp))
416c590fb32Scz4e  val vsSplit = Seq.fill(VstuCnt)(Module(new VSSplitImp))
417c590fb32Scz4e  val vlMergeBuffer = Module(new VLMergeBufferImp)
418c590fb32Scz4e  val vsMergeBuffer = Seq.fill(VstuCnt)(Module(new VSMergeBufferImp))
419c590fb32Scz4e  val vSegmentUnit  = Module(new VSegmentUnit)
420c590fb32Scz4e  val vfofBuffer    = Module(new VfofBuffer)
421c590fb32Scz4e
422c590fb32Scz4e  // misalign Buffer
423c590fb32Scz4e  val loadMisalignBuffer = Module(new LoadMisalignBuffer)
424c590fb32Scz4e  val storeMisalignBuffer = Module(new StoreMisalignBuffer)
425c590fb32Scz4e
426c590fb32Scz4e  val l1_pf_req = Wire(Decoupled(new L1PrefetchReq()))
427c590fb32Scz4e  dcache.io.sms_agt_evict_req.ready := false.B
42805cc6da9SYanqin Li  val l1D_pf_enable = GatedRegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_enable, 2, Some(false.B))
429c590fb32Scz4e  val prefetcherOpt: Option[BasePrefecher] = coreParams.prefetcher.map {
430c590fb32Scz4e    case _: SMSParams =>
431c590fb32Scz4e      val sms = Module(new SMSPrefetcher())
43205cc6da9SYanqin Li      val enableSMS = Constantin.createRecord(s"enableSMS$hartId", initValue = true)
43305cc6da9SYanqin Li      // constantinCtrl && master switch csrCtrl && single switch csrCtrl
43405cc6da9SYanqin Li      sms.io.enable := enableSMS && l1D_pf_enable &&
43505cc6da9SYanqin Li        GatedRegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l2_pf_recv_enable, 2, Some(false.B))
436c590fb32Scz4e      sms.io_agt_en := GatedRegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_enable_agt, 2, Some(false.B))
437c590fb32Scz4e      sms.io_pht_en := GatedRegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_enable_pht, 2, Some(false.B))
438c590fb32Scz4e      sms.io_act_threshold := GatedRegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_active_threshold, 2, Some(12.U))
439c590fb32Scz4e      sms.io_act_stride := GatedRegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_active_stride, 2, Some(30.U))
440c590fb32Scz4e      sms.io_stride_en := false.B
441c590fb32Scz4e      sms.io_dcache_evict <> dcache.io.sms_agt_evict_req
4424b2c87baS梁森 Liang Sen      val mbistSmsPl = MbistPipeline.PlaceMbistPipeline(1, "MbistPipeSms", hasMbist)
443c590fb32Scz4e      sms
444c590fb32Scz4e  }
445c590fb32Scz4e  prefetcherOpt.foreach{ pf => pf.io.l1_req.ready := false.B }
446c590fb32Scz4e  val hartId = p(XSCoreParamsKey).HartId
447c590fb32Scz4e  val l1PrefetcherOpt: Option[BasePrefecher] = coreParams.prefetcher.map {
448c590fb32Scz4e    case _ =>
449c590fb32Scz4e      val l1Prefetcher = Module(new L1Prefetcher())
4509db05eaeScz4e      val enableL1StreamPrefetcher = Constantin.createRecord(s"enableL1StreamPrefetcher$hartId", initValue = true)
45105cc6da9SYanqin Li      // constantinCtrl && master switch csrCtrl && single switch csrCtrl
45205cc6da9SYanqin Li      l1Prefetcher.io.enable := enableL1StreamPrefetcher && l1D_pf_enable &&
45305cc6da9SYanqin Li        GatedRegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_enable_stride, 2, Some(false.B))
454c590fb32Scz4e      l1Prefetcher.pf_ctrl <> dcache.io.pf_ctrl
455c590fb32Scz4e      l1Prefetcher.l2PfqBusy := io.l2PfqBusy
456c590fb32Scz4e
457c590fb32Scz4e      // stride will train on miss or prefetch hit
458c590fb32Scz4e      for (i <- 0 until LduCnt) {
459c590fb32Scz4e        val source = loadUnits(i).io.prefetch_train_l1
460c590fb32Scz4e        l1Prefetcher.stride_train(i).valid := source.valid && source.bits.isFirstIssue && (
461c590fb32Scz4e          source.bits.miss || isFromStride(source.bits.meta_prefetch)
462c590fb32Scz4e        )
463c590fb32Scz4e        l1Prefetcher.stride_train(i).bits := source.bits
464c590fb32Scz4e        val loadPc = RegNext(io.ooo_to_mem.issueLda(i).bits.uop.pc) // for s1
465c590fb32Scz4e        l1Prefetcher.stride_train(i).bits.uop.pc := Mux(
466c590fb32Scz4e          loadUnits(i).io.s2_ptr_chasing,
467c590fb32Scz4e          RegEnable(loadPc, loadUnits(i).io.s2_prefetch_spec),
468c590fb32Scz4e          RegEnable(RegEnable(loadPc, loadUnits(i).io.s1_prefetch_spec), loadUnits(i).io.s2_prefetch_spec)
469c590fb32Scz4e        )
470c590fb32Scz4e      }
471c590fb32Scz4e      for (i <- 0 until HyuCnt) {
472c590fb32Scz4e        val source = hybridUnits(i).io.prefetch_train_l1
473c590fb32Scz4e        l1Prefetcher.stride_train.drop(LduCnt)(i).valid := source.valid && source.bits.isFirstIssue && (
474c590fb32Scz4e          source.bits.miss || isFromStride(source.bits.meta_prefetch)
475c590fb32Scz4e        )
476c590fb32Scz4e        l1Prefetcher.stride_train.drop(LduCnt)(i).bits := source.bits
477c590fb32Scz4e        l1Prefetcher.stride_train.drop(LduCnt)(i).bits.uop.pc := Mux(
478c590fb32Scz4e          hybridUnits(i).io.ldu_io.s2_ptr_chasing,
479c590fb32Scz4e          RegNext(io.ooo_to_mem.hybridPc(i)),
480c590fb32Scz4e          RegNext(RegNext(io.ooo_to_mem.hybridPc(i)))
481c590fb32Scz4e        )
482c590fb32Scz4e      }
483c590fb32Scz4e      l1Prefetcher
484c590fb32Scz4e  }
485c590fb32Scz4e  // load prefetch to l1 Dcache
486c590fb32Scz4e  l1PrefetcherOpt match {
487c590fb32Scz4e    case Some(pf) => l1_pf_req <> Pipeline(in = pf.io.l1_req, depth = 1, pipe = false, name = Some("pf_queue_to_ldu_reg"))
488c590fb32Scz4e    case None =>
489c590fb32Scz4e      l1_pf_req.valid := false.B
490c590fb32Scz4e      l1_pf_req.bits := DontCare
491c590fb32Scz4e  }
492c590fb32Scz4e  val pf_train_on_hit = RegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_train_on_hit, 2, Some(true.B))
493c590fb32Scz4e
494c590fb32Scz4e  loadUnits.zipWithIndex.map(x => x._1.suggestName("LoadUnit_"+x._2))
495c590fb32Scz4e  storeUnits.zipWithIndex.map(x => x._1.suggestName("StoreUnit_"+x._2))
496c590fb32Scz4e  hybridUnits.zipWithIndex.map(x => x._1.suggestName("HybridUnit_"+x._2))
497c590fb32Scz4e  val atomicsUnit = Module(new AtomicsUnit)
498c590fb32Scz4e
499c590fb32Scz4e
500c590fb32Scz4e  val ldaExeWbReqs = Wire(Vec(LduCnt, Decoupled(new MemExuOutput)))
501c590fb32Scz4e  // atomicsUnit will overwrite the source from ldu if it is about to writeback
502c590fb32Scz4e  val atomicWritebackOverride = Mux(
503c590fb32Scz4e    atomicsUnit.io.out.valid,
504c590fb32Scz4e    atomicsUnit.io.out.bits,
505c590fb32Scz4e    loadUnits(AtomicWBPort).io.ldout.bits
506c590fb32Scz4e  )
507c590fb32Scz4e  ldaExeWbReqs(AtomicWBPort).valid := atomicsUnit.io.out.valid || loadUnits(AtomicWBPort).io.ldout.valid
508c590fb32Scz4e  ldaExeWbReqs(AtomicWBPort).bits  := atomicWritebackOverride
509c590fb32Scz4e  atomicsUnit.io.out.ready := ldaExeWbReqs(AtomicWBPort).ready
510c590fb32Scz4e  loadUnits(AtomicWBPort).io.ldout.ready := ldaExeWbReqs(AtomicWBPort).ready
511c590fb32Scz4e
512c590fb32Scz4e  val st_data_atomics = Seq.tabulate(StdCnt)(i =>
513c590fb32Scz4e    stData(i).valid && FuType.storeIsAMO(stData(i).bits.uop.fuType)
514c590fb32Scz4e  )
515c590fb32Scz4e
516c590fb32Scz4e  // misalignBuffer will overwrite the source from ldu if it is about to writeback
517c590fb32Scz4e  val misalignWritebackOverride = Mux(
518c590fb32Scz4e    loadUnits(MisalignWBPort).io.ldout.valid,
519c590fb32Scz4e    loadUnits(MisalignWBPort).io.ldout.bits,
520c590fb32Scz4e    loadMisalignBuffer.io.writeBack.bits
521c590fb32Scz4e  )
522c590fb32Scz4e  ldaExeWbReqs(MisalignWBPort).valid    := loadMisalignBuffer.io.writeBack.valid || loadUnits(MisalignWBPort).io.ldout.valid
523c590fb32Scz4e  ldaExeWbReqs(MisalignWBPort).bits     := misalignWritebackOverride
524c590fb32Scz4e  loadMisalignBuffer.io.writeBack.ready := ldaExeWbReqs(MisalignWBPort).ready && !loadUnits(MisalignWBPort).io.ldout.valid
525c590fb32Scz4e  loadMisalignBuffer.io.loadOutValid    := loadUnits(MisalignWBPort).io.ldout.valid
526c590fb32Scz4e  loadMisalignBuffer.io.loadVecOutValid := loadUnits(MisalignWBPort).io.vecldout.valid
527c590fb32Scz4e  loadUnits(MisalignWBPort).io.ldout.ready := ldaExeWbReqs(MisalignWBPort).ready
528c590fb32Scz4e  ldaExeWbReqs(MisalignWBPort).bits.isFromLoadUnit := loadUnits(MisalignWBPort).io.ldout.bits.isFromLoadUnit || loadMisalignBuffer.io.writeBack.valid
529c590fb32Scz4e
530c590fb32Scz4e  // loadUnit will overwrite the source from uncache if it is about to writeback
531c590fb32Scz4e  ldaExeWbReqs(UncacheWBPort) <> loadUnits(UncacheWBPort).io.ldout
532c590fb32Scz4e  io.mem_to_ooo.writebackLda <> ldaExeWbReqs
533c590fb32Scz4e  io.mem_to_ooo.writebackSta <> storeUnits.map(_.io.stout)
534c590fb32Scz4e  io.mem_to_ooo.writebackStd.zip(stdExeUnits).foreach {x =>
535c590fb32Scz4e    x._1.bits  := x._2.io.out.bits
536c590fb32Scz4e    // AMOs do not need to write back std now.
537c590fb32Scz4e    x._1.valid := x._2.io.out.fire && !FuType.storeIsAMO(x._2.io.out.bits.uop.fuType)
538c590fb32Scz4e  }
539c590fb32Scz4e  io.mem_to_ooo.writebackHyuLda <> hybridUnits.map(_.io.ldout)
540c590fb32Scz4e  io.mem_to_ooo.writebackHyuSta <> hybridUnits.map(_.io.stout)
541c590fb32Scz4e  io.mem_to_ooo.otherFastWakeup := DontCare
542c590fb32Scz4e  io.mem_to_ooo.otherFastWakeup.drop(HyuCnt).take(LduCnt).zip(loadUnits.map(_.io.fast_uop)).foreach{case(a,b)=> a := b}
543c590fb32Scz4e  io.mem_to_ooo.otherFastWakeup.take(HyuCnt).zip(hybridUnits.map(_.io.ldu_io.fast_uop)).foreach{case(a,b)=> a:=b}
544c590fb32Scz4e  val stOut = io.mem_to_ooo.writebackSta ++ io.mem_to_ooo.writebackHyuSta
545c590fb32Scz4e
546c590fb32Scz4e  // prefetch to l1 req
547c590fb32Scz4e  // Stream's confidence is always 1
548c590fb32Scz4e  // (LduCnt + HyuCnt) l1_pf_reqs ?
549c590fb32Scz4e  loadUnits.foreach(load_unit => {
550c590fb32Scz4e    load_unit.io.prefetch_req.valid <> l1_pf_req.valid
551c590fb32Scz4e    load_unit.io.prefetch_req.bits <> l1_pf_req.bits
552c590fb32Scz4e  })
553c590fb32Scz4e
554c590fb32Scz4e  hybridUnits.foreach(hybrid_unit => {
555c590fb32Scz4e    hybrid_unit.io.ldu_io.prefetch_req.valid <> l1_pf_req.valid
556c590fb32Scz4e    hybrid_unit.io.ldu_io.prefetch_req.bits <> l1_pf_req.bits
557c590fb32Scz4e  })
558c590fb32Scz4e
559c590fb32Scz4e  // NOTE: loadUnits(0) has higher bank conflict and miss queue arb priority than loadUnits(1) and loadUnits(2)
560c590fb32Scz4e  // when loadUnits(1)/loadUnits(2) stage 0 is busy, hw prefetch will never use that pipeline
561c590fb32Scz4e  val LowConfPorts = if (LduCnt == 2) Seq(1) else if (LduCnt == 3) Seq(1, 2) else Seq(0)
562c590fb32Scz4e  LowConfPorts.map{case i => loadUnits(i).io.prefetch_req.bits.confidence := 0.U}
563c590fb32Scz4e  hybridUnits.foreach(hybrid_unit => { hybrid_unit.io.ldu_io.prefetch_req.bits.confidence := 0.U })
564c590fb32Scz4e
565c590fb32Scz4e  val canAcceptHighConfPrefetch = loadUnits.map(_.io.canAcceptHighConfPrefetch) ++
566c590fb32Scz4e                                  hybridUnits.map(_.io.canAcceptLowConfPrefetch)
567c590fb32Scz4e  val canAcceptLowConfPrefetch = loadUnits.map(_.io.canAcceptLowConfPrefetch) ++
568c590fb32Scz4e                                 hybridUnits.map(_.io.canAcceptLowConfPrefetch)
569c590fb32Scz4e  l1_pf_req.ready := (0 until LduCnt + HyuCnt).map{
570c590fb32Scz4e    case i => {
571c590fb32Scz4e      if (LowConfPorts.contains(i)) {
572c590fb32Scz4e        loadUnits(i).io.canAcceptLowConfPrefetch
573c590fb32Scz4e      } else {
574c590fb32Scz4e        Mux(l1_pf_req.bits.confidence === 1.U, canAcceptHighConfPrefetch(i), canAcceptLowConfPrefetch(i))
575c590fb32Scz4e      }
576c590fb32Scz4e    }
577c590fb32Scz4e  }.reduce(_ || _)
578c590fb32Scz4e
579c590fb32Scz4e  // l1 pf fuzzer interface
580c590fb32Scz4e  val DebugEnableL1PFFuzzer = false
581c590fb32Scz4e  if (DebugEnableL1PFFuzzer) {
582c590fb32Scz4e    // l1 pf req fuzzer
583c590fb32Scz4e    val fuzzer = Module(new L1PrefetchFuzzer())
584c590fb32Scz4e    fuzzer.io.vaddr := DontCare
585c590fb32Scz4e    fuzzer.io.paddr := DontCare
586c590fb32Scz4e
587c590fb32Scz4e    // override load_unit prefetch_req
588c590fb32Scz4e    loadUnits.foreach(load_unit => {
589c590fb32Scz4e      load_unit.io.prefetch_req.valid <> fuzzer.io.req.valid
590c590fb32Scz4e      load_unit.io.prefetch_req.bits <> fuzzer.io.req.bits
591c590fb32Scz4e    })
592c590fb32Scz4e
593c590fb32Scz4e    // override hybrid_unit prefetch_req
594c590fb32Scz4e    hybridUnits.foreach(hybrid_unit => {
595c590fb32Scz4e      hybrid_unit.io.ldu_io.prefetch_req.valid <> fuzzer.io.req.valid
596c590fb32Scz4e      hybrid_unit.io.ldu_io.prefetch_req.bits <> fuzzer.io.req.bits
597c590fb32Scz4e    })
598c590fb32Scz4e
599c590fb32Scz4e    fuzzer.io.req.ready := l1_pf_req.ready
600c590fb32Scz4e  }
601c590fb32Scz4e
602c590fb32Scz4e  // TODO: fast load wakeup
603c590fb32Scz4e  val lsq     = Module(new LsqWrapper)
604c590fb32Scz4e  val sbuffer = Module(new Sbuffer)
605c590fb32Scz4e  // if you wants to stress test dcache store, use FakeSbuffer
606c590fb32Scz4e  // val sbuffer = Module(new FakeSbuffer) // out of date now
607c590fb32Scz4e  io.mem_to_ooo.stIssuePtr := lsq.io.issuePtrExt
608c590fb32Scz4e
609c590fb32Scz4e  dcache.io.hartId := io.hartId
610c590fb32Scz4e  lsq.io.hartId := io.hartId
611c590fb32Scz4e  sbuffer.io.hartId := io.hartId
612c590fb32Scz4e  atomicsUnit.io.hartId := io.hartId
613c590fb32Scz4e
614c590fb32Scz4e  dcache.io.lqEmpty := lsq.io.lqEmpty
615c590fb32Scz4e
616c590fb32Scz4e  // load/store prefetch to l2 cache
617c590fb32Scz4e  prefetcherOpt.foreach(sms_pf => {
618c590fb32Scz4e    l1PrefetcherOpt.foreach(l1_pf => {
619c590fb32Scz4e      val sms_pf_to_l2 = DelayNWithValid(sms_pf.io.l2_req, 2)
620c590fb32Scz4e      val l1_pf_to_l2 = DelayNWithValid(l1_pf.io.l2_req, 2)
621c590fb32Scz4e
622c590fb32Scz4e      outer.l2_pf_sender_opt.get.out.head._1.addr_valid := sms_pf_to_l2.valid || l1_pf_to_l2.valid
623c590fb32Scz4e      outer.l2_pf_sender_opt.get.out.head._1.addr := Mux(l1_pf_to_l2.valid, l1_pf_to_l2.bits.addr, sms_pf_to_l2.bits.addr)
624c590fb32Scz4e      outer.l2_pf_sender_opt.get.out.head._1.pf_source := Mux(l1_pf_to_l2.valid, l1_pf_to_l2.bits.source, sms_pf_to_l2.bits.source)
625c590fb32Scz4e      outer.l2_pf_sender_opt.get.out.head._1.l2_pf_en := RegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l2_pf_enable, 2, Some(true.B))
626c590fb32Scz4e
627c590fb32Scz4e      val l2_trace = Wire(new LoadPfDbBundle)
628c590fb32Scz4e      l2_trace.paddr := outer.l2_pf_sender_opt.get.out.head._1.addr
629c590fb32Scz4e      val table = ChiselDB.createTable(s"L2PrefetchTrace$hartId", new LoadPfDbBundle, basicDB = false)
630c590fb32Scz4e      table.log(l2_trace, l1_pf_to_l2.valid, "StreamPrefetchTrace", clock, reset)
631c590fb32Scz4e      table.log(l2_trace, !l1_pf_to_l2.valid && sms_pf_to_l2.valid, "L2PrefetchTrace", clock, reset)
632c590fb32Scz4e
633c590fb32Scz4e      val l1_pf_to_l3 = ValidIODelay(l1_pf.io.l3_req, 4)
634c590fb32Scz4e      outer.l3_pf_sender_opt.foreach(_.out.head._1.addr_valid := l1_pf_to_l3.valid)
635c590fb32Scz4e      outer.l3_pf_sender_opt.foreach(_.out.head._1.addr := l1_pf_to_l3.bits)
636c590fb32Scz4e      outer.l3_pf_sender_opt.foreach(_.out.head._1.l2_pf_en := RegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l2_pf_enable, 4, Some(true.B)))
637c590fb32Scz4e
638c590fb32Scz4e      val l3_trace = Wire(new LoadPfDbBundle)
639c590fb32Scz4e      l3_trace.paddr := outer.l3_pf_sender_opt.map(_.out.head._1.addr).getOrElse(0.U)
640c590fb32Scz4e      val l3_table = ChiselDB.createTable(s"L3PrefetchTrace$hartId", new LoadPfDbBundle, basicDB = false)
641c590fb32Scz4e      l3_table.log(l3_trace, l1_pf_to_l3.valid, "StreamPrefetchTrace", clock, reset)
642c590fb32Scz4e
643c590fb32Scz4e      XSPerfAccumulate("prefetch_fire_l2", outer.l2_pf_sender_opt.get.out.head._1.addr_valid)
644c590fb32Scz4e      XSPerfAccumulate("prefetch_fire_l3", outer.l3_pf_sender_opt.map(_.out.head._1.addr_valid).getOrElse(false.B))
645c590fb32Scz4e      XSPerfAccumulate("l1pf_fire_l2", l1_pf_to_l2.valid)
646c590fb32Scz4e      XSPerfAccumulate("sms_fire_l2", !l1_pf_to_l2.valid && sms_pf_to_l2.valid)
647c590fb32Scz4e      XSPerfAccumulate("sms_block_by_l1pf", l1_pf_to_l2.valid && sms_pf_to_l2.valid)
648c590fb32Scz4e    })
649c590fb32Scz4e  })
650c590fb32Scz4e
651c590fb32Scz4e  // ptw
652c590fb32Scz4e  val sfence = RegNext(RegNext(io.ooo_to_mem.sfence))
653c590fb32Scz4e  val tlbcsr = RegNext(RegNext(io.ooo_to_mem.tlbCsr))
654c590fb32Scz4e  private val ptw = outer.ptw.module
655c590fb32Scz4e  private val ptw_to_l2_buffer = outer.ptw_to_l2_buffer.module
656c590fb32Scz4e  private val l1d_to_l2_buffer = outer.l1d_to_l2_buffer.module
657c590fb32Scz4e  ptw.io.hartId := io.hartId
658c590fb32Scz4e  ptw.io.sfence <> sfence
659c590fb32Scz4e  ptw.io.csr.tlb <> tlbcsr
660c590fb32Scz4e  ptw.io.csr.distribute_csr <> csrCtrl.distribute_csr
661c590fb32Scz4e
662c590fb32Scz4e  val perfEventsPTW = if (!coreParams.softPTW) {
663c590fb32Scz4e    ptw.getPerfEvents
664c590fb32Scz4e  } else {
665c590fb32Scz4e    Seq()
666c590fb32Scz4e  }
667c590fb32Scz4e
668c590fb32Scz4e  // dtlb
669c590fb32Scz4e  val dtlb_ld_tlb_ld = Module(new TLBNonBlock(LduCnt + HyuCnt + 1, 2, ldtlbParams))
670c590fb32Scz4e  val dtlb_st_tlb_st = Module(new TLBNonBlock(StaCnt, 1, sttlbParams))
671c590fb32Scz4e  val dtlb_prefetch_tlb_prefetch = Module(new TLBNonBlock(2, 2, pftlbParams))
672c590fb32Scz4e  val dtlb_ld = Seq(dtlb_ld_tlb_ld.io)
673c590fb32Scz4e  val dtlb_st = Seq(dtlb_st_tlb_st.io)
674c590fb32Scz4e  val dtlb_prefetch = Seq(dtlb_prefetch_tlb_prefetch.io)
675c590fb32Scz4e  /* tlb vec && constant variable */
676c590fb32Scz4e  val dtlb = dtlb_ld ++ dtlb_st ++ dtlb_prefetch
677c590fb32Scz4e  val (dtlb_ld_idx, dtlb_st_idx, dtlb_pf_idx) = (0, 1, 2)
678c590fb32Scz4e  val TlbSubSizeVec = Seq(LduCnt + HyuCnt + 1, StaCnt, 2) // (load + hyu + stream pf, store, sms+l2bop)
679c590fb32Scz4e  val DTlbSize = TlbSubSizeVec.sum
680c590fb32Scz4e  val TlbStartVec = TlbSubSizeVec.scanLeft(0)(_ + _).dropRight(1)
681c590fb32Scz4e  val TlbEndVec = TlbSubSizeVec.scanLeft(0)(_ + _).drop(1)
682c590fb32Scz4e
683c590fb32Scz4e  val ptwio = Wire(new VectorTlbPtwIO(DTlbSize))
684c590fb32Scz4e  val dtlb_reqs = dtlb.map(_.requestor).flatten
685c590fb32Scz4e  val dtlb_pmps = dtlb.map(_.pmp).flatten
686c590fb32Scz4e  dtlb.map(_.hartId := io.hartId)
687c590fb32Scz4e  dtlb.map(_.sfence := sfence)
688c590fb32Scz4e  dtlb.map(_.csr := tlbcsr)
689c590fb32Scz4e  dtlb.map(_.flushPipe.map(a => a := false.B)) // non-block doesn't need
690c590fb32Scz4e  dtlb.map(_.redirect := redirect)
691c590fb32Scz4e  if (refillBothTlb) {
692c590fb32Scz4e    require(ldtlbParams.outReplace == sttlbParams.outReplace)
693c590fb32Scz4e    require(ldtlbParams.outReplace == hytlbParams.outReplace)
694c590fb32Scz4e    require(ldtlbParams.outReplace == pftlbParams.outReplace)
695c590fb32Scz4e    require(ldtlbParams.outReplace)
696c590fb32Scz4e
697c590fb32Scz4e    val replace = Module(new TlbReplace(DTlbSize, ldtlbParams))
698c590fb32Scz4e    replace.io.apply_sep(dtlb_ld.map(_.replace) ++ dtlb_st.map(_.replace) ++ dtlb_prefetch.map(_.replace), ptwio.resp.bits.data.s1.entry.tag)
699c590fb32Scz4e  } else {
700c590fb32Scz4e    // TODO: there will be bugs in TlbReplace when outReplace enable, since the order of Hyu is not right.
701c590fb32Scz4e    if (ldtlbParams.outReplace) {
702c590fb32Scz4e      val replace_ld = Module(new TlbReplace(LduCnt + 1, ldtlbParams))
703c590fb32Scz4e      replace_ld.io.apply_sep(dtlb_ld.map(_.replace), ptwio.resp.bits.data.s1.entry.tag)
704c590fb32Scz4e    }
705c590fb32Scz4e    if (hytlbParams.outReplace) {
706c590fb32Scz4e      val replace_hy = Module(new TlbReplace(HyuCnt, hytlbParams))
707c590fb32Scz4e      replace_hy.io.apply_sep(dtlb_ld.map(_.replace), ptwio.resp.bits.data.s1.entry.tag)
708c590fb32Scz4e    }
709c590fb32Scz4e    if (sttlbParams.outReplace) {
710c590fb32Scz4e      val replace_st = Module(new TlbReplace(StaCnt, sttlbParams))
711c590fb32Scz4e      replace_st.io.apply_sep(dtlb_st.map(_.replace), ptwio.resp.bits.data.s1.entry.tag)
712c590fb32Scz4e    }
713c590fb32Scz4e    if (pftlbParams.outReplace) {
714c590fb32Scz4e      val replace_pf = Module(new TlbReplace(2, pftlbParams))
715c590fb32Scz4e      replace_pf.io.apply_sep(dtlb_prefetch.map(_.replace), ptwio.resp.bits.data.s1.entry.tag)
716c590fb32Scz4e    }
717c590fb32Scz4e  }
718c590fb32Scz4e
719c590fb32Scz4e  val ptw_resp_next = RegEnable(ptwio.resp.bits, ptwio.resp.valid)
720c590fb32Scz4e  val ptw_resp_v = RegNext(ptwio.resp.valid && !(sfence.valid && tlbcsr.satp.changed && tlbcsr.vsatp.changed && tlbcsr.hgatp.changed), init = false.B)
721c590fb32Scz4e  ptwio.resp.ready := true.B
722c590fb32Scz4e
723c590fb32Scz4e  val tlbreplay = WireInit(VecInit(Seq.fill(LdExuCnt)(false.B)))
724c590fb32Scz4e  val tlbreplay_reg = GatedValidRegNext(tlbreplay)
725c590fb32Scz4e  val dtlb_ld0_tlbreplay_reg = GatedValidRegNext(dtlb_ld(0).tlbreplay)
726c590fb32Scz4e
727c590fb32Scz4e  if (backendParams.debugEn){ dontTouch(tlbreplay) }
728c590fb32Scz4e
729c590fb32Scz4e  for (i <- 0 until LdExuCnt) {
730c590fb32Scz4e    tlbreplay(i) := dtlb_ld(0).ptw.req(i).valid && ptw_resp_next.vector(0) && ptw_resp_v &&
731c590fb32Scz4e      ptw_resp_next.data.hit(dtlb_ld(0).ptw.req(i).bits.vpn, tlbcsr.satp.asid, tlbcsr.vsatp.asid, tlbcsr.hgatp.vmid, allType = true, ignoreAsid = true)
732c590fb32Scz4e  }
733c590fb32Scz4e
734c590fb32Scz4e  dtlb.flatMap(a => a.ptw.req)
735c590fb32Scz4e    .zipWithIndex
736c590fb32Scz4e    .foreach{ case (tlb, i) =>
737c590fb32Scz4e      tlb.ready := ptwio.req(i).ready
738c590fb32Scz4e      ptwio.req(i).bits := tlb.bits
739c590fb32Scz4e    val vector_hit = if (refillBothTlb) Cat(ptw_resp_next.vector).orR
740c590fb32Scz4e      else if (i < TlbEndVec(dtlb_ld_idx)) Cat(ptw_resp_next.vector.slice(TlbStartVec(dtlb_ld_idx), TlbEndVec(dtlb_ld_idx))).orR
741c590fb32Scz4e      else if (i < TlbEndVec(dtlb_st_idx)) Cat(ptw_resp_next.vector.slice(TlbStartVec(dtlb_st_idx), TlbEndVec(dtlb_st_idx))).orR
742c590fb32Scz4e      else                                 Cat(ptw_resp_next.vector.slice(TlbStartVec(dtlb_pf_idx), TlbEndVec(dtlb_pf_idx))).orR
743c590fb32Scz4e    ptwio.req(i).valid := tlb.valid && !(ptw_resp_v && vector_hit && ptw_resp_next.data.hit(tlb.bits.vpn, tlbcsr.satp.asid, tlbcsr.vsatp.asid, tlbcsr.hgatp.vmid, allType = true, ignoreAsid = true))
744c590fb32Scz4e  }
745c590fb32Scz4e  dtlb.foreach(_.ptw.resp.bits := ptw_resp_next.data)
746c590fb32Scz4e  if (refillBothTlb) {
747c590fb32Scz4e    dtlb.foreach(_.ptw.resp.valid := ptw_resp_v && Cat(ptw_resp_next.vector).orR)
748c590fb32Scz4e  } else {
749c590fb32Scz4e    dtlb_ld.foreach(_.ptw.resp.valid := ptw_resp_v && Cat(ptw_resp_next.vector.slice(TlbStartVec(dtlb_ld_idx), TlbEndVec(dtlb_ld_idx))).orR)
750c590fb32Scz4e    dtlb_st.foreach(_.ptw.resp.valid := ptw_resp_v && Cat(ptw_resp_next.vector.slice(TlbStartVec(dtlb_st_idx), TlbEndVec(dtlb_st_idx))).orR)
751c590fb32Scz4e    dtlb_prefetch.foreach(_.ptw.resp.valid := ptw_resp_v && Cat(ptw_resp_next.vector.slice(TlbStartVec(dtlb_pf_idx), TlbEndVec(dtlb_pf_idx))).orR)
752c590fb32Scz4e  }
753c590fb32Scz4e  dtlb_ld.foreach(_.ptw.resp.bits.getGpa := Cat(ptw_resp_next.getGpa.take(LduCnt + HyuCnt + 1)).orR)
754c590fb32Scz4e  dtlb_st.foreach(_.ptw.resp.bits.getGpa := Cat(ptw_resp_next.getGpa.slice(LduCnt + HyuCnt + 1, LduCnt + HyuCnt + 1 + StaCnt)).orR)
755c590fb32Scz4e  dtlb_prefetch.foreach(_.ptw.resp.bits.getGpa := Cat(ptw_resp_next.getGpa.drop(LduCnt + HyuCnt + 1 + StaCnt)).orR)
756c590fb32Scz4e
757c590fb32Scz4e  val dtlbRepeater  = PTWNewFilter(ldtlbParams.fenceDelay, ptwio, ptw.io.tlb(1), sfence, tlbcsr, l2tlbParams.dfilterSize)
758c590fb32Scz4e  val itlbRepeater3 = PTWRepeaterNB(passReady = false, itlbParams.fenceDelay, io.fetch_to_mem.itlb, ptw.io.tlb(0), sfence, tlbcsr)
759c590fb32Scz4e
760c590fb32Scz4e  lsq.io.debugTopDown.robHeadMissInDTlb := dtlbRepeater.io.rob_head_miss_in_tlb
761c590fb32Scz4e
762c590fb32Scz4e  // pmp
763c590fb32Scz4e  val pmp = Module(new PMP())
764c590fb32Scz4e  pmp.io.distribute_csr <> csrCtrl.distribute_csr
765c590fb32Scz4e
766c590fb32Scz4e  val pmp_checkers = Seq.fill(DTlbSize)(Module(new PMPChecker(4, leaveHitMux = true)))
767c590fb32Scz4e  val pmp_check = pmp_checkers.map(_.io)
768c590fb32Scz4e  for ((p,d) <- pmp_check zip dtlb_pmps) {
7698882eb68SXin Tian    if (HasBitmapCheck) {
7708882eb68SXin Tian      p.apply(tlbcsr.mbmc.CMODE.asBool, tlbcsr.priv.dmode, pmp.io.pmp, pmp.io.pma, d)
7718882eb68SXin Tian    } else {
772c590fb32Scz4e      p.apply(tlbcsr.priv.dmode, pmp.io.pmp, pmp.io.pma, d)
7738882eb68SXin Tian    }
774c590fb32Scz4e    require(p.req.bits.size.getWidth == d.bits.size.getWidth)
775c590fb32Scz4e  }
776c590fb32Scz4e
777c590fb32Scz4e  for (i <- 0 until LduCnt) {
778c590fb32Scz4e    io.debug_ls.debugLsInfo(i) := loadUnits(i).io.debug_ls
779c590fb32Scz4e  }
780c590fb32Scz4e  for (i <- 0 until HyuCnt) {
781c590fb32Scz4e    io.debug_ls.debugLsInfo.drop(LduCnt)(i) := hybridUnits(i).io.ldu_io.debug_ls
782c590fb32Scz4e  }
783c590fb32Scz4e  for (i <- 0 until StaCnt) {
784c590fb32Scz4e    io.debug_ls.debugLsInfo.drop(LduCnt + HyuCnt)(i) := storeUnits(i).io.debug_ls
785c590fb32Scz4e  }
786c590fb32Scz4e  for (i <- 0 until HyuCnt) {
787c590fb32Scz4e    io.debug_ls.debugLsInfo.drop(LduCnt + HyuCnt + StaCnt)(i) := hybridUnits(i).io.stu_io.debug_ls
788c590fb32Scz4e  }
789c590fb32Scz4e
790c590fb32Scz4e  io.mem_to_ooo.lsTopdownInfo := loadUnits.map(_.io.lsTopdownInfo) ++ hybridUnits.map(_.io.ldu_io.lsTopdownInfo)
791c590fb32Scz4e
792c590fb32Scz4e  // trigger
793c590fb32Scz4e  val tdata = RegInit(VecInit(Seq.fill(TriggerNum)(0.U.asTypeOf(new MatchTriggerIO))))
794c590fb32Scz4e  val tEnable = RegInit(VecInit(Seq.fill(TriggerNum)(false.B)))
795c590fb32Scz4e  tEnable := csrCtrl.mem_trigger.tEnableVec
796c590fb32Scz4e  when(csrCtrl.mem_trigger.tUpdate.valid) {
797c590fb32Scz4e    tdata(csrCtrl.mem_trigger.tUpdate.bits.addr) := csrCtrl.mem_trigger.tUpdate.bits.tdata
798c590fb32Scz4e  }
799c590fb32Scz4e  val triggerCanRaiseBpExp = csrCtrl.mem_trigger.triggerCanRaiseBpExp
800c590fb32Scz4e  val debugMode = csrCtrl.mem_trigger.debugMode
801c590fb32Scz4e
802c590fb32Scz4e  val backendTriggerTimingVec = VecInit(tdata.map(_.timing))
803c590fb32Scz4e  val backendTriggerChainVec = VecInit(tdata.map(_.chain))
804c590fb32Scz4e
805c590fb32Scz4e  XSDebug(tEnable.asUInt.orR, "Debug Mode: At least one store trigger is enabled\n")
806c590fb32Scz4e  for (j <- 0 until TriggerNum)
807c590fb32Scz4e    PrintTriggerInfo(tEnable(j), tdata(j))
808c590fb32Scz4e
809c590fb32Scz4e  // The segment instruction is executed atomically.
810c590fb32Scz4e  // After the segment instruction directive starts executing, no other instructions should be executed.
811c590fb32Scz4e  val vSegmentFlag = RegInit(false.B)
812c590fb32Scz4e
813c590fb32Scz4e  when(GatedValidRegNext(vSegmentUnit.io.in.fire)) {
814c590fb32Scz4e    vSegmentFlag := true.B
815c590fb32Scz4e  }.elsewhen(GatedValidRegNext(vSegmentUnit.io.uopwriteback.valid)) {
816c590fb32Scz4e    vSegmentFlag := false.B
817c590fb32Scz4e  }
818c590fb32Scz4e
819522c7f99SAnzo  val misalign_allow_spec = RegInit(true.B)
820522c7f99SAnzo  val ldu_rollback_with_misalign_nack = loadUnits.map(ldu =>
821522c7f99SAnzo    ldu.io.lsq.ldin.bits.isFrmMisAlignBuf && ldu.io.lsq.ldin.bits.rep_info.rar_nack && ldu.io.rollback.valid
822522c7f99SAnzo  ).reduce(_ || _)
823522c7f99SAnzo  when (ldu_rollback_with_misalign_nack) {
824522c7f99SAnzo    misalign_allow_spec := false.B
825522c7f99SAnzo  } .elsewhen(lsq.io.rarValidCount < (LoadQueueRARSize - 4).U) {
826522c7f99SAnzo    misalign_allow_spec := true.B
827522c7f99SAnzo  }
828522c7f99SAnzo
829c590fb32Scz4e  // LoadUnit
830c590fb32Scz4e  val correctMissTrain = Constantin.createRecord(s"CorrectMissTrain$hartId", initValue = false)
831c590fb32Scz4e
832c590fb32Scz4e  for (i <- 0 until LduCnt) {
833c590fb32Scz4e    loadUnits(i).io.redirect <> redirect
834522c7f99SAnzo    loadUnits(i).io.misalign_allow_spec := misalign_allow_spec
835c590fb32Scz4e
836c590fb32Scz4e    // get input form dispatch
837c590fb32Scz4e    loadUnits(i).io.ldin <> io.ooo_to_mem.issueLda(i)
838c590fb32Scz4e    loadUnits(i).io.feedback_slow <> io.mem_to_ooo.ldaIqFeedback(i).feedbackSlow
839c590fb32Scz4e    io.mem_to_ooo.ldaIqFeedback(i).feedbackFast := DontCare
840c590fb32Scz4e    loadUnits(i).io.correctMissTrain := correctMissTrain
841c590fb32Scz4e    io.mem_to_ooo.ldCancel.drop(HyuCnt)(i) := loadUnits(i).io.ldCancel
842c590fb32Scz4e    io.mem_to_ooo.wakeup.drop(HyuCnt)(i) := loadUnits(i).io.wakeup
843c590fb32Scz4e
844c590fb32Scz4e    // vector
845c590fb32Scz4e    if (i < VlduCnt) {
846c590fb32Scz4e      loadUnits(i).io.vecldout.ready := false.B
847c590fb32Scz4e    } else {
848c590fb32Scz4e      loadUnits(i).io.vecldin.valid := false.B
849c590fb32Scz4e      loadUnits(i).io.vecldin.bits := DontCare
850c590fb32Scz4e      loadUnits(i).io.vecldout.ready := false.B
851c590fb32Scz4e    }
852c590fb32Scz4e
853c590fb32Scz4e    // fast replay
854c590fb32Scz4e    loadUnits(i).io.fast_rep_in <> loadUnits(i).io.fast_rep_out
855c590fb32Scz4e
856c590fb32Scz4e    // SoftPrefetch to frontend (prefetch.i)
857c590fb32Scz4e    loadUnits(i).io.ifetchPrefetch <> io.ifetchPrefetch(i)
858c590fb32Scz4e
859c590fb32Scz4e    // dcache access
860c590fb32Scz4e    loadUnits(i).io.dcache <> dcache.io.lsu.load(i)
861c590fb32Scz4e    if(i == 0){
862c590fb32Scz4e      vSegmentUnit.io.rdcache := DontCare
863c590fb32Scz4e      dcache.io.lsu.load(i).req.valid := loadUnits(i).io.dcache.req.valid || vSegmentUnit.io.rdcache.req.valid
864c590fb32Scz4e      dcache.io.lsu.load(i).req.bits  := Mux1H(Seq(
865c590fb32Scz4e        vSegmentUnit.io.rdcache.req.valid -> vSegmentUnit.io.rdcache.req.bits,
866c590fb32Scz4e        loadUnits(i).io.dcache.req.valid -> loadUnits(i).io.dcache.req.bits
867c590fb32Scz4e      ))
868c590fb32Scz4e      vSegmentUnit.io.rdcache.req.ready := dcache.io.lsu.load(i).req.ready
869c590fb32Scz4e    }
870c590fb32Scz4e
871c590fb32Scz4e    // Dcache requests must also be preempted by the segment.
872c590fb32Scz4e    when(vSegmentFlag){
873c590fb32Scz4e      loadUnits(i).io.dcache.req.ready             := false.B // Dcache is preempted.
874c590fb32Scz4e
875c590fb32Scz4e      dcache.io.lsu.load(0).pf_source              := vSegmentUnit.io.rdcache.pf_source
876c590fb32Scz4e      dcache.io.lsu.load(0).s1_paddr_dup_lsu       := vSegmentUnit.io.rdcache.s1_paddr_dup_lsu
877c590fb32Scz4e      dcache.io.lsu.load(0).s1_paddr_dup_dcache    := vSegmentUnit.io.rdcache.s1_paddr_dup_dcache
878c590fb32Scz4e      dcache.io.lsu.load(0).s1_kill                := vSegmentUnit.io.rdcache.s1_kill
879c590fb32Scz4e      dcache.io.lsu.load(0).s2_kill                := vSegmentUnit.io.rdcache.s2_kill
880c590fb32Scz4e      dcache.io.lsu.load(0).s0_pc                  := vSegmentUnit.io.rdcache.s0_pc
881c590fb32Scz4e      dcache.io.lsu.load(0).s1_pc                  := vSegmentUnit.io.rdcache.s1_pc
882c590fb32Scz4e      dcache.io.lsu.load(0).s2_pc                  := vSegmentUnit.io.rdcache.s2_pc
883c590fb32Scz4e      dcache.io.lsu.load(0).is128Req               := vSegmentUnit.io.rdcache.is128Req
884c590fb32Scz4e    }.otherwise {
885c590fb32Scz4e      loadUnits(i).io.dcache.req.ready             := dcache.io.lsu.load(i).req.ready
886c590fb32Scz4e
887c590fb32Scz4e      dcache.io.lsu.load(0).pf_source              := loadUnits(0).io.dcache.pf_source
888c590fb32Scz4e      dcache.io.lsu.load(0).s1_paddr_dup_lsu       := loadUnits(0).io.dcache.s1_paddr_dup_lsu
889c590fb32Scz4e      dcache.io.lsu.load(0).s1_paddr_dup_dcache    := loadUnits(0).io.dcache.s1_paddr_dup_dcache
890c590fb32Scz4e      dcache.io.lsu.load(0).s1_kill                := loadUnits(0).io.dcache.s1_kill
891c590fb32Scz4e      dcache.io.lsu.load(0).s2_kill                := loadUnits(0).io.dcache.s2_kill
892c590fb32Scz4e      dcache.io.lsu.load(0).s0_pc                  := loadUnits(0).io.dcache.s0_pc
893c590fb32Scz4e      dcache.io.lsu.load(0).s1_pc                  := loadUnits(0).io.dcache.s1_pc
894c590fb32Scz4e      dcache.io.lsu.load(0).s2_pc                  := loadUnits(0).io.dcache.s2_pc
895c590fb32Scz4e      dcache.io.lsu.load(0).is128Req               := loadUnits(0).io.dcache.is128Req
896c590fb32Scz4e    }
897c590fb32Scz4e
898c590fb32Scz4e    // forward
899c590fb32Scz4e    loadUnits(i).io.lsq.forward <> lsq.io.forward(i)
900c590fb32Scz4e    loadUnits(i).io.sbuffer <> sbuffer.io.forward(i)
901c590fb32Scz4e    loadUnits(i).io.ubuffer <> uncache.io.forward(i)
902c590fb32Scz4e    loadUnits(i).io.tl_d_channel := dcache.io.lsu.forward_D(i)
903c590fb32Scz4e    loadUnits(i).io.forward_mshr <> dcache.io.lsu.forward_mshr(i)
904c590fb32Scz4e    // ld-ld violation check
905c590fb32Scz4e    loadUnits(i).io.lsq.ldld_nuke_query <> lsq.io.ldu.ldld_nuke_query(i)
906c590fb32Scz4e    loadUnits(i).io.lsq.stld_nuke_query <> lsq.io.ldu.stld_nuke_query(i)
907522c7f99SAnzo    // loadqueue old ptr
908522c7f99SAnzo    loadUnits(i).io.lsq.lqDeqPtr := lsq.io.lqDeqPtr
909c590fb32Scz4e    loadUnits(i).io.csrCtrl       <> csrCtrl
910c590fb32Scz4e    // dcache refill req
911c590fb32Scz4e  // loadUnits(i).io.refill           <> delayedDcacheRefill
912c590fb32Scz4e    // dtlb
913c590fb32Scz4e    loadUnits(i).io.tlb <> dtlb_reqs.take(LduCnt)(i)
914c590fb32Scz4e    if(i == 0 ){ // port 0 assign to vsegmentUnit
915c590fb32Scz4e      val vsegmentDtlbReqValid = vSegmentUnit.io.dtlb.req.valid // segment tlb resquest need to delay 1 cycle
916c590fb32Scz4e      dtlb_reqs.take(LduCnt)(i).req.valid := loadUnits(i).io.tlb.req.valid || RegNext(vsegmentDtlbReqValid)
917c590fb32Scz4e      vSegmentUnit.io.dtlb.req.ready      := dtlb_reqs.take(LduCnt)(i).req.ready
918c590fb32Scz4e      dtlb_reqs.take(LduCnt)(i).req.bits  := ParallelPriorityMux(Seq(
919c590fb32Scz4e        RegNext(vsegmentDtlbReqValid)     -> RegEnable(vSegmentUnit.io.dtlb.req.bits, vsegmentDtlbReqValid),
920c590fb32Scz4e        loadUnits(i).io.tlb.req.valid     -> loadUnits(i).io.tlb.req.bits
921c590fb32Scz4e      ))
922c590fb32Scz4e    }
923c590fb32Scz4e    // pmp
924c590fb32Scz4e    loadUnits(i).io.pmp <> pmp_check(i).resp
925c590fb32Scz4e    // st-ld violation query
926c590fb32Scz4e    val stld_nuke_query = storeUnits.map(_.io.stld_nuke_query) ++ hybridUnits.map(_.io.stu_io.stld_nuke_query)
927c590fb32Scz4e    for (s <- 0 until StorePipelineWidth) {
928c590fb32Scz4e      loadUnits(i).io.stld_nuke_query(s) := stld_nuke_query(s)
929c590fb32Scz4e    }
930c590fb32Scz4e    loadUnits(i).io.lq_rep_full <> lsq.io.lq_rep_full
931c590fb32Scz4e    // load prefetch train
932c590fb32Scz4e    prefetcherOpt.foreach(pf => {
933c590fb32Scz4e      // sms will train on all miss load sources
934c590fb32Scz4e      val source = loadUnits(i).io.prefetch_train
935c590fb32Scz4e      pf.io.ld_in(i).valid := Mux(pf_train_on_hit,
936c590fb32Scz4e        source.valid,
937c590fb32Scz4e        source.valid && source.bits.isFirstIssue && source.bits.miss
938c590fb32Scz4e      )
939c590fb32Scz4e      pf.io.ld_in(i).bits := source.bits
940c590fb32Scz4e      val loadPc = RegNext(io.ooo_to_mem.issueLda(i).bits.uop.pc) // for s1
941c590fb32Scz4e      pf.io.ld_in(i).bits.uop.pc := Mux(
942c590fb32Scz4e        loadUnits(i).io.s2_ptr_chasing,
943c590fb32Scz4e        RegEnable(loadPc, loadUnits(i).io.s2_prefetch_spec),
944c590fb32Scz4e        RegEnable(RegEnable(loadPc, loadUnits(i).io.s1_prefetch_spec), loadUnits(i).io.s2_prefetch_spec)
945c590fb32Scz4e      )
946c590fb32Scz4e    })
947c590fb32Scz4e    l1PrefetcherOpt.foreach(pf => {
948c590fb32Scz4e      // stream will train on all load sources
949c590fb32Scz4e      val source = loadUnits(i).io.prefetch_train_l1
950c590fb32Scz4e      pf.io.ld_in(i).valid := source.valid && source.bits.isFirstIssue
951c590fb32Scz4e      pf.io.ld_in(i).bits := source.bits
952c590fb32Scz4e    })
953c590fb32Scz4e
954c590fb32Scz4e    // load to load fast forward: load(i) prefers data(i)
955c590fb32Scz4e    val l2l_fwd_out = loadUnits.map(_.io.l2l_fwd_out) ++ hybridUnits.map(_.io.ldu_io.l2l_fwd_out)
956c590fb32Scz4e    val fastPriority = (i until LduCnt + HyuCnt) ++ (0 until i)
957c590fb32Scz4e    val fastValidVec = fastPriority.map(j => l2l_fwd_out(j).valid)
958c590fb32Scz4e    val fastDataVec = fastPriority.map(j => l2l_fwd_out(j).data)
959c590fb32Scz4e    val fastErrorVec = fastPriority.map(j => l2l_fwd_out(j).dly_ld_err)
960c590fb32Scz4e    val fastMatchVec = fastPriority.map(j => io.ooo_to_mem.loadFastMatch(i)(j))
961c590fb32Scz4e    loadUnits(i).io.l2l_fwd_in.valid := VecInit(fastValidVec).asUInt.orR
962c590fb32Scz4e    loadUnits(i).io.l2l_fwd_in.data := ParallelPriorityMux(fastValidVec, fastDataVec)
963c590fb32Scz4e    loadUnits(i).io.l2l_fwd_in.dly_ld_err := ParallelPriorityMux(fastValidVec, fastErrorVec)
964c590fb32Scz4e    val fastMatch = ParallelPriorityMux(fastValidVec, fastMatchVec)
965c590fb32Scz4e    loadUnits(i).io.ld_fast_match := fastMatch
966c590fb32Scz4e    loadUnits(i).io.ld_fast_imm := io.ooo_to_mem.loadFastImm(i)
967c590fb32Scz4e    loadUnits(i).io.ld_fast_fuOpType := io.ooo_to_mem.loadFastFuOpType(i)
968c590fb32Scz4e    loadUnits(i).io.replay <> lsq.io.replay(i)
969c590fb32Scz4e
970c590fb32Scz4e    val l2_hint = RegNext(io.l2_hint)
971c590fb32Scz4e
972c590fb32Scz4e    // L2 Hint for DCache
973c590fb32Scz4e    dcache.io.l2_hint <> l2_hint
974c590fb32Scz4e
975c590fb32Scz4e    loadUnits(i).io.l2_hint <> l2_hint
976c590fb32Scz4e    loadUnits(i).io.tlb_hint.id := dtlbRepeater.io.hint.get.req(i).id
977c590fb32Scz4e    loadUnits(i).io.tlb_hint.full := dtlbRepeater.io.hint.get.req(i).full ||
978c590fb32Scz4e      tlbreplay_reg(i) || dtlb_ld0_tlbreplay_reg(i)
979c590fb32Scz4e
980c590fb32Scz4e    // passdown to lsq (load s2)
981c590fb32Scz4e    lsq.io.ldu.ldin(i) <> loadUnits(i).io.lsq.ldin
982c590fb32Scz4e    if (i == UncacheWBPort) {
983c590fb32Scz4e      lsq.io.ldout(i) <> loadUnits(i).io.lsq.uncache
984c590fb32Scz4e    } else {
985c590fb32Scz4e      lsq.io.ldout(i).ready := true.B
986c590fb32Scz4e      loadUnits(i).io.lsq.uncache.valid := false.B
987c590fb32Scz4e      loadUnits(i).io.lsq.uncache.bits := DontCare
988c590fb32Scz4e    }
989c590fb32Scz4e    lsq.io.ld_raw_data(i) <> loadUnits(i).io.lsq.ld_raw_data
990c590fb32Scz4e    lsq.io.ncOut(i) <> loadUnits(i).io.lsq.nc_ldin
991c590fb32Scz4e    lsq.io.l2_hint.valid := l2_hint.valid
992c590fb32Scz4e    lsq.io.l2_hint.bits.sourceId := l2_hint.bits.sourceId
993c590fb32Scz4e    lsq.io.l2_hint.bits.isKeyword := l2_hint.bits.isKeyword
994c590fb32Scz4e
995c590fb32Scz4e    lsq.io.tlb_hint <> dtlbRepeater.io.hint.get
996c590fb32Scz4e
997c590fb32Scz4e    // connect misalignBuffer
9984ec1f462Scz4e    loadMisalignBuffer.io.enq(i) <> loadUnits(i).io.misalign_enq
999c590fb32Scz4e
1000c590fb32Scz4e    if (i == MisalignWBPort) {
1001c590fb32Scz4e      loadUnits(i).io.misalign_ldin  <> loadMisalignBuffer.io.splitLoadReq
1002c590fb32Scz4e      loadUnits(i).io.misalign_ldout <> loadMisalignBuffer.io.splitLoadResp
1003c590fb32Scz4e    } else {
1004c590fb32Scz4e      loadUnits(i).io.misalign_ldin.valid := false.B
1005c590fb32Scz4e      loadUnits(i).io.misalign_ldin.bits := DontCare
1006c590fb32Scz4e    }
1007c590fb32Scz4e
1008c590fb32Scz4e    // alter writeback exception info
1009c590fb32Scz4e    io.mem_to_ooo.s3_delayed_load_error(i) := loadUnits(i).io.s3_dly_ld_err
1010c590fb32Scz4e
1011c590fb32Scz4e    // update mem dependency predictor
1012c590fb32Scz4e    // io.memPredUpdate(i) := DontCare
1013c590fb32Scz4e
1014c590fb32Scz4e    // --------------------------------
1015c590fb32Scz4e    // Load Triggers
1016c590fb32Scz4e    // --------------------------------
1017c590fb32Scz4e    loadUnits(i).io.fromCsrTrigger.tdataVec := tdata
1018c590fb32Scz4e    loadUnits(i).io.fromCsrTrigger.tEnableVec := tEnable
1019c590fb32Scz4e    loadUnits(i).io.fromCsrTrigger.triggerCanRaiseBpExp := triggerCanRaiseBpExp
1020c590fb32Scz4e    loadUnits(i).io.fromCsrTrigger.debugMode := debugMode
1021c590fb32Scz4e  }
1022c590fb32Scz4e
1023c590fb32Scz4e  for (i <- 0 until HyuCnt) {
1024c590fb32Scz4e    hybridUnits(i).io.redirect <> redirect
1025c590fb32Scz4e
1026c590fb32Scz4e    // get input from dispatch
1027c590fb32Scz4e    hybridUnits(i).io.lsin <> io.ooo_to_mem.issueHya(i)
1028c590fb32Scz4e    hybridUnits(i).io.feedback_slow <> io.mem_to_ooo.hyuIqFeedback(i).feedbackSlow
1029c590fb32Scz4e    hybridUnits(i).io.feedback_fast <> io.mem_to_ooo.hyuIqFeedback(i).feedbackFast
1030c590fb32Scz4e    hybridUnits(i).io.correctMissTrain := correctMissTrain
1031c590fb32Scz4e    io.mem_to_ooo.ldCancel.take(HyuCnt)(i) := hybridUnits(i).io.ldu_io.ldCancel
1032c590fb32Scz4e    io.mem_to_ooo.wakeup.take(HyuCnt)(i) := hybridUnits(i).io.ldu_io.wakeup
1033c590fb32Scz4e
1034c590fb32Scz4e    // ------------------------------------
1035c590fb32Scz4e    //  Load Port
1036c590fb32Scz4e    // ------------------------------------
1037c590fb32Scz4e    // fast replay
1038c590fb32Scz4e    hybridUnits(i).io.ldu_io.fast_rep_in <> hybridUnits(i).io.ldu_io.fast_rep_out
1039c590fb32Scz4e
1040c590fb32Scz4e    // get input from dispatch
1041c590fb32Scz4e    hybridUnits(i).io.ldu_io.dcache <> dcache.io.lsu.load(LduCnt + i)
1042c590fb32Scz4e    hybridUnits(i).io.stu_io.dcache <> dcache.io.lsu.sta(StaCnt + i)
1043c590fb32Scz4e
1044c590fb32Scz4e    // dcache access
1045c590fb32Scz4e    hybridUnits(i).io.ldu_io.lsq.forward <> lsq.io.forward(LduCnt + i)
1046c590fb32Scz4e    // forward
1047c590fb32Scz4e    hybridUnits(i).io.ldu_io.sbuffer <> sbuffer.io.forward(LduCnt + i)
1048c590fb32Scz4e    hybridUnits(i).io.ldu_io.ubuffer <> uncache.io.forward(LduCnt + i)
1049c590fb32Scz4e    // hybridUnits(i).io.ldu_io.vec_forward <> vsFlowQueue.io.forward(LduCnt + i)
1050c590fb32Scz4e    hybridUnits(i).io.ldu_io.vec_forward := DontCare
1051c590fb32Scz4e    hybridUnits(i).io.ldu_io.tl_d_channel := dcache.io.lsu.forward_D(LduCnt + i)
1052c590fb32Scz4e    hybridUnits(i).io.ldu_io.forward_mshr <> dcache.io.lsu.forward_mshr(LduCnt + i)
1053c590fb32Scz4e    // ld-ld violation check
1054c590fb32Scz4e    hybridUnits(i).io.ldu_io.lsq.ldld_nuke_query <> lsq.io.ldu.ldld_nuke_query(LduCnt + i)
1055c590fb32Scz4e    hybridUnits(i).io.ldu_io.lsq.stld_nuke_query <> lsq.io.ldu.stld_nuke_query(LduCnt + i)
1056c590fb32Scz4e    hybridUnits(i).io.csrCtrl <> csrCtrl
1057c590fb32Scz4e    // dcache refill req
1058c590fb32Scz4e    hybridUnits(i).io.ldu_io.tlb_hint.id := dtlbRepeater.io.hint.get.req(LduCnt + i).id
1059c590fb32Scz4e    hybridUnits(i).io.ldu_io.tlb_hint.full := dtlbRepeater.io.hint.get.req(LduCnt + i).full ||
1060c590fb32Scz4e      tlbreplay_reg(LduCnt + i) || dtlb_ld0_tlbreplay_reg(LduCnt + i)
1061c590fb32Scz4e
1062c590fb32Scz4e    // dtlb
1063c590fb32Scz4e    hybridUnits(i).io.tlb <> dtlb_ld.head.requestor(LduCnt + i)
1064c590fb32Scz4e    // pmp
1065c590fb32Scz4e    hybridUnits(i).io.pmp <> pmp_check.drop(LduCnt)(i).resp
1066c590fb32Scz4e    // st-ld violation query
1067c590fb32Scz4e    val stld_nuke_query = VecInit(storeUnits.map(_.io.stld_nuke_query) ++ hybridUnits.map(_.io.stu_io.stld_nuke_query))
1068c590fb32Scz4e    hybridUnits(i).io.ldu_io.stld_nuke_query := stld_nuke_query
1069c590fb32Scz4e    hybridUnits(i).io.ldu_io.lq_rep_full <> lsq.io.lq_rep_full
1070c590fb32Scz4e    // load prefetch train
1071c590fb32Scz4e    prefetcherOpt.foreach(pf => {
1072c590fb32Scz4e      val source = hybridUnits(i).io.prefetch_train
1073c590fb32Scz4e      pf.io.ld_in(LduCnt + i).valid := Mux(pf_train_on_hit,
1074c590fb32Scz4e        source.valid,
1075c590fb32Scz4e        source.valid && source.bits.isFirstIssue && source.bits.miss
1076c590fb32Scz4e      )
1077c590fb32Scz4e      pf.io.ld_in(LduCnt + i).bits := source.bits
1078c590fb32Scz4e      pf.io.ld_in(LduCnt + i).bits.uop.pc := Mux(hybridUnits(i).io.ldu_io.s2_ptr_chasing, io.ooo_to_mem.hybridPc(i), RegNext(io.ooo_to_mem.hybridPc(i)))
1079c590fb32Scz4e    })
1080c590fb32Scz4e    l1PrefetcherOpt.foreach(pf => {
1081c590fb32Scz4e      // stream will train on all load sources
1082c590fb32Scz4e      val source = hybridUnits(i).io.prefetch_train_l1
1083c590fb32Scz4e      pf.io.ld_in(LduCnt + i).valid := source.valid && source.bits.isFirstIssue &&
1084c590fb32Scz4e                                       FuType.isLoad(source.bits.uop.fuType)
1085c590fb32Scz4e      pf.io.ld_in(LduCnt + i).bits := source.bits
1086c590fb32Scz4e      pf.io.st_in(StaCnt + i).valid := false.B
1087c590fb32Scz4e      pf.io.st_in(StaCnt + i).bits := DontCare
1088c590fb32Scz4e    })
1089c590fb32Scz4e    prefetcherOpt.foreach(pf => {
1090c590fb32Scz4e      val source = hybridUnits(i).io.prefetch_train
1091c590fb32Scz4e      pf.io.st_in(StaCnt + i).valid := Mux(pf_train_on_hit,
1092c590fb32Scz4e        source.valid,
1093c590fb32Scz4e        source.valid && source.bits.isFirstIssue && source.bits.miss
1094c590fb32Scz4e      ) && FuType.isStore(source.bits.uop.fuType)
1095c590fb32Scz4e      pf.io.st_in(StaCnt + i).bits := source.bits
1096c590fb32Scz4e      pf.io.st_in(StaCnt + i).bits.uop.pc := RegNext(io.ooo_to_mem.hybridPc(i))
1097c590fb32Scz4e    })
1098c590fb32Scz4e
1099c590fb32Scz4e    // load to load fast forward: load(i) prefers data(i)
1100c590fb32Scz4e    val l2l_fwd_out = loadUnits.map(_.io.l2l_fwd_out) ++ hybridUnits.map(_.io.ldu_io.l2l_fwd_out)
1101c590fb32Scz4e    val fastPriority = (LduCnt + i until LduCnt + HyuCnt) ++ (0 until LduCnt + i)
1102c590fb32Scz4e    val fastValidVec = fastPriority.map(j => l2l_fwd_out(j).valid)
1103c590fb32Scz4e    val fastDataVec = fastPriority.map(j => l2l_fwd_out(j).data)
1104c590fb32Scz4e    val fastErrorVec = fastPriority.map(j => l2l_fwd_out(j).dly_ld_err)
1105c590fb32Scz4e    val fastMatchVec = fastPriority.map(j => io.ooo_to_mem.loadFastMatch(LduCnt + i)(j))
1106c590fb32Scz4e    hybridUnits(i).io.ldu_io.l2l_fwd_in.valid := VecInit(fastValidVec).asUInt.orR
1107c590fb32Scz4e    hybridUnits(i).io.ldu_io.l2l_fwd_in.data := ParallelPriorityMux(fastValidVec, fastDataVec)
1108c590fb32Scz4e    hybridUnits(i).io.ldu_io.l2l_fwd_in.dly_ld_err := ParallelPriorityMux(fastValidVec, fastErrorVec)
1109c590fb32Scz4e    val fastMatch = ParallelPriorityMux(fastValidVec, fastMatchVec)
1110c590fb32Scz4e    hybridUnits(i).io.ldu_io.ld_fast_match := fastMatch
1111c590fb32Scz4e    hybridUnits(i).io.ldu_io.ld_fast_imm := io.ooo_to_mem.loadFastImm(LduCnt + i)
1112c590fb32Scz4e    hybridUnits(i).io.ldu_io.ld_fast_fuOpType := io.ooo_to_mem.loadFastFuOpType(LduCnt + i)
1113c590fb32Scz4e    hybridUnits(i).io.ldu_io.replay <> lsq.io.replay(LduCnt + i)
1114c590fb32Scz4e    hybridUnits(i).io.ldu_io.l2_hint <> io.l2_hint
1115c590fb32Scz4e
1116c590fb32Scz4e    // uncache
1117c590fb32Scz4e    lsq.io.ldout.drop(LduCnt)(i) <> hybridUnits(i).io.ldu_io.lsq.uncache
1118c590fb32Scz4e    lsq.io.ld_raw_data.drop(LduCnt)(i) <> hybridUnits(i).io.ldu_io.lsq.ld_raw_data
1119c590fb32Scz4e
1120c590fb32Scz4e
1121c590fb32Scz4e    // passdown to lsq (load s2)
1122c590fb32Scz4e    hybridUnits(i).io.ldu_io.lsq.nc_ldin.valid := false.B
1123c590fb32Scz4e    hybridUnits(i).io.ldu_io.lsq.nc_ldin.bits := DontCare
1124c590fb32Scz4e    lsq.io.ldu.ldin(LduCnt + i) <> hybridUnits(i).io.ldu_io.lsq.ldin
1125c590fb32Scz4e    // Lsq to sta unit
1126c590fb32Scz4e    lsq.io.sta.storeMaskIn(StaCnt + i) <> hybridUnits(i).io.stu_io.st_mask_out
1127c590fb32Scz4e
1128c590fb32Scz4e    // Lsq to std unit's rs
1129c590fb32Scz4e    lsq.io.std.storeDataIn(StaCnt + i) := stData(StaCnt + i)
1130c590fb32Scz4e    lsq.io.std.storeDataIn(StaCnt + i).valid := stData(StaCnt + i).valid && !st_data_atomics(StaCnt + i)
1131c590fb32Scz4e    // prefetch
1132c590fb32Scz4e    hybridUnits(i).io.stu_io.prefetch_req <> sbuffer.io.store_prefetch(StaCnt + i)
1133c590fb32Scz4e
1134c590fb32Scz4e    io.mem_to_ooo.s3_delayed_load_error(LduCnt + i) := hybridUnits(i).io.ldu_io.s3_dly_ld_err
1135c590fb32Scz4e
1136c590fb32Scz4e    // ------------------------------------
1137c590fb32Scz4e    //  Store Port
1138c590fb32Scz4e    // ------------------------------------
1139c590fb32Scz4e    hybridUnits(i).io.stu_io.lsq <> lsq.io.sta.storeAddrIn.takeRight(HyuCnt)(i)
1140c590fb32Scz4e    hybridUnits(i).io.stu_io.lsq_replenish <> lsq.io.sta.storeAddrInRe.takeRight(HyuCnt)(i)
1141c590fb32Scz4e
1142c590fb32Scz4e    lsq.io.sta.storeMaskIn.takeRight(HyuCnt)(i) <> hybridUnits(i).io.stu_io.st_mask_out
1143c590fb32Scz4e    io.mem_to_ooo.stIn.takeRight(HyuCnt)(i).valid := hybridUnits(i).io.stu_io.issue.valid
1144c590fb32Scz4e    io.mem_to_ooo.stIn.takeRight(HyuCnt)(i).bits := hybridUnits(i).io.stu_io.issue.bits
1145c590fb32Scz4e
1146c590fb32Scz4e    // ------------------------------------
1147c590fb32Scz4e    //  Vector Store Port
1148c590fb32Scz4e    // ------------------------------------
1149c590fb32Scz4e    hybridUnits(i).io.vec_stu_io.isFirstIssue := true.B
1150c590fb32Scz4e
1151c590fb32Scz4e    // -------------------------
1152c590fb32Scz4e    // Store Triggers
1153c590fb32Scz4e    // -------------------------
1154c590fb32Scz4e    hybridUnits(i).io.fromCsrTrigger.tdataVec := tdata
1155c590fb32Scz4e    hybridUnits(i).io.fromCsrTrigger.tEnableVec := tEnable
1156c590fb32Scz4e    hybridUnits(i).io.fromCsrTrigger.triggerCanRaiseBpExp := triggerCanRaiseBpExp
1157c590fb32Scz4e    hybridUnits(i).io.fromCsrTrigger.debugMode := debugMode
1158c590fb32Scz4e  }
1159c590fb32Scz4e
1160c590fb32Scz4e  // misalignBuffer
1161c590fb32Scz4e  loadMisalignBuffer.io.redirect                <> redirect
1162c590fb32Scz4e  loadMisalignBuffer.io.rob.lcommit             := io.ooo_to_mem.lsqio.lcommit
1163c590fb32Scz4e  loadMisalignBuffer.io.rob.scommit             := io.ooo_to_mem.lsqio.scommit
1164c590fb32Scz4e  loadMisalignBuffer.io.rob.pendingMMIOld       := io.ooo_to_mem.lsqio.pendingMMIOld
1165c590fb32Scz4e  loadMisalignBuffer.io.rob.pendingld           := io.ooo_to_mem.lsqio.pendingld
1166c590fb32Scz4e  loadMisalignBuffer.io.rob.pendingst           := io.ooo_to_mem.lsqio.pendingst
1167c590fb32Scz4e  loadMisalignBuffer.io.rob.pendingVst          := io.ooo_to_mem.lsqio.pendingVst
1168c590fb32Scz4e  loadMisalignBuffer.io.rob.commit              := io.ooo_to_mem.lsqio.commit
1169c590fb32Scz4e  loadMisalignBuffer.io.rob.pendingPtr          := io.ooo_to_mem.lsqio.pendingPtr
1170c590fb32Scz4e  loadMisalignBuffer.io.rob.pendingPtrNext      := io.ooo_to_mem.lsqio.pendingPtrNext
1171c590fb32Scz4e
1172c590fb32Scz4e  lsq.io.loadMisalignFull                       := loadMisalignBuffer.io.loadMisalignFull
1173522c7f99SAnzo  lsq.io.misalignAllowSpec                      := misalign_allow_spec
1174c590fb32Scz4e
1175c590fb32Scz4e  storeMisalignBuffer.io.redirect               <> redirect
1176c590fb32Scz4e  storeMisalignBuffer.io.rob.lcommit            := io.ooo_to_mem.lsqio.lcommit
1177c590fb32Scz4e  storeMisalignBuffer.io.rob.scommit            := io.ooo_to_mem.lsqio.scommit
1178c590fb32Scz4e  storeMisalignBuffer.io.rob.pendingMMIOld      := io.ooo_to_mem.lsqio.pendingMMIOld
1179c590fb32Scz4e  storeMisalignBuffer.io.rob.pendingld          := io.ooo_to_mem.lsqio.pendingld
1180c590fb32Scz4e  storeMisalignBuffer.io.rob.pendingst          := io.ooo_to_mem.lsqio.pendingst
1181c590fb32Scz4e  storeMisalignBuffer.io.rob.pendingVst         := io.ooo_to_mem.lsqio.pendingVst
1182c590fb32Scz4e  storeMisalignBuffer.io.rob.commit             := io.ooo_to_mem.lsqio.commit
1183c590fb32Scz4e  storeMisalignBuffer.io.rob.pendingPtr         := io.ooo_to_mem.lsqio.pendingPtr
1184c590fb32Scz4e  storeMisalignBuffer.io.rob.pendingPtrNext     := io.ooo_to_mem.lsqio.pendingPtrNext
1185c590fb32Scz4e
1186c590fb32Scz4e  lsq.io.maControl                              <> storeMisalignBuffer.io.sqControl
1187c590fb32Scz4e
1188c590fb32Scz4e  lsq.io.cmoOpReq <> dcache.io.cmoOpReq
1189c590fb32Scz4e  lsq.io.cmoOpResp <> dcache.io.cmoOpResp
1190c590fb32Scz4e
1191c590fb32Scz4e  // Prefetcher
1192c590fb32Scz4e  val StreamDTLBPortIndex = TlbStartVec(dtlb_ld_idx) + LduCnt + HyuCnt
1193c590fb32Scz4e  val PrefetcherDTLBPortIndex = TlbStartVec(dtlb_pf_idx)
1194c590fb32Scz4e  val L2toL1DLBPortIndex = TlbStartVec(dtlb_pf_idx) + 1
1195c590fb32Scz4e  prefetcherOpt match {
1196c590fb32Scz4e  case Some(pf) =>
1197c590fb32Scz4e    dtlb_reqs(PrefetcherDTLBPortIndex) <> pf.io.tlb_req
1198c590fb32Scz4e    pf.io.pmp_resp := pmp_check(PrefetcherDTLBPortIndex).resp
1199c590fb32Scz4e  case None =>
1200c590fb32Scz4e    dtlb_reqs(PrefetcherDTLBPortIndex) := DontCare
1201c590fb32Scz4e    dtlb_reqs(PrefetcherDTLBPortIndex).req.valid := false.B
1202c590fb32Scz4e    dtlb_reqs(PrefetcherDTLBPortIndex).resp.ready := true.B
1203c590fb32Scz4e  }
1204c590fb32Scz4e  l1PrefetcherOpt match {
1205c590fb32Scz4e    case Some(pf) =>
1206c590fb32Scz4e      dtlb_reqs(StreamDTLBPortIndex) <> pf.io.tlb_req
1207c590fb32Scz4e      pf.io.pmp_resp := pmp_check(StreamDTLBPortIndex).resp
1208c590fb32Scz4e    case None =>
1209c590fb32Scz4e        dtlb_reqs(StreamDTLBPortIndex) := DontCare
1210c590fb32Scz4e        dtlb_reqs(StreamDTLBPortIndex).req.valid := false.B
1211c590fb32Scz4e        dtlb_reqs(StreamDTLBPortIndex).resp.ready := true.B
1212c590fb32Scz4e  }
1213c590fb32Scz4e  dtlb_reqs(L2toL1DLBPortIndex) <> io.l2_tlb_req
1214c590fb32Scz4e  dtlb_reqs(L2toL1DLBPortIndex).resp.ready := true.B
1215c590fb32Scz4e  io.l2_pmp_resp := pmp_check(L2toL1DLBPortIndex).resp
1216c590fb32Scz4e
1217c590fb32Scz4e  // StoreUnit
1218c590fb32Scz4e  for (i <- 0 until StdCnt) {
1219c590fb32Scz4e    stdExeUnits(i).io.flush <> redirect
1220c590fb32Scz4e    stdExeUnits(i).io.in.valid := io.ooo_to_mem.issueStd(i).valid
1221c590fb32Scz4e    io.ooo_to_mem.issueStd(i).ready := stdExeUnits(i).io.in.ready
1222c590fb32Scz4e    stdExeUnits(i).io.in.bits := io.ooo_to_mem.issueStd(i).bits
1223c590fb32Scz4e  }
1224c590fb32Scz4e
1225c590fb32Scz4e  for (i <- 0 until StaCnt) {
1226c590fb32Scz4e    val stu = storeUnits(i)
1227c590fb32Scz4e
1228c590fb32Scz4e    stu.io.redirect      <> redirect
1229c590fb32Scz4e    stu.io.csrCtrl       <> csrCtrl
1230c590fb32Scz4e    stu.io.dcache        <> dcache.io.lsu.sta(i)
1231c590fb32Scz4e    stu.io.feedback_slow <> io.mem_to_ooo.staIqFeedback(i).feedbackSlow
1232c590fb32Scz4e    stu.io.stin         <> io.ooo_to_mem.issueSta(i)
1233c590fb32Scz4e    stu.io.lsq          <> lsq.io.sta.storeAddrIn(i)
1234c590fb32Scz4e    stu.io.lsq_replenish <> lsq.io.sta.storeAddrInRe(i)
1235c590fb32Scz4e    // dtlb
1236c590fb32Scz4e    stu.io.tlb          <> dtlb_st.head.requestor(i)
1237c590fb32Scz4e    stu.io.pmp          <> pmp_check(LduCnt + HyuCnt + 1 + i).resp
1238c590fb32Scz4e
1239c590fb32Scz4e    // -------------------------
1240c590fb32Scz4e    // Store Triggers
1241c590fb32Scz4e    // -------------------------
1242c590fb32Scz4e    stu.io.fromCsrTrigger.tdataVec := tdata
1243c590fb32Scz4e    stu.io.fromCsrTrigger.tEnableVec := tEnable
1244c590fb32Scz4e    stu.io.fromCsrTrigger.triggerCanRaiseBpExp := triggerCanRaiseBpExp
1245c590fb32Scz4e    stu.io.fromCsrTrigger.debugMode := debugMode
1246c590fb32Scz4e
1247c590fb32Scz4e    // prefetch
1248c590fb32Scz4e    stu.io.prefetch_req <> sbuffer.io.store_prefetch(i)
1249c590fb32Scz4e
1250c590fb32Scz4e    // store unit does not need fast feedback
1251c590fb32Scz4e    io.mem_to_ooo.staIqFeedback(i).feedbackFast := DontCare
1252c590fb32Scz4e
1253c590fb32Scz4e    // Lsq to sta unit
1254c590fb32Scz4e    lsq.io.sta.storeMaskIn(i) <> stu.io.st_mask_out
1255c590fb32Scz4e
1256c590fb32Scz4e    // connect misalignBuffer
12574ec1f462Scz4e    storeMisalignBuffer.io.enq(i) <> stu.io.misalign_enq
1258c590fb32Scz4e
1259c590fb32Scz4e    if (i == 0) {
1260c590fb32Scz4e      stu.io.misalign_stin  <> storeMisalignBuffer.io.splitStoreReq
1261c590fb32Scz4e      stu.io.misalign_stout <> storeMisalignBuffer.io.splitStoreResp
1262c590fb32Scz4e    } else {
1263c590fb32Scz4e      stu.io.misalign_stin.valid := false.B
1264c590fb32Scz4e      stu.io.misalign_stin.bits := DontCare
1265c590fb32Scz4e    }
1266c590fb32Scz4e
1267c590fb32Scz4e    // Lsq to std unit's rs
1268c590fb32Scz4e    if (i < VstuCnt){
1269c590fb32Scz4e      when (vsSplit(i).io.vstd.get.valid) {
1270c590fb32Scz4e        lsq.io.std.storeDataIn(i).valid := true.B
1271c590fb32Scz4e        lsq.io.std.storeDataIn(i).bits := vsSplit(i).io.vstd.get.bits
1272c590fb32Scz4e        stData(i).ready := false.B
1273c590fb32Scz4e      }.otherwise {
1274c590fb32Scz4e        lsq.io.std.storeDataIn(i).valid := stData(i).valid && !st_data_atomics(i)
1275c590fb32Scz4e        lsq.io.std.storeDataIn(i).bits.uop := stData(i).bits.uop
1276c590fb32Scz4e        lsq.io.std.storeDataIn(i).bits.data := stData(i).bits.data
1277c590fb32Scz4e        lsq.io.std.storeDataIn(i).bits.mask.map(_ := 0.U)
1278c590fb32Scz4e        lsq.io.std.storeDataIn(i).bits.vdIdx.map(_ := 0.U)
1279c590fb32Scz4e        lsq.io.std.storeDataIn(i).bits.vdIdxInField.map(_ := 0.U)
1280c590fb32Scz4e        stData(i).ready := true.B
1281c590fb32Scz4e      }
1282c590fb32Scz4e    } else {
1283c590fb32Scz4e        lsq.io.std.storeDataIn(i).valid := stData(i).valid && !st_data_atomics(i)
1284c590fb32Scz4e        lsq.io.std.storeDataIn(i).bits.uop := stData(i).bits.uop
1285c590fb32Scz4e        lsq.io.std.storeDataIn(i).bits.data := stData(i).bits.data
1286c590fb32Scz4e        lsq.io.std.storeDataIn(i).bits.mask.map(_ := 0.U)
1287c590fb32Scz4e        lsq.io.std.storeDataIn(i).bits.vdIdx.map(_ := 0.U)
1288c590fb32Scz4e        lsq.io.std.storeDataIn(i).bits.vdIdxInField.map(_ := 0.U)
1289c590fb32Scz4e        stData(i).ready := true.B
1290c590fb32Scz4e    }
1291c590fb32Scz4e    lsq.io.std.storeDataIn.map(_.bits.debug := 0.U.asTypeOf(new DebugBundle))
1292c590fb32Scz4e    lsq.io.std.storeDataIn.foreach(_.bits.isFromLoadUnit := DontCare)
1293c590fb32Scz4e
1294c590fb32Scz4e
1295c590fb32Scz4e    // store prefetch train
1296c590fb32Scz4e    l1PrefetcherOpt.foreach(pf => {
1297c590fb32Scz4e      // stream will train on all load sources
1298c590fb32Scz4e      pf.io.st_in(i).valid := false.B
1299c590fb32Scz4e      pf.io.st_in(i).bits := DontCare
1300c590fb32Scz4e    })
1301c590fb32Scz4e
1302c590fb32Scz4e    prefetcherOpt.foreach(pf => {
1303c590fb32Scz4e      pf.io.st_in(i).valid := Mux(pf_train_on_hit,
1304c590fb32Scz4e        stu.io.prefetch_train.valid,
1305c590fb32Scz4e        stu.io.prefetch_train.valid && stu.io.prefetch_train.bits.isFirstIssue && (
1306c590fb32Scz4e          stu.io.prefetch_train.bits.miss
1307c590fb32Scz4e          )
1308c590fb32Scz4e      )
1309c590fb32Scz4e      pf.io.st_in(i).bits := stu.io.prefetch_train.bits
1310c590fb32Scz4e      pf.io.st_in(i).bits.uop.pc := RegEnable(RegEnable(io.ooo_to_mem.storePc(i), stu.io.s1_prefetch_spec), stu.io.s2_prefetch_spec)
1311c590fb32Scz4e    })
1312c590fb32Scz4e
1313c590fb32Scz4e    // 1. sync issue info to store set LFST
1314c590fb32Scz4e    // 2. when store issue, broadcast issued sqPtr to wake up the following insts
1315c590fb32Scz4e    // io.stIn(i).valid := io.issue(exuParameters.LduCnt + i).valid
1316c590fb32Scz4e    // io.stIn(i).bits := io.issue(exuParameters.LduCnt + i).bits
1317c590fb32Scz4e    io.mem_to_ooo.stIn(i).valid := stu.io.issue.valid
1318c590fb32Scz4e    io.mem_to_ooo.stIn(i).bits := stu.io.issue.bits
1319c590fb32Scz4e
1320c590fb32Scz4e    stu.io.stout.ready := true.B
1321c590fb32Scz4e
1322c590fb32Scz4e    // vector
1323c590fb32Scz4e    if (i < VstuCnt) {
1324c590fb32Scz4e      stu.io.vecstin <> vsSplit(i).io.out
1325c590fb32Scz4e      // vsFlowQueue.io.pipeFeedback(i) <> stu.io.vec_feedback_slow // need connect
1326c590fb32Scz4e    } else {
1327c590fb32Scz4e      stu.io.vecstin.valid := false.B
1328c590fb32Scz4e      stu.io.vecstin.bits := DontCare
1329c590fb32Scz4e      stu.io.vecstout.ready := false.B
1330c590fb32Scz4e    }
1331c590fb32Scz4e    stu.io.vec_isFirstIssue := true.B // TODO
1332c590fb32Scz4e  }
1333c590fb32Scz4e
13343c808de0SAnzo  val sqOtherStout = WireInit(0.U.asTypeOf(DecoupledIO(new MemExuOutput)))
13353c808de0SAnzo  sqOtherStout.valid := lsq.io.mmioStout.valid || lsq.io.cboZeroStout.valid
13363c808de0SAnzo  sqOtherStout.bits  := Mux(lsq.io.cboZeroStout.valid, lsq.io.cboZeroStout.bits, lsq.io.mmioStout.bits)
13373c808de0SAnzo  assert(!(lsq.io.mmioStout.valid && lsq.io.cboZeroStout.valid), "Cannot writeback to mmio and cboZero at the same time.")
13383c808de0SAnzo
13393c808de0SAnzo  // Store writeback by StoreQueue:
13403c808de0SAnzo  //   1. cbo Zero
13413c808de0SAnzo  //   2. mmio
13423c808de0SAnzo  // Currently, the two should not be present at the same time, so simply make cbo zero a higher priority.
13433c808de0SAnzo  val otherStout = WireInit(0.U.asTypeOf(lsq.io.mmioStout))
1344c590fb32Scz4e  NewPipelineConnect(
13453c808de0SAnzo    sqOtherStout, otherStout, otherStout.fire,
1346c590fb32Scz4e    false.B,
13473c808de0SAnzo    Option("otherStoutConnect")
1348c590fb32Scz4e  )
13493c808de0SAnzo  otherStout.ready := false.B
13503c808de0SAnzo  when (otherStout.valid && !storeUnits(0).io.stout.valid) {
1351c590fb32Scz4e    stOut(0).valid := true.B
13523c808de0SAnzo    stOut(0).bits  := otherStout.bits
13533c808de0SAnzo    otherStout.ready := true.B
1354c590fb32Scz4e  }
13553c808de0SAnzo  lsq.io.mmioStout.ready := sqOtherStout.ready
13563c808de0SAnzo  lsq.io.cboZeroStout.ready := sqOtherStout.ready
1357c590fb32Scz4e
1358c590fb32Scz4e  // vec mmio writeback
1359c590fb32Scz4e  lsq.io.vecmmioStout.ready := false.B
1360c590fb32Scz4e
1361c590fb32Scz4e  // miss align buffer will overwrite stOut(0)
13623c808de0SAnzo  val storeMisalignCanWriteBack = !otherStout.valid && !storeUnits(0).io.stout.valid && !storeUnits(0).io.vecstout.valid
1363c590fb32Scz4e  storeMisalignBuffer.io.writeBack.ready := storeMisalignCanWriteBack
1364c590fb32Scz4e  storeMisalignBuffer.io.storeOutValid := storeUnits(0).io.stout.valid
1365c590fb32Scz4e  storeMisalignBuffer.io.storeVecOutValid := storeUnits(0).io.vecstout.valid
1366c590fb32Scz4e  when (storeMisalignBuffer.io.writeBack.valid && storeMisalignCanWriteBack) {
1367c590fb32Scz4e    stOut(0).valid := true.B
1368c590fb32Scz4e    stOut(0).bits  := storeMisalignBuffer.io.writeBack.bits
1369c590fb32Scz4e  }
1370c590fb32Scz4e
1371c590fb32Scz4e  // Uncache
1372c590fb32Scz4e  uncache.io.enableOutstanding := io.ooo_to_mem.csrCtrl.uncache_write_outstanding_enable
1373c590fb32Scz4e  uncache.io.hartId := io.hartId
1374c590fb32Scz4e  lsq.io.uncacheOutstanding := io.ooo_to_mem.csrCtrl.uncache_write_outstanding_enable
1375c590fb32Scz4e
1376c590fb32Scz4e  // Lsq
1377c590fb32Scz4e  io.mem_to_ooo.lsqio.mmio       := lsq.io.rob.mmio
1378c590fb32Scz4e  io.mem_to_ooo.lsqio.uop        := lsq.io.rob.uop
1379c590fb32Scz4e  lsq.io.rob.lcommit             := io.ooo_to_mem.lsqio.lcommit
1380c590fb32Scz4e  lsq.io.rob.scommit             := io.ooo_to_mem.lsqio.scommit
1381c590fb32Scz4e  lsq.io.rob.pendingMMIOld       := io.ooo_to_mem.lsqio.pendingMMIOld
1382c590fb32Scz4e  lsq.io.rob.pendingld           := io.ooo_to_mem.lsqio.pendingld
1383c590fb32Scz4e  lsq.io.rob.pendingst           := io.ooo_to_mem.lsqio.pendingst
1384c590fb32Scz4e  lsq.io.rob.pendingVst          := io.ooo_to_mem.lsqio.pendingVst
1385c590fb32Scz4e  lsq.io.rob.commit              := io.ooo_to_mem.lsqio.commit
1386c590fb32Scz4e  lsq.io.rob.pendingPtr          := io.ooo_to_mem.lsqio.pendingPtr
1387c590fb32Scz4e  lsq.io.rob.pendingPtrNext      := io.ooo_to_mem.lsqio.pendingPtrNext
1388c590fb32Scz4e
1389c590fb32Scz4e  //  lsq.io.rob            <> io.lsqio.rob
1390c590fb32Scz4e  lsq.io.enq            <> io.ooo_to_mem.enqLsq
1391c590fb32Scz4e  lsq.io.brqRedirect    <> redirect
1392c590fb32Scz4e
1393c590fb32Scz4e  //  violation rollback
1394c590fb32Scz4e  def selectOldestRedirect(xs: Seq[Valid[Redirect]]): Vec[Bool] = {
1395c590fb32Scz4e    val compareVec = (0 until xs.length).map(i => (0 until i).map(j => isAfter(xs(j).bits.robIdx, xs(i).bits.robIdx)))
1396c590fb32Scz4e    val resultOnehot = VecInit((0 until xs.length).map(i => Cat((0 until xs.length).map(j =>
1397c590fb32Scz4e      (if (j < i) !xs(j).valid || compareVec(i)(j)
1398c590fb32Scz4e      else if (j == i) xs(i).valid
1399c590fb32Scz4e      else !xs(j).valid || !compareVec(j)(i))
1400c590fb32Scz4e    )).andR))
1401c590fb32Scz4e    resultOnehot
1402c590fb32Scz4e  }
1403c590fb32Scz4e  val allRedirect = loadUnits.map(_.io.rollback) ++ hybridUnits.map(_.io.ldu_io.rollback) ++ lsq.io.nack_rollback ++ lsq.io.nuke_rollback
1404c590fb32Scz4e  val oldestOneHot = selectOldestRedirect(allRedirect)
1405c590fb32Scz4e  val oldestRedirect = WireDefault(Mux1H(oldestOneHot, allRedirect))
1406c590fb32Scz4e  // memory replay would not cause IAF/IPF/IGPF
1407c590fb32Scz4e  oldestRedirect.bits.cfiUpdate.backendIAF := false.B
1408c590fb32Scz4e  oldestRedirect.bits.cfiUpdate.backendIPF := false.B
1409c590fb32Scz4e  oldestRedirect.bits.cfiUpdate.backendIGPF := false.B
1410c590fb32Scz4e  io.mem_to_ooo.memoryViolation := oldestRedirect
1411c590fb32Scz4e  io.mem_to_ooo.lsqio.lqCanAccept  := lsq.io.lqCanAccept
1412c590fb32Scz4e  io.mem_to_ooo.lsqio.sqCanAccept  := lsq.io.sqCanAccept
1413c590fb32Scz4e
1414c590fb32Scz4e  // lsq.io.uncache        <> uncache.io.lsq
1415c590fb32Scz4e  val s_idle :: s_scalar_uncache :: s_vector_uncache :: Nil = Enum(3)
1416c590fb32Scz4e  val uncacheState = RegInit(s_idle)
1417c590fb32Scz4e  val uncacheReq = Wire(Decoupled(new UncacheWordReq))
1418c590fb32Scz4e  val uncacheIdResp = uncache.io.lsq.idResp
1419c590fb32Scz4e  val uncacheResp = Wire(Decoupled(new UncacheWordResp))
1420c590fb32Scz4e
1421c590fb32Scz4e  uncacheReq.bits := DontCare
1422c590fb32Scz4e  uncacheReq.valid := false.B
1423c590fb32Scz4e  uncacheReq.ready := false.B
1424c590fb32Scz4e  uncacheResp.bits := DontCare
1425c590fb32Scz4e  uncacheResp.valid := false.B
1426c590fb32Scz4e  uncacheResp.ready := false.B
1427c590fb32Scz4e  lsq.io.uncache.req.ready := false.B
1428c590fb32Scz4e  lsq.io.uncache.idResp.valid := false.B
1429c590fb32Scz4e  lsq.io.uncache.idResp.bits := DontCare
1430c590fb32Scz4e  lsq.io.uncache.resp.valid := false.B
1431c590fb32Scz4e  lsq.io.uncache.resp.bits := DontCare
1432c590fb32Scz4e
1433c590fb32Scz4e  switch (uncacheState) {
1434c590fb32Scz4e    is (s_idle) {
1435c590fb32Scz4e      when (uncacheReq.fire) {
1436c590fb32Scz4e        when (lsq.io.uncache.req.valid) {
1437c590fb32Scz4e          when (!lsq.io.uncache.req.bits.nc || !io.ooo_to_mem.csrCtrl.uncache_write_outstanding_enable) {
1438c590fb32Scz4e            uncacheState := s_scalar_uncache
1439c590fb32Scz4e          }
1440c590fb32Scz4e        }.otherwise {
1441c590fb32Scz4e          // val isStore = vsFlowQueue.io.uncache.req.bits.cmd === MemoryOpConstants.M_XWR
1442c590fb32Scz4e          when (!io.ooo_to_mem.csrCtrl.uncache_write_outstanding_enable) {
1443c590fb32Scz4e            uncacheState := s_vector_uncache
1444c590fb32Scz4e          }
1445c590fb32Scz4e        }
1446c590fb32Scz4e      }
1447c590fb32Scz4e    }
1448c590fb32Scz4e
1449c590fb32Scz4e    is (s_scalar_uncache) {
1450c590fb32Scz4e      when (uncacheResp.fire) {
1451c590fb32Scz4e        uncacheState := s_idle
1452c590fb32Scz4e      }
1453c590fb32Scz4e    }
1454c590fb32Scz4e
1455c590fb32Scz4e    is (s_vector_uncache) {
1456c590fb32Scz4e      when (uncacheResp.fire) {
1457c590fb32Scz4e        uncacheState := s_idle
1458c590fb32Scz4e      }
1459c590fb32Scz4e    }
1460c590fb32Scz4e  }
1461c590fb32Scz4e
1462c590fb32Scz4e  when (lsq.io.uncache.req.valid) {
1463c590fb32Scz4e    uncacheReq <> lsq.io.uncache.req
1464c590fb32Scz4e  }
1465c590fb32Scz4e  when (io.ooo_to_mem.csrCtrl.uncache_write_outstanding_enable) {
1466c590fb32Scz4e    lsq.io.uncache.resp <> uncacheResp
1467c590fb32Scz4e    lsq.io.uncache.idResp <> uncacheIdResp
1468c590fb32Scz4e  }.otherwise {
1469c590fb32Scz4e    when (uncacheState === s_scalar_uncache) {
1470c590fb32Scz4e      lsq.io.uncache.resp <> uncacheResp
1471c590fb32Scz4e      lsq.io.uncache.idResp <> uncacheIdResp
1472c590fb32Scz4e    }
1473c590fb32Scz4e  }
1474c590fb32Scz4e  // delay dcache refill for 1 cycle for better timing
1475c590fb32Scz4e  AddPipelineReg(uncacheReq, uncache.io.lsq.req, false.B)
1476c590fb32Scz4e  AddPipelineReg(uncache.io.lsq.resp, uncacheResp, false.B)
1477c590fb32Scz4e
1478c590fb32Scz4e  //lsq.io.refill         := delayedDcacheRefill
1479c590fb32Scz4e  lsq.io.release        := dcache.io.lsu.release
1480c590fb32Scz4e  lsq.io.lqCancelCnt <> io.mem_to_ooo.lqCancelCnt
1481c590fb32Scz4e  lsq.io.sqCancelCnt <> io.mem_to_ooo.sqCancelCnt
1482c590fb32Scz4e  lsq.io.lqDeq <> io.mem_to_ooo.lqDeq
1483c590fb32Scz4e  lsq.io.sqDeq <> io.mem_to_ooo.sqDeq
1484c590fb32Scz4e  // Todo: assign these
1485c590fb32Scz4e  io.mem_to_ooo.sqDeqPtr := lsq.io.sqDeqPtr
1486c590fb32Scz4e  io.mem_to_ooo.lqDeqPtr := lsq.io.lqDeqPtr
1487c590fb32Scz4e  lsq.io.tl_d_channel <> dcache.io.lsu.tl_d_channel
1488c590fb32Scz4e
1489c590fb32Scz4e  // LSQ to store buffer
1490c590fb32Scz4e  lsq.io.sbuffer        <> sbuffer.io.in
1491c590fb32Scz4e  sbuffer.io.in(0).valid := lsq.io.sbuffer(0).valid || vSegmentUnit.io.sbuffer.valid
1492c590fb32Scz4e  sbuffer.io.in(0).bits  := Mux1H(Seq(
1493c590fb32Scz4e    vSegmentUnit.io.sbuffer.valid -> vSegmentUnit.io.sbuffer.bits,
1494c590fb32Scz4e    lsq.io.sbuffer(0).valid       -> lsq.io.sbuffer(0).bits
1495c590fb32Scz4e  ))
1496c590fb32Scz4e  vSegmentUnit.io.sbuffer.ready := sbuffer.io.in(0).ready
1497c590fb32Scz4e  lsq.io.sqEmpty        <> sbuffer.io.sqempty
1498c590fb32Scz4e  dcache.io.force_write := lsq.io.force_write
1499c590fb32Scz4e
1500c590fb32Scz4e  // Initialize when unenabled difftest.
1501c590fb32Scz4e  sbuffer.io.vecDifftestInfo      := DontCare
1502c590fb32Scz4e  lsq.io.sbufferVecDifftestInfo   := DontCare
1503c590fb32Scz4e  vSegmentUnit.io.vecDifftestInfo := DontCare
1504c590fb32Scz4e  if (env.EnableDifftest) {
1505c590fb32Scz4e    sbuffer.io.vecDifftestInfo .zipWithIndex.map{ case (sbufferPort, index) =>
1506c590fb32Scz4e      if (index == 0) {
1507c590fb32Scz4e        val vSegmentDifftestValid = vSegmentUnit.io.vecDifftestInfo.valid
1508c590fb32Scz4e        sbufferPort.valid := Mux(vSegmentDifftestValid, vSegmentUnit.io.vecDifftestInfo.valid, lsq.io.sbufferVecDifftestInfo(0).valid)
1509c590fb32Scz4e        sbufferPort.bits  := Mux(vSegmentDifftestValid, vSegmentUnit.io.vecDifftestInfo.bits, lsq.io.sbufferVecDifftestInfo(0).bits)
1510c590fb32Scz4e
1511c590fb32Scz4e        vSegmentUnit.io.vecDifftestInfo.ready  := sbufferPort.ready
1512c590fb32Scz4e        lsq.io.sbufferVecDifftestInfo(0).ready := sbufferPort.ready
1513c590fb32Scz4e      } else {
1514c590fb32Scz4e         sbufferPort <> lsq.io.sbufferVecDifftestInfo(index)
1515c590fb32Scz4e      }
1516c590fb32Scz4e    }
1517c590fb32Scz4e  }
1518c590fb32Scz4e
1519c590fb32Scz4e  // lsq.io.vecStoreRetire <> vsFlowQueue.io.sqRelease
1520c590fb32Scz4e  // lsq.io.vecWriteback.valid := vlWrapper.io.uopWriteback.fire &&
1521c590fb32Scz4e  //   vlWrapper.io.uopWriteback.bits.uop.vpu.lastUop
1522c590fb32Scz4e  // lsq.io.vecWriteback.bits := vlWrapper.io.uopWriteback.bits
1523c590fb32Scz4e
1524c590fb32Scz4e  // vector
1525c590fb32Scz4e  val vLoadCanAccept  = (0 until VlduCnt).map(i =>
1526c590fb32Scz4e    vlSplit(i).io.in.ready && VlduType.isVecLd(io.ooo_to_mem.issueVldu(i).bits.uop.fuOpType)
1527c590fb32Scz4e  )
1528c590fb32Scz4e  val vStoreCanAccept = (0 until VstuCnt).map(i =>
1529c590fb32Scz4e    vsSplit(i).io.in.ready && VstuType.isVecSt(io.ooo_to_mem.issueVldu(i).bits.uop.fuOpType)
1530c590fb32Scz4e  )
1531c590fb32Scz4e  val isSegment     = io.ooo_to_mem.issueVldu.head.valid && isVsegls(io.ooo_to_mem.issueVldu.head.bits.uop.fuType)
1532c590fb32Scz4e  val isFixVlUop    = io.ooo_to_mem.issueVldu.map{x =>
1533c590fb32Scz4e    x.bits.uop.vpu.isVleff && x.bits.uop.vpu.lastUop && x.valid
1534c590fb32Scz4e  }
1535c590fb32Scz4e
1536c590fb32Scz4e  // init port
1537c590fb32Scz4e  /**
1538c590fb32Scz4e   * TODO: splited vsMergebuffer maybe remove, if one RS can accept two feedback, or don't need RS replay uop
1539c590fb32Scz4e   * for now:
1540c590fb32Scz4e   *  RS0 -> VsSplit0 -> stu0 -> vsMergebuffer0 -> feedback -> RS0
1541c590fb32Scz4e   *  RS1 -> VsSplit1 -> stu1 -> vsMergebuffer1 -> feedback -> RS1
1542c590fb32Scz4e   *
1543c590fb32Scz4e   * vector load don't need feedback
1544c590fb32Scz4e   *
1545c590fb32Scz4e   *  RS0 -> VlSplit0  -> ldu0 -> |
1546c590fb32Scz4e   *  RS1 -> VlSplit1  -> ldu1 -> |  -> vlMergebuffer
1547c590fb32Scz4e   *        replayIO   -> ldu3 -> |
1548c590fb32Scz4e   * */
1549c590fb32Scz4e  (0 until VstuCnt).foreach{i =>
1550c590fb32Scz4e    vsMergeBuffer(i).io.fromPipeline := DontCare
1551c590fb32Scz4e    vsMergeBuffer(i).io.fromSplit := DontCare
1552c590fb32Scz4e
1553c590fb32Scz4e    vsMergeBuffer(i).io.fromMisalignBuffer.get.flush := storeMisalignBuffer.io.toVecStoreMergeBuffer(i).flush
1554c590fb32Scz4e    vsMergeBuffer(i).io.fromMisalignBuffer.get.mbIndex := storeMisalignBuffer.io.toVecStoreMergeBuffer(i).mbIndex
1555c590fb32Scz4e  }
1556c590fb32Scz4e
1557c590fb32Scz4e  (0 until VstuCnt).foreach{i =>
1558c590fb32Scz4e    vsSplit(i).io.redirect <> redirect
1559c590fb32Scz4e    vsSplit(i).io.in <> io.ooo_to_mem.issueVldu(i)
1560c590fb32Scz4e    vsSplit(i).io.in.valid := io.ooo_to_mem.issueVldu(i).valid &&
1561c590fb32Scz4e                              vStoreCanAccept(i) && !isSegment
1562c590fb32Scz4e    vsSplit(i).io.toMergeBuffer <> vsMergeBuffer(i).io.fromSplit.head
1563c590fb32Scz4e    NewPipelineConnect(
1564c590fb32Scz4e      vsSplit(i).io.out, storeUnits(i).io.vecstin, storeUnits(i).io.vecstin.fire,
1565c590fb32Scz4e      Mux(vsSplit(i).io.out.fire, vsSplit(i).io.out.bits.uop.robIdx.needFlush(io.redirect), storeUnits(i).io.vecstin.bits.uop.robIdx.needFlush(io.redirect)),
1566c590fb32Scz4e      Option("VsSplitConnectStu")
1567c590fb32Scz4e    )
1568c590fb32Scz4e    vsSplit(i).io.vstd.get := DontCare // Todo: Discuss how to pass vector store data
1569c590fb32Scz4e
1570c590fb32Scz4e    vsSplit(i).io.vstdMisalign.get.storeMisalignBufferEmpty := !storeMisalignBuffer.io.full
1571c590fb32Scz4e    vsSplit(i).io.vstdMisalign.get.storePipeEmpty := !storeUnits(i).io.s0_s1_valid
1572c590fb32Scz4e
1573c590fb32Scz4e  }
1574c590fb32Scz4e  (0 until VlduCnt).foreach{i =>
1575c590fb32Scz4e    vlSplit(i).io.redirect <> redirect
1576c590fb32Scz4e    vlSplit(i).io.in <> io.ooo_to_mem.issueVldu(i)
1577c590fb32Scz4e    vlSplit(i).io.in.valid := io.ooo_to_mem.issueVldu(i).valid &&
1578c590fb32Scz4e                              vLoadCanAccept(i) && !isSegment && !isFixVlUop(i)
1579c590fb32Scz4e    vlSplit(i).io.toMergeBuffer <> vlMergeBuffer.io.fromSplit(i)
1580c590fb32Scz4e    vlSplit(i).io.threshold.get.valid := vlMergeBuffer.io.toSplit.get.threshold
1581c590fb32Scz4e    vlSplit(i).io.threshold.get.bits  := lsq.io.lqDeqPtr
1582c590fb32Scz4e    NewPipelineConnect(
1583c590fb32Scz4e      vlSplit(i).io.out, loadUnits(i).io.vecldin, loadUnits(i).io.vecldin.fire,
1584c590fb32Scz4e      Mux(vlSplit(i).io.out.fire, vlSplit(i).io.out.bits.uop.robIdx.needFlush(io.redirect), loadUnits(i).io.vecldin.bits.uop.robIdx.needFlush(io.redirect)),
1585c590fb32Scz4e      Option("VlSplitConnectLdu")
1586c590fb32Scz4e    )
1587c590fb32Scz4e
1588c590fb32Scz4e    //Subsequent instrction will be blocked
1589c590fb32Scz4e    vfofBuffer.io.in(i).valid := io.ooo_to_mem.issueVldu(i).valid
1590c590fb32Scz4e    vfofBuffer.io.in(i).bits  := io.ooo_to_mem.issueVldu(i).bits
1591c590fb32Scz4e  }
1592c590fb32Scz4e  (0 until LduCnt).foreach{i=>
1593c590fb32Scz4e    loadUnits(i).io.vecldout.ready         := vlMergeBuffer.io.fromPipeline(i).ready
1594c590fb32Scz4e    loadMisalignBuffer.io.vecWriteBack.ready := true.B
1595c590fb32Scz4e
1596c590fb32Scz4e    if (i == MisalignWBPort) {
1597c590fb32Scz4e      when(loadUnits(i).io.vecldout.valid) {
1598c590fb32Scz4e        vlMergeBuffer.io.fromPipeline(i).valid := loadUnits(i).io.vecldout.valid
1599c590fb32Scz4e        vlMergeBuffer.io.fromPipeline(i).bits  := loadUnits(i).io.vecldout.bits
1600c590fb32Scz4e      } .otherwise {
1601c590fb32Scz4e        vlMergeBuffer.io.fromPipeline(i).valid   := loadMisalignBuffer.io.vecWriteBack.valid
1602c590fb32Scz4e        vlMergeBuffer.io.fromPipeline(i).bits    := loadMisalignBuffer.io.vecWriteBack.bits
1603c590fb32Scz4e      }
1604c590fb32Scz4e    } else {
1605c590fb32Scz4e      vlMergeBuffer.io.fromPipeline(i).valid := loadUnits(i).io.vecldout.valid
1606c590fb32Scz4e      vlMergeBuffer.io.fromPipeline(i).bits  := loadUnits(i).io.vecldout.bits
1607c590fb32Scz4e    }
1608c590fb32Scz4e  }
1609c590fb32Scz4e
1610c590fb32Scz4e  (0 until StaCnt).foreach{i=>
1611c590fb32Scz4e    if(i < VstuCnt){
1612c590fb32Scz4e      storeUnits(i).io.vecstout.ready := true.B
1613c590fb32Scz4e      storeMisalignBuffer.io.vecWriteBack(i).ready := vsMergeBuffer(i).io.fromPipeline.head.ready
1614c590fb32Scz4e
1615c590fb32Scz4e      when(storeUnits(i).io.vecstout.valid) {
1616c590fb32Scz4e        vsMergeBuffer(i).io.fromPipeline.head.valid := storeUnits(i).io.vecstout.valid
1617c590fb32Scz4e        vsMergeBuffer(i).io.fromPipeline.head.bits  := storeUnits(i).io.vecstout.bits
1618c590fb32Scz4e      } .otherwise {
1619c590fb32Scz4e        vsMergeBuffer(i).io.fromPipeline.head.valid   := storeMisalignBuffer.io.vecWriteBack(i).valid
1620c590fb32Scz4e        vsMergeBuffer(i).io.fromPipeline.head.bits    := storeMisalignBuffer.io.vecWriteBack(i).bits
1621c590fb32Scz4e      }
1622c590fb32Scz4e    }
1623c590fb32Scz4e  }
1624c590fb32Scz4e
1625c590fb32Scz4e  (0 until VlduCnt).foreach{i=>
1626c590fb32Scz4e    io.ooo_to_mem.issueVldu(i).ready := vLoadCanAccept(i) || vStoreCanAccept(i)
1627c590fb32Scz4e  }
1628c590fb32Scz4e
1629c590fb32Scz4e  vlMergeBuffer.io.redirect <> redirect
1630c590fb32Scz4e  vsMergeBuffer.map(_.io.redirect <> redirect)
1631c590fb32Scz4e  (0 until VlduCnt).foreach{i=>
1632c590fb32Scz4e    vlMergeBuffer.io.toLsq(i) <> lsq.io.ldvecFeedback(i)
1633c590fb32Scz4e  }
1634c590fb32Scz4e  (0 until VstuCnt).foreach{i=>
1635c590fb32Scz4e    vsMergeBuffer(i).io.toLsq.head <> lsq.io.stvecFeedback(i)
1636c590fb32Scz4e  }
1637c590fb32Scz4e
1638c590fb32Scz4e  (0 until VlduCnt).foreach{i=>
1639c590fb32Scz4e    // send to RS
1640c590fb32Scz4e    vlMergeBuffer.io.feedback(i) <> io.mem_to_ooo.vlduIqFeedback(i).feedbackSlow
1641c590fb32Scz4e    io.mem_to_ooo.vlduIqFeedback(i).feedbackFast := DontCare
1642c590fb32Scz4e  }
1643c590fb32Scz4e  (0 until VstuCnt).foreach{i =>
1644c590fb32Scz4e    // send to RS
1645c590fb32Scz4e    if (i == 0){
1646c590fb32Scz4e      io.mem_to_ooo.vstuIqFeedback(i).feedbackSlow.valid := vsMergeBuffer(i).io.feedback.head.valid || vSegmentUnit.io.feedback.valid
1647c590fb32Scz4e      io.mem_to_ooo.vstuIqFeedback(i).feedbackSlow.bits := Mux1H(Seq(
1648c590fb32Scz4e        vSegmentUnit.io.feedback.valid -> vSegmentUnit.io.feedback.bits,
1649c590fb32Scz4e        vsMergeBuffer(i).io.feedback.head.valid ->  vsMergeBuffer(i).io.feedback.head.bits
1650c590fb32Scz4e      ))
1651c590fb32Scz4e      io.mem_to_ooo.vstuIqFeedback(i).feedbackFast := DontCare
1652c590fb32Scz4e    } else {
1653c590fb32Scz4e      vsMergeBuffer(i).io.feedback.head <> io.mem_to_ooo.vstuIqFeedback(i).feedbackSlow
1654c590fb32Scz4e      io.mem_to_ooo.vstuIqFeedback(i).feedbackFast := DontCare
1655c590fb32Scz4e    }
1656c590fb32Scz4e  }
1657c590fb32Scz4e
1658c590fb32Scz4e  (0 until VlduCnt).foreach{i=>
1659c590fb32Scz4e    if (i == 0){ // for segmentUnit, segmentUnit use port0 writeback
1660c590fb32Scz4e      io.mem_to_ooo.writebackVldu(i).valid := vlMergeBuffer.io.uopWriteback(i).valid || vsMergeBuffer(i).io.uopWriteback.head.valid || vSegmentUnit.io.uopwriteback.valid
1661c590fb32Scz4e      io.mem_to_ooo.writebackVldu(i).bits := PriorityMux(Seq(
1662c590fb32Scz4e        vSegmentUnit.io.uopwriteback.valid          -> vSegmentUnit.io.uopwriteback.bits,
1663c590fb32Scz4e        vlMergeBuffer.io.uopWriteback(i).valid      -> vlMergeBuffer.io.uopWriteback(i).bits,
1664c590fb32Scz4e        vsMergeBuffer(i).io.uopWriteback.head.valid -> vsMergeBuffer(i).io.uopWriteback.head.bits,
1665c590fb32Scz4e      ))
1666c590fb32Scz4e      vlMergeBuffer.io.uopWriteback(i).ready := io.mem_to_ooo.writebackVldu(i).ready && !vSegmentUnit.io.uopwriteback.valid
1667c590fb32Scz4e      vsMergeBuffer(i).io.uopWriteback.head.ready := io.mem_to_ooo.writebackVldu(i).ready && !vlMergeBuffer.io.uopWriteback(i).valid && !vSegmentUnit.io.uopwriteback.valid
1668c590fb32Scz4e      vSegmentUnit.io.uopwriteback.ready := io.mem_to_ooo.writebackVldu(i).ready
1669c590fb32Scz4e    } else if (i == 1) {
1670c590fb32Scz4e      io.mem_to_ooo.writebackVldu(i).valid := vlMergeBuffer.io.uopWriteback(i).valid || vsMergeBuffer(i).io.uopWriteback.head.valid || vfofBuffer.io.uopWriteback.valid
1671c590fb32Scz4e      io.mem_to_ooo.writebackVldu(i).bits := PriorityMux(Seq(
1672c590fb32Scz4e        vfofBuffer.io.uopWriteback.valid            -> vfofBuffer.io.uopWriteback.bits,
1673c590fb32Scz4e        vlMergeBuffer.io.uopWriteback(i).valid      -> vlMergeBuffer.io.uopWriteback(i).bits,
1674c590fb32Scz4e        vsMergeBuffer(i).io.uopWriteback.head.valid -> vsMergeBuffer(i).io.uopWriteback.head.bits,
1675c590fb32Scz4e      ))
1676c590fb32Scz4e      vlMergeBuffer.io.uopWriteback(i).ready := io.mem_to_ooo.writebackVldu(i).ready && !vfofBuffer.io.uopWriteback.valid
1677c590fb32Scz4e      vsMergeBuffer(i).io.uopWriteback.head.ready := io.mem_to_ooo.writebackVldu(i).ready && !vlMergeBuffer.io.uopWriteback(i).valid && !vfofBuffer.io.uopWriteback.valid
1678c590fb32Scz4e      vfofBuffer.io.uopWriteback.ready := io.mem_to_ooo.writebackVldu(i).ready
1679c590fb32Scz4e    } else {
1680c590fb32Scz4e      io.mem_to_ooo.writebackVldu(i).valid := vlMergeBuffer.io.uopWriteback(i).valid || vsMergeBuffer(i).io.uopWriteback.head.valid
1681c590fb32Scz4e      io.mem_to_ooo.writebackVldu(i).bits := PriorityMux(Seq(
1682c590fb32Scz4e        vlMergeBuffer.io.uopWriteback(i).valid -> vlMergeBuffer.io.uopWriteback(i).bits,
1683c590fb32Scz4e        vsMergeBuffer(i).io.uopWriteback.head.valid -> vsMergeBuffer(i).io.uopWriteback.head.bits,
1684c590fb32Scz4e      ))
1685c590fb32Scz4e      vlMergeBuffer.io.uopWriteback(i).ready := io.mem_to_ooo.writebackVldu(i).ready
1686c590fb32Scz4e      vsMergeBuffer(i).io.uopWriteback.head.ready := io.mem_to_ooo.writebackVldu(i).ready && !vlMergeBuffer.io.uopWriteback(i).valid
1687c590fb32Scz4e    }
1688c590fb32Scz4e
1689c590fb32Scz4e    vfofBuffer.io.mergeUopWriteback(i).valid := vlMergeBuffer.io.uopWriteback(i).valid
1690c590fb32Scz4e    vfofBuffer.io.mergeUopWriteback(i).bits  := vlMergeBuffer.io.uopWriteback(i).bits
1691c590fb32Scz4e  }
1692c590fb32Scz4e
1693c590fb32Scz4e
1694c590fb32Scz4e  vfofBuffer.io.redirect <> redirect
1695c590fb32Scz4e
1696c590fb32Scz4e  // Sbuffer
1697c590fb32Scz4e  sbuffer.io.csrCtrl    <> csrCtrl
1698c590fb32Scz4e  sbuffer.io.dcache     <> dcache.io.lsu.store
1699c590fb32Scz4e  sbuffer.io.memSetPattenDetected := dcache.io.memSetPattenDetected
1700c590fb32Scz4e  sbuffer.io.force_write <> lsq.io.force_write
1701c590fb32Scz4e  // flush sbuffer
1702c590fb32Scz4e  val cmoFlush = lsq.io.flushSbuffer.valid
1703c590fb32Scz4e  val fenceFlush = io.ooo_to_mem.flushSb
1704c590fb32Scz4e  val atomicsFlush = atomicsUnit.io.flush_sbuffer.valid || vSegmentUnit.io.flush_sbuffer.valid
1705c590fb32Scz4e  val stIsEmpty = sbuffer.io.flush.empty && uncache.io.flush.empty
1706c590fb32Scz4e  io.mem_to_ooo.sbIsEmpty := RegNext(stIsEmpty)
1707c590fb32Scz4e
1708c590fb32Scz4e  // if both of them tries to flush sbuffer at the same time
1709c590fb32Scz4e  // something must have gone wrong
1710c590fb32Scz4e  assert(!(fenceFlush && atomicsFlush && cmoFlush))
1711c590fb32Scz4e  sbuffer.io.flush.valid := RegNext(fenceFlush || atomicsFlush || cmoFlush)
1712c590fb32Scz4e  uncache.io.flush.valid := sbuffer.io.flush.valid
1713c590fb32Scz4e
1714c590fb32Scz4e  // AtomicsUnit: AtomicsUnit will override other control signials,
1715c590fb32Scz4e  // as atomics insts (LR/SC/AMO) will block the pipeline
1716c590fb32Scz4e  val s_normal +: s_atomics = Enum(StaCnt + HyuCnt + 1)
1717c590fb32Scz4e  val state = RegInit(s_normal)
1718c590fb32Scz4e
1719c590fb32Scz4e  val st_atomics = Seq.tabulate(StaCnt)(i =>
1720c590fb32Scz4e    io.ooo_to_mem.issueSta(i).valid && FuType.storeIsAMO((io.ooo_to_mem.issueSta(i).bits.uop.fuType))
1721c590fb32Scz4e  ) ++ Seq.tabulate(HyuCnt)(i =>
1722c590fb32Scz4e    io.ooo_to_mem.issueHya(i).valid && FuType.storeIsAMO((io.ooo_to_mem.issueHya(i).bits.uop.fuType))
1723c590fb32Scz4e  )
1724c590fb32Scz4e
1725c590fb32Scz4e  for (i <- 0 until StaCnt) when(st_atomics(i)) {
1726c590fb32Scz4e    io.ooo_to_mem.issueSta(i).ready := atomicsUnit.io.in.ready
1727c590fb32Scz4e    storeUnits(i).io.stin.valid := false.B
1728c590fb32Scz4e
1729c590fb32Scz4e    state := s_atomics(i)
1730c590fb32Scz4e  }
1731c590fb32Scz4e  for (i <- 0 until HyuCnt) when(st_atomics(StaCnt + i)) {
1732c590fb32Scz4e    io.ooo_to_mem.issueHya(i).ready := atomicsUnit.io.in.ready
1733c590fb32Scz4e    hybridUnits(i).io.lsin.valid := false.B
1734c590fb32Scz4e
1735c590fb32Scz4e    state := s_atomics(StaCnt + i)
1736c590fb32Scz4e    assert(!st_atomics.zipWithIndex.filterNot(_._2 == StaCnt + i).unzip._1.reduce(_ || _))
1737c590fb32Scz4e  }
1738c590fb32Scz4e  when (atomicsUnit.io.out.valid) {
1739c590fb32Scz4e    state := s_normal
1740c590fb32Scz4e  }
1741c590fb32Scz4e
1742c590fb32Scz4e  atomicsUnit.io.in.valid := st_atomics.reduce(_ || _)
1743c590fb32Scz4e  atomicsUnit.io.in.bits  := Mux1H(Seq.tabulate(StaCnt)(i =>
1744c590fb32Scz4e    st_atomics(i) -> io.ooo_to_mem.issueSta(i).bits) ++
1745c590fb32Scz4e    Seq.tabulate(HyuCnt)(i => st_atomics(StaCnt+i) -> io.ooo_to_mem.issueHya(i).bits))
1746c590fb32Scz4e  atomicsUnit.io.storeDataIn.zipWithIndex.foreach { case (stdin, i) =>
1747c590fb32Scz4e    stdin.valid := st_data_atomics(i)
1748c590fb32Scz4e    stdin.bits := stData(i).bits
1749c590fb32Scz4e  }
1750c590fb32Scz4e  atomicsUnit.io.redirect <> redirect
1751c590fb32Scz4e
1752c590fb32Scz4e  // TODO: complete amo's pmp support
1753c590fb32Scz4e  val amoTlb = dtlb_ld(0).requestor(0)
1754c590fb32Scz4e  atomicsUnit.io.dtlb.resp.valid := false.B
1755c590fb32Scz4e  atomicsUnit.io.dtlb.resp.bits  := DontCare
1756c590fb32Scz4e  atomicsUnit.io.dtlb.req.ready  := amoTlb.req.ready
1757c590fb32Scz4e  atomicsUnit.io.pmpResp := pmp_check(0).resp
1758c590fb32Scz4e
1759c590fb32Scz4e  atomicsUnit.io.dcache <> dcache.io.lsu.atomics
1760c590fb32Scz4e  atomicsUnit.io.flush_sbuffer.empty := stIsEmpty
1761c590fb32Scz4e
1762c590fb32Scz4e  atomicsUnit.io.csrCtrl := csrCtrl
1763c590fb32Scz4e
1764c590fb32Scz4e  // for atomicsUnit, it uses loadUnit(0)'s TLB port
1765c590fb32Scz4e
1766c590fb32Scz4e  when (state =/= s_normal) {
1767c590fb32Scz4e    // use store wb port instead of load
1768c590fb32Scz4e    loadUnits(0).io.ldout.ready := false.B
1769c590fb32Scz4e    // use load_0's TLB
1770c590fb32Scz4e    atomicsUnit.io.dtlb <> amoTlb
1771c590fb32Scz4e
1772c590fb32Scz4e    // hw prefetch should be disabled while executing atomic insts
1773c590fb32Scz4e    loadUnits.map(i => i.io.prefetch_req.valid := false.B)
1774c590fb32Scz4e
1775c590fb32Scz4e    // make sure there's no in-flight uops in load unit
1776c590fb32Scz4e    assert(!loadUnits(0).io.ldout.valid)
1777c590fb32Scz4e  }
1778c590fb32Scz4e
1779c590fb32Scz4e  lsq.io.flushSbuffer.empty := sbuffer.io.sbempty
1780c590fb32Scz4e
1781c590fb32Scz4e  for (i <- 0 until StaCnt) {
1782c590fb32Scz4e    when (state === s_atomics(i)) {
1783c590fb32Scz4e      io.mem_to_ooo.staIqFeedback(i).feedbackSlow := atomicsUnit.io.feedbackSlow
1784c590fb32Scz4e      assert(!storeUnits(i).io.feedback_slow.valid)
1785c590fb32Scz4e    }
1786c590fb32Scz4e  }
1787c590fb32Scz4e  for (i <- 0 until HyuCnt) {
1788c590fb32Scz4e    when (state === s_atomics(StaCnt + i)) {
1789c590fb32Scz4e      io.mem_to_ooo.hyuIqFeedback(i).feedbackSlow := atomicsUnit.io.feedbackSlow
1790c590fb32Scz4e      assert(!hybridUnits(i).io.feedback_slow.valid)
1791c590fb32Scz4e    }
1792c590fb32Scz4e  }
1793c590fb32Scz4e
1794c590fb32Scz4e  lsq.io.exceptionAddr.isStore := io.ooo_to_mem.isStoreException
1795c590fb32Scz4e  // Exception address is used several cycles after flush.
1796c590fb32Scz4e  // We delay it by 10 cycles to ensure its flush safety.
1797c590fb32Scz4e  val atomicsException = RegInit(false.B)
1798c590fb32Scz4e  when (DelayN(redirect.valid, 10) && atomicsException) {
1799c590fb32Scz4e    atomicsException := false.B
1800c590fb32Scz4e  }.elsewhen (atomicsUnit.io.exceptionInfo.valid) {
1801c590fb32Scz4e    atomicsException := true.B
1802c590fb32Scz4e  }
1803c590fb32Scz4e
1804c590fb32Scz4e  val misalignBufExceptionOverwrite = loadMisalignBuffer.io.overwriteExpBuf.valid || storeMisalignBuffer.io.overwriteExpBuf.valid
1805c590fb32Scz4e  val misalignBufExceptionVaddr = Mux(loadMisalignBuffer.io.overwriteExpBuf.valid,
1806c590fb32Scz4e    loadMisalignBuffer.io.overwriteExpBuf.vaddr,
1807c590fb32Scz4e    storeMisalignBuffer.io.overwriteExpBuf.vaddr
1808c590fb32Scz4e  )
1809c590fb32Scz4e  val misalignBufExceptionIsHyper = Mux(loadMisalignBuffer.io.overwriteExpBuf.valid,
1810c590fb32Scz4e    loadMisalignBuffer.io.overwriteExpBuf.isHyper,
1811c590fb32Scz4e    storeMisalignBuffer.io.overwriteExpBuf.isHyper
1812c590fb32Scz4e  )
1813c590fb32Scz4e  val misalignBufExceptionGpaddr = Mux(loadMisalignBuffer.io.overwriteExpBuf.valid,
1814c590fb32Scz4e    loadMisalignBuffer.io.overwriteExpBuf.gpaddr,
1815c590fb32Scz4e    storeMisalignBuffer.io.overwriteExpBuf.gpaddr
1816c590fb32Scz4e  )
1817c590fb32Scz4e  val misalignBufExceptionIsForVSnonLeafPTE = Mux(loadMisalignBuffer.io.overwriteExpBuf.valid,
1818c590fb32Scz4e    loadMisalignBuffer.io.overwriteExpBuf.isForVSnonLeafPTE,
1819c590fb32Scz4e    storeMisalignBuffer.io.overwriteExpBuf.isForVSnonLeafPTE
1820c590fb32Scz4e  )
1821c590fb32Scz4e
1822c590fb32Scz4e  val vSegmentException = RegInit(false.B)
1823c590fb32Scz4e  when (DelayN(redirect.valid, 10) && vSegmentException) {
1824c590fb32Scz4e    vSegmentException := false.B
1825c590fb32Scz4e  }.elsewhen (vSegmentUnit.io.exceptionInfo.valid) {
1826c590fb32Scz4e    vSegmentException := true.B
1827c590fb32Scz4e  }
1828c590fb32Scz4e  val atomicsExceptionAddress = RegEnable(atomicsUnit.io.exceptionInfo.bits.vaddr, atomicsUnit.io.exceptionInfo.valid)
1829c590fb32Scz4e  val vSegmentExceptionVstart = RegEnable(vSegmentUnit.io.exceptionInfo.bits.vstart, vSegmentUnit.io.exceptionInfo.valid)
1830c590fb32Scz4e  val vSegmentExceptionVl     = RegEnable(vSegmentUnit.io.exceptionInfo.bits.vl, vSegmentUnit.io.exceptionInfo.valid)
1831c590fb32Scz4e  val vSegmentExceptionAddress = RegEnable(vSegmentUnit.io.exceptionInfo.bits.vaddr, vSegmentUnit.io.exceptionInfo.valid)
1832c590fb32Scz4e  val atomicsExceptionGPAddress = RegEnable(atomicsUnit.io.exceptionInfo.bits.gpaddr, atomicsUnit.io.exceptionInfo.valid)
1833c590fb32Scz4e  val vSegmentExceptionGPAddress = RegEnable(vSegmentUnit.io.exceptionInfo.bits.gpaddr, vSegmentUnit.io.exceptionInfo.valid)
1834c590fb32Scz4e  val atomicsExceptionIsForVSnonLeafPTE = RegEnable(atomicsUnit.io.exceptionInfo.bits.isForVSnonLeafPTE, atomicsUnit.io.exceptionInfo.valid)
1835c590fb32Scz4e  val vSegmentExceptionIsForVSnonLeafPTE = RegEnable(vSegmentUnit.io.exceptionInfo.bits.isForVSnonLeafPTE, vSegmentUnit.io.exceptionInfo.valid)
1836c590fb32Scz4e
1837c590fb32Scz4e  val exceptionVaddr = Mux(
1838c590fb32Scz4e    atomicsException,
1839c590fb32Scz4e    atomicsExceptionAddress,
1840c590fb32Scz4e    Mux(misalignBufExceptionOverwrite,
1841c590fb32Scz4e      misalignBufExceptionVaddr,
1842c590fb32Scz4e      Mux(vSegmentException,
1843c590fb32Scz4e        vSegmentExceptionAddress,
1844c590fb32Scz4e        lsq.io.exceptionAddr.vaddr
1845c590fb32Scz4e      )
1846c590fb32Scz4e    )
1847c590fb32Scz4e  )
1848c590fb32Scz4e  // whether vaddr need ext or is hyper inst:
1849c590fb32Scz4e  // VaNeedExt: atomicsException -> false; misalignBufExceptionOverwrite -> true; vSegmentException -> false
1850c590fb32Scz4e  // IsHyper: atomicsException -> false; vSegmentException -> false
1851c590fb32Scz4e  val exceptionVaNeedExt = !atomicsException &&
1852c590fb32Scz4e    (misalignBufExceptionOverwrite ||
1853c590fb32Scz4e      (!vSegmentException && lsq.io.exceptionAddr.vaNeedExt))
1854c590fb32Scz4e  val exceptionIsHyper = !atomicsException &&
1855c590fb32Scz4e    (misalignBufExceptionOverwrite && misalignBufExceptionIsHyper ||
1856c590fb32Scz4e      (!vSegmentException && lsq.io.exceptionAddr.isHyper && !misalignBufExceptionOverwrite))
1857c590fb32Scz4e
1858168f1995SXu, Zefan  def GenExceptionVa(
1859168f1995SXu, Zefan    mode: UInt, isVirt: Bool, vaNeedExt: Bool,
1860c590fb32Scz4e    satp: TlbSatpBundle, vsatp: TlbSatpBundle, hgatp: TlbHgatpBundle,
1861168f1995SXu, Zefan    vaddr: UInt
1862168f1995SXu, Zefan  ) = {
1863c590fb32Scz4e    require(VAddrBits >= 50)
1864c590fb32Scz4e
1865168f1995SXu, Zefan    val satpNone = satp.mode === 0.U
1866168f1995SXu, Zefan    val satpSv39 = satp.mode === 8.U
1867168f1995SXu, Zefan    val satpSv48 = satp.mode === 9.U
1868c590fb32Scz4e
1869168f1995SXu, Zefan    val vsatpNone = vsatp.mode === 0.U
1870168f1995SXu, Zefan    val vsatpSv39 = vsatp.mode === 8.U
1871168f1995SXu, Zefan    val vsatpSv48 = vsatp.mode === 9.U
1872168f1995SXu, Zefan
1873168f1995SXu, Zefan    val hgatpNone = hgatp.mode === 0.U
1874168f1995SXu, Zefan    val hgatpSv39x4 = hgatp.mode === 8.U
1875168f1995SXu, Zefan    val hgatpSv48x4 = hgatp.mode === 9.U
1876168f1995SXu, Zefan
1877168f1995SXu, Zefan    // For !isVirt, mode check is necessary, as we don't want virtual memory in M-mode.
1878168f1995SXu, Zefan    // For isVirt, mode check is unnecessary, as virt won't be 1 in M-mode.
1879168f1995SXu, Zefan    // Also, isVirt includes Hyper Insts, which don't care mode either.
1880168f1995SXu, Zefan
1881168f1995SXu, Zefan    val useBareAddr =
1882168f1995SXu, Zefan      (isVirt && vsatpNone && hgatpNone) ||
1883168f1995SXu, Zefan      (!isVirt && (mode === CSRConst.ModeM)) ||
1884168f1995SXu, Zefan      (!isVirt && (mode =/= CSRConst.ModeM) && satpNone)
1885168f1995SXu, Zefan    val useSv39Addr =
1886168f1995SXu, Zefan      (isVirt && vsatpSv39) ||
1887168f1995SXu, Zefan      (!isVirt && (mode =/= CSRConst.ModeM) && satpSv39)
1888168f1995SXu, Zefan    val useSv48Addr =
1889168f1995SXu, Zefan      (isVirt && vsatpSv48) ||
1890168f1995SXu, Zefan      (!isVirt && (mode =/= CSRConst.ModeM) && satpSv48)
1891168f1995SXu, Zefan    val useSv39x4Addr = isVirt && vsatpNone && hgatpSv39x4
1892168f1995SXu, Zefan    val useSv48x4Addr = isVirt && vsatpNone && hgatpSv48x4
1893c590fb32Scz4e
1894c590fb32Scz4e    val bareAddr   = ZeroExt(vaddr(PAddrBits - 1, 0), XLEN)
1895c590fb32Scz4e    val sv39Addr   = SignExt(vaddr.take(39), XLEN)
1896c590fb32Scz4e    val sv39x4Addr = ZeroExt(vaddr.take(39 + 2), XLEN)
1897c590fb32Scz4e    val sv48Addr   = SignExt(vaddr.take(48), XLEN)
1898c590fb32Scz4e    val sv48x4Addr = ZeroExt(vaddr.take(48 + 2), XLEN)
1899c590fb32Scz4e
1900c590fb32Scz4e    val ExceptionVa = Wire(UInt(XLEN.W))
1901c590fb32Scz4e    when (vaNeedExt) {
1902c590fb32Scz4e      ExceptionVa := Mux1H(Seq(
1903168f1995SXu, Zefan        (useBareAddr)   -> bareAddr,
1904168f1995SXu, Zefan        (useSv39Addr)   -> sv39Addr,
1905168f1995SXu, Zefan        (useSv48Addr)   -> sv48Addr,
1906168f1995SXu, Zefan        (useSv39x4Addr) -> sv39x4Addr,
1907168f1995SXu, Zefan        (useSv48x4Addr) -> sv48x4Addr,
1908c590fb32Scz4e      ))
1909c590fb32Scz4e    } .otherwise {
1910c590fb32Scz4e      ExceptionVa := vaddr
1911c590fb32Scz4e    }
1912c590fb32Scz4e
1913c590fb32Scz4e    ExceptionVa
1914c590fb32Scz4e  }
1915c590fb32Scz4e
1916c590fb32Scz4e  io.mem_to_ooo.lsqio.vaddr := RegNext(
1917c590fb32Scz4e    GenExceptionVa(tlbcsr.priv.dmode, tlbcsr.priv.virt || exceptionIsHyper, exceptionVaNeedExt,
1918c590fb32Scz4e    tlbcsr.satp, tlbcsr.vsatp, tlbcsr.hgatp, exceptionVaddr)
1919c590fb32Scz4e  )
1920c590fb32Scz4e
1921c590fb32Scz4e  // vsegment instruction is executed atomic, which mean atomicsException and vSegmentException should not raise at the same time.
1922c590fb32Scz4e  XSError(atomicsException && vSegmentException, "atomicsException and vSegmentException raise at the same time!")
1923c590fb32Scz4e  io.mem_to_ooo.lsqio.vstart := RegNext(Mux(vSegmentException,
1924c590fb32Scz4e                                            vSegmentExceptionVstart,
1925c590fb32Scz4e                                            lsq.io.exceptionAddr.vstart)
1926c590fb32Scz4e  )
1927c590fb32Scz4e  io.mem_to_ooo.lsqio.vl     := RegNext(Mux(vSegmentException,
1928c590fb32Scz4e                                            vSegmentExceptionVl,
1929c590fb32Scz4e                                            lsq.io.exceptionAddr.vl)
1930c590fb32Scz4e  )
1931c590fb32Scz4e
1932c590fb32Scz4e  XSError(atomicsException && atomicsUnit.io.in.valid, "new instruction before exception triggers\n")
1933c590fb32Scz4e  io.mem_to_ooo.lsqio.gpaddr := RegNext(Mux(
1934c590fb32Scz4e    atomicsException,
1935c590fb32Scz4e    atomicsExceptionGPAddress,
1936c590fb32Scz4e    Mux(misalignBufExceptionOverwrite,
1937c590fb32Scz4e      misalignBufExceptionGpaddr,
1938c590fb32Scz4e      Mux(vSegmentException,
1939c590fb32Scz4e        vSegmentExceptionGPAddress,
1940c590fb32Scz4e        lsq.io.exceptionAddr.gpaddr
1941c590fb32Scz4e      )
1942c590fb32Scz4e    )
1943c590fb32Scz4e  ))
1944c590fb32Scz4e  io.mem_to_ooo.lsqio.isForVSnonLeafPTE := RegNext(Mux(
1945c590fb32Scz4e    atomicsException,
1946c590fb32Scz4e    atomicsExceptionIsForVSnonLeafPTE,
1947c590fb32Scz4e    Mux(misalignBufExceptionOverwrite,
1948c590fb32Scz4e      misalignBufExceptionIsForVSnonLeafPTE,
1949c590fb32Scz4e      Mux(vSegmentException,
1950c590fb32Scz4e        vSegmentExceptionIsForVSnonLeafPTE,
1951c590fb32Scz4e        lsq.io.exceptionAddr.isForVSnonLeafPTE
1952c590fb32Scz4e      )
1953c590fb32Scz4e    )
1954c590fb32Scz4e  ))
1955c590fb32Scz4e  io.mem_to_ooo.topToBackendBypass match { case x =>
1956c590fb32Scz4e    x.hartId            := io.hartId
1957c590fb32Scz4e    x.l2FlushDone       := RegNext(io.l2_flush_done)
1958c590fb32Scz4e    x.externalInterrupt.msip  := outer.clint_int_sink.in.head._1(0)
1959c590fb32Scz4e    x.externalInterrupt.mtip  := outer.clint_int_sink.in.head._1(1)
1960c590fb32Scz4e    x.externalInterrupt.meip  := outer.plic_int_sink.in.head._1(0)
1961c590fb32Scz4e    x.externalInterrupt.seip  := outer.plic_int_sink.in.last._1(0)
1962c590fb32Scz4e    x.externalInterrupt.debug := outer.debug_int_sink.in.head._1(0)
196376cb49abScz4e    x.externalInterrupt.nmi.nmi_31 := outer.nmi_int_sink.in.head._1(0) | outer.beu_local_int_sink.in.head._1(0)
1964c590fb32Scz4e    x.externalInterrupt.nmi.nmi_43 := outer.nmi_int_sink.in.head._1(1)
1965c590fb32Scz4e    x.msiInfo           := DelayNWithValid(io.fromTopToBackend.msiInfo, 1)
1966c590fb32Scz4e    x.clintTime         := DelayNWithValid(io.fromTopToBackend.clintTime, 1)
1967c590fb32Scz4e  }
1968c590fb32Scz4e
1969c590fb32Scz4e  io.memInfo.sqFull := RegNext(lsq.io.sqFull)
1970c590fb32Scz4e  io.memInfo.lqFull := RegNext(lsq.io.lqFull)
1971c590fb32Scz4e  io.memInfo.dcacheMSHRFull := RegNext(dcache.io.mshrFull)
1972c590fb32Scz4e
1973c590fb32Scz4e  io.inner_hartId := io.hartId
1974c590fb32Scz4e  io.inner_reset_vector := RegNext(io.outer_reset_vector)
1975c590fb32Scz4e  io.outer_cpu_halt := io.ooo_to_mem.backendToTopBypass.cpuHalted
1976c590fb32Scz4e  io.outer_l2_flush_en := io.ooo_to_mem.csrCtrl.flush_l2_enable
1977c590fb32Scz4e  io.outer_power_down_en := io.ooo_to_mem.csrCtrl.power_down_enable
1978c590fb32Scz4e  io.outer_cpu_critical_error := io.ooo_to_mem.backendToTopBypass.cpuCriticalError
19798cfc24b2STang Haojin  io.outer_msi_ack := io.ooo_to_mem.backendToTopBypass.msiAck
1980c590fb32Scz4e  io.outer_beu_errors_icache := RegNext(io.inner_beu_errors_icache)
1981c590fb32Scz4e  io.inner_hc_perfEvents <> RegNext(io.outer_hc_perfEvents)
1982c590fb32Scz4e
1983c590fb32Scz4e  // vector segmentUnit
1984c590fb32Scz4e  vSegmentUnit.io.in.bits <> io.ooo_to_mem.issueVldu.head.bits
1985c590fb32Scz4e  vSegmentUnit.io.in.valid := isSegment && io.ooo_to_mem.issueVldu.head.valid// is segment instruction
1986c590fb32Scz4e  vSegmentUnit.io.dtlb.resp.bits <> dtlb_reqs.take(LduCnt).head.resp.bits
1987c590fb32Scz4e  vSegmentUnit.io.dtlb.resp.valid <> dtlb_reqs.take(LduCnt).head.resp.valid
1988c590fb32Scz4e  vSegmentUnit.io.pmpResp <> pmp_check.head.resp
1989c590fb32Scz4e  vSegmentUnit.io.flush_sbuffer.empty := stIsEmpty
1990c590fb32Scz4e  vSegmentUnit.io.redirect <> redirect
1991c590fb32Scz4e  vSegmentUnit.io.rdcache.resp.bits := dcache.io.lsu.load(0).resp.bits
1992c590fb32Scz4e  vSegmentUnit.io.rdcache.resp.valid := dcache.io.lsu.load(0).resp.valid
1993c590fb32Scz4e  vSegmentUnit.io.rdcache.s2_bank_conflict := dcache.io.lsu.load(0).s2_bank_conflict
1994c590fb32Scz4e  // -------------------------
1995c590fb32Scz4e  // Vector Segment Triggers
1996c590fb32Scz4e  // -------------------------
1997c590fb32Scz4e  vSegmentUnit.io.fromCsrTrigger.tdataVec := tdata
1998c590fb32Scz4e  vSegmentUnit.io.fromCsrTrigger.tEnableVec := tEnable
1999c590fb32Scz4e  vSegmentUnit.io.fromCsrTrigger.triggerCanRaiseBpExp := triggerCanRaiseBpExp
2000c590fb32Scz4e  vSegmentUnit.io.fromCsrTrigger.debugMode := debugMode
2001c590fb32Scz4e
2002c590fb32Scz4e  // reset tree of MemBlock
2003c590fb32Scz4e  if (p(DebugOptionsKey).ResetGen) {
2004c590fb32Scz4e    val leftResetTree = ResetGenNode(
2005c590fb32Scz4e      Seq(
2006c590fb32Scz4e        ModuleNode(ptw),
2007c590fb32Scz4e        ModuleNode(ptw_to_l2_buffer),
2008c590fb32Scz4e        ModuleNode(lsq),
2009c590fb32Scz4e        ModuleNode(dtlb_st_tlb_st),
2010c590fb32Scz4e        ModuleNode(dtlb_prefetch_tlb_prefetch),
2011c590fb32Scz4e        ModuleNode(pmp)
2012c590fb32Scz4e      )
2013c590fb32Scz4e      ++ pmp_checkers.map(ModuleNode(_))
2014c590fb32Scz4e      ++ (if (prefetcherOpt.isDefined) Seq(ModuleNode(prefetcherOpt.get)) else Nil)
2015c590fb32Scz4e      ++ (if (l1PrefetcherOpt.isDefined) Seq(ModuleNode(l1PrefetcherOpt.get)) else Nil)
2016c590fb32Scz4e    )
2017c590fb32Scz4e    val rightResetTree = ResetGenNode(
2018c590fb32Scz4e      Seq(
2019c590fb32Scz4e        ModuleNode(sbuffer),
2020c590fb32Scz4e        ModuleNode(dtlb_ld_tlb_ld),
2021c590fb32Scz4e        ModuleNode(dcache),
2022c590fb32Scz4e        ModuleNode(l1d_to_l2_buffer),
2023c590fb32Scz4e        CellNode(io.reset_backend)
2024c590fb32Scz4e      )
2025c590fb32Scz4e    )
202630f35717Scz4e    ResetGen(leftResetTree, reset, sim = false, io.dft_reset)
202730f35717Scz4e    ResetGen(rightResetTree, reset, sim = false, io.dft_reset)
2028c590fb32Scz4e  } else {
2029c590fb32Scz4e    io.reset_backend := DontCare
2030c590fb32Scz4e  }
2031c590fb32Scz4e  io.resetInFrontendBypass.toL2Top := io.resetInFrontendBypass.fromFrontend
2032c590fb32Scz4e  // trace interface
2033c590fb32Scz4e  val traceToL2Top = io.traceCoreInterfaceBypass.toL2Top
2034c590fb32Scz4e  val traceFromBackend = io.traceCoreInterfaceBypass.fromBackend
2035c590fb32Scz4e  traceFromBackend.fromEncoder := RegNext(traceToL2Top.fromEncoder)
2036c590fb32Scz4e  traceToL2Top.toEncoder.trap  := RegEnable(
2037c590fb32Scz4e    traceFromBackend.toEncoder.trap,
2038c590fb32Scz4e    traceFromBackend.toEncoder.groups(0).valid && Itype.isTrap(traceFromBackend.toEncoder.groups(0).bits.itype)
2039c590fb32Scz4e  )
2040c590fb32Scz4e  traceToL2Top.toEncoder.priv := RegEnable(
2041c590fb32Scz4e    traceFromBackend.toEncoder.priv,
2042c590fb32Scz4e    traceFromBackend.toEncoder.groups(0).valid
2043c590fb32Scz4e  )
2044c590fb32Scz4e  (0 until TraceGroupNum).foreach { i =>
2045c590fb32Scz4e    traceToL2Top.toEncoder.groups(i).valid := RegNext(traceFromBackend.toEncoder.groups(i).valid)
2046c590fb32Scz4e    traceToL2Top.toEncoder.groups(i).bits.iretire := RegNext(traceFromBackend.toEncoder.groups(i).bits.iretire)
2047c590fb32Scz4e    traceToL2Top.toEncoder.groups(i).bits.itype := RegNext(traceFromBackend.toEncoder.groups(i).bits.itype)
2048c590fb32Scz4e    traceToL2Top.toEncoder.groups(i).bits.ilastsize := RegEnable(
2049c590fb32Scz4e      traceFromBackend.toEncoder.groups(i).bits.ilastsize,
2050c590fb32Scz4e      traceFromBackend.toEncoder.groups(i).valid
2051c590fb32Scz4e    )
2052c590fb32Scz4e    traceToL2Top.toEncoder.groups(i).bits.iaddr := RegEnable(
2053c590fb32Scz4e      traceFromBackend.toEncoder.groups(i).bits.iaddr,
2054c590fb32Scz4e      traceFromBackend.toEncoder.groups(i).valid
2055c590fb32Scz4e    ) + (RegEnable(
2056c590fb32Scz4e      traceFromBackend.toEncoder.groups(i).bits.ftqOffset.getOrElse(0.U),
2057c590fb32Scz4e      traceFromBackend.toEncoder.groups(i).valid
2058c590fb32Scz4e    ) << instOffsetBits)
2059c590fb32Scz4e  }
2060c590fb32Scz4e
2061c590fb32Scz4e
2062c590fb32Scz4e  io.mem_to_ooo.storeDebugInfo := DontCare
2063c590fb32Scz4e  // store event difftest information
2064c590fb32Scz4e  if (env.EnableDifftest) {
2065c590fb32Scz4e    (0 until EnsbufferWidth).foreach{i =>
2066c590fb32Scz4e        io.mem_to_ooo.storeDebugInfo(i).robidx := sbuffer.io.vecDifftestInfo(i).bits.robIdx
2067c590fb32Scz4e        sbuffer.io.vecDifftestInfo(i).bits.pc := io.mem_to_ooo.storeDebugInfo(i).pc
2068c590fb32Scz4e    }
2069c590fb32Scz4e  }
2070c590fb32Scz4e
2071c590fb32Scz4e  // top-down info
2072c590fb32Scz4e  dcache.io.debugTopDown.robHeadVaddr := io.debugTopDown.robHeadVaddr
2073c590fb32Scz4e  dtlbRepeater.io.debugTopDown.robHeadVaddr := io.debugTopDown.robHeadVaddr
2074c590fb32Scz4e  lsq.io.debugTopDown.robHeadVaddr := io.debugTopDown.robHeadVaddr
2075c590fb32Scz4e  io.debugTopDown.toCore.robHeadMissInDCache := dcache.io.debugTopDown.robHeadMissInDCache
2076c590fb32Scz4e  io.debugTopDown.toCore.robHeadTlbReplay := lsq.io.debugTopDown.robHeadTlbReplay
2077c590fb32Scz4e  io.debugTopDown.toCore.robHeadTlbMiss := lsq.io.debugTopDown.robHeadTlbMiss
2078c590fb32Scz4e  io.debugTopDown.toCore.robHeadLoadVio := lsq.io.debugTopDown.robHeadLoadVio
2079c590fb32Scz4e  io.debugTopDown.toCore.robHeadLoadMSHR := lsq.io.debugTopDown.robHeadLoadMSHR
2080c590fb32Scz4e  dcache.io.debugTopDown.robHeadOtherReplay := lsq.io.debugTopDown.robHeadOtherReplay
2081c590fb32Scz4e  dcache.io.debugRolling := io.debugRolling
2082c590fb32Scz4e
2083c590fb32Scz4e  lsq.io.noUopsIssued := io.topDownInfo.toBackend.noUopsIssued
2084c590fb32Scz4e  io.topDownInfo.toBackend.lqEmpty := lsq.io.lqEmpty
2085c590fb32Scz4e  io.topDownInfo.toBackend.sqEmpty := lsq.io.sqEmpty
2086c590fb32Scz4e  io.topDownInfo.toBackend.l1Miss := dcache.io.l1Miss
2087c590fb32Scz4e  io.topDownInfo.toBackend.l2TopMiss.l2Miss := RegNext(io.topDownInfo.fromL2Top.l2Miss)
2088c590fb32Scz4e  io.topDownInfo.toBackend.l2TopMiss.l3Miss := RegNext(io.topDownInfo.fromL2Top.l3Miss)
2089c590fb32Scz4e
2090c590fb32Scz4e  val hyLdDeqCount = PopCount(io.ooo_to_mem.issueHya.map(x => x.valid && FuType.isLoad(x.bits.uop.fuType)))
2091c590fb32Scz4e  val hyStDeqCount = PopCount(io.ooo_to_mem.issueHya.map(x => x.valid && FuType.isStore(x.bits.uop.fuType)))
2092c590fb32Scz4e  val ldDeqCount = PopCount(io.ooo_to_mem.issueLda.map(_.valid)) +& hyLdDeqCount
2093c590fb32Scz4e  val stDeqCount = PopCount(io.ooo_to_mem.issueSta.take(StaCnt).map(_.valid)) +& hyStDeqCount
2094c590fb32Scz4e  val iqDeqCount = ldDeqCount +& stDeqCount
2095c590fb32Scz4e  XSPerfAccumulate("load_iq_deq_count", ldDeqCount)
2096c590fb32Scz4e  XSPerfHistogram("load_iq_deq_count", ldDeqCount, true.B, 0, LdExuCnt + 1)
2097c590fb32Scz4e  XSPerfAccumulate("store_iq_deq_count", stDeqCount)
2098c590fb32Scz4e  XSPerfHistogram("store_iq_deq_count", stDeqCount, true.B, 0, StAddrCnt + 1)
2099c590fb32Scz4e  XSPerfAccumulate("ls_iq_deq_count", iqDeqCount)
2100c590fb32Scz4e
2101c590fb32Scz4e  val pfevent = Module(new PFEvent)
2102c590fb32Scz4e  pfevent.io.distribute_csr := csrCtrl.distribute_csr
2103c590fb32Scz4e  val csrevents = pfevent.io.hpmevent.slice(16,24)
2104c590fb32Scz4e
2105c590fb32Scz4e  val perfFromUnits = (loadUnits ++ Seq(sbuffer, lsq, dcache)).flatMap(_.getPerfEvents)
2106c590fb32Scz4e  val perfFromPTW = perfEventsPTW.map(x => ("PTW_" + x._1, x._2))
2107c590fb32Scz4e  val perfBlock     = Seq(("ldDeqCount", ldDeqCount),
2108c590fb32Scz4e                          ("stDeqCount", stDeqCount))
2109c590fb32Scz4e  // let index = 0 be no event
2110c590fb32Scz4e  val allPerfEvents = Seq(("noEvent", 0.U)) ++ perfFromUnits ++ perfFromPTW ++ perfBlock
2111c590fb32Scz4e
2112c590fb32Scz4e  if (printEventCoding) {
2113c590fb32Scz4e    for (((name, inc), i) <- allPerfEvents.zipWithIndex) {
2114c590fb32Scz4e      println("MemBlock perfEvents Set", name, inc, i)
2115c590fb32Scz4e    }
2116c590fb32Scz4e  }
2117c590fb32Scz4e
2118c590fb32Scz4e  val allPerfInc = allPerfEvents.map(_._2.asTypeOf(new PerfEvent))
2119c590fb32Scz4e  val perfEvents = HPerfMonitor(csrevents, allPerfInc).getPerfEvents
2120c590fb32Scz4e  generatePerfEvent()
21214b2c87baS梁森 Liang Sen
21224b2c87baS梁森 Liang Sen  private val mbistPl = MbistPipeline.PlaceMbistPipeline(Int.MaxValue, "MbistPipeMemBlk", hasMbist)
21234b2c87baS梁森 Liang Sen  private val mbistIntf = if(hasMbist) {
21244b2c87baS梁森 Liang Sen    val params = mbistPl.get.nodeParams
21254b2c87baS梁森 Liang Sen    val intf = Some(Module(new MbistInterface(
21264b2c87baS梁森 Liang Sen      params = Seq(params),
21274b2c87baS梁森 Liang Sen      ids = Seq(mbistPl.get.childrenIds),
21284b2c87baS梁森 Liang Sen      name = s"MbistIntfMemBlk",
21294b2c87baS梁森 Liang Sen      pipelineNum = 1
21304b2c87baS梁森 Liang Sen    )))
21314b2c87baS梁森 Liang Sen    intf.get.toPipeline.head <> mbistPl.get.mbist
21324b2c87baS梁森 Liang Sen    mbistPl.get.registerCSV(intf.get.info, "MbistMemBlk")
21334b2c87baS梁森 Liang Sen    intf.get.mbist := DontCare
21344b2c87baS梁森 Liang Sen    dontTouch(intf.get.mbist)
21354b2c87baS梁森 Liang Sen    //TODO: add mbist controller connections here
21364b2c87baS梁森 Liang Sen    intf
21374b2c87baS梁森 Liang Sen  } else {
21384b2c87baS梁森 Liang Sen    None
21394b2c87baS梁森 Liang Sen  }
214030f35717Scz4e  private val sigFromSrams = if (hasDFT) Some(SramHelper.genBroadCastBundleTop()) else None
21414b2c87baS梁森 Liang Sen  private val cg = ClockGate.genTeSrc
21424b2c87baS梁森 Liang Sen  dontTouch(cg)
2143602aa9f1Scz4e
21444b2c87baS梁森 Liang Sen  if (hasMbist) {
214530f35717Scz4e    cg.cgen := io.dft.get.cgen
21464b2c87baS梁森 Liang Sen  } else {
21474b2c87baS梁森 Liang Sen    cg.cgen := false.B
21484b2c87baS梁森 Liang Sen  }
2149602aa9f1Scz4e
2150602aa9f1Scz4e  // sram debug
215130f35717Scz4e  sigFromSrams.foreach({ case sig => sig := DontCare })
215230f35717Scz4e  sigFromSrams.zip(io.dft).foreach {
215330f35717Scz4e    case (sig, dft) =>
215430f35717Scz4e      if (hasMbist) {
215530f35717Scz4e        sig.ram_hold := dft.ram_hold
215630f35717Scz4e        sig.ram_bypass := dft.ram_bypass
215730f35717Scz4e        sig.ram_bp_clken := dft.ram_bp_clken
215830f35717Scz4e        sig.ram_aux_clk := dft.ram_aux_clk
215930f35717Scz4e        sig.ram_aux_ckbp := dft.ram_aux_ckbp
216030f35717Scz4e        sig.ram_mcp_hold := dft.ram_mcp_hold
2161*e5325730Scz4e        sig.cgen := dft.cgen
2162602aa9f1Scz4e      }
2163602aa9f1Scz4e      if (hasSramCtl) {
216430f35717Scz4e        sig.ram_ctl := RegNext(dft.ram_ctl)
2165602aa9f1Scz4e      }
2166c590fb32Scz4e  }
216730f35717Scz4e  io.dft_frnt.zip(sigFromSrams).foreach({ case (a, b) => a := b })
216830f35717Scz4e  io.dft_reset_frnt.zip(io.dft_reset).foreach({ case (a, b) => a := b })
216930f35717Scz4e  io.dft_bcknd.zip(sigFromSrams).foreach({ case (a, b) => a := b })
217030f35717Scz4e  io.dft_reset_bcknd.zip(io.dft_reset).foreach({ case (a, b) => a := b })
217130f35717Scz4e}
2172c590fb32Scz4e
2173c590fb32Scz4eclass MemBlock()(implicit p: Parameters) extends LazyModule
2174c590fb32Scz4e  with HasXSParameter {
2175c590fb32Scz4e  override def shouldBeInlined: Boolean = false
2176c590fb32Scz4e
2177c590fb32Scz4e  val inner = LazyModule(new MemBlockInlined())
2178c590fb32Scz4e
2179c590fb32Scz4e  lazy val module = new MemBlockImp(this)
2180c590fb32Scz4e}
2181c590fb32Scz4e
2182c590fb32Scz4eclass MemBlockImp(wrapper: MemBlock) extends LazyModuleImp(wrapper) {
2183c590fb32Scz4e  val io = IO(wrapper.inner.module.io.cloneType)
2184c590fb32Scz4e  val io_perf = IO(wrapper.inner.module.io_perf.cloneType)
2185c590fb32Scz4e  io <> wrapper.inner.module.io
2186c590fb32Scz4e  io_perf <> wrapper.inner.module.io_perf
2187c590fb32Scz4e
2188c590fb32Scz4e  if (p(DebugOptionsKey).ResetGen) {
2189602aa9f1Scz4e    ResetGen(
2190602aa9f1Scz4e      ResetGenNode(Seq(ModuleNode(wrapper.inner.module))),
219130f35717Scz4e      reset, sim = false, io.dft_reset
2192602aa9f1Scz4e    )
2193c590fb32Scz4e  }
2194c590fb32Scz4e}
2195