xref: /XiangShan/src/main/scala/xiangshan/mem/pipeline/LoadUnit.scala (revision ee46cd6e78eef60f4d73a2afdc0db624d8ccba6b)
1024ee227SWilliam Wangpackage xiangshan.mem
2024ee227SWilliam Wang
3024ee227SWilliam Wangimport chisel3._
4024ee227SWilliam Wangimport chisel3.util._
5024ee227SWilliam Wangimport utils._
6024ee227SWilliam Wangimport xiangshan._
779460b79SLinJiaweiimport xiangshan.backend.decode.ImmUnion
81279060fSWilliam Wangimport xiangshan.cache._
91279060fSWilliam Wang// import xiangshan.cache.{DCacheWordIO, TlbRequestIO, TlbCmd, MemoryOpConstants, TlbReq, DCacheLoadReq, DCacheWordResp}
10024ee227SWilliam Wangimport xiangshan.backend.LSUOpType
11024ee227SWilliam Wang
120bd67ba5SYinan Xuclass LoadToLsqIO extends XSBundle {
13024ee227SWilliam Wang  val loadIn = ValidIO(new LsPipelineBundle)
14024ee227SWilliam Wang  val ldout = Flipped(DecoupledIO(new ExuOutput))
155830ba4fSWilliam Wang  val loadDataForwarded = Output(Bool())
16bce7d861SWilliam Wang  val needReplayFromRS = Output(Bool())
177830f711SWilliam Wang  val forward = new MaskedLoadForwardQueryIO
18024ee227SWilliam Wang}
19024ee227SWilliam Wang
207962cc88SWilliam Wang// Load Pipeline Stage 0
217962cc88SWilliam Wang// Generate addr, use addr to query DCache and DTLB
227962cc88SWilliam Wangclass LoadUnit_S0 extends XSModule {
23024ee227SWilliam Wang  val io = IO(new Bundle() {
247962cc88SWilliam Wang    val in = Flipped(Decoupled(new ExuInput))
257962cc88SWilliam Wang    val out = Decoupled(new LsPipelineBundle)
260cab60cbSZhangZifei    val dtlbReq = DecoupledIO(new TlbReq)
276e9ed841SAllen    val dcacheReq = DecoupledIO(new DCacheWordReq)
2864e8d8bdSZhangZifei    val rsIdx = Input(UInt(log2Up(IssQueSize).W))
29*ee46cd6eSLemover    val isFirstIssue = Input(Bool())
30024ee227SWilliam Wang  })
31024ee227SWilliam Wang
327962cc88SWilliam Wang  val s0_uop = io.in.bits.uop
33b91d2353SWilliam Wang  // val s0_vaddr = io.in.bits.src1 + SignExt(s0_uop.ctrl.imm(11,0), VAddrBits)
34b91d2353SWilliam Wang  // val s0_mask = genWmask(s0_vaddr, s0_uop.ctrl.fuOpType(1,0))
357101e41aSWilliam Wang  val imm12 = WireInit(s0_uop.ctrl.imm(11,0))
367101e41aSWilliam Wang  val s0_vaddr_lo = io.in.bits.src1(11,0) + Cat(0.U(1.W), imm12)
37b91d2353SWilliam Wang  val s0_vaddr_hi = Mux(s0_vaddr_lo(12),
38b91d2353SWilliam Wang    Mux(imm12(11), io.in.bits.src1(VAddrBits-1, 12), io.in.bits.src1(VAddrBits-1, 12)+1.U),
39b91d2353SWilliam Wang    Mux(imm12(11), io.in.bits.src1(VAddrBits-1, 12)+SignExt(1.U, VAddrBits-12), io.in.bits.src1(VAddrBits-1, 12)),
407101e41aSWilliam Wang  )
417101e41aSWilliam Wang  val s0_vaddr = Cat(s0_vaddr_hi, s0_vaddr_lo(11,0))
427101e41aSWilliam Wang  val s0_mask = genWmask(s0_vaddr_lo, s0_uop.ctrl.fuOpType(1,0))
43024ee227SWilliam Wang
447962cc88SWilliam Wang  // query DTLB
45d0f66e88SYinan Xu  io.dtlbReq.valid := io.in.valid
461279060fSWilliam Wang  io.dtlbReq.bits.vaddr := s0_vaddr
471279060fSWilliam Wang  io.dtlbReq.bits.cmd := TlbCmd.read
481279060fSWilliam Wang  io.dtlbReq.bits.roqIdx := s0_uop.roqIdx
491279060fSWilliam Wang  io.dtlbReq.bits.debug.pc := s0_uop.cf.pc
50*ee46cd6eSLemover  io.dtlbReq.bits.debug.isFirstIssue := io.isFirstIssue
51024ee227SWilliam Wang
527962cc88SWilliam Wang  // query DCache
53d0f66e88SYinan Xu  io.dcacheReq.valid := io.in.valid
541279060fSWilliam Wang  io.dcacheReq.bits.cmd  := MemoryOpConstants.M_XRD
551279060fSWilliam Wang  io.dcacheReq.bits.addr := s0_vaddr
561279060fSWilliam Wang  io.dcacheReq.bits.mask := s0_mask
5759a40467SWilliam Wang  io.dcacheReq.bits.data := DontCare
58024ee227SWilliam Wang
5959a40467SWilliam Wang  // TODO: update cache meta
60743bc277SAllen  io.dcacheReq.bits.id   := DontCare
61024ee227SWilliam Wang
627962cc88SWilliam Wang  val addrAligned = LookupTree(s0_uop.ctrl.fuOpType(1, 0), List(
63024ee227SWilliam Wang    "b00".U   -> true.B,                   //b
647962cc88SWilliam Wang    "b01".U   -> (s0_vaddr(0)    === 0.U), //h
657962cc88SWilliam Wang    "b10".U   -> (s0_vaddr(1, 0) === 0.U), //w
667962cc88SWilliam Wang    "b11".U   -> (s0_vaddr(2, 0) === 0.U)  //d
67024ee227SWilliam Wang  ))
68024ee227SWilliam Wang
691a51d1d9SYinan Xu  io.out.valid := io.in.valid && io.dcacheReq.ready
70d0f66e88SYinan Xu
717962cc88SWilliam Wang  io.out.bits := DontCare
727962cc88SWilliam Wang  io.out.bits.vaddr := s0_vaddr
737962cc88SWilliam Wang  io.out.bits.mask := s0_mask
747962cc88SWilliam Wang  io.out.bits.uop := s0_uop
757962cc88SWilliam Wang  io.out.bits.uop.cf.exceptionVec(loadAddrMisaligned) := !addrAligned
7664e8d8bdSZhangZifei  io.out.bits.rsIdx := io.rsIdx
77024ee227SWilliam Wang
78d0f66e88SYinan Xu  io.in.ready := !io.in.valid || (io.out.ready && io.dcacheReq.ready)
79024ee227SWilliam Wang
80d0f66e88SYinan Xu  XSDebug(io.dcacheReq.fire(),
81bcc55f84SYinan Xu    p"[DCACHE LOAD REQ] pc ${Hexadecimal(s0_uop.cf.pc)}, vaddr ${Hexadecimal(s0_vaddr)}\n"
823dbae6f8SYinan Xu  )
83d479a3a8SYinan Xu  XSPerf("in", io.in.valid)
84d479a3a8SYinan Xu  XSPerf("stall_out", io.out.valid && !io.out.ready && io.dcacheReq.ready)
85d479a3a8SYinan Xu  XSPerf("stall_dcache", io.out.valid && io.out.ready && !io.dcacheReq.ready)
867962cc88SWilliam Wang}
87024ee227SWilliam Wang
887962cc88SWilliam Wang
897962cc88SWilliam Wang// Load Pipeline Stage 1
907962cc88SWilliam Wang// TLB resp (send paddr to dcache)
917962cc88SWilliam Wangclass LoadUnit_S1 extends XSModule {
927962cc88SWilliam Wang  val io = IO(new Bundle() {
937962cc88SWilliam Wang    val in = Flipped(Decoupled(new LsPipelineBundle))
947962cc88SWilliam Wang    val out = Decoupled(new LsPipelineBundle)
95bcc55f84SYinan Xu    val dtlbResp = Flipped(DecoupledIO(new TlbResp))
96bcc55f84SYinan Xu    val dcachePAddr = Output(UInt(PAddrBits.W))
97d21b1759SYinan Xu    val dcacheKill = Output(Bool())
982e36e3b7SWilliam Wang    val sbuffer = new LoadForwardQueryIO
997830f711SWilliam Wang    val lsq = new MaskedLoadForwardQueryIO
1007962cc88SWilliam Wang  })
1017962cc88SWilliam Wang
1027962cc88SWilliam Wang  val s1_uop = io.in.bits.uop
103bcc55f84SYinan Xu  val s1_paddr = io.dtlbResp.bits.paddr
104baf8def6SYinan Xu  val s1_exception = selectLoad(io.out.bits.uop.cf.exceptionVec, false).asUInt.orR
105bcc55f84SYinan Xu  val s1_tlb_miss = io.dtlbResp.bits.miss
106cff68e26SWilliam Wang  val s1_mmio = !s1_tlb_miss && io.dtlbResp.bits.mmio
1072e36e3b7SWilliam Wang  val s1_mask = io.in.bits.mask
1087962cc88SWilliam Wang
1092e36e3b7SWilliam Wang  io.out.bits := io.in.bits // forwardXX field will be updated in s1
110bcc55f84SYinan Xu
111bcc55f84SYinan Xu  io.dtlbResp.ready := true.B
112bcc55f84SYinan Xu
1138005392cSYinan Xu  // TOOD: PMA check
114bcc55f84SYinan Xu  io.dcachePAddr := s1_paddr
1158005392cSYinan Xu  io.dcacheKill := s1_tlb_miss || s1_exception || s1_mmio
1167962cc88SWilliam Wang
1172e36e3b7SWilliam Wang  // load forward query datapath
1182e36e3b7SWilliam Wang  io.sbuffer.valid := io.in.valid
1192e36e3b7SWilliam Wang  io.sbuffer.paddr := s1_paddr
1202e36e3b7SWilliam Wang  io.sbuffer.uop := s1_uop
1212e36e3b7SWilliam Wang  io.sbuffer.sqIdx := s1_uop.sqIdx
1222e36e3b7SWilliam Wang  io.sbuffer.mask := s1_mask
1232e36e3b7SWilliam Wang  io.sbuffer.pc := s1_uop.cf.pc // FIXME: remove it
1242e36e3b7SWilliam Wang
1250bd67ba5SYinan Xu  io.lsq.valid := io.in.valid
1260bd67ba5SYinan Xu  io.lsq.paddr := s1_paddr
1270bd67ba5SYinan Xu  io.lsq.uop := s1_uop
1280bd67ba5SYinan Xu  io.lsq.sqIdx := s1_uop.sqIdx
1297830f711SWilliam Wang  io.lsq.sqIdxMask := DontCare // will be overwritten by sqIdxMask pre-generated in s0
1300bd67ba5SYinan Xu  io.lsq.mask := s1_mask
1310bd67ba5SYinan Xu  io.lsq.pc := s1_uop.cf.pc // FIXME: remove it
1322e36e3b7SWilliam Wang
133d21b1759SYinan Xu  io.out.valid := io.in.valid// && !s1_tlb_miss
1347962cc88SWilliam Wang  io.out.bits.paddr := s1_paddr
1358005392cSYinan Xu  io.out.bits.mmio := s1_mmio && !s1_exception
13659a40467SWilliam Wang  io.out.bits.tlbMiss := s1_tlb_miss
137bcc55f84SYinan Xu  io.out.bits.uop.cf.exceptionVec(loadPageFault) := io.dtlbResp.bits.excp.pf.ld
138cff68e26SWilliam Wang  io.out.bits.uop.cf.exceptionVec(loadAccessFault) := io.dtlbResp.bits.excp.af.ld
13962f57a35SLemover  io.out.bits.ptwBack := io.dtlbResp.bits.ptwBack
14064e8d8bdSZhangZifei  io.out.bits.rsIdx := io.in.bits.rsIdx
1417962cc88SWilliam Wang
142d0f66e88SYinan Xu  io.in.ready := !io.in.valid || io.out.ready
1437962cc88SWilliam Wang
144d479a3a8SYinan Xu  XSPerf("in", io.in.valid)
145d479a3a8SYinan Xu  XSPerf("tlb_miss", io.in.valid && s1_tlb_miss)
146d479a3a8SYinan Xu  XSPerf("stall_out", io.out.valid && !io.out.ready)
1477962cc88SWilliam Wang}
1487962cc88SWilliam Wang
1497962cc88SWilliam Wang
1507962cc88SWilliam Wang// Load Pipeline Stage 2
1517962cc88SWilliam Wang// DCache resp
152579b9f28SLinJiaweiclass LoadUnit_S2 extends XSModule with HasLoadHelper {
1537962cc88SWilliam Wang  val io = IO(new Bundle() {
1547962cc88SWilliam Wang    val in = Flipped(Decoupled(new LsPipelineBundle))
1557962cc88SWilliam Wang    val out = Decoupled(new LsPipelineBundle)
156d21b1759SYinan Xu    val tlbFeedback = ValidIO(new TlbFeedback)
1571279060fSWilliam Wang    val dcacheResp = Flipped(DecoupledIO(new DCacheWordResp))
158b3084e27SWilliam Wang    val lsq = new LoadForwardQueryIO
159995f167cSYinan Xu    val sbuffer = new LoadForwardQueryIO
1605830ba4fSWilliam Wang    val dataForwarded = Output(Bool())
161bce7d861SWilliam Wang    val needReplayFromRS = Output(Bool())
1627962cc88SWilliam Wang  })
1637962cc88SWilliam Wang
1647962cc88SWilliam Wang  val s2_uop = io.in.bits.uop
1657962cc88SWilliam Wang  val s2_mask = io.in.bits.mask
1667962cc88SWilliam Wang  val s2_paddr = io.in.bits.paddr
167d21b1759SYinan Xu  val s2_tlb_miss = io.in.bits.tlbMiss
168baf8def6SYinan Xu  val s2_exception = selectLoad(io.in.bits.uop.cf.exceptionVec, false).asUInt.orR
1696567ff05SYinan Xu  val s2_mmio = io.in.bits.mmio && !s2_exception
1701279060fSWilliam Wang  val s2_cache_miss = io.dcacheResp.bits.miss
1716e9ed841SAllen  val s2_cache_replay = io.dcacheResp.bits.replay
1727962cc88SWilliam Wang
1731279060fSWilliam Wang  io.dcacheResp.ready := true.B
1748005392cSYinan Xu  val dcacheShouldResp = !(s2_tlb_miss || s2_exception || s2_mmio)
1758005392cSYinan Xu  assert(!(io.in.valid && dcacheShouldResp && !io.dcacheResp.valid), "DCache response got lost")
1767962cc88SWilliam Wang
177d21b1759SYinan Xu  // feedback tlb result to RS
178d21b1759SYinan Xu  io.tlbFeedback.valid := io.in.valid
1793f65d796SYinan Xu  io.tlbFeedback.bits.hit := !s2_tlb_miss && (!s2_cache_replay || s2_mmio || s2_exception)
18064e8d8bdSZhangZifei  io.tlbFeedback.bits.rsIdx := io.in.bits.rsIdx
18162f57a35SLemover  io.tlbFeedback.bits.flushState := io.in.bits.ptwBack
182bce7d861SWilliam Wang  io.needReplayFromRS := s2_cache_replay
183d21b1759SYinan Xu
18450f5ed78SWilliam Wang  // merge forward result
18550f5ed78SWilliam Wang  // lsq has higher priority than sbuffer
18650f5ed78SWilliam Wang  val forwardMask = Wire(Vec(8, Bool()))
18750f5ed78SWilliam Wang  val forwardData = Wire(Vec(8, UInt(8.W)))
18850f5ed78SWilliam Wang
1897962cc88SWilliam Wang  val fullForward = (~forwardMask.asUInt & s2_mask) === 0.U
19050f5ed78SWilliam Wang  io.lsq := DontCare
19150f5ed78SWilliam Wang  io.sbuffer := DontCare
19250f5ed78SWilliam Wang
19350f5ed78SWilliam Wang  // generate XLEN/8 Muxs
19450f5ed78SWilliam Wang  for (i <- 0 until XLEN / 8) {
19550f5ed78SWilliam Wang    forwardMask(i) := io.lsq.forwardMask(i) || io.sbuffer.forwardMask(i)
19650f5ed78SWilliam Wang    forwardData(i) := Mux(io.lsq.forwardMask(i), io.lsq.forwardData(i), io.sbuffer.forwardData(i))
19750f5ed78SWilliam Wang  }
198024ee227SWilliam Wang
199b3084e27SWilliam Wang  XSDebug(io.out.fire(), "[FWD LOAD RESP] pc %x fwd %x(%b) + %x(%b)\n",
200b3084e27SWilliam Wang    s2_uop.cf.pc,
201b3084e27SWilliam Wang    io.lsq.forwardData.asUInt, io.lsq.forwardMask.asUInt,
202b3084e27SWilliam Wang    io.in.bits.forwardData.asUInt, io.in.bits.forwardMask.asUInt
203b3084e27SWilliam Wang  )
204b3084e27SWilliam Wang
205024ee227SWilliam Wang  // data merge
20650f5ed78SWilliam Wang  val rdataVec = VecInit((0 until XLEN / 8).map(j =>
20750f5ed78SWilliam Wang    Mux(forwardMask(j), forwardData(j), io.dcacheResp.bits.data(8*(j+1)-1, 8*j))))
20850f5ed78SWilliam Wang  val rdata = rdataVec.asUInt
2097962cc88SWilliam Wang  val rdataSel = LookupTree(s2_paddr(2, 0), List(
210024ee227SWilliam Wang    "b000".U -> rdata(63, 0),
211024ee227SWilliam Wang    "b001".U -> rdata(63, 8),
212024ee227SWilliam Wang    "b010".U -> rdata(63, 16),
213024ee227SWilliam Wang    "b011".U -> rdata(63, 24),
214024ee227SWilliam Wang    "b100".U -> rdata(63, 32),
215024ee227SWilliam Wang    "b101".U -> rdata(63, 40),
216024ee227SWilliam Wang    "b110".U -> rdata(63, 48),
217024ee227SWilliam Wang    "b111".U -> rdata(63, 56)
218024ee227SWilliam Wang  ))
219579b9f28SLinJiawei  val rdataPartialLoad = rdataHelper(s2_uop, rdataSel)
220024ee227SWilliam Wang
221e36b28e8SWilliam Wang  io.out.valid := io.in.valid && !s2_tlb_miss
2220bd67ba5SYinan Xu  // Inst will be canceled in store queue / lsq,
223dd1ffd4dSWilliam Wang  // so we do not need to care about flush in load / store unit's out.valid
2247962cc88SWilliam Wang  io.out.bits := io.in.bits
2257962cc88SWilliam Wang  io.out.bits.data := rdataPartialLoad
22626a692b9SYinan Xu  // when exception occurs, set it to not miss and let it write back to roq (via int port)
2275830ba4fSWilliam Wang  io.out.bits.miss := s2_cache_miss && !s2_exception
22826a692b9SYinan Xu  io.out.bits.uop.ctrl.fpWen := io.in.bits.uop.ctrl.fpWen && !s2_exception
2292c671545SYinan Xu  io.out.bits.mmio := s2_mmio
2307962cc88SWilliam Wang
2315830ba4fSWilliam Wang  // For timing reasons, we can not let
2325830ba4fSWilliam Wang  // io.out.bits.miss := s2_cache_miss && !s2_exception && !fullForward
2335830ba4fSWilliam Wang  // We use io.dataForwarded instead. It means forward logic have prepared all data needed,
2345830ba4fSWilliam Wang  // and dcache query is no longer needed.
2355830ba4fSWilliam Wang  // Such inst will be writebacked from load queue.
2365830ba4fSWilliam Wang  io.dataForwarded := s2_cache_miss && fullForward && !s2_exception
23750f5ed78SWilliam Wang  // io.out.bits.forwardX will be send to lq
23850f5ed78SWilliam Wang  io.out.bits.forwardMask := forwardMask
23950f5ed78SWilliam Wang  // data retbrived from dcache is also included in io.out.bits.forwardData
24050f5ed78SWilliam Wang  io.out.bits.forwardData := rdataVec
2415830ba4fSWilliam Wang
2427962cc88SWilliam Wang  io.in.ready := io.out.ready || !io.in.valid
2437962cc88SWilliam Wang
2442e36e3b7SWilliam Wang  XSDebug(io.out.fire(), "[DCACHE LOAD RESP] pc %x rdata %x <- D$ %x + fwd %x(%b)\n",
245d5ea289eSWilliam Wang    s2_uop.cf.pc, rdataPartialLoad, io.dcacheResp.bits.data,
24650f5ed78SWilliam Wang    forwardData.asUInt, forwardMask.asUInt
247024ee227SWilliam Wang  )
248d479a3a8SYinan Xu
249d479a3a8SYinan Xu  XSPerf("in", io.in.valid)
250d479a3a8SYinan Xu  XSPerf("dcache_miss", io.in.valid && s2_cache_miss)
251d479a3a8SYinan Xu  XSPerf("full_forward", io.in.valid && fullForward)
252d479a3a8SYinan Xu  XSPerf("dcache_miss_full_forward", io.in.valid && s2_cache_miss && fullForward)
253d479a3a8SYinan Xu  XSPerf("replay",  io.tlbFeedback.valid && !io.tlbFeedback.bits.hit)
254d479a3a8SYinan Xu  XSPerf("replay_tlb_miss", io.tlbFeedback.valid && !io.tlbFeedback.bits.hit && s2_tlb_miss)
255d479a3a8SYinan Xu  XSPerf("replay_cache", io.tlbFeedback.valid && !io.tlbFeedback.bits.hit && !s2_tlb_miss && s2_cache_replay)
256d479a3a8SYinan Xu  XSPerf("stall_out", io.out.valid && !io.out.ready)
2577962cc88SWilliam Wang}
2587962cc88SWilliam Wang
25903a91a79SWilliam Wangclass LoadUnit extends XSModule with HasLoadHelper {
260024ee227SWilliam Wang  val io = IO(new Bundle() {
261024ee227SWilliam Wang    val ldin = Flipped(Decoupled(new ExuInput))
262024ee227SWilliam Wang    val ldout = Decoupled(new ExuOutput)
263024ee227SWilliam Wang    val redirect = Flipped(ValidIO(new Redirect))
2642d7c7105SYinan Xu    val flush = Input(Bool())
265024ee227SWilliam Wang    val tlbFeedback = ValidIO(new TlbFeedback)
26664e8d8bdSZhangZifei    val rsIdx = Input(UInt(log2Up(IssQueSize).W))
267*ee46cd6eSLemover    val isFirstIssue = Input(Bool())
2681279060fSWilliam Wang    val dcache = new DCacheLoadIO
269024ee227SWilliam Wang    val dtlb = new TlbRequestIO()
270024ee227SWilliam Wang    val sbuffer = new LoadForwardQueryIO
2710bd67ba5SYinan Xu    val lsq = new LoadToLsqIO
2727f376046SLemover    val fastUop = ValidIO(new MicroOp) // early wakup signal generated in load_s1
273024ee227SWilliam Wang  })
274024ee227SWilliam Wang
2757962cc88SWilliam Wang  val load_s0 = Module(new LoadUnit_S0)
2767962cc88SWilliam Wang  val load_s1 = Module(new LoadUnit_S1)
2777962cc88SWilliam Wang  val load_s2 = Module(new LoadUnit_S2)
278024ee227SWilliam Wang
2797962cc88SWilliam Wang  load_s0.io.in <> io.ldin
2801279060fSWilliam Wang  load_s0.io.dtlbReq <> io.dtlb.req
2811279060fSWilliam Wang  load_s0.io.dcacheReq <> io.dcache.req
28264e8d8bdSZhangZifei  load_s0.io.rsIdx := io.rsIdx
283*ee46cd6eSLemover  load_s0.io.isFirstIssue := io.isFirstIssue
284024ee227SWilliam Wang
2852d7c7105SYinan Xu  PipelineConnect(load_s0.io.out, load_s1.io.in, true.B, load_s0.io.out.bits.uop.roqIdx.needFlush(io.redirect, io.flush))
286024ee227SWilliam Wang
287bcc55f84SYinan Xu  load_s1.io.dtlbResp <> io.dtlb.resp
288bcc55f84SYinan Xu  io.dcache.s1_paddr <> load_s1.io.dcachePAddr
289d21b1759SYinan Xu  io.dcache.s1_kill <> load_s1.io.dcacheKill
290d0f66e88SYinan Xu  load_s1.io.sbuffer <> io.sbuffer
291d0f66e88SYinan Xu  load_s1.io.lsq <> io.lsq.forward
292024ee227SWilliam Wang
2932d7c7105SYinan Xu  PipelineConnect(load_s1.io.out, load_s2.io.in, true.B, load_s1.io.out.bits.uop.roqIdx.needFlush(io.redirect, io.flush))
294024ee227SWilliam Wang
2951279060fSWilliam Wang  load_s2.io.dcacheResp <> io.dcache.resp
296b3084e27SWilliam Wang  load_s2.io.lsq.forwardData <> io.lsq.forward.forwardData
297b3084e27SWilliam Wang  load_s2.io.lsq.forwardMask <> io.lsq.forward.forwardMask
298995f167cSYinan Xu  load_s2.io.sbuffer.forwardData <> io.sbuffer.forwardData
299995f167cSYinan Xu  load_s2.io.sbuffer.forwardMask <> io.sbuffer.forwardMask
3005830ba4fSWilliam Wang  load_s2.io.dataForwarded <> io.lsq.loadDataForwarded
301e36b28e8SWilliam Wang  io.tlbFeedback.bits := RegNext(load_s2.io.tlbFeedback.bits)
302bce7d861SWilliam Wang  io.tlbFeedback.valid := RegNext(load_s2.io.tlbFeedback.valid && !load_s2.io.out.bits.uop.roqIdx.needFlush(io.redirect, io.flush))
3036696b076SWilliam Wang  io.lsq.needReplayFromRS := load_s2.io.needReplayFromRS
304024ee227SWilliam Wang
3057830f711SWilliam Wang  // pre-calcuate sqIdx mask in s0, then send it to lsq in s1 for forwarding
3067830f711SWilliam Wang  val sqIdxMaskReg = RegNext(UIntToMask(load_s0.io.in.bits.uop.sqIdx.value, StoreQueueSize))
3077830f711SWilliam Wang  io.lsq.forward.sqIdxMask := sqIdxMaskReg
308024ee227SWilliam Wang
3097f376046SLemover  // // use s2_hit_way to select data received in s1
3107f376046SLemover  // load_s2.io.dcacheResp.bits.data := Mux1H(RegNext(io.dcache.s1_hit_way), RegNext(io.dcache.s1_data))
3117f376046SLemover  // assert(load_s2.io.dcacheResp.bits.data === io.dcache.resp.bits.data)
3127f376046SLemover
313f50aa934SLemover  io.fastUop.valid := io.dcache.s1_hit_way.orR && !io.dcache.s1_disable_fast_wakeup && load_s1.io.in.valid
3147f376046SLemover  io.fastUop.bits := load_s1.io.out.bits.uop
3157f376046SLemover
3167962cc88SWilliam Wang  XSDebug(load_s0.io.out.valid,
31748ae2f92SWilliam Wang    p"S0: pc ${Hexadecimal(load_s0.io.out.bits.uop.cf.pc)}, lId ${Hexadecimal(load_s0.io.out.bits.uop.lqIdx.asUInt)}, " +
3187962cc88SWilliam Wang    p"vaddr ${Hexadecimal(load_s0.io.out.bits.vaddr)}, mask ${Hexadecimal(load_s0.io.out.bits.mask)}\n")
3197962cc88SWilliam Wang  XSDebug(load_s1.io.out.valid,
32048ae2f92SWilliam Wang    p"S1: pc ${Hexadecimal(load_s1.io.out.bits.uop.cf.pc)}, lId ${Hexadecimal(load_s1.io.out.bits.uop.lqIdx.asUInt)}, tlb_miss ${io.dtlb.resp.bits.miss}, " +
32106c91a3dSWilliam Wang    p"paddr ${Hexadecimal(load_s1.io.out.bits.paddr)}, mmio ${load_s1.io.out.bits.mmio}\n")
322024ee227SWilliam Wang
3230bd67ba5SYinan Xu  // writeback to LSQ
324024ee227SWilliam Wang  // Current dcache use MSHR
325c5c06e78SWilliam Wang  // Load queue will be updated at s2 for both hit/miss int/fp load
3260bd67ba5SYinan Xu  io.lsq.loadIn.valid := load_s2.io.out.valid
3270bd67ba5SYinan Xu  io.lsq.loadIn.bits := load_s2.io.out.bits
32826a692b9SYinan Xu
32926a692b9SYinan Xu  // write to rob and writeback bus
33026a692b9SYinan Xu  val s2_wb_valid = load_s2.io.out.valid && !load_s2.io.out.bits.miss
331024ee227SWilliam Wang
332c5c06e78SWilliam Wang  // Int load, if hit, will be writebacked at s2
333ef638ab2SWilliam Wang  val hitLoadOut = Wire(Valid(new ExuOutput))
334ef638ab2SWilliam Wang  hitLoadOut.valid := s2_wb_valid
335ef638ab2SWilliam Wang  hitLoadOut.bits.uop := load_s2.io.out.bits.uop
336ef638ab2SWilliam Wang  hitLoadOut.bits.data := load_s2.io.out.bits.data
337ef638ab2SWilliam Wang  hitLoadOut.bits.redirectValid := false.B
338ef638ab2SWilliam Wang  hitLoadOut.bits.redirect := DontCare
339ef638ab2SWilliam Wang  hitLoadOut.bits.debug.isMMIO := load_s2.io.out.bits.mmio
340ef638ab2SWilliam Wang  hitLoadOut.bits.debug.isPerfCnt := false.B
341ef638ab2SWilliam Wang  hitLoadOut.bits.debug.paddr := load_s2.io.out.bits.paddr
342ef638ab2SWilliam Wang  hitLoadOut.bits.fflags := DontCare
343024ee227SWilliam Wang
3447962cc88SWilliam Wang  load_s2.io.out.ready := true.B
345c5c06e78SWilliam Wang
346ef638ab2SWilliam Wang  io.ldout.bits := Mux(hitLoadOut.valid, hitLoadOut.bits, io.lsq.ldout.bits)
347ef638ab2SWilliam Wang  io.ldout.valid := hitLoadOut.valid || io.lsq.ldout.valid
348c5c06e78SWilliam Wang
349ef638ab2SWilliam Wang  io.lsq.ldout.ready := !hitLoadOut.valid
350024ee227SWilliam Wang
351024ee227SWilliam Wang  when(io.ldout.fire()){
352c5c06e78SWilliam Wang    XSDebug("ldout %x\n", io.ldout.bits.uop.cf.pc)
353c5c06e78SWilliam Wang  }
354024ee227SWilliam Wang}
355