xref: /XiangShan/src/main/scala/xiangshan/mem/pipeline/LoadUnit.scala (revision 3136ee6a0600eb77c23effda5389fe3f858a813c)
1024ee227SWilliam Wangpackage xiangshan.mem
2024ee227SWilliam Wang
3024ee227SWilliam Wangimport chisel3._
4024ee227SWilliam Wangimport chisel3.util._
5024ee227SWilliam Wangimport utils._
6024ee227SWilliam Wangimport xiangshan._
7024ee227SWilliam Wangimport xiangshan.cache.{DCacheWordIO, TlbRequestIO, TlbCmd, MemoryOpConstants}
8024ee227SWilliam Wangimport xiangshan.backend.LSUOpType
9*3136ee6aSLinJiaweiimport xiangshan.backend.fu.fpu.boxF32ToF64
10024ee227SWilliam Wang
11024ee227SWilliam Wangclass LoadToLsroqIO extends XSBundle {
12024ee227SWilliam Wang  val loadIn = ValidIO(new LsPipelineBundle)
13024ee227SWilliam Wang  val ldout = Flipped(DecoupledIO(new ExuOutput))
14024ee227SWilliam Wang  val forward = new LoadForwardQueryIO
15024ee227SWilliam Wang}
16024ee227SWilliam Wang
17024ee227SWilliam Wangclass LoadUnit extends XSModule {
18024ee227SWilliam Wang  val io = IO(new Bundle() {
19024ee227SWilliam Wang    val ldin = Flipped(Decoupled(new ExuInput))
20024ee227SWilliam Wang    val ldout = Decoupled(new ExuOutput)
21024ee227SWilliam Wang    val redirect = Flipped(ValidIO(new Redirect))
22024ee227SWilliam Wang    val tlbFeedback = ValidIO(new TlbFeedback)
23024ee227SWilliam Wang    val dcache = new DCacheWordIO
24024ee227SWilliam Wang    val dtlb = new TlbRequestIO()
25024ee227SWilliam Wang    val sbuffer = new LoadForwardQueryIO
26024ee227SWilliam Wang    val lsroq = new LoadToLsroqIO
27024ee227SWilliam Wang  })
28024ee227SWilliam Wang
29024ee227SWilliam Wang  when(io.ldin.valid){
30024ee227SWilliam Wang    XSDebug("load enpipe %x iw %x fw %x\n", io.ldin.bits.uop.cf.pc, io.ldin.bits.uop.ctrl.rfWen, io.ldin.bits.uop.ctrl.fpWen)
31024ee227SWilliam Wang  }
32024ee227SWilliam Wang
33024ee227SWilliam Wang  //-------------------------------------------------------
34024ee227SWilliam Wang  // Load Pipeline
35024ee227SWilliam Wang  //-------------------------------------------------------
36024ee227SWilliam Wang
37024ee227SWilliam Wang  val l2_out = Wire(Decoupled(new LsPipelineBundle))
38024ee227SWilliam Wang  val l4_out = Wire(Decoupled(new LsPipelineBundle))
39024ee227SWilliam Wang  val l5_in  = Wire(Flipped(Decoupled(new LsPipelineBundle)))
40024ee227SWilliam Wang
41024ee227SWilliam Wang  //-------------------------------------------------------
42024ee227SWilliam Wang  // LD Pipeline Stage 2
43024ee227SWilliam Wang  // Generate addr, use addr to query DCache Tag and DTLB
44024ee227SWilliam Wang  //-------------------------------------------------------
45024ee227SWilliam Wang
46024ee227SWilliam Wang  val l2_dtlb_hit  = Wire(new Bool())
47024ee227SWilliam Wang  val l2_dtlb_miss = Wire(new Bool())
48024ee227SWilliam Wang  val l2_dcache = Wire(new Bool())
49024ee227SWilliam Wang  val l2_mmio = Wire(new Bool())
50024ee227SWilliam Wang  val isMMIOReq = Wire(new Bool())
51024ee227SWilliam Wang
52024ee227SWilliam Wang  // send req to dtlb
53024ee227SWilliam Wang  io.dtlb.req.valid := l2_out.valid
54024ee227SWilliam Wang  io.dtlb.req.bits.vaddr := l2_out.bits.vaddr
55024ee227SWilliam Wang  io.dtlb.req.bits.cmd := TlbCmd.read
56024ee227SWilliam Wang  io.dtlb.req.bits.roqIdx := l2_out.bits.uop.roqIdx
57024ee227SWilliam Wang  io.dtlb.req.bits.debug.pc := l2_out.bits.uop.cf.pc
58185e8566SWilliam Wang  io.dtlb.req.bits.debug.lsroqIdx := l2_out.bits.uop.lsroqIdx // FIXME: need update
59024ee227SWilliam Wang
60024ee227SWilliam Wang  l2_dtlb_hit  := io.dtlb.resp.valid && !io.dtlb.resp.bits.miss
61024ee227SWilliam Wang  l2_dtlb_miss := io.dtlb.resp.valid && io.dtlb.resp.bits.miss
62024ee227SWilliam Wang  isMMIOReq := AddressSpace.isMMIO(io.dtlb.resp.bits.paddr)
63024ee227SWilliam Wang  l2_dcache := l2_dtlb_hit && !isMMIOReq
64024ee227SWilliam Wang  l2_mmio   := l2_dtlb_hit && isMMIOReq
65024ee227SWilliam Wang
66024ee227SWilliam Wang  // l2_out is used to generate dcache req
67024ee227SWilliam Wang  l2_out.bits := DontCare
68024ee227SWilliam Wang  l2_out.bits.vaddr := io.ldin.bits.src1 + io.ldin.bits.uop.ctrl.imm
69024ee227SWilliam Wang  l2_out.bits.paddr := io.dtlb.resp.bits.paddr
70024ee227SWilliam Wang  l2_out.bits.mask  := genWmask(l2_out.bits.vaddr, io.ldin.bits.uop.ctrl.fuOpType(1,0))
71024ee227SWilliam Wang  l2_out.bits.uop   := io.ldin.bits.uop
72024ee227SWilliam Wang  l2_out.bits.miss  := false.B
73024ee227SWilliam Wang  l2_out.bits.mmio  := l2_mmio
743dbae6f8SYinan Xu  l2_out.valid := io.ldin.valid && !io.ldin.bits.uop.roqIdx.needFlush(io.redirect)
75024ee227SWilliam Wang  // when we are sure it's a MMIO req, we do not need to wait for cache ready
76024ee227SWilliam Wang  l2_out.ready := (l2_dcache && io.dcache.req.ready) || l2_mmio || l2_dtlb_miss
77024ee227SWilliam Wang  io.ldin.ready := l2_out.ready
78024ee227SWilliam Wang
79024ee227SWilliam Wang  // exception check
80024ee227SWilliam Wang  val addrAligned = LookupTree(io.ldin.bits.uop.ctrl.fuOpType(1,0), List(
81024ee227SWilliam Wang    "b00".U   -> true.B,              //b
82024ee227SWilliam Wang    "b01".U   -> (l2_out.bits.vaddr(0) === 0.U),   //h
83024ee227SWilliam Wang    "b10".U   -> (l2_out.bits.vaddr(1,0) === 0.U), //w
84024ee227SWilliam Wang    "b11".U   -> (l2_out.bits.vaddr(2,0) === 0.U)  //d
85024ee227SWilliam Wang  ))
86024ee227SWilliam Wang  l2_out.bits.uop.cf.exceptionVec(loadAddrMisaligned) := !addrAligned
87024ee227SWilliam Wang  l2_out.bits.uop.cf.exceptionVec(loadPageFault) := io.dtlb.resp.bits.excp.pf.ld
88024ee227SWilliam Wang
89024ee227SWilliam Wang  // send result to dcache
90024ee227SWilliam Wang  // never send tlb missed or MMIO reqs to dcache
91024ee227SWilliam Wang  io.dcache.req.valid     := l2_dcache
92024ee227SWilliam Wang
93024ee227SWilliam Wang  io.dcache.req.bits.cmd  := MemoryOpConstants.M_XRD
94024ee227SWilliam Wang  // TODO: vaddr
95024ee227SWilliam Wang  io.dcache.req.bits.addr := io.dtlb.resp.bits.paddr
96024ee227SWilliam Wang  io.dcache.req.bits.data := DontCare
97024ee227SWilliam Wang  io.dcache.req.bits.mask := l2_out.bits.mask
98024ee227SWilliam Wang
99024ee227SWilliam Wang  io.dcache.req.bits.meta.id       := DontCare
100024ee227SWilliam Wang  io.dcache.req.bits.meta.vaddr    := l2_out.bits.vaddr
101024ee227SWilliam Wang  io.dcache.req.bits.meta.paddr    := io.dtlb.resp.bits.paddr
102024ee227SWilliam Wang  io.dcache.req.bits.meta.uop      := l2_out.bits.uop
103024ee227SWilliam Wang  io.dcache.req.bits.meta.mmio     := isMMIOReq
104024ee227SWilliam Wang  io.dcache.req.bits.meta.tlb_miss := io.dtlb.resp.bits.miss
105024ee227SWilliam Wang  io.dcache.req.bits.meta.mask     := l2_out.bits.mask
106024ee227SWilliam Wang  io.dcache.req.bits.meta.replay   := false.B
107024ee227SWilliam Wang
108024ee227SWilliam Wang
109024ee227SWilliam Wang  val l2_tlbFeedback = Wire(new TlbFeedback)
110024ee227SWilliam Wang  l2_tlbFeedback.hit := !io.dtlb.resp.bits.miss
111024ee227SWilliam Wang  l2_tlbFeedback.roqIdx := l2_out.bits.uop.roqIdx
112024ee227SWilliam Wang
113024ee227SWilliam Wang  // dump l2
114024ee227SWilliam Wang  XSDebug(l2_out.valid, "L2: pc 0x%x addr 0x%x -> 0x%x op %b data 0x%x mask %x dltb_miss %b dcache %b mmio %b\n",
115024ee227SWilliam Wang    l2_out.bits.uop.cf.pc, l2_out.bits.vaddr, l2_out.bits.paddr,
116024ee227SWilliam Wang    l2_out.bits.uop.ctrl.fuOpType, l2_out.bits.data, l2_out.bits.mask,
117024ee227SWilliam Wang    l2_dtlb_miss, l2_dcache, l2_mmio)
118024ee227SWilliam Wang
119024ee227SWilliam Wang  XSDebug(l2_out.fire(), "load req: pc 0x%x addr 0x%x -> 0x%x op %b\n",
120024ee227SWilliam Wang    l2_out.bits.uop.cf.pc, l2_out.bits.vaddr, l2_out.bits.paddr, l2_out.bits.uop.ctrl.fuOpType)
121024ee227SWilliam Wang
122024ee227SWilliam Wang  XSDebug(io.dcache.req.valid, p"dcache req(${io.dcache.req.valid} ${io.dcache.req.ready}): pc:0x${Hexadecimal(io.dcache.req.bits.meta.uop.cf.pc)} roqIdx:${io.dcache.req.bits.meta.uop.roqIdx} lsroqIdx:${io.dcache.req.bits.meta.uop.lsroqIdx} addr:0x${Hexadecimal(io.dcache.req.bits.addr)} vaddr:0x${Hexadecimal(io.dcache.req.bits.meta.vaddr)} paddr:0x${Hexadecimal(io.dcache.req.bits.meta.paddr)} mmio:${io.dcache.req.bits.meta.mmio} tlb_miss:${io.dcache.req.bits.meta.tlb_miss} mask:${io.dcache.req.bits.meta.mask}\n")
123024ee227SWilliam Wang
124024ee227SWilliam Wang  //-------------------------------------------------------
125024ee227SWilliam Wang  // LD Pipeline Stage 3
126024ee227SWilliam Wang  // Compare tag, use addr to query DCache Data
127024ee227SWilliam Wang  //-------------------------------------------------------
128024ee227SWilliam Wang
129024ee227SWilliam Wang  val l3_valid = RegNext(l2_out.fire(), false.B)
130024ee227SWilliam Wang  val l3_dtlb_miss = RegEnable(next = l2_dtlb_miss, enable = l2_out.fire(), init = false.B)
131024ee227SWilliam Wang  val l3_dcache = RegEnable(next = l2_dcache, enable = l2_out.fire(), init = false.B)
132024ee227SWilliam Wang  val l3_tlbFeedback = RegEnable(next = l2_tlbFeedback, enable = l2_out.fire())
133024ee227SWilliam Wang  val l3_bundle = RegEnable(next = l2_out.bits, enable = l2_out.fire())
134024ee227SWilliam Wang  val l3_uop = l3_bundle.uop
135024ee227SWilliam Wang  // dltb miss reqs ends here
1363dbae6f8SYinan Xu  val l3_passdown = l3_valid && !l3_dtlb_miss && !l3_uop.roqIdx.needFlush(io.redirect)
137024ee227SWilliam Wang
138024ee227SWilliam Wang  io.tlbFeedback.valid := l3_valid
139024ee227SWilliam Wang  io.tlbFeedback.bits := l3_tlbFeedback
1403dbae6f8SYinan Xu  io.dcache.s1_kill := l3_valid && l3_dcache && l3_uop.roqIdx.needFlush(io.redirect)
141024ee227SWilliam Wang
142024ee227SWilliam Wang  // dump l3
143024ee227SWilliam Wang  XSDebug(l3_valid, "l3: pc 0x%x addr 0x%x -> 0x%x op %b data 0x%x mask %x dltb_miss %b dcache %b mmio %b\n",
144024ee227SWilliam Wang    l3_bundle.uop.cf.pc, l3_bundle.vaddr, l3_bundle.paddr,
145024ee227SWilliam Wang    l3_bundle.uop.ctrl.fuOpType, l3_bundle.data, l3_bundle.mask,
146024ee227SWilliam Wang    l3_dtlb_miss, l3_dcache, l3_bundle.mmio)
147024ee227SWilliam Wang
148024ee227SWilliam Wang  XSDebug(io.tlbFeedback.valid, "tlbFeedback: hit %b roqIdx %d\n",
1493dbae6f8SYinan Xu    io.tlbFeedback.bits.hit, io.tlbFeedback.bits.roqIdx.asUInt)
150024ee227SWilliam Wang
151024ee227SWilliam Wang  XSDebug(io.dcache.s1_kill, "l3: dcache s1_kill\n")
152024ee227SWilliam Wang
153024ee227SWilliam Wang  // Done in Dcache
154024ee227SWilliam Wang
155024ee227SWilliam Wang  //-------------------------------------------------------
156024ee227SWilliam Wang  // LD Pipeline Stage 4
157024ee227SWilliam Wang  // Dcache return result, do tag ecc check and forward check
158024ee227SWilliam Wang  //-------------------------------------------------------
159024ee227SWilliam Wang
160024ee227SWilliam Wang  val l4_valid = RegNext(l3_passdown, false.B)
161024ee227SWilliam Wang  val l4_dcache = RegNext(l3_dcache, false.B)
162024ee227SWilliam Wang  val l4_bundle = RegNext(l3_bundle)
163024ee227SWilliam Wang
164024ee227SWilliam Wang  val fullForward = Wire(Bool())
165024ee227SWilliam Wang
166024ee227SWilliam Wang  assert(!(io.dcache.resp.ready && !io.dcache.resp.valid), "DCache response got lost")
167024ee227SWilliam Wang  io.dcache.resp.ready := l4_valid && l4_dcache
168024ee227SWilliam Wang  when (io.dcache.resp.fire()) {
169024ee227SWilliam Wang    l4_out.bits := DontCare
170024ee227SWilliam Wang    l4_out.bits.data  := io.dcache.resp.bits.data
171024ee227SWilliam Wang    l4_out.bits.paddr := io.dcache.resp.bits.meta.paddr
172024ee227SWilliam Wang    l4_out.bits.uop   := io.dcache.resp.bits.meta.uop
173024ee227SWilliam Wang    l4_out.bits.mmio  := io.dcache.resp.bits.meta.mmio
174024ee227SWilliam Wang    l4_out.bits.mask  := io.dcache.resp.bits.meta.mask
175024ee227SWilliam Wang    // when we can get the data completely from forward
176024ee227SWilliam Wang    // we no longer need to access dcache
177024ee227SWilliam Wang    // treat nack as miss
178024ee227SWilliam Wang    l4_out.bits.miss  := Mux(fullForward, false.B,
179024ee227SWilliam Wang      io.dcache.resp.bits.miss || io.dcache.resp.bits.nack)
180024ee227SWilliam Wang    XSDebug(io.dcache.resp.fire(), p"DcacheResp(l4): data:0x${Hexadecimal(io.dcache.resp.bits.data)} paddr:0x${Hexadecimal(io.dcache.resp.bits.meta.paddr)} pc:0x${Hexadecimal(io.dcache.resp.bits.meta.uop.cf.pc)} roqIdx:${io.dcache.resp.bits.meta.uop.roqIdx} lsroqIdx:${io.dcache.resp.bits.meta.uop.lsroqIdx} miss:${io.dcache.resp.bits.miss}\n")
181024ee227SWilliam Wang  } .otherwise {
182024ee227SWilliam Wang    l4_out.bits := l4_bundle
183024ee227SWilliam Wang  }
1843dbae6f8SYinan Xu  l4_out.valid := l4_valid && !l4_out.bits.uop.roqIdx.needFlush(io.redirect)
185024ee227SWilliam Wang
186024ee227SWilliam Wang  // Store addr forward match
187024ee227SWilliam Wang  // If match, get data / fmask from store queue / store buffer
188024ee227SWilliam Wang
189bc86598fSWilliam Wang  // io.lsroq.forward := DontCare
190024ee227SWilliam Wang  io.lsroq.forward.paddr := l4_out.bits.paddr
191024ee227SWilliam Wang  io.lsroq.forward.mask := io.dcache.resp.bits.meta.mask
192024ee227SWilliam Wang  io.lsroq.forward.lsroqIdx := l4_out.bits.uop.lsroqIdx
193e1f91fc1SWilliam Wang  io.lsroq.forward.sqIdx := l4_out.bits.uop.sqIdx
194024ee227SWilliam Wang  io.lsroq.forward.uop := l4_out.bits.uop
195024ee227SWilliam Wang  io.lsroq.forward.pc := l4_out.bits.uop.cf.pc
196024ee227SWilliam Wang  io.lsroq.forward.valid := io.dcache.resp.valid //TODO: opt timing
197024ee227SWilliam Wang
198024ee227SWilliam Wang  io.sbuffer.paddr := l4_out.bits.paddr
199024ee227SWilliam Wang  io.sbuffer.mask := io.dcache.resp.bits.meta.mask
200024ee227SWilliam Wang  io.sbuffer.lsroqIdx := l4_out.bits.uop.lsroqIdx
201e1f91fc1SWilliam Wang  io.sbuffer.sqIdx := l4_out.bits.uop.sqIdx
202024ee227SWilliam Wang  io.sbuffer.uop := DontCare
203024ee227SWilliam Wang  io.sbuffer.pc := l4_out.bits.uop.cf.pc
204024ee227SWilliam Wang  io.sbuffer.valid := l4_out.valid
205024ee227SWilliam Wang
206024ee227SWilliam Wang  val forwardVec = WireInit(io.sbuffer.forwardData)
207024ee227SWilliam Wang  val forwardMask = WireInit(io.sbuffer.forwardMask)
208024ee227SWilliam Wang  // generate XLEN/8 Muxs
209024ee227SWilliam Wang  (0 until XLEN/8).map(j => {
210024ee227SWilliam Wang    when(io.lsroq.forward.forwardMask(j)) {
211024ee227SWilliam Wang      forwardMask(j) := true.B
212024ee227SWilliam Wang      forwardVec(j) := io.lsroq.forward.forwardData(j)
213024ee227SWilliam Wang    }
214024ee227SWilliam Wang  })
215024ee227SWilliam Wang  l4_out.bits.forwardMask := forwardMask
216024ee227SWilliam Wang  l4_out.bits.forwardData := forwardVec
217024ee227SWilliam Wang  fullForward := (~l4_out.bits.forwardMask.asUInt & l4_out.bits.mask) === 0.U
218024ee227SWilliam Wang
219024ee227SWilliam Wang  PipelineConnect(l4_out, l5_in, io.ldout.fire() || (l5_in.bits.miss || l5_in.bits.mmio) && l5_in.valid, false.B)
220024ee227SWilliam Wang
221024ee227SWilliam Wang  XSDebug(l4_valid, "l4: out.valid:%d pc 0x%x addr 0x%x -> 0x%x op %b data 0x%x mask %x forwardData: 0x%x forwardMask: %x dcache %b mmio %b miss:%d\n",
222024ee227SWilliam Wang    l4_out.valid, l4_out.bits.uop.cf.pc, l4_out.bits.vaddr, l4_out.bits.paddr,
223024ee227SWilliam Wang    l4_out.bits.uop.ctrl.fuOpType, l4_out.bits.data, l4_out.bits.mask,
224024ee227SWilliam Wang    l4_out.bits.forwardData.asUInt, l4_out.bits.forwardMask.asUInt, l4_dcache, l4_out.bits.mmio, l4_out.bits.miss)
225024ee227SWilliam Wang
226024ee227SWilliam Wang  XSDebug(l5_in.valid, "L5(%d %d): pc 0x%x addr 0x%x -> 0x%x op %b data 0x%x mask %x forwardData: 0x%x forwardMask: %x\n",
227024ee227SWilliam Wang    l5_in.valid, l5_in.ready, l5_in.bits.uop.cf.pc,  l5_in.bits.vaddr, l5_in.bits.paddr,
228024ee227SWilliam Wang    l5_in.bits.uop.ctrl.fuOpType , l5_in.bits.data,  l5_in.bits.mask,
229024ee227SWilliam Wang    l5_in.bits.forwardData.asUInt, l5_in.bits.forwardMask.asUInt)
230024ee227SWilliam Wang
231024ee227SWilliam Wang  XSDebug(l4_valid, "l4: sbuffer forwardData: 0x%x forwardMask: %x\n",
232024ee227SWilliam Wang    io.sbuffer.forwardData.asUInt, io.sbuffer.forwardMask.asUInt)
233024ee227SWilliam Wang
234024ee227SWilliam Wang  XSDebug(l4_valid, "l4: lsroq forwardData: 0x%x forwardMask: %x\n",
235024ee227SWilliam Wang    io.lsroq.forward.forwardData.asUInt, io.lsroq.forward.forwardMask.asUInt)
236024ee227SWilliam Wang
2373dbae6f8SYinan Xu  XSDebug(io.redirect.valid,
2383dbae6f8SYinan Xu    p"Redirect: excp:${io.redirect.bits.isException} flushPipe:${io.redirect.bits.isFlushPipe} misp:${io.redirect.bits.isMisPred} " +
2393dbae6f8SYinan Xu    p"replay:${io.redirect.bits.isReplay} pc:0x${Hexadecimal(io.redirect.bits.pc)} target:0x${Hexadecimal(io.redirect.bits.target)} " +
2403dbae6f8SYinan Xu    p"brTag:${io.redirect.bits.brTag} l2:${io.ldin.bits.uop.roqIdx.needFlush(io.redirect)} l3:${l3_uop.roqIdx.needFlush(io.redirect)} " +
2413dbae6f8SYinan Xu    p"l4:${l4_out.bits.uop.roqIdx.needFlush(io.redirect)}\n"
2423dbae6f8SYinan Xu  )
243024ee227SWilliam Wang  //-------------------------------------------------------
244024ee227SWilliam Wang  // LD Pipeline Stage 5
245024ee227SWilliam Wang  // Do data ecc check, merge result and write back to LS ROQ
246024ee227SWilliam Wang  // If cache hit, return writeback result to CDB
247024ee227SWilliam Wang  //-------------------------------------------------------
248024ee227SWilliam Wang
249024ee227SWilliam Wang  val loadWriteBack = l5_in.fire()
250024ee227SWilliam Wang
251024ee227SWilliam Wang  // data merge
252024ee227SWilliam Wang  val rdata = VecInit((0 until 8).map(j => {
253024ee227SWilliam Wang    Mux(l5_in.bits.forwardMask(j),
254024ee227SWilliam Wang      l5_in.bits.forwardData(j),
255024ee227SWilliam Wang      l5_in.bits.data(8*(j+1)-1, 8*j)
256024ee227SWilliam Wang    )
257024ee227SWilliam Wang  })).asUInt
258024ee227SWilliam Wang  val func = l5_in.bits.uop.ctrl.fuOpType
259024ee227SWilliam Wang  val raddr = l5_in.bits.paddr
260024ee227SWilliam Wang  val rdataSel = LookupTree(raddr(2, 0), List(
261024ee227SWilliam Wang    "b000".U -> rdata(63, 0),
262024ee227SWilliam Wang    "b001".U -> rdata(63, 8),
263024ee227SWilliam Wang    "b010".U -> rdata(63, 16),
264024ee227SWilliam Wang    "b011".U -> rdata(63, 24),
265024ee227SWilliam Wang    "b100".U -> rdata(63, 32),
266024ee227SWilliam Wang    "b101".U -> rdata(63, 40),
267024ee227SWilliam Wang    "b110".U -> rdata(63, 48),
268024ee227SWilliam Wang    "b111".U -> rdata(63, 56)
269024ee227SWilliam Wang  ))
270024ee227SWilliam Wang  val rdataPartialLoad = LookupTree(func, List(
271024ee227SWilliam Wang      LSUOpType.lb   -> SignExt(rdataSel(7, 0) , XLEN),
272024ee227SWilliam Wang      LSUOpType.lh   -> SignExt(rdataSel(15, 0), XLEN),
273024ee227SWilliam Wang      LSUOpType.lw   -> SignExt(rdataSel(31, 0), XLEN),
274024ee227SWilliam Wang      LSUOpType.ld   -> SignExt(rdataSel(63, 0), XLEN),
275024ee227SWilliam Wang      LSUOpType.lbu  -> ZeroExt(rdataSel(7, 0) , XLEN),
276024ee227SWilliam Wang      LSUOpType.lhu  -> ZeroExt(rdataSel(15, 0), XLEN),
277*3136ee6aSLinJiawei      LSUOpType.lwu  -> ZeroExt(rdataSel(31, 0), XLEN),
278*3136ee6aSLinJiawei      LSUOpType.flw  -> boxF32ToF64(rdataSel(31, 0))
279024ee227SWilliam Wang  ))
280024ee227SWilliam Wang
281024ee227SWilliam Wang  // ecc check
282024ee227SWilliam Wang  // TODO
283024ee227SWilliam Wang
284024ee227SWilliam Wang  // if hit, writeback result to CDB
285024ee227SWilliam Wang  // val ldout = Vec(2, Decoupled(new ExuOutput))
286024ee227SWilliam Wang  // when io.loadIn(i).fire() && !io.io.loadIn(i).miss, commit load to cdb
287024ee227SWilliam Wang  val hitLoadOut = Wire(Decoupled(new ExuOutput))
288024ee227SWilliam Wang  hitLoadOut.bits.uop := l5_in.bits.uop
289024ee227SWilliam Wang  hitLoadOut.bits.data := rdataPartialLoad
290*3136ee6aSLinJiawei  hitLoadOut.bits.fflags := DontCare
291024ee227SWilliam Wang  hitLoadOut.bits.redirectValid := false.B
292024ee227SWilliam Wang  hitLoadOut.bits.redirect := DontCare
293024ee227SWilliam Wang  hitLoadOut.bits.brUpdate := DontCare
294024ee227SWilliam Wang  hitLoadOut.bits.debug.isMMIO := l5_in.bits.mmio
295024ee227SWilliam Wang  hitLoadOut.valid := l5_in.valid && !l5_in.bits.mmio && !l5_in.bits.miss // MMIO will be done in lsroq
296024ee227SWilliam Wang  XSDebug(hitLoadOut.fire(), "load writeback: pc %x data %x (%x + %x(%b))\n",
297024ee227SWilliam Wang    hitLoadOut.bits.uop.cf.pc, rdataPartialLoad, l5_in.bits.data,
298024ee227SWilliam Wang    l5_in.bits.forwardData.asUInt, l5_in.bits.forwardMask.asUInt
299024ee227SWilliam Wang  )
300024ee227SWilliam Wang
301024ee227SWilliam Wang  // writeback to LSROQ
302024ee227SWilliam Wang  // Current dcache use MSHR
303024ee227SWilliam Wang
304024ee227SWilliam Wang  io.lsroq.loadIn.bits := l5_in.bits
305024ee227SWilliam Wang  io.lsroq.loadIn.bits.data := rdataPartialLoad // for debug
306024ee227SWilliam Wang  io.lsroq.loadIn.valid := loadWriteBack
307024ee227SWilliam Wang
308024ee227SWilliam Wang  // pipeline control
309024ee227SWilliam Wang  l5_in.ready := io.ldout.ready
310024ee227SWilliam Wang
311024ee227SWilliam Wang  val cdbArb = Module(new Arbiter(new ExuOutput, 2))
312024ee227SWilliam Wang  io.ldout <> cdbArb.io.out
313024ee227SWilliam Wang  hitLoadOut <> cdbArb.io.in(0)
314024ee227SWilliam Wang  io.lsroq.ldout <> cdbArb.io.in(1) // missLoadOut
315024ee227SWilliam Wang
316024ee227SWilliam Wang  when(io.ldout.fire()){
317024ee227SWilliam Wang    XSDebug("ldout %x iw %x fw %x\n", io.ldout.bits.uop.cf.pc, io.ldout.bits.uop.ctrl.rfWen, io.ldout.bits.uop.ctrl.fpWen)
318024ee227SWilliam Wang  }
319024ee227SWilliam Wang}
320