xref: /XiangShan/src/main/scala/xiangshan/mem/pipeline/LoadUnit.scala (revision 4d8915fda0e6d6f5e1468fbbcaadb531c95e25c4)
1package xiangshan.mem
2
3import chisel3._
4import chisel3.util._
5import utils._
6import xiangshan._
7import xiangshan.cache.{DCacheWordIO, TlbRequestIO, TlbCmd, MemoryOpConstants}
8import xiangshan.backend.LSUOpType
9
10class LoadToLsroqIO extends XSBundle {
11  val loadIn = ValidIO(new LsPipelineBundle)
12  val ldout = Flipped(DecoupledIO(new ExuOutput))
13  val forward = new LoadForwardQueryIO
14}
15
16class LoadUnit extends XSModule {
17  val io = IO(new Bundle() {
18    val ldin = Flipped(Decoupled(new ExuInput))
19    val ldout = Decoupled(new ExuOutput)
20    val redirect = Flipped(ValidIO(new Redirect))
21    val tlbFeedback = ValidIO(new TlbFeedback)
22    val dcache = new DCacheWordIO
23    val dtlb = new TlbRequestIO()
24    val sbuffer = new LoadForwardQueryIO
25    val lsroq = new LoadToLsroqIO
26  })
27
28  when(io.ldin.valid){
29    XSDebug("load enpipe %x iw %x fw %x\n", io.ldin.bits.uop.cf.pc, io.ldin.bits.uop.ctrl.rfWen, io.ldin.bits.uop.ctrl.fpWen)
30  }
31
32  //-------------------------------------------------------
33  // Load Pipeline
34  //-------------------------------------------------------
35
36  val l2_out = Wire(Decoupled(new LsPipelineBundle))
37  val l4_out = Wire(Decoupled(new LsPipelineBundle))
38  val l5_in  = Wire(Flipped(Decoupled(new LsPipelineBundle)))
39
40  //-------------------------------------------------------
41  // LD Pipeline Stage 2
42  // Generate addr, use addr to query DCache Tag and DTLB
43  //-------------------------------------------------------
44
45  val l2_dtlb_hit  = Wire(new Bool())
46  val l2_dtlb_miss = Wire(new Bool())
47  val l2_dcache = Wire(new Bool())
48  val l2_mmio = Wire(new Bool())
49  val isMMIOReq = Wire(new Bool())
50
51  // send req to dtlb
52  io.dtlb.req.valid := l2_out.valid
53  io.dtlb.req.bits.vaddr := l2_out.bits.vaddr
54  io.dtlb.req.bits.cmd := TlbCmd.read
55  io.dtlb.req.bits.roqIdx := l2_out.bits.uop.roqIdx
56  io.dtlb.req.bits.debug.pc := l2_out.bits.uop.cf.pc
57  io.dtlb.req.bits.debug.lsroqIdx := l2_out.bits.uop.lsroqIdx // FIXME: need update
58
59  l2_dtlb_hit  := io.dtlb.resp.valid && !io.dtlb.resp.bits.miss
60  l2_dtlb_miss := io.dtlb.resp.valid && io.dtlb.resp.bits.miss
61  isMMIOReq := AddressSpace.isMMIO(io.dtlb.resp.bits.paddr)
62  l2_dcache := l2_dtlb_hit && !isMMIOReq
63  l2_mmio   := l2_dtlb_hit && isMMIOReq
64
65  // l2_out is used to generate dcache req
66  l2_out.bits := DontCare
67  l2_out.bits.vaddr := io.ldin.bits.src1 + io.ldin.bits.uop.ctrl.imm
68  l2_out.bits.paddr := io.dtlb.resp.bits.paddr
69  l2_out.bits.mask  := genWmask(l2_out.bits.vaddr, io.ldin.bits.uop.ctrl.fuOpType(1,0))
70  l2_out.bits.uop   := io.ldin.bits.uop
71  l2_out.bits.miss  := false.B
72  l2_out.bits.mmio  := l2_mmio
73  l2_out.valid := io.ldin.valid && !io.ldin.bits.uop.roqIdx.needFlush(io.redirect)
74  // when we are sure it's a MMIO req, we do not need to wait for cache ready
75  l2_out.ready := (l2_dcache && io.dcache.req.ready) || l2_mmio || l2_dtlb_miss
76  io.ldin.ready := l2_out.ready
77
78  // exception check
79  val addrAligned = LookupTree(io.ldin.bits.uop.ctrl.fuOpType(1,0), List(
80    "b00".U   -> true.B,              //b
81    "b01".U   -> (l2_out.bits.vaddr(0) === 0.U),   //h
82    "b10".U   -> (l2_out.bits.vaddr(1,0) === 0.U), //w
83    "b11".U   -> (l2_out.bits.vaddr(2,0) === 0.U)  //d
84  ))
85  l2_out.bits.uop.cf.exceptionVec(loadAddrMisaligned) := !addrAligned
86  l2_out.bits.uop.cf.exceptionVec(loadPageFault) := io.dtlb.resp.bits.excp.pf.ld
87
88  // send result to dcache
89  // never send tlb missed or MMIO reqs to dcache
90  io.dcache.req.valid     := l2_dcache
91
92  io.dcache.req.bits.cmd  := MemoryOpConstants.M_XRD
93  // TODO: vaddr
94  io.dcache.req.bits.addr := io.dtlb.resp.bits.paddr
95  io.dcache.req.bits.data := DontCare
96  io.dcache.req.bits.mask := l2_out.bits.mask
97
98  io.dcache.req.bits.meta.id       := DontCare
99  io.dcache.req.bits.meta.vaddr    := l2_out.bits.vaddr
100  io.dcache.req.bits.meta.paddr    := io.dtlb.resp.bits.paddr
101  io.dcache.req.bits.meta.uop      := l2_out.bits.uop
102  io.dcache.req.bits.meta.mmio     := isMMIOReq
103  io.dcache.req.bits.meta.tlb_miss := io.dtlb.resp.bits.miss
104  io.dcache.req.bits.meta.mask     := l2_out.bits.mask
105  io.dcache.req.bits.meta.replay   := false.B
106
107
108  val l2_tlbFeedback = Wire(new TlbFeedback)
109  l2_tlbFeedback.hit := !io.dtlb.resp.bits.miss
110  l2_tlbFeedback.roqIdx := l2_out.bits.uop.roqIdx
111
112  // dump l2
113  XSDebug(l2_out.valid, "L2: pc 0x%x addr 0x%x -> 0x%x op %b data 0x%x mask %x dltb_miss %b dcache %b mmio %b\n",
114    l2_out.bits.uop.cf.pc, l2_out.bits.vaddr, l2_out.bits.paddr,
115    l2_out.bits.uop.ctrl.fuOpType, l2_out.bits.data, l2_out.bits.mask,
116    l2_dtlb_miss, l2_dcache, l2_mmio)
117
118  XSDebug(l2_out.fire(), "load req: pc 0x%x addr 0x%x -> 0x%x op %b\n",
119    l2_out.bits.uop.cf.pc, l2_out.bits.vaddr, l2_out.bits.paddr, l2_out.bits.uop.ctrl.fuOpType)
120
121  XSDebug(io.dcache.req.valid, p"dcache req(${io.dcache.req.valid} ${io.dcache.req.ready}): pc:0x${Hexadecimal(io.dcache.req.bits.meta.uop.cf.pc)} roqIdx:${io.dcache.req.bits.meta.uop.roqIdx} lsroqIdx:${io.dcache.req.bits.meta.uop.lsroqIdx} addr:0x${Hexadecimal(io.dcache.req.bits.addr)} vaddr:0x${Hexadecimal(io.dcache.req.bits.meta.vaddr)} paddr:0x${Hexadecimal(io.dcache.req.bits.meta.paddr)} mmio:${io.dcache.req.bits.meta.mmio} tlb_miss:${io.dcache.req.bits.meta.tlb_miss} mask:${io.dcache.req.bits.meta.mask}\n")
122
123  //-------------------------------------------------------
124  // LD Pipeline Stage 3
125  // Compare tag, use addr to query DCache Data
126  //-------------------------------------------------------
127
128  val l3_valid = RegNext(l2_out.fire(), false.B)
129  val l3_dtlb_miss = RegEnable(next = l2_dtlb_miss, enable = l2_out.fire(), init = false.B)
130  val l3_dcache = RegEnable(next = l2_dcache, enable = l2_out.fire(), init = false.B)
131  val l3_tlbFeedback = RegEnable(next = l2_tlbFeedback, enable = l2_out.fire())
132  val l3_bundle = RegEnable(next = l2_out.bits, enable = l2_out.fire())
133  val l3_uop = l3_bundle.uop
134  // dltb miss reqs ends here
135  val l3_passdown = l3_valid && !l3_dtlb_miss && !l3_uop.roqIdx.needFlush(io.redirect)
136
137  io.tlbFeedback.valid := l3_valid
138  io.tlbFeedback.bits := l3_tlbFeedback
139  io.dcache.s1_kill := l3_valid && l3_dcache && l3_uop.roqIdx.needFlush(io.redirect)
140
141  // dump l3
142  XSDebug(l3_valid, "l3: pc 0x%x addr 0x%x -> 0x%x op %b data 0x%x mask %x dltb_miss %b dcache %b mmio %b\n",
143    l3_bundle.uop.cf.pc, l3_bundle.vaddr, l3_bundle.paddr,
144    l3_bundle.uop.ctrl.fuOpType, l3_bundle.data, l3_bundle.mask,
145    l3_dtlb_miss, l3_dcache, l3_bundle.mmio)
146
147  XSDebug(io.tlbFeedback.valid, "tlbFeedback: hit %b roqIdx %d\n",
148    io.tlbFeedback.bits.hit, io.tlbFeedback.bits.roqIdx.asUInt)
149
150  XSDebug(io.dcache.s1_kill, "l3: dcache s1_kill\n")
151
152  // Done in Dcache
153
154  //-------------------------------------------------------
155  // LD Pipeline Stage 4
156  // Dcache return result, do tag ecc check and forward check
157  //-------------------------------------------------------
158
159  val l4_valid = RegNext(l3_passdown, false.B)
160  val l4_dcache = RegNext(l3_dcache, false.B)
161  val l4_bundle = RegNext(l3_bundle)
162
163  val fullForward = Wire(Bool())
164
165  assert(!(io.dcache.resp.ready && !io.dcache.resp.valid), "DCache response got lost")
166  io.dcache.resp.ready := l4_valid && l4_dcache
167  when (io.dcache.resp.fire()) {
168    l4_out.bits := DontCare
169    l4_out.bits.data  := io.dcache.resp.bits.data
170    l4_out.bits.paddr := io.dcache.resp.bits.meta.paddr
171    l4_out.bits.uop   := io.dcache.resp.bits.meta.uop
172    l4_out.bits.mmio  := io.dcache.resp.bits.meta.mmio
173    l4_out.bits.mask  := io.dcache.resp.bits.meta.mask
174    // when we can get the data completely from forward
175    // we no longer need to access dcache
176    // treat nack as miss
177    l4_out.bits.miss  := Mux(fullForward, false.B,
178      io.dcache.resp.bits.miss || io.dcache.resp.bits.nack)
179    XSDebug(io.dcache.resp.fire(), p"DcacheResp(l4): data:0x${Hexadecimal(io.dcache.resp.bits.data)} paddr:0x${Hexadecimal(io.dcache.resp.bits.meta.paddr)} pc:0x${Hexadecimal(io.dcache.resp.bits.meta.uop.cf.pc)} roqIdx:${io.dcache.resp.bits.meta.uop.roqIdx} lsroqIdx:${io.dcache.resp.bits.meta.uop.lsroqIdx} miss:${io.dcache.resp.bits.miss}\n")
180  } .otherwise {
181    l4_out.bits := l4_bundle
182  }
183  l4_out.valid := l4_valid && !l4_out.bits.uop.roqIdx.needFlush(io.redirect)
184
185  // Store addr forward match
186  // If match, get data / fmask from store queue / store buffer
187
188  // io.lsroq.forward := DontCare
189  io.lsroq.forward.paddr := l4_out.bits.paddr
190  io.lsroq.forward.mask := io.dcache.resp.bits.meta.mask
191  io.lsroq.forward.lsroqIdx := l4_out.bits.uop.lsroqIdx
192  io.lsroq.forward.sqIdx := l4_out.bits.uop.sqIdx
193  io.lsroq.forward.uop := l4_out.bits.uop
194  io.lsroq.forward.pc := l4_out.bits.uop.cf.pc
195  io.lsroq.forward.valid := io.dcache.resp.valid //TODO: opt timing
196
197  io.sbuffer.paddr := l4_out.bits.paddr
198  io.sbuffer.mask := io.dcache.resp.bits.meta.mask
199  io.sbuffer.lsroqIdx := l4_out.bits.uop.lsroqIdx
200  io.sbuffer.sqIdx := l4_out.bits.uop.sqIdx
201  io.sbuffer.uop := DontCare
202  io.sbuffer.pc := l4_out.bits.uop.cf.pc
203  io.sbuffer.valid := l4_out.valid
204
205  val forwardVec = WireInit(io.sbuffer.forwardData)
206  val forwardMask = WireInit(io.sbuffer.forwardMask)
207  // generate XLEN/8 Muxs
208  (0 until XLEN/8).map(j => {
209    when(io.lsroq.forward.forwardMask(j)) {
210      forwardMask(j) := true.B
211      forwardVec(j) := io.lsroq.forward.forwardData(j)
212    }
213  })
214  l4_out.bits.forwardMask := forwardMask
215  l4_out.bits.forwardData := forwardVec
216  fullForward := (~l4_out.bits.forwardMask.asUInt & l4_out.bits.mask) === 0.U
217
218  PipelineConnect(l4_out, l5_in, io.ldout.fire() || (l5_in.bits.miss || l5_in.bits.mmio) && l5_in.valid, false.B)
219
220  XSDebug(l4_valid, "l4: out.valid:%d pc 0x%x addr 0x%x -> 0x%x op %b data 0x%x mask %x forwardData: 0x%x forwardMask: %x dcache %b mmio %b miss:%d\n",
221    l4_out.valid, l4_out.bits.uop.cf.pc, l4_out.bits.vaddr, l4_out.bits.paddr,
222    l4_out.bits.uop.ctrl.fuOpType, l4_out.bits.data, l4_out.bits.mask,
223    l4_out.bits.forwardData.asUInt, l4_out.bits.forwardMask.asUInt, l4_dcache, l4_out.bits.mmio, l4_out.bits.miss)
224
225  XSDebug(l5_in.valid, "L5(%d %d): pc 0x%x addr 0x%x -> 0x%x op %b data 0x%x mask %x forwardData: 0x%x forwardMask: %x\n",
226    l5_in.valid, l5_in.ready, l5_in.bits.uop.cf.pc,  l5_in.bits.vaddr, l5_in.bits.paddr,
227    l5_in.bits.uop.ctrl.fuOpType , l5_in.bits.data,  l5_in.bits.mask,
228    l5_in.bits.forwardData.asUInt, l5_in.bits.forwardMask.asUInt)
229
230  XSDebug(l4_valid, "l4: sbuffer forwardData: 0x%x forwardMask: %x\n",
231    io.sbuffer.forwardData.asUInt, io.sbuffer.forwardMask.asUInt)
232
233  XSDebug(l4_valid, "l4: lsroq forwardData: 0x%x forwardMask: %x\n",
234    io.lsroq.forward.forwardData.asUInt, io.lsroq.forward.forwardMask.asUInt)
235
236  XSDebug(io.redirect.valid,
237    p"Redirect: excp:${io.redirect.bits.isException} flushPipe:${io.redirect.bits.isFlushPipe} misp:${io.redirect.bits.isMisPred} " +
238    p"replay:${io.redirect.bits.isReplay} pc:0x${Hexadecimal(io.redirect.bits.pc)} target:0x${Hexadecimal(io.redirect.bits.target)} " +
239    p"brTag:${io.redirect.bits.brTag} l2:${io.ldin.bits.uop.roqIdx.needFlush(io.redirect)} l3:${l3_uop.roqIdx.needFlush(io.redirect)} " +
240    p"l4:${l4_out.bits.uop.roqIdx.needFlush(io.redirect)}\n"
241  )
242  //-------------------------------------------------------
243  // LD Pipeline Stage 5
244  // Do data ecc check, merge result and write back to LS ROQ
245  // If cache hit, return writeback result to CDB
246  //-------------------------------------------------------
247
248  val loadWriteBack = l5_in.fire()
249
250  // data merge
251  val rdata = VecInit((0 until 8).map(j => {
252    Mux(l5_in.bits.forwardMask(j),
253      l5_in.bits.forwardData(j),
254      l5_in.bits.data(8*(j+1)-1, 8*j)
255    )
256  })).asUInt
257  val func = l5_in.bits.uop.ctrl.fuOpType
258  val raddr = l5_in.bits.paddr
259  val rdataSel = LookupTree(raddr(2, 0), List(
260    "b000".U -> rdata(63, 0),
261    "b001".U -> rdata(63, 8),
262    "b010".U -> rdata(63, 16),
263    "b011".U -> rdata(63, 24),
264    "b100".U -> rdata(63, 32),
265    "b101".U -> rdata(63, 40),
266    "b110".U -> rdata(63, 48),
267    "b111".U -> rdata(63, 56)
268  ))
269  val rdataPartialLoad = LookupTree(func, List(
270      LSUOpType.lb   -> SignExt(rdataSel(7, 0) , XLEN),
271      LSUOpType.lh   -> SignExt(rdataSel(15, 0), XLEN),
272      LSUOpType.lw   -> SignExt(rdataSel(31, 0), XLEN),
273      LSUOpType.ld   -> SignExt(rdataSel(63, 0), XLEN),
274      LSUOpType.lbu  -> ZeroExt(rdataSel(7, 0) , XLEN),
275      LSUOpType.lhu  -> ZeroExt(rdataSel(15, 0), XLEN),
276      LSUOpType.lwu  -> ZeroExt(rdataSel(31, 0), XLEN)
277  ))
278
279  // ecc check
280  // TODO
281
282  // if hit, writeback result to CDB
283  // val ldout = Vec(2, Decoupled(new ExuOutput))
284  // when io.loadIn(i).fire() && !io.io.loadIn(i).miss, commit load to cdb
285  val hitLoadOut = Wire(Decoupled(new ExuOutput))
286  hitLoadOut.bits.uop := l5_in.bits.uop
287  hitLoadOut.bits.data := rdataPartialLoad
288  hitLoadOut.bits.redirectValid := false.B
289  hitLoadOut.bits.redirect := DontCare
290  hitLoadOut.bits.brUpdate := DontCare
291  hitLoadOut.bits.debug.isMMIO := l5_in.bits.mmio
292  hitLoadOut.valid := l5_in.valid && !l5_in.bits.mmio && !l5_in.bits.miss // MMIO will be done in lsroq
293  XSDebug(hitLoadOut.fire(), "load writeback: pc %x data %x (%x + %x(%b))\n",
294    hitLoadOut.bits.uop.cf.pc, rdataPartialLoad, l5_in.bits.data,
295    l5_in.bits.forwardData.asUInt, l5_in.bits.forwardMask.asUInt
296  )
297
298  // writeback to LSROQ
299  // Current dcache use MSHR
300
301  io.lsroq.loadIn.bits := l5_in.bits
302  io.lsroq.loadIn.bits.data := rdataPartialLoad // for debug
303  io.lsroq.loadIn.valid := loadWriteBack
304
305  // pipeline control
306  l5_in.ready := io.ldout.ready
307
308  val cdbArb = Module(new Arbiter(new ExuOutput, 2))
309  io.ldout <> cdbArb.io.out
310  hitLoadOut <> cdbArb.io.in(0)
311  io.lsroq.ldout <> cdbArb.io.in(1) // missLoadOut
312
313  when(io.ldout.fire()){
314    XSDebug("ldout %x iw %x fw %x\n", io.ldout.bits.uop.cf.pc, io.ldout.bits.uop.ctrl.rfWen, io.ldout.bits.uop.ctrl.fpWen)
315  }
316}
317