1024ee227SWilliam Wangpackage xiangshan.mem 2024ee227SWilliam Wang 3024ee227SWilliam Wangimport chisel3._ 4024ee227SWilliam Wangimport chisel3.util._ 5024ee227SWilliam Wangimport utils._ 6024ee227SWilliam Wangimport xiangshan._ 71279060fSWilliam Wangimport xiangshan.cache._ 81279060fSWilliam Wang// import xiangshan.cache.{DCacheWordIO, TlbRequestIO, TlbCmd, MemoryOpConstants, TlbReq, DCacheLoadReq, DCacheWordResp} 9024ee227SWilliam Wangimport xiangshan.backend.LSUOpType 10024ee227SWilliam Wang 110bd67ba5SYinan Xuclass LoadToLsqIO extends XSBundle { 12024ee227SWilliam Wang val loadIn = ValidIO(new LsPipelineBundle) 13024ee227SWilliam Wang val ldout = Flipped(DecoupledIO(new ExuOutput)) 14024ee227SWilliam Wang val forward = new LoadForwardQueryIO 15024ee227SWilliam Wang} 16024ee227SWilliam Wang 177962cc88SWilliam Wang// Load Pipeline Stage 0 187962cc88SWilliam Wang// Generate addr, use addr to query DCache and DTLB 197962cc88SWilliam Wangclass LoadUnit_S0 extends XSModule { 20024ee227SWilliam Wang val io = IO(new Bundle() { 217962cc88SWilliam Wang val in = Flipped(Decoupled(new ExuInput)) 227962cc88SWilliam Wang val out = Decoupled(new LsPipelineBundle) 230cab60cbSZhangZifei val dtlbReq = DecoupledIO(new TlbReq) 241279060fSWilliam Wang val dcacheReq = DecoupledIO(new DCacheLoadReq) 25024ee227SWilliam Wang }) 26024ee227SWilliam Wang 277962cc88SWilliam Wang val s0_uop = io.in.bits.uop 287962cc88SWilliam Wang val s0_vaddr = io.in.bits.src1 + s0_uop.ctrl.imm 297962cc88SWilliam Wang val s0_mask = genWmask(s0_vaddr, s0_uop.ctrl.fuOpType(1,0)) 30024ee227SWilliam Wang 317962cc88SWilliam Wang // query DTLB 32d0f66e88SYinan Xu io.dtlbReq.valid := io.in.valid 331279060fSWilliam Wang io.dtlbReq.bits.vaddr := s0_vaddr 341279060fSWilliam Wang io.dtlbReq.bits.cmd := TlbCmd.read 351279060fSWilliam Wang io.dtlbReq.bits.roqIdx := s0_uop.roqIdx 361279060fSWilliam Wang io.dtlbReq.bits.debug.pc := s0_uop.cf.pc 37024ee227SWilliam Wang 387962cc88SWilliam Wang // query DCache 39d0f66e88SYinan Xu io.dcacheReq.valid := io.in.valid 401279060fSWilliam Wang io.dcacheReq.bits.cmd := MemoryOpConstants.M_XRD 411279060fSWilliam Wang io.dcacheReq.bits.addr := s0_vaddr 421279060fSWilliam Wang io.dcacheReq.bits.mask := s0_mask 4359a40467SWilliam Wang io.dcacheReq.bits.data := DontCare 44024ee227SWilliam Wang 4559a40467SWilliam Wang // TODO: update cache meta 4659a40467SWilliam Wang io.dcacheReq.bits.meta.id := DontCare 4759a40467SWilliam Wang io.dcacheReq.bits.meta.vaddr := s0_vaddr 4859a40467SWilliam Wang io.dcacheReq.bits.meta.paddr := DontCare 4959a40467SWilliam Wang io.dcacheReq.bits.meta.uop := s0_uop 5059a40467SWilliam Wang io.dcacheReq.bits.meta.mmio := false.B 5159a40467SWilliam Wang io.dcacheReq.bits.meta.tlb_miss := false.B 5259a40467SWilliam Wang io.dcacheReq.bits.meta.mask := s0_mask 5359a40467SWilliam Wang io.dcacheReq.bits.meta.replay := false.B 54024ee227SWilliam Wang 557962cc88SWilliam Wang val addrAligned = LookupTree(s0_uop.ctrl.fuOpType(1, 0), List( 56024ee227SWilliam Wang "b00".U -> true.B, //b 577962cc88SWilliam Wang "b01".U -> (s0_vaddr(0) === 0.U), //h 587962cc88SWilliam Wang "b10".U -> (s0_vaddr(1, 0) === 0.U), //w 597962cc88SWilliam Wang "b11".U -> (s0_vaddr(2, 0) === 0.U) //d 60024ee227SWilliam Wang )) 61024ee227SWilliam Wang 621a51d1d9SYinan Xu io.out.valid := io.in.valid && io.dcacheReq.ready 63d0f66e88SYinan Xu 647962cc88SWilliam Wang io.out.bits := DontCare 657962cc88SWilliam Wang io.out.bits.vaddr := s0_vaddr 667962cc88SWilliam Wang io.out.bits.mask := s0_mask 677962cc88SWilliam Wang io.out.bits.uop := s0_uop 687962cc88SWilliam Wang io.out.bits.uop.cf.exceptionVec(loadAddrMisaligned) := !addrAligned 69024ee227SWilliam Wang 70d0f66e88SYinan Xu io.in.ready := !io.in.valid || (io.out.ready && io.dcacheReq.ready) 71024ee227SWilliam Wang 72d0f66e88SYinan Xu XSDebug(io.dcacheReq.fire(), 73bcc55f84SYinan Xu p"[DCACHE LOAD REQ] pc ${Hexadecimal(s0_uop.cf.pc)}, vaddr ${Hexadecimal(s0_vaddr)}\n" 743dbae6f8SYinan Xu ) 757962cc88SWilliam Wang} 76024ee227SWilliam Wang 777962cc88SWilliam Wang 787962cc88SWilliam Wang// Load Pipeline Stage 1 797962cc88SWilliam Wang// TLB resp (send paddr to dcache) 807962cc88SWilliam Wangclass LoadUnit_S1 extends XSModule { 817962cc88SWilliam Wang val io = IO(new Bundle() { 827962cc88SWilliam Wang val in = Flipped(Decoupled(new LsPipelineBundle)) 837962cc88SWilliam Wang val out = Decoupled(new LsPipelineBundle) 84bcc55f84SYinan Xu val dtlbResp = Flipped(DecoupledIO(new TlbResp)) 85bcc55f84SYinan Xu val tlbFeedback = ValidIO(new TlbFeedback) 86bcc55f84SYinan Xu val dcachePAddr = Output(UInt(PAddrBits.W)) 872e36e3b7SWilliam Wang val sbuffer = new LoadForwardQueryIO 880bd67ba5SYinan Xu val lsq = new LoadForwardQueryIO 897962cc88SWilliam Wang }) 907962cc88SWilliam Wang 917962cc88SWilliam Wang val s1_uop = io.in.bits.uop 92bcc55f84SYinan Xu val s1_paddr = io.dtlbResp.bits.paddr 93bcc55f84SYinan Xu val s1_tlb_miss = io.dtlbResp.bits.miss 94f61ed468SWilliam Wang val s1_mmio = !s1_tlb_miss && AddressSpace.isMMIO(s1_paddr) && !io.out.bits.uop.cf.exceptionVec.asUInt.orR 952e36e3b7SWilliam Wang val s1_mask = io.in.bits.mask 967962cc88SWilliam Wang 972e36e3b7SWilliam Wang io.out.bits := io.in.bits // forwardXX field will be updated in s1 98bcc55f84SYinan Xu 99bcc55f84SYinan Xu io.dtlbResp.ready := true.B 100bcc55f84SYinan Xu // feedback tlb result to RS 101bcc55f84SYinan Xu io.tlbFeedback.valid := io.in.valid 102bcc55f84SYinan Xu io.tlbFeedback.bits.hit := !s1_tlb_miss 103bcc55f84SYinan Xu io.tlbFeedback.bits.roqIdx := s1_uop.roqIdx 104bcc55f84SYinan Xu 105bcc55f84SYinan Xu io.dcachePAddr := s1_paddr 1067962cc88SWilliam Wang 1072e36e3b7SWilliam Wang // load forward query datapath 1082e36e3b7SWilliam Wang io.sbuffer.valid := io.in.valid 1092e36e3b7SWilliam Wang io.sbuffer.paddr := s1_paddr 1102e36e3b7SWilliam Wang io.sbuffer.uop := s1_uop 1112e36e3b7SWilliam Wang io.sbuffer.sqIdx := s1_uop.sqIdx 1122e36e3b7SWilliam Wang io.sbuffer.mask := s1_mask 1132e36e3b7SWilliam Wang io.sbuffer.pc := s1_uop.cf.pc // FIXME: remove it 1142e36e3b7SWilliam Wang 1150bd67ba5SYinan Xu io.lsq.valid := io.in.valid 1160bd67ba5SYinan Xu io.lsq.paddr := s1_paddr 1170bd67ba5SYinan Xu io.lsq.uop := s1_uop 1180bd67ba5SYinan Xu io.lsq.sqIdx := s1_uop.sqIdx 1190bd67ba5SYinan Xu io.lsq.mask := s1_mask 1200bd67ba5SYinan Xu io.lsq.pc := s1_uop.cf.pc // FIXME: remove it 1212e36e3b7SWilliam Wang 1221a51d1d9SYinan Xu io.out.valid := io.in.valid && !s1_tlb_miss 1237962cc88SWilliam Wang io.out.bits.paddr := s1_paddr 1247962cc88SWilliam Wang io.out.bits.mmio := s1_mmio 12559a40467SWilliam Wang io.out.bits.tlbMiss := s1_tlb_miss 126bcc55f84SYinan Xu io.out.bits.uop.cf.exceptionVec(loadPageFault) := io.dtlbResp.bits.excp.pf.ld 1277962cc88SWilliam Wang 128d0f66e88SYinan Xu io.in.ready := !io.in.valid || io.out.ready 1297962cc88SWilliam Wang 1307962cc88SWilliam Wang} 1317962cc88SWilliam Wang 1327962cc88SWilliam Wang 1337962cc88SWilliam Wang// Load Pipeline Stage 2 1347962cc88SWilliam Wang// DCache resp 135579b9f28SLinJiaweiclass LoadUnit_S2 extends XSModule with HasLoadHelper { 1367962cc88SWilliam Wang val io = IO(new Bundle() { 1377962cc88SWilliam Wang val in = Flipped(Decoupled(new LsPipelineBundle)) 1387962cc88SWilliam Wang val out = Decoupled(new LsPipelineBundle) 1391279060fSWilliam Wang val dcacheResp = Flipped(DecoupledIO(new DCacheWordResp)) 140b3084e27SWilliam Wang val lsq = new LoadForwardQueryIO 141995f167cSYinan Xu val sbuffer = new LoadForwardQueryIO 1427962cc88SWilliam Wang }) 1437962cc88SWilliam Wang 1447962cc88SWilliam Wang val s2_uop = io.in.bits.uop 1457962cc88SWilliam Wang val s2_mask = io.in.bits.mask 1467962cc88SWilliam Wang val s2_paddr = io.in.bits.paddr 1471279060fSWilliam Wang val s2_cache_miss = io.dcacheResp.bits.miss 148933d99d8SWilliam Wang val s2_cache_nack = io.dcacheResp.bits.nack 1497962cc88SWilliam Wang 15059a40467SWilliam Wang 1511279060fSWilliam Wang io.dcacheResp.ready := true.B 1521279060fSWilliam Wang assert(!(io.in.valid && !io.dcacheResp.valid), "DCache response got lost") 1537962cc88SWilliam Wang 154b3084e27SWilliam Wang val forwardMask = io.out.bits.forwardMask 155b3084e27SWilliam Wang val forwardData = io.out.bits.forwardData 1567962cc88SWilliam Wang val fullForward = (~forwardMask.asUInt & s2_mask) === 0.U 157024ee227SWilliam Wang 158b3084e27SWilliam Wang XSDebug(io.out.fire(), "[FWD LOAD RESP] pc %x fwd %x(%b) + %x(%b)\n", 159b3084e27SWilliam Wang s2_uop.cf.pc, 160b3084e27SWilliam Wang io.lsq.forwardData.asUInt, io.lsq.forwardMask.asUInt, 161b3084e27SWilliam Wang io.in.bits.forwardData.asUInt, io.in.bits.forwardMask.asUInt 162b3084e27SWilliam Wang ) 163b3084e27SWilliam Wang 164024ee227SWilliam Wang // data merge 1657962cc88SWilliam Wang val rdata = VecInit((0 until XLEN / 8).map(j => 1661279060fSWilliam Wang Mux(forwardMask(j), forwardData(j), io.dcacheResp.bits.data(8*(j+1)-1, 8*j)))).asUInt 1677962cc88SWilliam Wang val rdataSel = LookupTree(s2_paddr(2, 0), List( 168024ee227SWilliam Wang "b000".U -> rdata(63, 0), 169024ee227SWilliam Wang "b001".U -> rdata(63, 8), 170024ee227SWilliam Wang "b010".U -> rdata(63, 16), 171024ee227SWilliam Wang "b011".U -> rdata(63, 24), 172024ee227SWilliam Wang "b100".U -> rdata(63, 32), 173024ee227SWilliam Wang "b101".U -> rdata(63, 40), 174024ee227SWilliam Wang "b110".U -> rdata(63, 48), 175024ee227SWilliam Wang "b111".U -> rdata(63, 56) 176024ee227SWilliam Wang )) 177579b9f28SLinJiawei val rdataPartialLoad = rdataHelper(s2_uop, rdataSel) 178024ee227SWilliam Wang 1797962cc88SWilliam Wang // TODO: ECC check 180024ee227SWilliam Wang 18103a91a79SWilliam Wang io.out.valid := io.in.valid 1820bd67ba5SYinan Xu // Inst will be canceled in store queue / lsq, 183dd1ffd4dSWilliam Wang // so we do not need to care about flush in load / store unit's out.valid 1847962cc88SWilliam Wang io.out.bits := io.in.bits 1857962cc88SWilliam Wang io.out.bits.data := rdataPartialLoad 186933d99d8SWilliam Wang io.out.bits.miss := (s2_cache_miss || s2_cache_nack) && !fullForward 187933d99d8SWilliam Wang io.out.bits.mmio := io.in.bits.mmio 1887962cc88SWilliam Wang 1897962cc88SWilliam Wang io.in.ready := io.out.ready || !io.in.valid 1907962cc88SWilliam Wang 191b3084e27SWilliam Wang // merge forward result 192995f167cSYinan Xu // lsq has higher priority than sbuffer 193b3084e27SWilliam Wang io.lsq := DontCare 194995f167cSYinan Xu io.sbuffer := DontCare 195b3084e27SWilliam Wang // generate XLEN/8 Muxs 196b3084e27SWilliam Wang for (i <- 0 until XLEN / 8) { 197995f167cSYinan Xu when (io.sbuffer.forwardMask(i)) { 198995f167cSYinan Xu io.out.bits.forwardMask(i) := true.B 199995f167cSYinan Xu io.out.bits.forwardData(i) := io.sbuffer.forwardData(i) 200995f167cSYinan Xu } 201b3084e27SWilliam Wang when (io.lsq.forwardMask(i)) { 202b3084e27SWilliam Wang io.out.bits.forwardMask(i) := true.B 203b3084e27SWilliam Wang io.out.bits.forwardData(i) := io.lsq.forwardData(i) 204b3084e27SWilliam Wang } 205b3084e27SWilliam Wang } 206b3084e27SWilliam Wang 2072e36e3b7SWilliam Wang XSDebug(io.out.fire(), "[DCACHE LOAD RESP] pc %x rdata %x <- D$ %x + fwd %x(%b)\n", 208d5ea289eSWilliam Wang s2_uop.cf.pc, rdataPartialLoad, io.dcacheResp.bits.data, 209b3084e27SWilliam Wang io.out.bits.forwardData.asUInt, io.out.bits.forwardMask.asUInt 210024ee227SWilliam Wang ) 211c5c06e78SWilliam Wang} 212c5c06e78SWilliam Wang 21303a91a79SWilliam Wangclass LoadUnit extends XSModule with HasLoadHelper { 214024ee227SWilliam Wang val io = IO(new Bundle() { 215024ee227SWilliam Wang val ldin = Flipped(Decoupled(new ExuInput)) 216024ee227SWilliam Wang val ldout = Decoupled(new ExuOutput) 217c5c06e78SWilliam Wang val fpout = Decoupled(new ExuOutput) 218024ee227SWilliam Wang val redirect = Flipped(ValidIO(new Redirect)) 219024ee227SWilliam Wang val tlbFeedback = ValidIO(new TlbFeedback) 2201279060fSWilliam Wang val dcache = new DCacheLoadIO 221024ee227SWilliam Wang val dtlb = new TlbRequestIO() 222024ee227SWilliam Wang val sbuffer = new LoadForwardQueryIO 2230bd67ba5SYinan Xu val lsq = new LoadToLsqIO 224024ee227SWilliam Wang }) 225024ee227SWilliam Wang 2267962cc88SWilliam Wang val load_s0 = Module(new LoadUnit_S0) 2277962cc88SWilliam Wang val load_s1 = Module(new LoadUnit_S1) 2287962cc88SWilliam Wang val load_s2 = Module(new LoadUnit_S2) 229024ee227SWilliam Wang 2307962cc88SWilliam Wang load_s0.io.in <> io.ldin 2311279060fSWilliam Wang load_s0.io.dtlbReq <> io.dtlb.req 2321279060fSWilliam Wang load_s0.io.dcacheReq <> io.dcache.req 233024ee227SWilliam Wang 2341a51d1d9SYinan Xu PipelineConnect(load_s0.io.out, load_s1.io.in, true.B, load_s0.io.out.bits.uop.roqIdx.needFlush(io.redirect)) 235024ee227SWilliam Wang 236bcc55f84SYinan Xu load_s1.io.dtlbResp <> io.dtlb.resp 237bcc55f84SYinan Xu load_s1.io.tlbFeedback <> io.tlbFeedback 238bcc55f84SYinan Xu io.dcache.s1_paddr <> load_s1.io.dcachePAddr 239dd1ffd4dSWilliam Wang io.dcache.s1_kill := DontCare // FIXME 240d0f66e88SYinan Xu load_s1.io.sbuffer <> io.sbuffer 241d0f66e88SYinan Xu load_s1.io.lsq <> io.lsq.forward 242024ee227SWilliam Wang 2431a51d1d9SYinan Xu PipelineConnect(load_s1.io.out, load_s2.io.in, true.B, load_s1.io.out.bits.uop.roqIdx.needFlush(io.redirect)) 244024ee227SWilliam Wang 2451279060fSWilliam Wang load_s2.io.dcacheResp <> io.dcache.resp 246b3084e27SWilliam Wang load_s2.io.lsq.forwardData <> io.lsq.forward.forwardData 247b3084e27SWilliam Wang load_s2.io.lsq.forwardMask <> io.lsq.forward.forwardMask 248995f167cSYinan Xu load_s2.io.sbuffer.forwardData <> io.sbuffer.forwardData 249995f167cSYinan Xu load_s2.io.sbuffer.forwardMask <> io.sbuffer.forwardMask 25043ed2475SWilliam Wang 2517962cc88SWilliam Wang XSDebug(load_s0.io.out.valid, 25248ae2f92SWilliam Wang p"S0: pc ${Hexadecimal(load_s0.io.out.bits.uop.cf.pc)}, lId ${Hexadecimal(load_s0.io.out.bits.uop.lqIdx.asUInt)}, " + 2537962cc88SWilliam Wang p"vaddr ${Hexadecimal(load_s0.io.out.bits.vaddr)}, mask ${Hexadecimal(load_s0.io.out.bits.mask)}\n") 2547962cc88SWilliam Wang XSDebug(load_s1.io.out.valid, 25548ae2f92SWilliam Wang p"S1: pc ${Hexadecimal(load_s1.io.out.bits.uop.cf.pc)}, lId ${Hexadecimal(load_s1.io.out.bits.uop.lqIdx.asUInt)}, tlb_miss ${io.dtlb.resp.bits.miss}, " + 25606c91a3dSWilliam Wang p"paddr ${Hexadecimal(load_s1.io.out.bits.paddr)}, mmio ${load_s1.io.out.bits.mmio}\n") 257024ee227SWilliam Wang 2580bd67ba5SYinan Xu // writeback to LSQ 259024ee227SWilliam Wang // Current dcache use MSHR 260c5c06e78SWilliam Wang // Load queue will be updated at s2 for both hit/miss int/fp load 2610bd67ba5SYinan Xu io.lsq.loadIn.valid := load_s2.io.out.valid 2620bd67ba5SYinan Xu io.lsq.loadIn.bits := load_s2.io.out.bits 26303a91a79SWilliam Wang val s2Valid = load_s2.io.out.valid && (!load_s2.io.out.bits.miss || load_s2.io.out.bits.uop.cf.exceptionVec.asUInt.orR) 26403a91a79SWilliam Wang val refillFpLoad = io.lsq.ldout.bits.uop.ctrl.fpWen 265024ee227SWilliam Wang 266c5c06e78SWilliam Wang // Int load, if hit, will be writebacked at s2 26703a91a79SWilliam Wang val intHitLoadOut = Wire(Valid(new ExuOutput)) 26803a91a79SWilliam Wang intHitLoadOut.valid := s2Valid && !load_s2.io.out.bits.uop.ctrl.fpWen 26903a91a79SWilliam Wang intHitLoadOut.bits.uop := load_s2.io.out.bits.uop 27003a91a79SWilliam Wang intHitLoadOut.bits.data := load_s2.io.out.bits.data 27103a91a79SWilliam Wang intHitLoadOut.bits.redirectValid := false.B 27203a91a79SWilliam Wang intHitLoadOut.bits.redirect := DontCare 27303a91a79SWilliam Wang intHitLoadOut.bits.brUpdate := DontCare 27403a91a79SWilliam Wang intHitLoadOut.bits.debug.isMMIO := load_s2.io.out.bits.mmio 27503a91a79SWilliam Wang intHitLoadOut.bits.fflags := DontCare 276c5c06e78SWilliam Wang 2777962cc88SWilliam Wang load_s2.io.out.ready := true.B 278c5c06e78SWilliam Wang 27903a91a79SWilliam Wang io.ldout.bits := Mux(intHitLoadOut.valid, intHitLoadOut.bits, io.lsq.ldout.bits) 28003a91a79SWilliam Wang io.ldout.valid := intHitLoadOut.valid || io.lsq.ldout.valid && !refillFpLoad 281c5c06e78SWilliam Wang 28203a91a79SWilliam Wang // Fp load, if hit, will be send to recoder at s2, then it will be recoded & writebacked at s3 28303a91a79SWilliam Wang val fpHitLoadOut = Wire(Valid(new ExuOutput)) 28403a91a79SWilliam Wang fpHitLoadOut.valid := s2Valid && load_s2.io.out.bits.uop.ctrl.fpWen 28503a91a79SWilliam Wang fpHitLoadOut.bits := intHitLoadOut.bits 28603a91a79SWilliam Wang 28703a91a79SWilliam Wang val fpLoadOut = Wire(Valid(new ExuOutput)) 28803a91a79SWilliam Wang fpLoadOut.bits := Mux(fpHitLoadOut.valid, fpHitLoadOut.bits, io.lsq.ldout.bits) 28903a91a79SWilliam Wang fpLoadOut.valid := fpHitLoadOut.valid || io.lsq.ldout.valid && refillFpLoad 29003a91a79SWilliam Wang 29103a91a79SWilliam Wang val fpLoadOutReg = RegNext(fpLoadOut) 29203a91a79SWilliam Wang io.fpout.bits := fpLoadOutReg.bits 29303a91a79SWilliam Wang io.fpout.bits.data := fpRdataHelper(fpLoadOutReg.bits.uop, fpLoadOutReg.bits.data) // recode 29403a91a79SWilliam Wang io.fpout.valid := RegNext(fpLoadOut.valid && !load_s2.io.out.bits.uop.roqIdx.needFlush(io.redirect)) 29503a91a79SWilliam Wang 296*c3d4d93eSZhangfw io.lsq.ldout.ready := Mux(refillFpLoad, !fpHitLoadOut.valid, !intHitLoadOut.valid) 297024ee227SWilliam Wang 298024ee227SWilliam Wang when(io.ldout.fire()){ 299c5c06e78SWilliam Wang XSDebug("ldout %x\n", io.ldout.bits.uop.cf.pc) 300c5c06e78SWilliam Wang } 301c5c06e78SWilliam Wang 302c5c06e78SWilliam Wang when(io.fpout.fire()){ 303c5c06e78SWilliam Wang XSDebug("fpout %x\n", io.fpout.bits.uop.cf.pc) 304024ee227SWilliam Wang } 305024ee227SWilliam Wang}