1024ee227SWilliam Wangpackage xiangshan.mem 2024ee227SWilliam Wang 3024ee227SWilliam Wangimport chisel3._ 4024ee227SWilliam Wangimport chisel3.util._ 5024ee227SWilliam Wangimport utils._ 6024ee227SWilliam Wangimport xiangshan._ 71279060fSWilliam Wangimport xiangshan.cache._ 81279060fSWilliam Wang// import xiangshan.cache.{DCacheWordIO, TlbRequestIO, TlbCmd, MemoryOpConstants, TlbReq, DCacheLoadReq, DCacheWordResp} 9024ee227SWilliam Wangimport xiangshan.backend.LSUOpType 103136ee6aSLinJiaweiimport xiangshan.backend.fu.fpu.boxF32ToF64 11024ee227SWilliam Wang 120bd67ba5SYinan Xuclass LoadToLsqIO extends XSBundle { 13024ee227SWilliam Wang val loadIn = ValidIO(new LsPipelineBundle) 14024ee227SWilliam Wang val ldout = Flipped(DecoupledIO(new ExuOutput)) 15024ee227SWilliam Wang val forward = new LoadForwardQueryIO 16024ee227SWilliam Wang} 17024ee227SWilliam Wang 187962cc88SWilliam Wang// Load Pipeline Stage 0 197962cc88SWilliam Wang// Generate addr, use addr to query DCache and DTLB 207962cc88SWilliam Wangclass LoadUnit_S0 extends XSModule { 21024ee227SWilliam Wang val io = IO(new Bundle() { 227962cc88SWilliam Wang val in = Flipped(Decoupled(new ExuInput)) 237962cc88SWilliam Wang val out = Decoupled(new LsPipelineBundle) 24024ee227SWilliam Wang val redirect = Flipped(ValidIO(new Redirect)) 250cab60cbSZhangZifei val dtlbReq = DecoupledIO(new TlbReq) 260cab60cbSZhangZifei val dtlbResp = Flipped(DecoupledIO(new TlbResp)) 27024ee227SWilliam Wang val tlbFeedback = ValidIO(new TlbFeedback) 281279060fSWilliam Wang val dcacheReq = DecoupledIO(new DCacheLoadReq) 29024ee227SWilliam Wang }) 30024ee227SWilliam Wang 317962cc88SWilliam Wang val s0_uop = io.in.bits.uop 327962cc88SWilliam Wang val s0_vaddr = io.in.bits.src1 + s0_uop.ctrl.imm 33dd1ffd4dSWilliam Wang val s0_paddr = io.dtlbResp.bits.paddr 34dd1ffd4dSWilliam Wang val s0_tlb_miss = io.dtlbResp.bits.miss 357962cc88SWilliam Wang val s0_mask = genWmask(s0_vaddr, s0_uop.ctrl.fuOpType(1,0)) 36024ee227SWilliam Wang 377962cc88SWilliam Wang // query DTLB 38*d0f66e88SYinan Xu io.dtlbReq.valid := io.in.valid 391279060fSWilliam Wang io.dtlbReq.bits.vaddr := s0_vaddr 401279060fSWilliam Wang io.dtlbReq.bits.cmd := TlbCmd.read 411279060fSWilliam Wang io.dtlbReq.bits.roqIdx := s0_uop.roqIdx 421279060fSWilliam Wang io.dtlbReq.bits.debug.pc := s0_uop.cf.pc 43*d0f66e88SYinan Xu io.dtlbResp.ready := true.B 44024ee227SWilliam Wang 45dd1ffd4dSWilliam Wang // feedback tlb result to RS 46dd1ffd4dSWilliam Wang // Note: can be moved to s1 47dd1ffd4dSWilliam Wang io.tlbFeedback.valid := io.out.valid 48dd1ffd4dSWilliam Wang io.tlbFeedback.bits.hit := !s0_tlb_miss 49dd1ffd4dSWilliam Wang io.tlbFeedback.bits.roqIdx := s0_uop.roqIdx 50024ee227SWilliam Wang 517962cc88SWilliam Wang // query DCache 52*d0f66e88SYinan Xu io.dcacheReq.valid := io.in.valid 531279060fSWilliam Wang io.dcacheReq.bits.cmd := MemoryOpConstants.M_XRD 541279060fSWilliam Wang io.dcacheReq.bits.addr := s0_vaddr 551279060fSWilliam Wang io.dcacheReq.bits.mask := s0_mask 5659a40467SWilliam Wang io.dcacheReq.bits.data := DontCare 57024ee227SWilliam Wang 5859a40467SWilliam Wang // TODO: update cache meta 5959a40467SWilliam Wang io.dcacheReq.bits.meta.id := DontCare 6059a40467SWilliam Wang io.dcacheReq.bits.meta.vaddr := s0_vaddr 6159a40467SWilliam Wang io.dcacheReq.bits.meta.paddr := DontCare 6259a40467SWilliam Wang io.dcacheReq.bits.meta.uop := s0_uop 6359a40467SWilliam Wang io.dcacheReq.bits.meta.mmio := false.B 6459a40467SWilliam Wang io.dcacheReq.bits.meta.tlb_miss := false.B 6559a40467SWilliam Wang io.dcacheReq.bits.meta.mask := s0_mask 6659a40467SWilliam Wang io.dcacheReq.bits.meta.replay := false.B 67024ee227SWilliam Wang 687962cc88SWilliam Wang val addrAligned = LookupTree(s0_uop.ctrl.fuOpType(1, 0), List( 69024ee227SWilliam Wang "b00".U -> true.B, //b 707962cc88SWilliam Wang "b01".U -> (s0_vaddr(0) === 0.U), //h 717962cc88SWilliam Wang "b10".U -> (s0_vaddr(1, 0) === 0.U), //w 727962cc88SWilliam Wang "b11".U -> (s0_vaddr(2, 0) === 0.U) //d 73024ee227SWilliam Wang )) 74024ee227SWilliam Wang 75*d0f66e88SYinan Xu io.out.valid := io.in.valid && io.dcacheReq.ready && !s0_uop.roqIdx.needFlush(io.redirect) 76*d0f66e88SYinan Xu 777962cc88SWilliam Wang io.out.bits := DontCare 787962cc88SWilliam Wang io.out.bits.vaddr := s0_vaddr 79dd1ffd4dSWilliam Wang io.out.bits.paddr := s0_paddr 80dd1ffd4dSWilliam Wang io.out.bits.tlbMiss := io.dtlbResp.bits.miss 817962cc88SWilliam Wang io.out.bits.mask := s0_mask 827962cc88SWilliam Wang io.out.bits.uop := s0_uop 837962cc88SWilliam Wang io.out.bits.uop.cf.exceptionVec(loadAddrMisaligned) := !addrAligned 84dd1ffd4dSWilliam Wang io.out.bits.uop.cf.exceptionVec(loadPageFault) := io.dtlbResp.bits.excp.pf.ld 85024ee227SWilliam Wang 86*d0f66e88SYinan Xu io.in.ready := !io.in.valid || (io.out.ready && io.dcacheReq.ready) 87024ee227SWilliam Wang 88*d0f66e88SYinan Xu XSDebug(io.dcacheReq.fire(), 89*d0f66e88SYinan Xu p"[DCACHE LOAD REQ] pc ${Hexadecimal(s0_uop.cf.pc)}, " + 90*d0f66e88SYinan Xu p"vaddr ${Hexadecimal(s0_vaddr)}, paddr will be ${Hexadecimal(s0_paddr)}\n" 913dbae6f8SYinan Xu ) 927962cc88SWilliam Wang} 93024ee227SWilliam Wang 947962cc88SWilliam Wang 957962cc88SWilliam Wang// Load Pipeline Stage 1 967962cc88SWilliam Wang// TLB resp (send paddr to dcache) 977962cc88SWilliam Wangclass LoadUnit_S1 extends XSModule { 987962cc88SWilliam Wang val io = IO(new Bundle() { 997962cc88SWilliam Wang val in = Flipped(Decoupled(new LsPipelineBundle)) 1007962cc88SWilliam Wang val out = Decoupled(new LsPipelineBundle) 1017962cc88SWilliam Wang val redirect = Flipped(ValidIO(new Redirect)) 1021279060fSWilliam Wang val s1_paddr = Output(UInt(PAddrBits.W)) 1032e36e3b7SWilliam Wang val sbuffer = new LoadForwardQueryIO 1040bd67ba5SYinan Xu val lsq = new LoadForwardQueryIO 1057962cc88SWilliam Wang }) 1067962cc88SWilliam Wang 1077962cc88SWilliam Wang val s1_uop = io.in.bits.uop 108dd1ffd4dSWilliam Wang val s1_paddr = io.in.bits.paddr 109dd1ffd4dSWilliam Wang val s1_tlb_miss = io.in.bits.tlbMiss 110f61ed468SWilliam Wang val s1_mmio = !s1_tlb_miss && AddressSpace.isMMIO(s1_paddr) && !io.out.bits.uop.cf.exceptionVec.asUInt.orR 1112e36e3b7SWilliam Wang val s1_mask = io.in.bits.mask 1127962cc88SWilliam Wang 1132e36e3b7SWilliam Wang io.out.bits := io.in.bits // forwardXX field will be updated in s1 1147962cc88SWilliam Wang io.s1_paddr := s1_paddr 1157962cc88SWilliam Wang 1162e36e3b7SWilliam Wang // load forward query datapath 1172e36e3b7SWilliam Wang io.sbuffer.valid := io.in.valid 1182e36e3b7SWilliam Wang io.sbuffer.paddr := s1_paddr 1192e36e3b7SWilliam Wang io.sbuffer.uop := s1_uop 1202e36e3b7SWilliam Wang io.sbuffer.sqIdx := s1_uop.sqIdx 1212e36e3b7SWilliam Wang io.sbuffer.mask := s1_mask 1222e36e3b7SWilliam Wang io.sbuffer.pc := s1_uop.cf.pc // FIXME: remove it 1232e36e3b7SWilliam Wang 1240bd67ba5SYinan Xu io.lsq.valid := io.in.valid 1250bd67ba5SYinan Xu io.lsq.paddr := s1_paddr 1260bd67ba5SYinan Xu io.lsq.uop := s1_uop 1270bd67ba5SYinan Xu io.lsq.sqIdx := s1_uop.sqIdx 1280bd67ba5SYinan Xu io.lsq.mask := s1_mask 1290bd67ba5SYinan Xu io.lsq.pc := s1_uop.cf.pc // FIXME: remove it 1302e36e3b7SWilliam Wang 1312e36e3b7SWilliam Wang io.out.bits.forwardMask := io.sbuffer.forwardMask 1322e36e3b7SWilliam Wang io.out.bits.forwardData := io.sbuffer.forwardData 1337962cc88SWilliam Wang 134fdae62aaSZhangZifei io.out.valid := io.in.valid && !s1_tlb_miss && !s1_uop.roqIdx.needFlush(io.redirect) 1357962cc88SWilliam Wang io.out.bits.paddr := s1_paddr 1367962cc88SWilliam Wang io.out.bits.mmio := s1_mmio 13759a40467SWilliam Wang io.out.bits.tlbMiss := s1_tlb_miss 1387962cc88SWilliam Wang 139*d0f66e88SYinan Xu io.in.ready := !io.in.valid || io.out.ready 1407962cc88SWilliam Wang 1417962cc88SWilliam Wang} 1427962cc88SWilliam Wang 1437962cc88SWilliam Wang 1447962cc88SWilliam Wang// Load Pipeline Stage 2 1457962cc88SWilliam Wang// DCache resp 1467962cc88SWilliam Wangclass LoadUnit_S2 extends XSModule { 1477962cc88SWilliam Wang val io = IO(new Bundle() { 1487962cc88SWilliam Wang val in = Flipped(Decoupled(new LsPipelineBundle)) 1497962cc88SWilliam Wang val out = Decoupled(new LsPipelineBundle) 1501279060fSWilliam Wang val dcacheResp = Flipped(DecoupledIO(new DCacheWordResp)) 151b3084e27SWilliam Wang val lsq = new LoadForwardQueryIO 1527962cc88SWilliam Wang }) 1537962cc88SWilliam Wang 1547962cc88SWilliam Wang val s2_uop = io.in.bits.uop 1557962cc88SWilliam Wang val s2_mask = io.in.bits.mask 1567962cc88SWilliam Wang val s2_paddr = io.in.bits.paddr 1571279060fSWilliam Wang val s2_cache_miss = io.dcacheResp.bits.miss 158933d99d8SWilliam Wang val s2_cache_nack = io.dcacheResp.bits.nack 1597962cc88SWilliam Wang 16059a40467SWilliam Wang 1611279060fSWilliam Wang io.dcacheResp.ready := true.B 1621279060fSWilliam Wang assert(!(io.in.valid && !io.dcacheResp.valid), "DCache response got lost") 1637962cc88SWilliam Wang 164b3084e27SWilliam Wang val forwardMask = io.out.bits.forwardMask 165b3084e27SWilliam Wang val forwardData = io.out.bits.forwardData 1667962cc88SWilliam Wang val fullForward = (~forwardMask.asUInt & s2_mask) === 0.U 167024ee227SWilliam Wang 168b3084e27SWilliam Wang XSDebug(io.out.fire(), "[FWD LOAD RESP] pc %x fwd %x(%b) + %x(%b)\n", 169b3084e27SWilliam Wang s2_uop.cf.pc, 170b3084e27SWilliam Wang io.lsq.forwardData.asUInt, io.lsq.forwardMask.asUInt, 171b3084e27SWilliam Wang io.in.bits.forwardData.asUInt, io.in.bits.forwardMask.asUInt 172b3084e27SWilliam Wang ) 173b3084e27SWilliam Wang 174024ee227SWilliam Wang // data merge 1757962cc88SWilliam Wang val rdata = VecInit((0 until XLEN / 8).map(j => 1761279060fSWilliam Wang Mux(forwardMask(j), forwardData(j), io.dcacheResp.bits.data(8*(j+1)-1, 8*j)))).asUInt 1777962cc88SWilliam Wang val rdataSel = LookupTree(s2_paddr(2, 0), List( 178024ee227SWilliam Wang "b000".U -> rdata(63, 0), 179024ee227SWilliam Wang "b001".U -> rdata(63, 8), 180024ee227SWilliam Wang "b010".U -> rdata(63, 16), 181024ee227SWilliam Wang "b011".U -> rdata(63, 24), 182024ee227SWilliam Wang "b100".U -> rdata(63, 32), 183024ee227SWilliam Wang "b101".U -> rdata(63, 40), 184024ee227SWilliam Wang "b110".U -> rdata(63, 48), 185024ee227SWilliam Wang "b111".U -> rdata(63, 56) 186024ee227SWilliam Wang )) 1877962cc88SWilliam Wang val rdataPartialLoad = LookupTree(s2_uop.ctrl.fuOpType, List( 188024ee227SWilliam Wang LSUOpType.lb -> SignExt(rdataSel(7, 0) , XLEN), 189024ee227SWilliam Wang LSUOpType.lh -> SignExt(rdataSel(15, 0), XLEN), 190024ee227SWilliam Wang LSUOpType.lw -> SignExt(rdataSel(31, 0), XLEN), 191024ee227SWilliam Wang LSUOpType.ld -> SignExt(rdataSel(63, 0), XLEN), 192024ee227SWilliam Wang LSUOpType.lbu -> ZeroExt(rdataSel(7, 0) , XLEN), 193024ee227SWilliam Wang LSUOpType.lhu -> ZeroExt(rdataSel(15, 0), XLEN), 1943136ee6aSLinJiawei LSUOpType.lwu -> ZeroExt(rdataSel(31, 0), XLEN), 1953136ee6aSLinJiawei LSUOpType.flw -> boxF32ToF64(rdataSel(31, 0)) 196024ee227SWilliam Wang )) 197024ee227SWilliam Wang 1987962cc88SWilliam Wang // TODO: ECC check 199024ee227SWilliam Wang 200*d0f66e88SYinan Xu io.out.valid := io.in.valid 2010bd67ba5SYinan Xu // Inst will be canceled in store queue / lsq, 202dd1ffd4dSWilliam Wang // so we do not need to care about flush in load / store unit's out.valid 2037962cc88SWilliam Wang io.out.bits := io.in.bits 2047962cc88SWilliam Wang io.out.bits.data := rdataPartialLoad 205933d99d8SWilliam Wang io.out.bits.miss := (s2_cache_miss || s2_cache_nack) && !fullForward 206933d99d8SWilliam Wang io.out.bits.mmio := io.in.bits.mmio 2077962cc88SWilliam Wang 2087962cc88SWilliam Wang io.in.ready := io.out.ready || !io.in.valid 2097962cc88SWilliam Wang 210b3084e27SWilliam Wang // merge forward result 211b3084e27SWilliam Wang io.lsq := DontCare 212b3084e27SWilliam Wang // generate XLEN/8 Muxs 213b3084e27SWilliam Wang for (i <- 0 until XLEN / 8) { 214b3084e27SWilliam Wang when(io.lsq.forwardMask(i)) { 215b3084e27SWilliam Wang io.out.bits.forwardMask(i) := true.B 216b3084e27SWilliam Wang io.out.bits.forwardData(i) := io.lsq.forwardData(i) 217b3084e27SWilliam Wang } 218b3084e27SWilliam Wang } 219b3084e27SWilliam Wang 2202e36e3b7SWilliam Wang XSDebug(io.out.fire(), "[DCACHE LOAD RESP] pc %x rdata %x <- D$ %x + fwd %x(%b)\n", 221d5ea289eSWilliam Wang s2_uop.cf.pc, rdataPartialLoad, io.dcacheResp.bits.data, 222b3084e27SWilliam Wang io.out.bits.forwardData.asUInt, io.out.bits.forwardMask.asUInt 223024ee227SWilliam Wang ) 224024ee227SWilliam Wang 2257962cc88SWilliam Wang} 2267962cc88SWilliam Wang 227024ee227SWilliam Wangclass LoadUnit extends XSModule { 228024ee227SWilliam Wang val io = IO(new Bundle() { 229024ee227SWilliam Wang val ldin = Flipped(Decoupled(new ExuInput)) 230024ee227SWilliam Wang val ldout = Decoupled(new ExuOutput) 231024ee227SWilliam Wang val redirect = Flipped(ValidIO(new Redirect)) 232024ee227SWilliam Wang val tlbFeedback = ValidIO(new TlbFeedback) 2331279060fSWilliam Wang val dcache = new DCacheLoadIO 234024ee227SWilliam Wang val dtlb = new TlbRequestIO() 235024ee227SWilliam Wang val sbuffer = new LoadForwardQueryIO 2360bd67ba5SYinan Xu val lsq = new LoadToLsqIO 237024ee227SWilliam Wang }) 238024ee227SWilliam Wang 2397962cc88SWilliam Wang val load_s0 = Module(new LoadUnit_S0) 2407962cc88SWilliam Wang val load_s1 = Module(new LoadUnit_S1) 2417962cc88SWilliam Wang val load_s2 = Module(new LoadUnit_S2) 242024ee227SWilliam Wang 2437962cc88SWilliam Wang load_s0.io.in <> io.ldin 2447962cc88SWilliam Wang load_s0.io.redirect <> io.redirect 2451279060fSWilliam Wang load_s0.io.dtlbReq <> io.dtlb.req 246dd1ffd4dSWilliam Wang load_s0.io.dtlbResp <> io.dtlb.resp 2471279060fSWilliam Wang load_s0.io.dcacheReq <> io.dcache.req 248dd1ffd4dSWilliam Wang load_s0.io.tlbFeedback <> io.tlbFeedback 249024ee227SWilliam Wang 2508c3597a8SWilliam Wang PipelineConnect(load_s0.io.out, load_s1.io.in, true.B, false.B) 251024ee227SWilliam Wang 2521279060fSWilliam Wang io.dcache.s1_paddr := load_s1.io.out.bits.paddr 2537962cc88SWilliam Wang load_s1.io.redirect <> io.redirect 254dd1ffd4dSWilliam Wang io.dcache.s1_kill := DontCare // FIXME 255*d0f66e88SYinan Xu load_s1.io.sbuffer <> io.sbuffer 256*d0f66e88SYinan Xu load_s1.io.lsq <> io.lsq.forward 257024ee227SWilliam Wang 2588c3597a8SWilliam Wang PipelineConnect(load_s1.io.out, load_s2.io.in, true.B, false.B) 259024ee227SWilliam Wang 2601279060fSWilliam Wang load_s2.io.dcacheResp <> io.dcache.resp 261b3084e27SWilliam Wang load_s2.io.lsq := DontCare 262b3084e27SWilliam Wang load_s2.io.lsq.forwardData <> io.lsq.forward.forwardData 263b3084e27SWilliam Wang load_s2.io.lsq.forwardMask <> io.lsq.forward.forwardMask 264024ee227SWilliam Wang 2657962cc88SWilliam Wang XSDebug(load_s0.io.out.valid, 26648ae2f92SWilliam Wang p"S0: pc ${Hexadecimal(load_s0.io.out.bits.uop.cf.pc)}, lId ${Hexadecimal(load_s0.io.out.bits.uop.lqIdx.asUInt)}, " + 2677962cc88SWilliam Wang p"vaddr ${Hexadecimal(load_s0.io.out.bits.vaddr)}, mask ${Hexadecimal(load_s0.io.out.bits.mask)}\n") 2687962cc88SWilliam Wang XSDebug(load_s1.io.out.valid, 26948ae2f92SWilliam Wang p"S1: pc ${Hexadecimal(load_s1.io.out.bits.uop.cf.pc)}, lId ${Hexadecimal(load_s1.io.out.bits.uop.lqIdx.asUInt)}, tlb_miss ${io.dtlb.resp.bits.miss}, " + 27006c91a3dSWilliam Wang p"paddr ${Hexadecimal(load_s1.io.out.bits.paddr)}, mmio ${load_s1.io.out.bits.mmio}\n") 271024ee227SWilliam Wang 2720bd67ba5SYinan Xu // writeback to LSQ 273024ee227SWilliam Wang // Current dcache use MSHR 2740bd67ba5SYinan Xu io.lsq.loadIn.valid := load_s2.io.out.valid 2750bd67ba5SYinan Xu io.lsq.loadIn.bits := load_s2.io.out.bits 276024ee227SWilliam Wang 2777962cc88SWilliam Wang val hitLoadOut = Wire(Valid(new ExuOutput)) 2787daa1df6SYinan Xu hitLoadOut.valid := load_s2.io.out.valid && (!load_s2.io.out.bits.miss || load_s2.io.out.bits.uop.cf.exceptionVec.asUInt.orR) 2797962cc88SWilliam Wang hitLoadOut.bits.uop := load_s2.io.out.bits.uop 2807962cc88SWilliam Wang hitLoadOut.bits.data := load_s2.io.out.bits.data 2817962cc88SWilliam Wang hitLoadOut.bits.redirectValid := false.B 2827962cc88SWilliam Wang hitLoadOut.bits.redirect := DontCare 2837962cc88SWilliam Wang hitLoadOut.bits.brUpdate := DontCare 2847962cc88SWilliam Wang hitLoadOut.bits.debug.isMMIO := load_s2.io.out.bits.mmio 2858b4ffe05SLinJiawei hitLoadOut.bits.fflags := DontCare 286024ee227SWilliam Wang 2877962cc88SWilliam Wang // TODO: arbiter 2887962cc88SWilliam Wang // if hit, writeback result to CDB 2897962cc88SWilliam Wang // val ldout = Vec(2, Decoupled(new ExuOutput)) 2907962cc88SWilliam Wang // when io.loadIn(i).fire() && !io.io.loadIn(i).miss, commit load to cdb 2917962cc88SWilliam Wang // val cdbArb = Module(new Arbiter(new ExuOutput, 2)) 2927962cc88SWilliam Wang // io.ldout <> cdbArb.io.out 2937962cc88SWilliam Wang // hitLoadOut <> cdbArb.io.in(0) 2940bd67ba5SYinan Xu // io.lsq.ldout <> cdbArb.io.in(1) // missLoadOut 2957962cc88SWilliam Wang load_s2.io.out.ready := true.B 2960bd67ba5SYinan Xu io.lsq.ldout.ready := !hitLoadOut.valid 2970bd67ba5SYinan Xu io.ldout.bits := Mux(hitLoadOut.valid, hitLoadOut.bits, io.lsq.ldout.bits) 2980bd67ba5SYinan Xu io.ldout.valid := hitLoadOut.valid || io.lsq.ldout.valid 299024ee227SWilliam Wang 300024ee227SWilliam Wang when(io.ldout.fire()){ 301024ee227SWilliam Wang XSDebug("ldout %x iw %x fw %x\n", io.ldout.bits.uop.cf.pc, io.ldout.bits.uop.ctrl.rfWen, io.ldout.bits.uop.ctrl.fpWen) 302024ee227SWilliam Wang } 303024ee227SWilliam Wang}