1package xiangshan.mem 2 3import chisel3._ 4import chisel3.util._ 5import utils._ 6import xiangshan._ 7import xiangshan.cache.{DCacheWordIO, TlbRequestIO, TlbCmd, MemoryOpConstants} 8import xiangshan.backend.LSUOpType 9 10class LoadToLsroqIO extends XSBundle { 11 val loadIn = ValidIO(new LsPipelineBundle) 12 val ldout = Flipped(DecoupledIO(new ExuOutput)) 13 val forward = new LoadForwardQueryIO 14} 15 16class LoadUnit extends XSModule { 17 val io = IO(new Bundle() { 18 val ldin = Flipped(Decoupled(new ExuInput)) 19 val ldout = Decoupled(new ExuOutput) 20 val redirect = Flipped(ValidIO(new Redirect)) 21 val tlbFeedback = ValidIO(new TlbFeedback) 22 val dcache = new DCacheWordIO 23 val dtlb = new TlbRequestIO() 24 val sbuffer = new LoadForwardQueryIO 25 val lsroq = new LoadToLsroqIO 26 }) 27 28 when(io.ldin.valid){ 29 XSDebug("load enpipe %x iw %x fw %x\n", io.ldin.bits.uop.cf.pc, io.ldin.bits.uop.ctrl.rfWen, io.ldin.bits.uop.ctrl.fpWen) 30 } 31 32 //------------------------------------------------------- 33 // Load Pipeline 34 //------------------------------------------------------- 35 36 val l2_out = Wire(Decoupled(new LsPipelineBundle)) 37 val l4_out = Wire(Decoupled(new LsPipelineBundle)) 38 val l5_in = Wire(Flipped(Decoupled(new LsPipelineBundle))) 39 40 //------------------------------------------------------- 41 // LD Pipeline Stage 2 42 // Generate addr, use addr to query DCache Tag and DTLB 43 //------------------------------------------------------- 44 45 val l2_dtlb_hit = Wire(new Bool()) 46 val l2_dtlb_miss = Wire(new Bool()) 47 val l2_dcache = Wire(new Bool()) 48 val l2_mmio = Wire(new Bool()) 49 val isMMIOReq = Wire(new Bool()) 50 51 // send req to dtlb 52 io.dtlb.req.valid := l2_out.valid 53 io.dtlb.req.bits.vaddr := l2_out.bits.vaddr 54 io.dtlb.req.bits.cmd := TlbCmd.read 55 io.dtlb.req.bits.roqIdx := l2_out.bits.uop.roqIdx 56 io.dtlb.req.bits.debug.pc := l2_out.bits.uop.cf.pc 57 io.dtlb.req.bits.debug.lsroqIdx := l2_out.bits.uop.lsroqIdx // FIXME: need update 58 59 l2_dtlb_hit := io.dtlb.resp.valid && !io.dtlb.resp.bits.miss 60 l2_dtlb_miss := io.dtlb.resp.valid && io.dtlb.resp.bits.miss 61 isMMIOReq := AddressSpace.isMMIO(io.dtlb.resp.bits.paddr) 62 l2_dcache := l2_dtlb_hit && !isMMIOReq 63 l2_mmio := l2_dtlb_hit && isMMIOReq 64 65 // l2_out is used to generate dcache req 66 l2_out.bits := DontCare 67 l2_out.bits.vaddr := io.ldin.bits.src1 + io.ldin.bits.uop.ctrl.imm 68 l2_out.bits.paddr := io.dtlb.resp.bits.paddr 69 l2_out.bits.mask := genWmask(l2_out.bits.vaddr, io.ldin.bits.uop.ctrl.fuOpType(1,0)) 70 l2_out.bits.uop := io.ldin.bits.uop 71 l2_out.bits.miss := false.B 72 l2_out.bits.mmio := l2_mmio 73 l2_out.valid := io.ldin.valid && !io.ldin.bits.uop.roqIdx.needFlush(io.redirect) 74 // when we are sure it's a MMIO req, we do not need to wait for cache ready 75 l2_out.ready := (l2_dcache && io.dcache.req.ready) || l2_mmio || l2_dtlb_miss 76 io.ldin.ready := l2_out.ready 77 78 // exception check 79 val addrAligned = LookupTree(io.ldin.bits.uop.ctrl.fuOpType(1,0), List( 80 "b00".U -> true.B, //b 81 "b01".U -> (l2_out.bits.vaddr(0) === 0.U), //h 82 "b10".U -> (l2_out.bits.vaddr(1,0) === 0.U), //w 83 "b11".U -> (l2_out.bits.vaddr(2,0) === 0.U) //d 84 )) 85 l2_out.bits.uop.cf.exceptionVec(loadAddrMisaligned) := !addrAligned 86 l2_out.bits.uop.cf.exceptionVec(loadPageFault) := io.dtlb.resp.bits.excp.pf.ld 87 88 // send result to dcache 89 // never send tlb missed or MMIO reqs to dcache 90 io.dcache.req.valid := l2_dcache 91 92 io.dcache.req.bits.cmd := MemoryOpConstants.M_XRD 93 // TODO: vaddr 94 io.dcache.req.bits.addr := io.dtlb.resp.bits.paddr 95 io.dcache.req.bits.data := DontCare 96 io.dcache.req.bits.mask := l2_out.bits.mask 97 98 io.dcache.req.bits.meta.id := DontCare 99 io.dcache.req.bits.meta.vaddr := l2_out.bits.vaddr 100 io.dcache.req.bits.meta.paddr := io.dtlb.resp.bits.paddr 101 io.dcache.req.bits.meta.uop := l2_out.bits.uop 102 io.dcache.req.bits.meta.mmio := isMMIOReq 103 io.dcache.req.bits.meta.tlb_miss := io.dtlb.resp.bits.miss 104 io.dcache.req.bits.meta.mask := l2_out.bits.mask 105 io.dcache.req.bits.meta.replay := false.B 106 107 108 val l2_tlbFeedback = Wire(new TlbFeedback) 109 l2_tlbFeedback.hit := !io.dtlb.resp.bits.miss 110 l2_tlbFeedback.roqIdx := l2_out.bits.uop.roqIdx 111 112 // dump l2 113 XSDebug(l2_out.valid, "L2: pc 0x%x addr 0x%x -> 0x%x op %b data 0x%x mask %x dltb_miss %b dcache %b mmio %b\n", 114 l2_out.bits.uop.cf.pc, l2_out.bits.vaddr, l2_out.bits.paddr, 115 l2_out.bits.uop.ctrl.fuOpType, l2_out.bits.data, l2_out.bits.mask, 116 l2_dtlb_miss, l2_dcache, l2_mmio) 117 118 XSDebug(l2_out.fire(), "load req: pc 0x%x addr 0x%x -> 0x%x op %b\n", 119 l2_out.bits.uop.cf.pc, l2_out.bits.vaddr, l2_out.bits.paddr, l2_out.bits.uop.ctrl.fuOpType) 120 121 XSDebug(io.dcache.req.valid, p"dcache req(${io.dcache.req.valid} ${io.dcache.req.ready}): pc:0x${Hexadecimal(io.dcache.req.bits.meta.uop.cf.pc)} roqIdx:${io.dcache.req.bits.meta.uop.roqIdx} lsroqIdx:${io.dcache.req.bits.meta.uop.lsroqIdx} addr:0x${Hexadecimal(io.dcache.req.bits.addr)} vaddr:0x${Hexadecimal(io.dcache.req.bits.meta.vaddr)} paddr:0x${Hexadecimal(io.dcache.req.bits.meta.paddr)} mmio:${io.dcache.req.bits.meta.mmio} tlb_miss:${io.dcache.req.bits.meta.tlb_miss} mask:${io.dcache.req.bits.meta.mask}\n") 122 123 //------------------------------------------------------- 124 // LD Pipeline Stage 3 125 // Compare tag, use addr to query DCache Data 126 //------------------------------------------------------- 127 128 val l3_valid = RegNext(l2_out.fire(), false.B) 129 val l3_dtlb_miss = RegEnable(next = l2_dtlb_miss, enable = l2_out.fire(), init = false.B) 130 val l3_dcache = RegEnable(next = l2_dcache, enable = l2_out.fire(), init = false.B) 131 val l3_tlbFeedback = RegEnable(next = l2_tlbFeedback, enable = l2_out.fire()) 132 val l3_bundle = RegEnable(next = l2_out.bits, enable = l2_out.fire()) 133 val l3_uop = l3_bundle.uop 134 // dltb miss reqs ends here 135 val l3_passdown = l3_valid && !l3_dtlb_miss && !l3_uop.roqIdx.needFlush(io.redirect) 136 137 io.tlbFeedback.valid := l3_valid 138 io.tlbFeedback.bits := l3_tlbFeedback 139 io.dcache.s1_kill := l3_valid && l3_dcache && l3_uop.roqIdx.needFlush(io.redirect) 140 141 // dump l3 142 XSDebug(l3_valid, "l3: pc 0x%x addr 0x%x -> 0x%x op %b data 0x%x mask %x dltb_miss %b dcache %b mmio %b\n", 143 l3_bundle.uop.cf.pc, l3_bundle.vaddr, l3_bundle.paddr, 144 l3_bundle.uop.ctrl.fuOpType, l3_bundle.data, l3_bundle.mask, 145 l3_dtlb_miss, l3_dcache, l3_bundle.mmio) 146 147 XSDebug(io.tlbFeedback.valid, "tlbFeedback: hit %b roqIdx %d\n", 148 io.tlbFeedback.bits.hit, io.tlbFeedback.bits.roqIdx.asUInt) 149 150 XSDebug(io.dcache.s1_kill, "l3: dcache s1_kill\n") 151 152 // Done in Dcache 153 154 //------------------------------------------------------- 155 // LD Pipeline Stage 4 156 // Dcache return result, do tag ecc check and forward check 157 //------------------------------------------------------- 158 159 val l4_valid = RegNext(l3_passdown, false.B) 160 val l4_dcache = RegNext(l3_dcache, false.B) 161 val l4_bundle = RegNext(l3_bundle) 162 163 val fullForward = Wire(Bool()) 164 165 assert(!(io.dcache.resp.ready && !io.dcache.resp.valid), "DCache response got lost") 166 io.dcache.resp.ready := l4_valid && l4_dcache 167 when (io.dcache.resp.fire()) { 168 l4_out.bits := DontCare 169 l4_out.bits.data := io.dcache.resp.bits.data 170 l4_out.bits.paddr := io.dcache.resp.bits.meta.paddr 171 l4_out.bits.uop := io.dcache.resp.bits.meta.uop 172 l4_out.bits.mmio := io.dcache.resp.bits.meta.mmio 173 l4_out.bits.mask := io.dcache.resp.bits.meta.mask 174 // when we can get the data completely from forward 175 // we no longer need to access dcache 176 // treat nack as miss 177 l4_out.bits.miss := Mux(fullForward, false.B, 178 io.dcache.resp.bits.miss || io.dcache.resp.bits.nack) 179 XSDebug(io.dcache.resp.fire(), p"DcacheResp(l4): data:0x${Hexadecimal(io.dcache.resp.bits.data)} paddr:0x${Hexadecimal(io.dcache.resp.bits.meta.paddr)} pc:0x${Hexadecimal(io.dcache.resp.bits.meta.uop.cf.pc)} roqIdx:${io.dcache.resp.bits.meta.uop.roqIdx} lsroqIdx:${io.dcache.resp.bits.meta.uop.lsroqIdx} miss:${io.dcache.resp.bits.miss}\n") 180 } .otherwise { 181 l4_out.bits := l4_bundle 182 } 183 l4_out.valid := l4_valid && !l4_out.bits.uop.roqIdx.needFlush(io.redirect) 184 185 // Store addr forward match 186 // If match, get data / fmask from store queue / store buffer 187 188 // io.lsroq.forward := DontCare 189 io.lsroq.forward.paddr := l4_out.bits.paddr 190 io.lsroq.forward.mask := io.dcache.resp.bits.meta.mask 191 io.lsroq.forward.lsroqIdx := l4_out.bits.uop.lsroqIdx 192 io.lsroq.forward.sqIdx := l4_out.bits.uop.sqIdx 193 io.lsroq.forward.uop := l4_out.bits.uop 194 io.lsroq.forward.pc := l4_out.bits.uop.cf.pc 195 io.lsroq.forward.valid := io.dcache.resp.valid //TODO: opt timing 196 197 io.sbuffer.paddr := l4_out.bits.paddr 198 io.sbuffer.mask := io.dcache.resp.bits.meta.mask 199 io.sbuffer.lsroqIdx := l4_out.bits.uop.lsroqIdx 200 io.sbuffer.sqIdx := l4_out.bits.uop.sqIdx 201 io.sbuffer.uop := DontCare 202 io.sbuffer.pc := l4_out.bits.uop.cf.pc 203 io.sbuffer.valid := l4_out.valid 204 205 val forwardVec = WireInit(io.sbuffer.forwardData) 206 val forwardMask = WireInit(io.sbuffer.forwardMask) 207 // generate XLEN/8 Muxs 208 (0 until XLEN/8).map(j => { 209 when(io.lsroq.forward.forwardMask(j)) { 210 forwardMask(j) := true.B 211 forwardVec(j) := io.lsroq.forward.forwardData(j) 212 } 213 }) 214 l4_out.bits.forwardMask := forwardMask 215 l4_out.bits.forwardData := forwardVec 216 fullForward := (~l4_out.bits.forwardMask.asUInt & l4_out.bits.mask) === 0.U 217 218 PipelineConnect(l4_out, l5_in, io.ldout.fire() || (l5_in.bits.miss || l5_in.bits.mmio) && l5_in.valid, false.B) 219 220 XSDebug(l4_valid, "l4: out.valid:%d pc 0x%x addr 0x%x -> 0x%x op %b data 0x%x mask %x forwardData: 0x%x forwardMask: %x dcache %b mmio %b miss:%d\n", 221 l4_out.valid, l4_out.bits.uop.cf.pc, l4_out.bits.vaddr, l4_out.bits.paddr, 222 l4_out.bits.uop.ctrl.fuOpType, l4_out.bits.data, l4_out.bits.mask, 223 l4_out.bits.forwardData.asUInt, l4_out.bits.forwardMask.asUInt, l4_dcache, l4_out.bits.mmio, l4_out.bits.miss) 224 225 XSDebug(l5_in.valid, "L5(%d %d): pc 0x%x addr 0x%x -> 0x%x op %b data 0x%x mask %x forwardData: 0x%x forwardMask: %x\n", 226 l5_in.valid, l5_in.ready, l5_in.bits.uop.cf.pc, l5_in.bits.vaddr, l5_in.bits.paddr, 227 l5_in.bits.uop.ctrl.fuOpType , l5_in.bits.data, l5_in.bits.mask, 228 l5_in.bits.forwardData.asUInt, l5_in.bits.forwardMask.asUInt) 229 230 XSDebug(l4_valid, "l4: sbuffer forwardData: 0x%x forwardMask: %x\n", 231 io.sbuffer.forwardData.asUInt, io.sbuffer.forwardMask.asUInt) 232 233 XSDebug(l4_valid, "l4: lsroq forwardData: 0x%x forwardMask: %x\n", 234 io.lsroq.forward.forwardData.asUInt, io.lsroq.forward.forwardMask.asUInt) 235 236 XSDebug(io.redirect.valid, 237 p"Redirect: excp:${io.redirect.bits.isException} flushPipe:${io.redirect.bits.isFlushPipe} misp:${io.redirect.bits.isMisPred} " + 238 p"replay:${io.redirect.bits.isReplay} pc:0x${Hexadecimal(io.redirect.bits.pc)} target:0x${Hexadecimal(io.redirect.bits.target)} " + 239 p"brTag:${io.redirect.bits.brTag} l2:${io.ldin.bits.uop.roqIdx.needFlush(io.redirect)} l3:${l3_uop.roqIdx.needFlush(io.redirect)} " + 240 p"l4:${l4_out.bits.uop.roqIdx.needFlush(io.redirect)}\n" 241 ) 242 //------------------------------------------------------- 243 // LD Pipeline Stage 5 244 // Do data ecc check, merge result and write back to LS ROQ 245 // If cache hit, return writeback result to CDB 246 //------------------------------------------------------- 247 248 val loadWriteBack = l5_in.fire() 249 250 // data merge 251 val rdata = VecInit((0 until 8).map(j => { 252 Mux(l5_in.bits.forwardMask(j), 253 l5_in.bits.forwardData(j), 254 l5_in.bits.data(8*(j+1)-1, 8*j) 255 ) 256 })).asUInt 257 val func = l5_in.bits.uop.ctrl.fuOpType 258 val raddr = l5_in.bits.paddr 259 val rdataSel = LookupTree(raddr(2, 0), List( 260 "b000".U -> rdata(63, 0), 261 "b001".U -> rdata(63, 8), 262 "b010".U -> rdata(63, 16), 263 "b011".U -> rdata(63, 24), 264 "b100".U -> rdata(63, 32), 265 "b101".U -> rdata(63, 40), 266 "b110".U -> rdata(63, 48), 267 "b111".U -> rdata(63, 56) 268 )) 269 val rdataPartialLoad = LookupTree(func, List( 270 LSUOpType.lb -> SignExt(rdataSel(7, 0) , XLEN), 271 LSUOpType.lh -> SignExt(rdataSel(15, 0), XLEN), 272 LSUOpType.lw -> SignExt(rdataSel(31, 0), XLEN), 273 LSUOpType.ld -> SignExt(rdataSel(63, 0), XLEN), 274 LSUOpType.lbu -> ZeroExt(rdataSel(7, 0) , XLEN), 275 LSUOpType.lhu -> ZeroExt(rdataSel(15, 0), XLEN), 276 LSUOpType.lwu -> ZeroExt(rdataSel(31, 0), XLEN) 277 )) 278 279 // ecc check 280 // TODO 281 282 // if hit, writeback result to CDB 283 // val ldout = Vec(2, Decoupled(new ExuOutput)) 284 // when io.loadIn(i).fire() && !io.io.loadIn(i).miss, commit load to cdb 285 val hitLoadOut = Wire(Decoupled(new ExuOutput)) 286 hitLoadOut.bits.uop := l5_in.bits.uop 287 hitLoadOut.bits.data := rdataPartialLoad 288 hitLoadOut.bits.redirectValid := false.B 289 hitLoadOut.bits.redirect := DontCare 290 hitLoadOut.bits.brUpdate := DontCare 291 hitLoadOut.bits.debug.isMMIO := l5_in.bits.mmio 292 hitLoadOut.valid := l5_in.valid && !l5_in.bits.mmio && !l5_in.bits.miss // MMIO will be done in lsroq 293 XSDebug(hitLoadOut.fire(), "load writeback: pc %x data %x (%x + %x(%b))\n", 294 hitLoadOut.bits.uop.cf.pc, rdataPartialLoad, l5_in.bits.data, 295 l5_in.bits.forwardData.asUInt, l5_in.bits.forwardMask.asUInt 296 ) 297 298 // writeback to LSROQ 299 // Current dcache use MSHR 300 301 io.lsroq.loadIn.bits := l5_in.bits 302 io.lsroq.loadIn.bits.data := rdataPartialLoad // for debug 303 io.lsroq.loadIn.valid := loadWriteBack 304 305 // pipeline control 306 l5_in.ready := io.ldout.ready 307 308 val cdbArb = Module(new Arbiter(new ExuOutput, 2)) 309 io.ldout <> cdbArb.io.out 310 hitLoadOut <> cdbArb.io.in(0) 311 io.lsroq.ldout <> cdbArb.io.in(1) // missLoadOut 312 313 when(io.ldout.fire()){ 314 XSDebug("ldout %x iw %x fw %x\n", io.ldout.bits.uop.cf.pc, io.ldout.bits.uop.ctrl.rfWen, io.ldout.bits.uop.ctrl.fpWen) 315 } 316} 317