13952421bSweiding liu/*************************************************************************************** 23952421bSweiding liu * Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 33952421bSweiding liu * Copyright (c) 2020-2021 Peng Cheng Laboratory 43952421bSweiding liu * 53952421bSweiding liu * XiangShan is licensed under Mulan PSL v2. 63952421bSweiding liu * You can use this software according to the terms and conditions of the Mulan PSL v2. 73952421bSweiding liu * You may obtain a copy of Mulan PSL v2 at: 83952421bSweiding liu * http://license.coscl.org.cn/MulanPSL2 93952421bSweiding liu * 103952421bSweiding liu * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 113952421bSweiding liu * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 123952421bSweiding liu * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 133952421bSweiding liu * 143952421bSweiding liu * See the Mulan PSL v2 for more details. 153952421bSweiding liu ***************************************************************************************/ 163952421bSweiding liu 173952421bSweiding liupackage xiangshan.mem 183952421bSweiding liu 193952421bSweiding liuimport org.chipsalliance.cde.config.Parameters 203952421bSweiding liuimport chisel3._ 213952421bSweiding liuimport chisel3.util._ 223952421bSweiding liuimport utils._ 233952421bSweiding liuimport utility._ 244a84d160SAnzoimport xiangshan.ExceptionNO._ 253952421bSweiding liuimport xiangshan._ 263952421bSweiding liuimport xiangshan.backend.rob.RobPtr 273952421bSweiding liuimport xiangshan.backend.Bundles._ 283952421bSweiding liuimport xiangshan.mem._ 293952421bSweiding liuimport xiangshan.backend.fu.vector.Bundles._ 30102b377bSweiding liuimport xiangshan.backend.fu.FuConfig._ 31e7ab4635SHuijin Liimport xiangshan.backend.fu.FuType 323952421bSweiding liu 333952421bSweiding liu 3426af847eSgood-circleclass VSplitPipeline(isVStore: Boolean = false)(implicit p: Parameters) extends VLSUModule{ 3526af847eSgood-circle val io = IO(new VSplitPipelineIO(isVStore)) 3630d194e4Sweiding liu // will be override later 3730d194e4Sweiding liu def us_whole_reg(fuOpType: UInt): Bool = false.B 3830d194e4Sweiding liu def us_mask(fuOpType: UInt): Bool = false.B 3930d194e4Sweiding liu def us_fof(fuOpType: UInt): Bool = false.B 4002ab9019SAnzooooo //TODO vdIdxReg should no longer be useful, don't delete it for now 412e1c1560Sweiding liu val vdIdxReg = RegInit(0.U(3.W)) 422e1c1560Sweiding liu 433952421bSweiding liu val s1_ready = WireInit(false.B) 4426af847eSgood-circle io.in.ready := s1_ready 453952421bSweiding liu 463952421bSweiding liu /**----------------------------------------------------------- 473952421bSweiding liu * s0 stage 483952421bSweiding liu * decode and generate AlignedType, uop mask, preIsSplit 493952421bSweiding liu * ---------------------------------------------------------- 503952421bSweiding liu */ 51c66d9614Sweiding liu val s0_uop = io.in.bits.uop 52c66d9614Sweiding liu val s0_vtype = s0_uop.vpu.vtype 533952421bSweiding liu val s0_sew = s0_vtype.vsew 54c66d9614Sweiding liu val s0_eew = s0_uop.vpu.veew 553952421bSweiding liu val s0_lmul = s0_vtype.vlmul 563952421bSweiding liu // when load whole register or unit-stride masked , emul should be 1 57c66d9614Sweiding liu val s0_fuOpType = s0_uop.fuOpType 583952421bSweiding liu val s0_mop = s0_fuOpType(6, 5) 59c66d9614Sweiding liu val s0_nf = Mux(us_whole_reg(s0_fuOpType), 0.U, s0_uop.vpu.nf) 60c66d9614Sweiding liu val s0_vm = s0_uop.vpu.vm 61c66d9614Sweiding liu val s0_emul = Mux(us_whole_reg(s0_fuOpType) ,GenUSWholeEmul(s0_uop.vpu.nf), Mux(us_mask(s0_fuOpType), 0.U(mulBits.W), EewLog2(s0_eew) - s0_sew + s0_lmul)) 62df3b4b92SAnzooooo val s0_preIsSplit = !isUnitStride(s0_mop) 6330d194e4Sweiding liu val s0_nfield = s0_nf +& 1.U 643952421bSweiding liu 653952421bSweiding liu val s0_valid = Wire(Bool()) 6626af847eSgood-circle val s0_kill = io.in.bits.uop.robIdx.needFlush(io.redirect) 673952421bSweiding liu val s0_can_go = s1_ready 683952421bSweiding liu val s0_fire = s0_valid && s0_can_go 6926af847eSgood-circle val s0_out = Wire(new VLSBundle(isVStore)) 703952421bSweiding liu 713952421bSweiding liu val isUsWholeReg = isUnitStride(s0_mop) && us_whole_reg(s0_fuOpType) 723952421bSweiding liu val isMaskReg = isUnitStride(s0_mop) && us_mask(s0_fuOpType) 733952421bSweiding liu val isSegment = s0_nf =/= 0.U && !us_whole_reg(s0_fuOpType) 743952421bSweiding liu val instType = Cat(isSegment, s0_mop) 753952421bSweiding liu val uopIdx = io.in.bits.uop.vpu.vuopIdx 763952421bSweiding liu val uopIdxInField = GenUopIdxInField(instType, s0_emul, s0_lmul, uopIdx) 773952421bSweiding liu val vdIdxInField = GenVdIdxInField(instType, s0_emul, s0_lmul, uopIdxInField) 783952421bSweiding liu val lmulLog2 = Mux(s0_lmul.asSInt >= 0.S, 0.U, s0_lmul) 793952421bSweiding liu val emulLog2 = Mux(s0_emul.asSInt >= 0.S, 0.U, s0_emul) 803952421bSweiding liu val numEewLog2 = emulLog2 - EewLog2(s0_eew) 813952421bSweiding liu val numSewLog2 = lmulLog2 - s0_sew 823952421bSweiding liu val numFlowsSameVdLog2 = Mux( 833952421bSweiding liu isIndexed(instType), 843952421bSweiding liu log2Up(VLENB).U - s0_sew(1,0), 85d73f3705SAnzo log2Up(VLENB).U - s0_eew 863952421bSweiding liu ) 873952421bSweiding liu // numUops = nf * max(lmul, emul) 883952421bSweiding liu val lmulLog2Pos = Mux(s0_lmul.asSInt < 0.S, 0.U, s0_lmul) 893952421bSweiding liu val emulLog2Pos = Mux(s0_emul.asSInt < 0.S, 0.U, s0_emul) 903952421bSweiding liu val numUops = Mux( 913952421bSweiding liu isIndexed(s0_mop) && s0_lmul.asSInt > s0_emul.asSInt, 923952421bSweiding liu (s0_nf +& 1.U) << lmulLog2Pos, 933952421bSweiding liu (s0_nf +& 1.U) << emulLog2Pos 943952421bSweiding liu ) 953952421bSweiding liu 963952421bSweiding liu val vvl = io.in.bits.src_vl.asTypeOf(VConfig()).vl 9730d194e4Sweiding liu val evl = Mux(isUsWholeReg, 9830d194e4Sweiding liu GenUSWholeRegVL(io.in.bits.uop.vpu.nf +& 1.U, s0_eew), 9930d194e4Sweiding liu Mux(isMaskReg, 10030d194e4Sweiding liu GenUSMaskRegVL(vvl), 10130d194e4Sweiding liu vvl)) 1023952421bSweiding liu val vvstart = io.in.bits.uop.vpu.vstart 103d73f3705SAnzo val alignedType = Mux(isIndexed(instType), s0_sew(1, 0), s0_eew) 1043952421bSweiding liu val broadenAligendType = Mux(s0_preIsSplit, Cat("b0".U, alignedType), "b100".U) // if is unit-stride, use 128-bits memory access 1053952421bSweiding liu val flowsLog2 = GenRealFlowLog2(instType, s0_emul, s0_lmul, s0_eew, s0_sew) 10602ab9019SAnzooooo val flowsPrevThisUop = (uopIdxInField << flowsLog2).asUInt // # of flows before this uop in a field 10702ab9019SAnzooooo val flowsPrevThisVd = (vdIdxInField << numFlowsSameVdLog2).asUInt // # of flows before this vd in a field 10802ab9019SAnzooooo val flowsIncludeThisUop = ((uopIdxInField +& 1.U) << flowsLog2).asUInt // # of flows before this uop besides this uop 1093952421bSweiding liu val flowNum = io.in.bits.flowNum.get 11081b02df5Sweiding liu // max index in vd, only use in index instructions for calculate index 111d73f3705SAnzo val maxIdxInVdIndex = GenVLMAX(Mux(s0_emul.asSInt > 0.S, 0.U, s0_emul), s0_eew) 11281b02df5Sweiding liu val indexVlMaxInVd = GenVlMaxMask(maxIdxInVdIndex, elemIdxBits) 11302ab9019SAnzooooo 11402ab9019SAnzooooo // For vectore indexed instructions: 11502ab9019SAnzooooo // When emul is greater than lmul, multiple uop correspond to a Vd, e.g: 11602ab9019SAnzooooo // vsetvli t1,t0,e8,m1,ta,ma lmul = 1 11702ab9019SAnzooooo // vluxei16.v v2,(a0),v8 emul = 2 11802ab9019SAnzooooo // In this case, we need to ensure the flownumis right shift by flowsPrevThisUop, However, the mask passed to mergebuff is right shift by flowsPrevThisVd e.g: 11902ab9019SAnzooooo // vl = 9 12002ab9019SAnzooooo // srcMask = 0x1FF 12102ab9019SAnzooooo // uopIdxInField = 0 and vdIdxInField = 0, flowMask = 0x00FF, toMergeBuffMask = 0x01FF 12202ab9019SAnzooooo // uopIdxInField = 1 and vdIdxInField = 0, flowMask = 0x0001, toMergeBuffMask = 0x01FF 12302ab9019SAnzooooo // uopIdxInField = 0 and vdIdxInField = 0, flowMask = 0x0000, toMergeBuffMask = 0x0000 12402ab9019SAnzooooo // uopIdxInField = 0 and vdIdxInField = 0, flowMask = 0x0000, toMergeBuffMask = 0x0000 12502ab9019SAnzooooo val isSpecialIndexed = isIndexed(instType) && s0_emul.asSInt > s0_lmul.asSInt 12602ab9019SAnzooooo 1273952421bSweiding liu val srcMask = GenFlowMask(Mux(s0_vm, Fill(VLEN, 1.U(1.W)), io.in.bits.src_mask), vvstart, evl, true) 12802ab9019SAnzooooo val srcMaskShiftBits = Mux(isSpecialIndexed, flowsPrevThisUop, flowsPrevThisVd) 1293952421bSweiding liu 1303952421bSweiding liu val flowMask = ((srcMask & 13126af847eSgood-circle UIntToMask(flowsIncludeThisUop.asUInt, VLEN + 1) & 13226af847eSgood-circle (~UIntToMask(flowsPrevThisUop.asUInt, VLEN)).asUInt 13302ab9019SAnzooooo ) >> srcMaskShiftBits)(VLENB - 1, 0) 13402ab9019SAnzooooo val indexedSrcMask = (srcMask >> flowsPrevThisVd).asUInt //only for index instructions 13502ab9019SAnzooooo 13602ab9019SAnzooooo // Used to calculate the element index. 13702ab9019SAnzooooo // See 'splitbuffer' for 'io.out.splitIdxOffset' and 'mergebuffer' for 'merge data' 13802ab9019SAnzooooo val indexedSplitOffset = Mux(isSpecialIndexed, flowsPrevThisUop - flowsPrevThisVd, 0.U) // only for index instructions of emul > lmul 1393952421bSweiding liu val vlmax = GenVLMAX(s0_lmul, s0_sew) 1403952421bSweiding liu 1413952421bSweiding liu // connect 1423952421bSweiding liu s0_out := DontCare 1433952421bSweiding liu s0_out match {case x => 1443952421bSweiding liu x.uop := io.in.bits.uop 145e7ab4635SHuijin Li x.uop.imm := 0.U 1463952421bSweiding liu x.uop.vpu.vl := evl 1473952421bSweiding liu x.uop.uopIdx := uopIdx 1483952421bSweiding liu x.uop.numUops := numUops 1493952421bSweiding liu x.uop.lastUop := (uopIdx +& 1.U) === numUops 150b7d72c56Sweiding liu x.uop.vpu.nf := s0_nf 151df3b4b92SAnzooooo x.rawNf := io.in.bits.uop.vpu.nf 1523952421bSweiding liu x.flowMask := flowMask 15302ab9019SAnzooooo x.indexedSrcMask := indexedSrcMask // Only vector indexed instructions uses it 15402ab9019SAnzooooo x.indexedSplitOffset := indexedSplitOffset 1551d619202Sweiding liu x.byteMask := GenUopByteMask(flowMask, Cat("b0".U, alignedType))(VLENB - 1, 0) 1563952421bSweiding liu x.fof := isUnitStride(s0_mop) && us_fof(s0_fuOpType) 1573952421bSweiding liu x.baseAddr := io.in.bits.src_rs1 1583952421bSweiding liu x.stride := io.in.bits.src_stride 15900b107d2Sweiding liu x.flowNum := flowNum 16030d194e4Sweiding liu x.nfields := s0_nfield 1613952421bSweiding liu x.vm := s0_vm 1623952421bSweiding liu x.usWholeReg := isUsWholeReg 1633952421bSweiding liu x.usMaskReg := isMaskReg 1643952421bSweiding liu x.eew := s0_eew 1653952421bSweiding liu x.sew := s0_sew 1663952421bSweiding liu x.emul := s0_emul 1673952421bSweiding liu x.lmul := s0_lmul 1683952421bSweiding liu x.vlmax := Mux(isUsWholeReg, evl, vlmax) 1693952421bSweiding liu x.instType := instType 1703952421bSweiding liu x.data := io.in.bits.src_vs3 1713952421bSweiding liu x.vdIdxInField := vdIdxInField 1723952421bSweiding liu x.preIsSplit := s0_preIsSplit 1733952421bSweiding liu x.alignedType := broadenAligendType 17481b02df5Sweiding liu x.indexVlMaxInVd := indexVlMaxInVd 1753952421bSweiding liu } 1763952421bSweiding liu s0_valid := io.in.valid && !s0_kill 1773952421bSweiding liu /**------------------------------------- 1783952421bSweiding liu * s1 stage 1793952421bSweiding liu * ------------------------------------ 1803952421bSweiding liu * generate UopOffset 1813952421bSweiding liu */ 1823952421bSweiding liu val s1_valid = RegInit(false.B) 1833952421bSweiding liu val s1_kill = Wire(Bool()) 18426af847eSgood-circle val s1_in = Wire(new VLSBundle(isVStore)) 185c608693fSweiding liu val s1_can_go = io.out.ready && io.toMergeBuffer.req.ready 1863952421bSweiding liu val s1_fire = s1_valid && !s1_kill && s1_can_go 1873952421bSweiding liu 188c608693fSweiding liu s1_ready := s1_kill || !s1_valid || s1_can_go 1893952421bSweiding liu 1903952421bSweiding liu when(s0_fire){ 1913952421bSweiding liu s1_valid := true.B 1923952421bSweiding liu }.elsewhen(s1_fire){ 1933952421bSweiding liu s1_valid := false.B 1943952421bSweiding liu }.elsewhen(s1_kill){ 1953952421bSweiding liu s1_valid := false.B 1963952421bSweiding liu } 1973952421bSweiding liu s1_in := RegEnable(s0_out, s0_fire) 1983952421bSweiding liu 199f2ea0585SAnzooooo val s1_flowNum = s1_in.flowNum 200c66d9614Sweiding liu val s1_uop = s1_in.uop 201c66d9614Sweiding liu val s1_uopidx = s1_uop.vpu.vuopIdx 202c66d9614Sweiding liu val s1_nf = s1_uop.vpu.nf 2033952421bSweiding liu val s1_nfields = s1_in.nfields 2043952421bSweiding liu val s1_eew = s1_in.eew 20502ab9019SAnzooooo val s1_emul = s1_in.emul 20602ab9019SAnzooooo val s1_lmul = s1_in.lmul 2073952421bSweiding liu val s1_instType = s1_in.instType 2083952421bSweiding liu val s1_stride = s1_in.stride 2091d619202Sweiding liu val s1_vmask = FillInterleaved(8, s1_in.byteMask)(VLEN-1, 0) 210b7d72c56Sweiding liu val s1_alignedType = s1_in.alignedType 21102ab9019SAnzooooo val s1_isSpecialIndexed = isIndexed(s1_instType) && s1_emul.asSInt > s1_lmul.asSInt 21202ab9019SAnzooooo val s1_mask = Mux(s1_isSpecialIndexed, s1_in.indexedSrcMask, s1_in.flowMask) 21302ab9019SAnzooooo val s1_vdIdx = s1_in.vdIdxInField 21472439acfSAnzooooo val s1_fof = s1_in.fof 2153952421bSweiding liu val s1_notIndexedStride = Mux( // stride for strided/unit-stride instruction 2163952421bSweiding liu isStrided(s1_instType), 2173952421bSweiding liu s1_stride(XLEN - 1, 0), // for strided load, stride = x[rs2] 218d73f3705SAnzo s1_nfields << s1_eew // for unit-stride load, stride = eew * NFIELDS 2193952421bSweiding liu ) 22061054c5cSAnzooooo 22161054c5cSAnzooooo val stride = Mux(isIndexed(s1_instType), s1_stride, s1_notIndexedStride).asUInt // if is index instructions, get index when split 222d73f3705SAnzo val uopOffset = genVUopOffset(s1_instType, s1_fof, s1_uopidx, s1_nf, s1_eew, stride, s1_alignedType) 2233771bdd2Sweiding liu // for Unit-Stride, if uop's addr is aligned with 128-bits, split it to one flow, otherwise split two 2243771bdd2Sweiding liu val usLowBitsAddr = getCheckAddrLowBits(s1_in.baseAddr, maxMemByteNum) + getCheckAddrLowBits(uopOffset, maxMemByteNum) 225c8d442a6Sweiding liu val usMask = Cat(0.U(VLENB.W), s1_in.byteMask) << getCheckAddrLowBits(usLowBitsAddr, maxMemByteNum) 2269f329f86SAnzo val usAligned128 = getCheckAddrLowBits(usLowBitsAddr, maxMemByteNum) === 0.U // addr 128-bit aligned 2279f329f86SAnzo val usMaskLowActive = genUSSplitMask(usMask.asTypeOf(UInt(32.W)), 1.U).asUInt.orR 2289f329f86SAnzo val usMaskHighActive = genUSSplitMask(usMask.asTypeOf(UInt(32.W)), 0.U).asUInt.orR 2299f329f86SAnzo val usActiveNum = Mux( 2309f329f86SAnzo usMaskLowActive && usMaskHighActive, 2319f329f86SAnzo VecMemUnitStrideMaxFlowNum.U, 2329f329f86SAnzo Mux(usMaskLowActive || usMaskHighActive, (VecMemUnitStrideMaxFlowNum - 1).U, 0.U) 2339f329f86SAnzo ) 2349f329f86SAnzo 2359f329f86SAnzo val activeNum = Mux(s1_in.preIsSplit, PopCount(s1_in.flowMask), usActiveNum) 2369f329f86SAnzo 2373952421bSweiding liu 2383952421bSweiding liu s1_kill := s1_in.uop.robIdx.needFlush(io.redirect) 2393952421bSweiding liu 2403952421bSweiding liu // query mergeBuffer 241c608693fSweiding liu io.toMergeBuffer.req.valid := io.out.ready && s1_valid// only can_go will get MergeBuffer entry 242c41a9f78Slwd io.toMergeBuffer.req.bits.flowNum := activeNum 2433952421bSweiding liu io.toMergeBuffer.req.bits.data := s1_in.data 2443952421bSweiding liu io.toMergeBuffer.req.bits.uop := s1_in.uop 245df3b4b92SAnzooooo io.toMergeBuffer.req.bits.uop.vpu.nf := s1_in.rawNf 24602ab9019SAnzooooo io.toMergeBuffer.req.bits.mask := s1_mask 247c0355297SAnzooooo io.toMergeBuffer.req.bits.vaddr := s1_in.baseAddr 24802ab9019SAnzooooo io.toMergeBuffer.req.bits.vdIdx := s1_vdIdx //TODO vdIdxReg should no longer be useful, don't delete it for now 24955178b77Sweiding liu io.toMergeBuffer.req.bits.fof := s1_in.fof 25055178b77Sweiding liu io.toMergeBuffer.req.bits.vlmax := s1_in.vlmax 2513952421bSweiding liu// io.toMergeBuffer.req.bits.vdOffset := 2523952421bSweiding liu 25302ab9019SAnzooooo //TODO vdIdxReg should no longer be useful, don't delete it for now 25402ab9019SAnzooooo// when (s1_in.uop.lastUop && s1_fire || s1_kill) { 25502ab9019SAnzooooo// vdIdxReg := 0.U 25602ab9019SAnzooooo// }.elsewhen(s1_fire) { 25702ab9019SAnzooooo// vdIdxReg := vdIdxReg + 1.U 25802ab9019SAnzooooo// XSError(vdIdxReg + 1.U === 0.U, s"Overflow! The number of vd should be less than 8\n") 25902ab9019SAnzooooo// } 2603952421bSweiding liu // out connect 261c41a9f78Slwd io.out.valid := s1_valid && io.toMergeBuffer.resp.valid && (activeNum =/= 0.U) // if activeNum == 0, this uop do nothing, can be killed. 26226af847eSgood-circle io.out.bits := s1_in 2633952421bSweiding liu io.out.bits.uopOffset := uopOffset 264*16c2d8bbSAnzo io.out.bits.uopAddr := s1_in.baseAddr + uopOffset 2653952421bSweiding liu io.out.bits.stride := stride 26626af847eSgood-circle io.out.bits.mBIndex := io.toMergeBuffer.resp.bits.mBIndex 2673771bdd2Sweiding liu io.out.bits.usLowBitsAddr := usLowBitsAddr 2683771bdd2Sweiding liu io.out.bits.usAligned128 := usAligned128 269c8d442a6Sweiding liu io.out.bits.usMask := usMask 270df3b4b92SAnzooooo io.out.bits.uop.vpu.nf := s1_in.rawNf 271b2d6d8e7Sgood-circle 272b2d6d8e7Sgood-circle XSPerfAccumulate("split_out", io.out.fire) 273b2d6d8e7Sgood-circle XSPerfAccumulate("pipe_block", io.out.valid && !io.out.ready) 274b2d6d8e7Sgood-circle XSPerfAccumulate("mbuffer_block", s1_valid && io.out.ready && !io.toMergeBuffer.resp.valid) 2753952421bSweiding liu} 2763952421bSweiding liu 2773952421bSweiding liuabstract class VSplitBuffer(isVStore: Boolean = false)(implicit p: Parameters) extends VLSUModule{ 2783952421bSweiding liu val io = IO(new VSplitBufferIO(isVStore)) 279102b377bSweiding liu lazy val fuCfg = if(isVStore) VstuCfg else VlduCfg 2803952421bSweiding liu 281c66d9614Sweiding liu val uopq = Reg(new VLSBundle(isVStore)) 282c66d9614Sweiding liu val allocated = RegInit(false.B) 283c66d9614Sweiding liu val needCancel = WireInit(false.B) 284c79353cdSweiding liu val activeIssue = Wire(Bool()) 285c79353cdSweiding liu val inActiveIssue = Wire(Bool()) 286c66d9614Sweiding liu val splitFinish = WireInit(false.B) 287c79353cdSweiding liu 2883952421bSweiding liu // for split 2893952421bSweiding liu val splitIdx = RegInit(0.U(flowIdxBits.W)) 2903952421bSweiding liu val strideOffsetReg = RegInit(0.U(VLEN.W)) 2913952421bSweiding liu 2923952421bSweiding liu /** 2933952421bSweiding liu * Redirect 2943952421bSweiding liu */ 295c79353cdSweiding liu val cancelEnq = io.in.bits.uop.robIdx.needFlush(io.redirect) 296c79353cdSweiding liu val canEnqueue = io.in.valid 297c79353cdSweiding liu val needEnqueue = canEnqueue && !cancelEnq 2983952421bSweiding liu 299c79353cdSweiding liu // enqueue 300c79353cdSweiding liu val offset = PopCount(needEnqueue) 301c66d9614Sweiding liu val canAccept = !allocated || allocated && splitFinish && (activeIssue || inActiveIssue) // if is valid entry, need split finish and send last uop 302c79353cdSweiding liu io.in.ready := canAccept 303c79353cdSweiding liu val doEnqueue = canAccept && needEnqueue 304c79353cdSweiding liu 305c79353cdSweiding liu when(doEnqueue){ 306c66d9614Sweiding liu uopq := io.in.bits 307c79353cdSweiding liu } 3083952421bSweiding liu 3093952421bSweiding liu //split uops 310c66d9614Sweiding liu val issueValid = allocated && !needCancel 311c66d9614Sweiding liu val issueEntry = uopq 3125281d28fSweiding liu val issueMbIndex = issueEntry.mBIndex 3133952421bSweiding liu val issueFlowNum = issueEntry.flowNum 3143952421bSweiding liu val issueBaseAddr = issueEntry.baseAddr 315*16c2d8bbSAnzo val issueUopAddr = issueEntry.uopAddr 3163952421bSweiding liu val issueUop = issueEntry.uop 3173952421bSweiding liu val issueUopIdx = issueUop.vpu.vuopIdx 3183952421bSweiding liu val issueInstType = issueEntry.instType 3193952421bSweiding liu val issueUopOffset = issueEntry.uopOffset 3203952421bSweiding liu val issueEew = issueEntry.eew 3213952421bSweiding liu val issueSew = issueEntry.sew 32261054c5cSAnzooooo val issueLmul = issueEntry.lmul 32361054c5cSAnzooooo val issueEmul = issueEntry.emul 3243952421bSweiding liu val issueAlignedType = issueEntry.alignedType 3253952421bSweiding liu val issuePreIsSplit = issueEntry.preIsSplit 3263952421bSweiding liu val issueByteMask = issueEntry.byteMask 327c8d442a6Sweiding liu val issueUsMask = issueEntry.usMask 32861054c5cSAnzooooo val issueVLMAXMask = issueEntry.vlmax - 1.U 32961054c5cSAnzooooo val issueIsWholeReg = issueEntry.usWholeReg 33061054c5cSAnzooooo val issueVLMAXLog2 = GenVLMAXLog2(issueEntry.lmul, issueSew) 33181b02df5Sweiding liu val issueVlMaxInVd = issueEntry.indexVlMaxInVd 3323771bdd2Sweiding liu val issueUsLowBitsAddr = issueEntry.usLowBitsAddr 3333771bdd2Sweiding liu val issueUsAligned128 = issueEntry.usAligned128 3343952421bSweiding liu val elemIdx = GenElemIdx( 3353952421bSweiding liu instType = issueInstType, 3363952421bSweiding liu emul = issueEmul, 3373952421bSweiding liu lmul = issueLmul, 3383952421bSweiding liu eew = issueEew, 3393952421bSweiding liu sew = issueSew, 3403952421bSweiding liu uopIdx = issueUopIdx, 3413952421bSweiding liu flowIdx = splitIdx 3423952421bSweiding liu ) // elemIdx inside an inst, for exception 34361054c5cSAnzooooo 34402ab9019SAnzooooo val splitIdxOffset = issueEntry.indexedSplitOffset + splitIdx 34502ab9019SAnzooooo 34681b02df5Sweiding liu val indexFlowInnerIdx = elemIdx & issueVlMaxInVd 34761054c5cSAnzooooo val nfIdx = Mux(issueIsWholeReg, 0.U, elemIdx >> issueVLMAXLog2) 34861054c5cSAnzooooo val fieldOffset = nfIdx << issueAlignedType // field offset inside a segment 34961054c5cSAnzooooo 3503952421bSweiding liu val indexedStride = IndexAddr( // index for indexed instruction 3513952421bSweiding liu index = issueEntry.stride, 35261054c5cSAnzooooo flow_inner_idx = indexFlowInnerIdx, 3533952421bSweiding liu eew = issueEew 3543952421bSweiding liu ) 3553952421bSweiding liu val issueStride = Mux(isIndexed(issueInstType), indexedStride, strideOffsetReg) 356*16c2d8bbSAnzo val vaddr = issueUopAddr + issueStride 3573952421bSweiding liu val mask = genVWmask128(vaddr ,issueAlignedType) // scala maske for flow 3583952421bSweiding liu val flowMask = issueEntry.flowMask 3593952421bSweiding liu /* 3603952421bSweiding liu * Unit-Stride split to one flow or two flow. 3613952421bSweiding liu * for Unit-Stride, if uop's addr is aligned with 128-bits, split it to one flow, otherwise split two 3623952421bSweiding liu */ 363c8d442a6Sweiding liu val usSplitMask = genUSSplitMask(issueUsMask, splitIdx) 3649e76f400Sweiding liu val usMaskInSingleUop = (genUSSplitMask(issueUsMask, 1.U) === 0.U) // if second splited Mask is zero, means this uop is unnecessary to split 3659e76f400Sweiding liu val usNoSplit = (issueUsAligned128 || usMaskInSingleUop) && 366a31db3ffSweiding liu !issuePreIsSplit && 367a31db3ffSweiding liu (splitIdx === 0.U)// unit-stride uop don't need to split into two flow 368*16c2d8bbSAnzo val usSplitVaddr = genUSSplitAddr(issueUopAddr, splitIdx, XLEN) 3693771bdd2Sweiding liu val regOffset = getCheckAddrLowBits(issueUsLowBitsAddr, maxMemByteNum) // offset in 256-bits vd 3703952421bSweiding liu XSError((splitIdx > 1.U && usNoSplit) || (splitIdx > 1.U && !issuePreIsSplit) , "Unit-Stride addr split error!\n") 3713952421bSweiding liu 3729f329f86SAnzo val vecActive = Mux(!issuePreIsSplit, usSplitMask.orR, (flowMask & UIntToOH(splitIdx)).orR) 373b240e1c0SAnzooooo // no-unit-stride can trigger misalign 3744a84d160SAnzo val addrAligned = LookupTree(issueEew, List( 3754a84d160SAnzo "b00".U -> true.B, //b 376b240e1c0SAnzooooo "b01".U -> (vaddr(0) === 0.U), //h 377b240e1c0SAnzooooo "b10".U -> (vaddr(1, 0) === 0.U), //w 378b240e1c0SAnzooooo "b11".U -> (vaddr(2, 0) === 0.U) //d 379b240e1c0SAnzooooo )) || !issuePreIsSplit 3804a84d160SAnzo 3813952421bSweiding liu // data 3823952421bSweiding liu io.out.bits match { case x => 3833952421bSweiding liu x.uop := issueUop 384e7ab4635SHuijin Li x.uop.imm := 0.U 385102b377bSweiding liu x.uop.exceptionVec := ExceptionNO.selectByFu(issueUop.exceptionVec, fuCfg) 3866bd8baa1Sweiding liu x.vaddr := Mux(!issuePreIsSplit, usSplitVaddr, vaddr) 387c0355297SAnzooooo x.basevaddr := issueBaseAddr 3883952421bSweiding liu x.alignedType := issueAlignedType 3893952421bSweiding liu x.isvec := true.B 3906bd8baa1Sweiding liu x.mask := Mux(!issuePreIsSplit, usSplitMask, mask) 391b5d66726Sweiding liu x.reg_offset := regOffset //for merge unit-stride data 3929f329f86SAnzo x.vecActive := vecActive // currently, unit-stride's flow always send to pipeline 3933952421bSweiding liu x.is_first_ele := DontCare 3943952421bSweiding liu x.usSecondInv := usNoSplit 39555178b77Sweiding liu x.elemIdx := elemIdx 39602ab9019SAnzooooo x.elemIdxInsideVd := splitIdxOffset // if is Unit-Stride, elemIdx is the index of 2 splited mem request (for merge data) 39726af847eSgood-circle x.uop_unit_stride_fof := DontCare 39826af847eSgood-circle x.isFirstIssue := DontCare 39926af847eSgood-circle x.mBIndex := issueMbIndex 4003952421bSweiding liu } 4013952421bSweiding liu 402c79353cdSweiding liu // redirect 403c66d9614Sweiding liu needCancel := uopq.uop.robIdx.needFlush(io.redirect) && allocated 4043952421bSweiding liu 4053952421bSweiding liu /* Execute logic */ 4063952421bSweiding liu /** Issue to scala pipeline**/ 407b240e1c0SAnzooooo 408b240e1c0SAnzooooo lazy val misalignedCanGo = true.B 409b240e1c0SAnzooooo val allowIssue = (addrAligned || misalignedCanGo) && io.out.ready 410511725b0Sweiding liu val issueCount = Mux(usNoSplit, 2.U, (PopCount(inActiveIssue) + PopCount(activeIssue))) // for dont need split unit-stride, issue two flow 411c79353cdSweiding liu splitFinish := splitIdx >= (issueFlowNum - issueCount) 4123952421bSweiding liu 4133952421bSweiding liu // handshake 4149f329f86SAnzo activeIssue := issueValid && allowIssue && vecActive // active issue, current use in no unit-stride 4159f329f86SAnzo inActiveIssue := issueValid && !vecActive 416c79353cdSweiding liu when (!issueEntry.uop.robIdx.needFlush(io.redirect)) { 417c79353cdSweiding liu when (!splitFinish) { 418511725b0Sweiding liu when (activeIssue || inActiveIssue) { 4193952421bSweiding liu // The uop has not been entirly splited yet 4203952421bSweiding liu splitIdx := splitIdx + issueCount 421*16c2d8bbSAnzo strideOffsetReg := Mux(!issuePreIsSplit, 0.U, strideOffsetReg + issueEntry.stride) // when normal unit-stride, don't use strideOffsetReg 42274954a87SAnzooooo } 4233952421bSweiding liu }.otherwise { 424511725b0Sweiding liu when (activeIssue || inActiveIssue) { 4253952421bSweiding liu // The uop is done spliting 4263952421bSweiding liu splitIdx := 0.U(flowIdxBits.W) // initialize flowIdx 4273952421bSweiding liu strideOffsetReg := 0.U 4283952421bSweiding liu } 4293952421bSweiding liu } 4303952421bSweiding liu }.otherwise { 4313952421bSweiding liu splitIdx := 0.U(flowIdxBits.W) // initialize flowIdx 4323952421bSweiding liu strideOffsetReg := 0.U 4333952421bSweiding liu } 434c79353cdSweiding liu // allocated 435c66d9614Sweiding liu when(doEnqueue){ // if enqueue need to been cancelled, it will be false, so this have high priority 436c66d9614Sweiding liu allocated := true.B 437c66d9614Sweiding liu }.elsewhen(needCancel) { // redirect 438c66d9614Sweiding liu allocated := false.B 439c66d9614Sweiding liu }.elsewhen(splitFinish && (activeIssue || inActiveIssue)){ //dequeue 440c66d9614Sweiding liu allocated := false.B 441c79353cdSweiding liu } 4423952421bSweiding liu 4433952421bSweiding liu // out connect 4449f329f86SAnzo io.out.valid := issueValid && vecActive && (addrAligned || misalignedCanGo) // TODO: inactive unit-stride uop do not send to pipeline 445c79353cdSweiding liu 446b2d6d8e7Sgood-circle XSPerfAccumulate("out_valid", io.out.valid) 447b2d6d8e7Sgood-circle XSPerfAccumulate("out_fire", io.out.fire) 448b2d6d8e7Sgood-circle XSPerfAccumulate("out_fire_unitstride", io.out.fire && !issuePreIsSplit) 449a31db3ffSweiding liu XSPerfAccumulate("unitstride_vlenAlign", io.out.fire && !issuePreIsSplit && getCheckAddrLowBits(io.out.bits.vaddr, maxMemByteNum) === 0.U) 450c79353cdSweiding liu XSPerfAccumulate("unitstride_invalid", io.out.ready && issueValid && !issuePreIsSplit && PopCount(io.out.bits.mask).orR) 4513952421bSweiding liu} 4523952421bSweiding liu 4533952421bSweiding liuclass VSSplitBufferImp(implicit p: Parameters) extends VSplitBuffer(isVStore = true){ 454b240e1c0SAnzooooo override lazy val misalignedCanGo = io.vstdMisalign.get.storePipeEmpty && io.vstdMisalign.get.storeMisalignBufferEmpty 455b240e1c0SAnzooooo 4563952421bSweiding liu // split data 45708047a41SAnzooooo val splitData = genVSData( 4583952421bSweiding liu data = issueEntry.data.asUInt, 45902ab9019SAnzooooo elemIdx = splitIdxOffset, 4603952421bSweiding liu alignedType = issueAlignedType 4613952421bSweiding liu ) 462eaf128c5SAnzooooo val flowData = genVWdata(splitData, issueAlignedType) 4633952421bSweiding liu val usSplitData = genUSSplitData(issueEntry.data.asUInt, splitIdx, vaddr(3,0)) 4643952421bSweiding liu 46584258c4dSweiding liu val sqIdx = issueUop.sqIdx + splitIdx 46684258c4dSweiding liu io.out.bits.uop.sqIdx := sqIdx 4674a84d160SAnzo io.out.bits.uop.exceptionVec(storeAddrMisaligned) := !addrAligned && !issuePreIsSplit && io.out.bits.mask.orR 46800b107d2Sweiding liu 4693952421bSweiding liu // send data to sq 4703952421bSweiding liu val vstd = io.vstd.get 471c41a9f78Slwd vstd.valid := issueValid && (vecActive || !issuePreIsSplit) 4723952421bSweiding liu vstd.bits.uop := issueUop 47384258c4dSweiding liu vstd.bits.uop.sqIdx := sqIdx 474e7ab4635SHuijin Li vstd.bits.uop.fuType := FuType.vstu.U 4756bd8baa1Sweiding liu vstd.bits.data := Mux(!issuePreIsSplit, usSplitData, flowData) 47626af847eSgood-circle vstd.bits.debug := DontCare 47726af847eSgood-circle vstd.bits.vdIdx.get := DontCare 47826af847eSgood-circle vstd.bits.vdIdxInField.get := DontCare 479bd3e32c1Ssinsanction vstd.bits.isFromLoadUnit := DontCare 4806bd8baa1Sweiding liu vstd.bits.mask.get := Mux(!issuePreIsSplit, usSplitMask, mask) 481b7618691Sweiding liu 4823952421bSweiding liu} 4833952421bSweiding liu 4843952421bSweiding liuclass VLSplitBufferImp(implicit p: Parameters) extends VSplitBuffer(isVStore = false){ 48500b107d2Sweiding liu io.out.bits.uop.lqIdx := issueUop.lqIdx + splitIdx 4864a84d160SAnzo io.out.bits.uop.exceptionVec(loadAddrMisaligned) := !addrAligned && !issuePreIsSplit && io.out.bits.mask.orR 487e7ab4635SHuijin Li io.out.bits.uop.fuType := FuType.vldu.U 4883952421bSweiding liu} 4893952421bSweiding liu 49026af847eSgood-circleclass VSSplitPipelineImp(implicit p: Parameters) extends VSplitPipeline(isVStore = true){ 49130d194e4Sweiding liu override def us_whole_reg(fuOpType: UInt): Bool = fuOpType === VstuType.vsr 49230d194e4Sweiding liu override def us_mask(fuOpType: UInt): Bool = fuOpType === VstuType.vsm 49330d194e4Sweiding liu override def us_fof(fuOpType: UInt): Bool = false.B // dont have vector fof store 4943952421bSweiding liu} 4953952421bSweiding liu 49626af847eSgood-circleclass VLSplitPipelineImp(implicit p: Parameters) extends VSplitPipeline(isVStore = false){ 49730d194e4Sweiding liu 49830d194e4Sweiding liu override def us_whole_reg(fuOpType: UInt): Bool = fuOpType === VlduType.vlr 49930d194e4Sweiding liu override def us_mask(fuOpType: UInt): Bool = fuOpType === VlduType.vlm 50030d194e4Sweiding liu override def us_fof(fuOpType: UInt): Bool = fuOpType === VlduType.vleff 5013952421bSweiding liu} 5023952421bSweiding liu 5033952421bSweiding liuclass VLSplitImp(implicit p: Parameters) extends VLSUModule{ 5043952421bSweiding liu val io = IO(new VSplitIO(isVStore=false)) 5053952421bSweiding liu val splitPipeline = Module(new VLSplitPipelineImp()) 5063952421bSweiding liu val splitBuffer = Module(new VLSplitBufferImp()) 5072d8a0b4aSAnzo val mergeBufferNack = io.threshold.get.valid && io.threshold.get.bits =/= io.in.bits.uop.lqIdx 5083952421bSweiding liu // Split Pipeline 5093952421bSweiding liu splitPipeline.io.in <> io.in 5102d8a0b4aSAnzo io.in.ready := splitPipeline.io.in.ready && !mergeBufferNack 5113952421bSweiding liu splitPipeline.io.redirect <> io.redirect 5123952421bSweiding liu io.toMergeBuffer <> splitPipeline.io.toMergeBuffer 5133952421bSweiding liu 51408b0bc30Shappy-lx // skid buffer 515aa55b9faSlwd skidBuffer(splitPipeline.io.out, splitBuffer.io.in, 516aa55b9faSlwd Mux(splitPipeline.io.out.fire, 517aa55b9faSlwd splitPipeline.io.out.bits.uop.robIdx.needFlush(io.redirect), 518aa55b9faSlwd splitBuffer.io.in.bits.uop.robIdx.needFlush(io.redirect)), 519aa55b9faSlwd "VSSplitSkidBuffer") 52008b0bc30Shappy-lx 5213952421bSweiding liu // Split Buffer 5223952421bSweiding liu splitBuffer.io.redirect <> io.redirect 5233952421bSweiding liu io.out <> splitBuffer.io.out 5243952421bSweiding liu} 5253952421bSweiding liu 5263952421bSweiding liuclass VSSplitImp(implicit p: Parameters) extends VLSUModule{ 5273952421bSweiding liu val io = IO(new VSplitIO(isVStore=true)) 5283952421bSweiding liu val splitPipeline = Module(new VSSplitPipelineImp()) 5293952421bSweiding liu val splitBuffer = Module(new VSSplitBufferImp()) 5303952421bSweiding liu // Split Pipeline 5313952421bSweiding liu splitPipeline.io.in <> io.in 5323952421bSweiding liu splitPipeline.io.redirect <> io.redirect 5333952421bSweiding liu io.toMergeBuffer <> splitPipeline.io.toMergeBuffer 5343952421bSweiding liu 53508b0bc30Shappy-lx // skid buffer 536aa55b9faSlwd skidBuffer(splitPipeline.io.out, splitBuffer.io.in, 537aa55b9faSlwd Mux(splitPipeline.io.out.fire, 538aa55b9faSlwd splitPipeline.io.out.bits.uop.robIdx.needFlush(io.redirect), 539aa55b9faSlwd splitBuffer.io.in.bits.uop.robIdx.needFlush(io.redirect)), 540aa55b9faSlwd "VSSplitSkidBuffer") 54108b0bc30Shappy-lx 5423952421bSweiding liu // Split Buffer 5433952421bSweiding liu splitBuffer.io.redirect <> io.redirect 5443952421bSweiding liu io.out <> splitBuffer.io.out 5453952421bSweiding liu io.vstd.get <> splitBuffer.io.vstd.get 546b240e1c0SAnzooooo 547b240e1c0SAnzooooo io.vstdMisalign.get <> splitBuffer.io.vstdMisalign.get 5483952421bSweiding liu} 5493952421bSweiding liu 550