1/*************************************************************************************** 2 * Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3 * Copyright (c) 2020-2021 Peng Cheng Laboratory 4 * 5 * XiangShan is licensed under Mulan PSL v2. 6 * You can use this software according to the terms and conditions of the Mulan PSL v2. 7 * You may obtain a copy of Mulan PSL v2 at: 8 * http://license.coscl.org.cn/MulanPSL2 9 * 10 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11 * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13 * 14 * See the Mulan PSL v2 for more details. 15 ***************************************************************************************/ 16 17package xiangshan.mem 18 19import org.chipsalliance.cde.config.Parameters 20import chisel3._ 21import chisel3.util._ 22import utils._ 23import utility._ 24import xiangshan.ExceptionNO._ 25import xiangshan._ 26import xiangshan.backend.rob.RobPtr 27import xiangshan.backend.Bundles._ 28import xiangshan.mem._ 29import xiangshan.backend.fu.vector.Bundles._ 30import xiangshan.backend.fu.FuConfig._ 31import xiangshan.backend.fu.FuType 32 33 34class VSplitPipeline(isVStore: Boolean = false)(implicit p: Parameters) extends VLSUModule{ 35 val io = IO(new VSplitPipelineIO(isVStore)) 36 // will be override later 37 def us_whole_reg(fuOpType: UInt): Bool = false.B 38 def us_mask(fuOpType: UInt): Bool = false.B 39 def us_fof(fuOpType: UInt): Bool = false.B 40 //TODO vdIdxReg should no longer be useful, don't delete it for now 41 val vdIdxReg = RegInit(0.U(3.W)) 42 43 val s1_ready = WireInit(false.B) 44 io.in.ready := s1_ready 45 46 /**----------------------------------------------------------- 47 * s0 stage 48 * decode and generate AlignedType, uop mask, preIsSplit 49 * ---------------------------------------------------------- 50 */ 51 val s0_uop = io.in.bits.uop 52 val s0_vtype = s0_uop.vpu.vtype 53 val s0_sew = s0_vtype.vsew 54 val s0_eew = s0_uop.vpu.veew 55 val s0_lmul = s0_vtype.vlmul 56 // when load whole register or unit-stride masked , emul should be 1 57 val s0_fuOpType = s0_uop.fuOpType 58 val s0_mop = s0_fuOpType(6, 5) 59 val s0_nf = Mux(us_whole_reg(s0_fuOpType), 0.U, s0_uop.vpu.nf) 60 val s0_vm = s0_uop.vpu.vm 61 val s0_emul = Mux(us_whole_reg(s0_fuOpType) ,GenUSWholeEmul(s0_uop.vpu.nf), Mux(us_mask(s0_fuOpType), 0.U(mulBits.W), EewLog2(s0_eew) - s0_sew + s0_lmul)) 62 val s0_preIsSplit = !isUnitStride(s0_mop) 63 val s0_nfield = s0_nf +& 1.U 64 65 val s0_valid = Wire(Bool()) 66 val s0_kill = io.in.bits.uop.robIdx.needFlush(io.redirect) 67 val s0_can_go = s1_ready 68 val s0_fire = s0_valid && s0_can_go 69 val s0_out = Wire(new VLSBundle(isVStore)) 70 71 val isUsWholeReg = isUnitStride(s0_mop) && us_whole_reg(s0_fuOpType) 72 val isMaskReg = isUnitStride(s0_mop) && us_mask(s0_fuOpType) 73 val isSegment = s0_nf =/= 0.U && !us_whole_reg(s0_fuOpType) 74 val instType = Cat(isSegment, s0_mop) 75 val uopIdx = io.in.bits.uop.vpu.vuopIdx 76 val uopIdxInField = GenUopIdxInField(instType, s0_emul, s0_lmul, uopIdx) 77 val vdIdxInField = GenVdIdxInField(instType, s0_emul, s0_lmul, uopIdxInField) 78 val lmulLog2 = Mux(s0_lmul.asSInt >= 0.S, 0.U, s0_lmul) 79 val emulLog2 = Mux(s0_emul.asSInt >= 0.S, 0.U, s0_emul) 80 val numEewLog2 = emulLog2 - EewLog2(s0_eew) 81 val numSewLog2 = lmulLog2 - s0_sew 82 val numFlowsSameVdLog2 = Mux( 83 isIndexed(instType), 84 log2Up(VLENB).U - s0_sew(1,0), 85 log2Up(VLENB).U - s0_eew 86 ) 87 // numUops = nf * max(lmul, emul) 88 val lmulLog2Pos = Mux(s0_lmul.asSInt < 0.S, 0.U, s0_lmul) 89 val emulLog2Pos = Mux(s0_emul.asSInt < 0.S, 0.U, s0_emul) 90 val numUops = Mux( 91 isIndexed(s0_mop) && s0_lmul.asSInt > s0_emul.asSInt, 92 (s0_nf +& 1.U) << lmulLog2Pos, 93 (s0_nf +& 1.U) << emulLog2Pos 94 ) 95 96 val vvl = io.in.bits.src_vl.asTypeOf(VConfig()).vl 97 val evl = Mux(isUsWholeReg, 98 GenUSWholeRegVL(io.in.bits.uop.vpu.nf +& 1.U, s0_eew), 99 Mux(isMaskReg, 100 GenUSMaskRegVL(vvl), 101 vvl)) 102 val vvstart = io.in.bits.uop.vpu.vstart 103 val alignedType = Mux(isIndexed(instType), s0_sew(1, 0), s0_eew) 104 val broadenAligendType = Mux(s0_preIsSplit, Cat("b0".U, alignedType), "b100".U) // if is unit-stride, use 128-bits memory access 105 val flowsLog2 = GenRealFlowLog2(instType, s0_emul, s0_lmul, s0_eew, s0_sew) 106 val flowsPrevThisUop = (uopIdxInField << flowsLog2).asUInt // # of flows before this uop in a field 107 val flowsPrevThisVd = (vdIdxInField << numFlowsSameVdLog2).asUInt // # of flows before this vd in a field 108 val flowsIncludeThisUop = ((uopIdxInField +& 1.U) << flowsLog2).asUInt // # of flows before this uop besides this uop 109 val flowNum = io.in.bits.flowNum.get 110 // max index in vd, only use in index instructions for calculate index 111 val maxIdxInVdIndex = GenVLMAX(Mux(s0_emul.asSInt > 0.S, 0.U, s0_emul), s0_eew) 112 val indexVlMaxInVd = GenVlMaxMask(maxIdxInVdIndex, elemIdxBits) 113 114 // For vectore indexed instructions: 115 // When emul is greater than lmul, multiple uop correspond to a Vd, e.g: 116 // vsetvli t1,t0,e8,m1,ta,ma lmul = 1 117 // vluxei16.v v2,(a0),v8 emul = 2 118 // In this case, we need to ensure the flownumis right shift by flowsPrevThisUop, However, the mask passed to mergebuff is right shift by flowsPrevThisVd e.g: 119 // vl = 9 120 // srcMask = 0x1FF 121 // uopIdxInField = 0 and vdIdxInField = 0, flowMask = 0x00FF, toMergeBuffMask = 0x01FF 122 // uopIdxInField = 1 and vdIdxInField = 0, flowMask = 0x0001, toMergeBuffMask = 0x01FF 123 // uopIdxInField = 0 and vdIdxInField = 0, flowMask = 0x0000, toMergeBuffMask = 0x0000 124 // uopIdxInField = 0 and vdIdxInField = 0, flowMask = 0x0000, toMergeBuffMask = 0x0000 125 val isSpecialIndexed = isIndexed(instType) && s0_emul.asSInt > s0_lmul.asSInt 126 127 val srcMask = GenFlowMask(Mux(s0_vm, Fill(VLEN, 1.U(1.W)), io.in.bits.src_mask), vvstart, evl, true) 128 val srcMaskShiftBits = Mux(isSpecialIndexed, flowsPrevThisUop, flowsPrevThisVd) 129 130 val flowMask = ((srcMask & 131 UIntToMask(flowsIncludeThisUop.asUInt, VLEN + 1) & 132 (~UIntToMask(flowsPrevThisUop.asUInt, VLEN)).asUInt 133 ) >> srcMaskShiftBits)(VLENB - 1, 0) 134 val indexedSrcMask = (srcMask >> flowsPrevThisVd).asUInt //only for index instructions 135 136 // Used to calculate the element index. 137 // See 'splitbuffer' for 'io.out.splitIdxOffset' and 'mergebuffer' for 'merge data' 138 val indexedSplitOffset = Mux(isSpecialIndexed, flowsPrevThisUop - flowsPrevThisVd, 0.U) // only for index instructions of emul > lmul 139 val vlmax = GenVLMAX(s0_lmul, s0_sew) 140 141 // connect 142 s0_out := DontCare 143 s0_out match {case x => 144 x.uop := io.in.bits.uop 145 x.uop.imm := 0.U 146 x.uop.vpu.vl := evl 147 x.uop.uopIdx := uopIdx 148 x.uop.numUops := numUops 149 x.uop.lastUop := (uopIdx +& 1.U) === numUops 150 x.uop.vpu.nf := s0_nf 151 x.rawNf := io.in.bits.uop.vpu.nf 152 x.flowMask := flowMask 153 x.indexedSrcMask := indexedSrcMask // Only vector indexed instructions uses it 154 x.indexedSplitOffset := indexedSplitOffset 155 x.byteMask := GenUopByteMask(flowMask, Cat("b0".U, alignedType))(VLENB - 1, 0) 156 x.fof := isUnitStride(s0_mop) && us_fof(s0_fuOpType) 157 x.baseAddr := io.in.bits.src_rs1 158 x.stride := io.in.bits.src_stride 159 x.flowNum := flowNum 160 x.nfields := s0_nfield 161 x.vm := s0_vm 162 x.usWholeReg := isUsWholeReg 163 x.usMaskReg := isMaskReg 164 x.eew := s0_eew 165 x.sew := s0_sew 166 x.emul := s0_emul 167 x.lmul := s0_lmul 168 x.vlmax := Mux(isUsWholeReg, evl, vlmax) 169 x.instType := instType 170 x.data := io.in.bits.src_vs3 171 x.vdIdxInField := vdIdxInField 172 x.preIsSplit := s0_preIsSplit 173 x.alignedType := broadenAligendType 174 x.indexVlMaxInVd := indexVlMaxInVd 175 } 176 s0_valid := io.in.valid && !s0_kill 177 /**------------------------------------- 178 * s1 stage 179 * ------------------------------------ 180 * generate UopOffset 181 */ 182 val s1_valid = RegInit(false.B) 183 val s1_kill = Wire(Bool()) 184 val s1_in = Wire(new VLSBundle(isVStore)) 185 val s1_can_go = io.out.ready && io.toMergeBuffer.req.ready 186 val s1_fire = s1_valid && !s1_kill && s1_can_go 187 188 s1_ready := s1_kill || !s1_valid || s1_can_go 189 190 when(s0_fire){ 191 s1_valid := true.B 192 }.elsewhen(s1_fire){ 193 s1_valid := false.B 194 }.elsewhen(s1_kill){ 195 s1_valid := false.B 196 } 197 s1_in := RegEnable(s0_out, s0_fire) 198 199 val s1_flowNum = s1_in.flowNum 200 val s1_uop = s1_in.uop 201 val s1_uopidx = s1_uop.vpu.vuopIdx 202 val s1_nf = s1_uop.vpu.nf 203 val s1_nfields = s1_in.nfields 204 val s1_eew = s1_in.eew 205 val s1_emul = s1_in.emul 206 val s1_lmul = s1_in.lmul 207 val s1_instType = s1_in.instType 208 val s1_stride = s1_in.stride 209 val s1_vmask = FillInterleaved(8, s1_in.byteMask)(VLEN-1, 0) 210 val s1_alignedType = s1_in.alignedType 211 val s1_isSpecialIndexed = isIndexed(s1_instType) && s1_emul.asSInt > s1_lmul.asSInt 212 val s1_mask = Mux(s1_isSpecialIndexed, s1_in.indexedSrcMask, s1_in.flowMask) 213 val s1_vdIdx = s1_in.vdIdxInField 214 val s1_fof = s1_in.fof 215 val s1_notIndexedStride = Mux( // stride for strided/unit-stride instruction 216 isStrided(s1_instType), 217 s1_stride(XLEN - 1, 0), // for strided load, stride = x[rs2] 218 s1_nfields << s1_eew // for unit-stride load, stride = eew * NFIELDS 219 ) 220 221 val stride = Mux(isIndexed(s1_instType), s1_stride, s1_notIndexedStride).asUInt // if is index instructions, get index when split 222 val uopOffset = genVUopOffset(s1_instType, s1_fof, s1_uopidx, s1_nf, s1_eew, stride, s1_alignedType) 223 // for Unit-Stride, if uop's addr is aligned with 128-bits, split it to one flow, otherwise split two 224 val usLowBitsAddr = getCheckAddrLowBits(s1_in.baseAddr, maxMemByteNum) + getCheckAddrLowBits(uopOffset, maxMemByteNum) 225 val usMask = Cat(0.U(VLENB.W), s1_in.byteMask) << getCheckAddrLowBits(usLowBitsAddr, maxMemByteNum) 226 val usAligned128 = getCheckAddrLowBits(usLowBitsAddr, maxMemByteNum) === 0.U // addr 128-bit aligned 227 val usMaskLowActive = genUSSplitMask(usMask.asTypeOf(UInt(32.W)), 1.U).asUInt.orR 228 val usMaskHighActive = genUSSplitMask(usMask.asTypeOf(UInt(32.W)), 0.U).asUInt.orR 229 val usActiveNum = Mux( 230 usMaskLowActive && usMaskHighActive, 231 VecMemUnitStrideMaxFlowNum.U, 232 Mux(usMaskLowActive || usMaskHighActive, (VecMemUnitStrideMaxFlowNum - 1).U, 0.U) 233 ) 234 235 val activeNum = Mux(s1_in.preIsSplit, PopCount(s1_in.flowMask), usActiveNum) 236 237 238 s1_kill := s1_in.uop.robIdx.needFlush(io.redirect) 239 240 // query mergeBuffer 241 io.toMergeBuffer.req.valid := io.out.ready && s1_valid// only can_go will get MergeBuffer entry 242 io.toMergeBuffer.req.bits.flowNum := activeNum 243 io.toMergeBuffer.req.bits.data := s1_in.data 244 io.toMergeBuffer.req.bits.uop := s1_in.uop 245 io.toMergeBuffer.req.bits.uop.vpu.nf := s1_in.rawNf 246 io.toMergeBuffer.req.bits.mask := s1_mask 247 io.toMergeBuffer.req.bits.vaddr := s1_in.baseAddr 248 io.toMergeBuffer.req.bits.vdIdx := s1_vdIdx //TODO vdIdxReg should no longer be useful, don't delete it for now 249 io.toMergeBuffer.req.bits.fof := s1_in.fof 250 io.toMergeBuffer.req.bits.vlmax := s1_in.vlmax 251// io.toMergeBuffer.req.bits.vdOffset := 252 253 //TODO vdIdxReg should no longer be useful, don't delete it for now 254// when (s1_in.uop.lastUop && s1_fire || s1_kill) { 255// vdIdxReg := 0.U 256// }.elsewhen(s1_fire) { 257// vdIdxReg := vdIdxReg + 1.U 258// XSError(vdIdxReg + 1.U === 0.U, s"Overflow! The number of vd should be less than 8\n") 259// } 260 // out connect 261 io.out.valid := s1_valid && io.toMergeBuffer.resp.valid && (activeNum =/= 0.U) // if activeNum == 0, this uop do nothing, can be killed. 262 io.out.bits := s1_in 263 io.out.bits.uopOffset := uopOffset 264 io.out.bits.uopAddr := s1_in.baseAddr + uopOffset 265 io.out.bits.stride := stride 266 io.out.bits.mBIndex := io.toMergeBuffer.resp.bits.mBIndex 267 io.out.bits.usLowBitsAddr := usLowBitsAddr 268 io.out.bits.usAligned128 := usAligned128 269 io.out.bits.usMask := usMask 270 io.out.bits.uop.vpu.nf := s1_in.rawNf 271 272 XSPerfAccumulate("split_out", io.out.fire) 273 XSPerfAccumulate("pipe_block", io.out.valid && !io.out.ready) 274 XSPerfAccumulate("mbuffer_block", s1_valid && io.out.ready && !io.toMergeBuffer.resp.valid) 275} 276 277abstract class VSplitBuffer(isVStore: Boolean = false)(implicit p: Parameters) extends VLSUModule{ 278 val io = IO(new VSplitBufferIO(isVStore)) 279 lazy val fuCfg = if(isVStore) VstuCfg else VlduCfg 280 281 val uopq = Reg(new VLSBundle(isVStore)) 282 val allocated = RegInit(false.B) 283 val needCancel = WireInit(false.B) 284 val activeIssue = Wire(Bool()) 285 val inActiveIssue = Wire(Bool()) 286 val splitFinish = WireInit(false.B) 287 288 // for split 289 val splitIdx = RegInit(0.U(flowIdxBits.W)) 290 val strideOffsetReg = RegInit(0.U(VLEN.W)) 291 292 /** 293 * Redirect 294 */ 295 val cancelEnq = io.in.bits.uop.robIdx.needFlush(io.redirect) 296 val canEnqueue = io.in.valid 297 val needEnqueue = canEnqueue && !cancelEnq 298 299 // enqueue 300 val offset = PopCount(needEnqueue) 301 val canAccept = !allocated || allocated && splitFinish && (activeIssue || inActiveIssue) // if is valid entry, need split finish and send last uop 302 io.in.ready := canAccept 303 val doEnqueue = canAccept && needEnqueue 304 305 when(doEnqueue){ 306 uopq := io.in.bits 307 } 308 309 //split uops 310 val issueValid = allocated && !needCancel 311 val issueEntry = uopq 312 val issueMbIndex = issueEntry.mBIndex 313 val issueFlowNum = issueEntry.flowNum 314 val issueBaseAddr = issueEntry.baseAddr 315 val issueUopAddr = issueEntry.uopAddr 316 val issueUop = issueEntry.uop 317 val issueUopIdx = issueUop.vpu.vuopIdx 318 val issueInstType = issueEntry.instType 319 val issueUopOffset = issueEntry.uopOffset 320 val issueEew = issueEntry.eew 321 val issueSew = issueEntry.sew 322 val issueLmul = issueEntry.lmul 323 val issueEmul = issueEntry.emul 324 val issueAlignedType = issueEntry.alignedType 325 val issuePreIsSplit = issueEntry.preIsSplit 326 val issueByteMask = issueEntry.byteMask 327 val issueUsMask = issueEntry.usMask 328 val issueVLMAXMask = issueEntry.vlmax - 1.U 329 val issueIsWholeReg = issueEntry.usWholeReg 330 val issueVLMAXLog2 = GenVLMAXLog2(issueEntry.lmul, issueSew) 331 val issueVlMaxInVd = issueEntry.indexVlMaxInVd 332 val issueUsLowBitsAddr = issueEntry.usLowBitsAddr 333 val issueUsAligned128 = issueEntry.usAligned128 334 val elemIdx = GenElemIdx( 335 instType = issueInstType, 336 emul = issueEmul, 337 lmul = issueLmul, 338 eew = issueEew, 339 sew = issueSew, 340 uopIdx = issueUopIdx, 341 flowIdx = splitIdx 342 ) // elemIdx inside an inst, for exception 343 344 val splitIdxOffset = issueEntry.indexedSplitOffset + splitIdx 345 346 val indexFlowInnerIdx = elemIdx & issueVlMaxInVd 347 val nfIdx = Mux(issueIsWholeReg, 0.U, elemIdx >> issueVLMAXLog2) 348 val fieldOffset = nfIdx << issueAlignedType // field offset inside a segment 349 350 val indexedStride = IndexAddr( // index for indexed instruction 351 index = issueEntry.stride, 352 flow_inner_idx = indexFlowInnerIdx, 353 eew = issueEew 354 ) 355 val issueStride = Mux(isIndexed(issueInstType), indexedStride, strideOffsetReg) 356 val vaddr = issueUopAddr + issueStride 357 val mask = genVWmask128(vaddr ,issueAlignedType) // scala maske for flow 358 val flowMask = issueEntry.flowMask 359 /* 360 * Unit-Stride split to one flow or two flow. 361 * for Unit-Stride, if uop's addr is aligned with 128-bits, split it to one flow, otherwise split two 362 */ 363 val usSplitMask = genUSSplitMask(issueUsMask, splitIdx) 364 val usMaskInSingleUop = (genUSSplitMask(issueUsMask, 1.U) === 0.U) // if second splited Mask is zero, means this uop is unnecessary to split 365 val usNoSplit = (issueUsAligned128 || usMaskInSingleUop) && 366 !issuePreIsSplit && 367 (splitIdx === 0.U)// unit-stride uop don't need to split into two flow 368 val usSplitVaddr = genUSSplitAddr(issueUopAddr, splitIdx, XLEN) 369 val regOffset = getCheckAddrLowBits(issueUsLowBitsAddr, maxMemByteNum) // offset in 256-bits vd 370 XSError((splitIdx > 1.U && usNoSplit) || (splitIdx > 1.U && !issuePreIsSplit) , "Unit-Stride addr split error!\n") 371 372 val vecActive = Mux(!issuePreIsSplit, usSplitMask.orR, (flowMask & UIntToOH(splitIdx)).orR) 373 // no-unit-stride can trigger misalign 374 val addrAligned = LookupTree(issueEew, List( 375 "b00".U -> true.B, //b 376 "b01".U -> (vaddr(0) === 0.U), //h 377 "b10".U -> (vaddr(1, 0) === 0.U), //w 378 "b11".U -> (vaddr(2, 0) === 0.U) //d 379 )) || !issuePreIsSplit 380 381 // data 382 io.out.bits match { case x => 383 x.uop := issueUop 384 x.uop.imm := 0.U 385 x.uop.exceptionVec := ExceptionNO.selectByFu(issueUop.exceptionVec, fuCfg) 386 x.vaddr := Mux(!issuePreIsSplit, usSplitVaddr, vaddr) 387 x.basevaddr := issueBaseAddr 388 x.alignedType := issueAlignedType 389 x.isvec := true.B 390 x.mask := Mux(!issuePreIsSplit, usSplitMask, mask) 391 x.reg_offset := regOffset //for merge unit-stride data 392 x.vecActive := vecActive // currently, unit-stride's flow always send to pipeline 393 x.is_first_ele := DontCare 394 x.usSecondInv := usNoSplit 395 x.elemIdx := elemIdx 396 x.elemIdxInsideVd := splitIdxOffset // if is Unit-Stride, elemIdx is the index of 2 splited mem request (for merge data) 397 x.uop_unit_stride_fof := DontCare 398 x.isFirstIssue := DontCare 399 x.mBIndex := issueMbIndex 400 } 401 402 // redirect 403 needCancel := uopq.uop.robIdx.needFlush(io.redirect) && allocated 404 405 /* Execute logic */ 406 /** Issue to scala pipeline**/ 407 408 lazy val misalignedCanGo = true.B 409 val allowIssue = (addrAligned || misalignedCanGo) && io.out.ready 410 val issueCount = Mux(usNoSplit, 2.U, (PopCount(inActiveIssue) + PopCount(activeIssue))) // for dont need split unit-stride, issue two flow 411 splitFinish := splitIdx >= (issueFlowNum - issueCount) 412 413 // handshake 414 activeIssue := issueValid && allowIssue && vecActive // active issue, current use in no unit-stride 415 inActiveIssue := issueValid && !vecActive 416 when (!issueEntry.uop.robIdx.needFlush(io.redirect)) { 417 when (!splitFinish) { 418 when (activeIssue || inActiveIssue) { 419 // The uop has not been entirly splited yet 420 splitIdx := splitIdx + issueCount 421 strideOffsetReg := Mux(!issuePreIsSplit, 0.U, strideOffsetReg + issueEntry.stride) // when normal unit-stride, don't use strideOffsetReg 422 } 423 }.otherwise { 424 when (activeIssue || inActiveIssue) { 425 // The uop is done spliting 426 splitIdx := 0.U(flowIdxBits.W) // initialize flowIdx 427 strideOffsetReg := 0.U 428 } 429 } 430 }.otherwise { 431 splitIdx := 0.U(flowIdxBits.W) // initialize flowIdx 432 strideOffsetReg := 0.U 433 } 434 // allocated 435 when(doEnqueue){ // if enqueue need to been cancelled, it will be false, so this have high priority 436 allocated := true.B 437 }.elsewhen(needCancel) { // redirect 438 allocated := false.B 439 }.elsewhen(splitFinish && (activeIssue || inActiveIssue)){ //dequeue 440 allocated := false.B 441 } 442 443 // out connect 444 io.out.valid := issueValid && vecActive && (addrAligned || misalignedCanGo) // TODO: inactive unit-stride uop do not send to pipeline 445 446 XSPerfAccumulate("out_valid", io.out.valid) 447 XSPerfAccumulate("out_fire", io.out.fire) 448 XSPerfAccumulate("out_fire_unitstride", io.out.fire && !issuePreIsSplit) 449 XSPerfAccumulate("unitstride_vlenAlign", io.out.fire && !issuePreIsSplit && getCheckAddrLowBits(io.out.bits.vaddr, maxMemByteNum) === 0.U) 450 XSPerfAccumulate("unitstride_invalid", io.out.ready && issueValid && !issuePreIsSplit && PopCount(io.out.bits.mask).orR) 451} 452 453class VSSplitBufferImp(implicit p: Parameters) extends VSplitBuffer(isVStore = true){ 454 override lazy val misalignedCanGo = io.vstdMisalign.get.storePipeEmpty && io.vstdMisalign.get.storeMisalignBufferEmpty 455 456 // split data 457 val splitData = genVSData( 458 data = issueEntry.data.asUInt, 459 elemIdx = splitIdxOffset, 460 alignedType = issueAlignedType 461 ) 462 val flowData = genVWdata(splitData, issueAlignedType) 463 val usSplitData = genUSSplitData(issueEntry.data.asUInt, splitIdx, vaddr(3,0)) 464 465 val sqIdx = issueUop.sqIdx + splitIdx 466 io.out.bits.uop.sqIdx := sqIdx 467 io.out.bits.uop.exceptionVec(storeAddrMisaligned) := !addrAligned && !issuePreIsSplit && io.out.bits.mask.orR 468 469 // send data to sq 470 val vstd = io.vstd.get 471 vstd.valid := issueValid && (vecActive || !issuePreIsSplit) 472 vstd.bits.uop := issueUop 473 vstd.bits.uop.sqIdx := sqIdx 474 vstd.bits.uop.fuType := FuType.vstu.U 475 vstd.bits.data := Mux(!issuePreIsSplit, usSplitData, flowData) 476 vstd.bits.debug := DontCare 477 vstd.bits.vdIdx.get := DontCare 478 vstd.bits.vdIdxInField.get := DontCare 479 vstd.bits.isFromLoadUnit := DontCare 480 vstd.bits.mask.get := Mux(!issuePreIsSplit, usSplitMask, mask) 481 482} 483 484class VLSplitBufferImp(implicit p: Parameters) extends VSplitBuffer(isVStore = false){ 485 io.out.bits.uop.lqIdx := issueUop.lqIdx + splitIdx 486 io.out.bits.uop.exceptionVec(loadAddrMisaligned) := !addrAligned && !issuePreIsSplit && io.out.bits.mask.orR 487 io.out.bits.uop.fuType := FuType.vldu.U 488} 489 490class VSSplitPipelineImp(implicit p: Parameters) extends VSplitPipeline(isVStore = true){ 491 override def us_whole_reg(fuOpType: UInt): Bool = fuOpType === VstuType.vsr 492 override def us_mask(fuOpType: UInt): Bool = fuOpType === VstuType.vsm 493 override def us_fof(fuOpType: UInt): Bool = false.B // dont have vector fof store 494} 495 496class VLSplitPipelineImp(implicit p: Parameters) extends VSplitPipeline(isVStore = false){ 497 498 override def us_whole_reg(fuOpType: UInt): Bool = fuOpType === VlduType.vlr 499 override def us_mask(fuOpType: UInt): Bool = fuOpType === VlduType.vlm 500 override def us_fof(fuOpType: UInt): Bool = fuOpType === VlduType.vleff 501} 502 503class VLSplitImp(implicit p: Parameters) extends VLSUModule{ 504 val io = IO(new VSplitIO(isVStore=false)) 505 val splitPipeline = Module(new VLSplitPipelineImp()) 506 val splitBuffer = Module(new VLSplitBufferImp()) 507 val mergeBufferNack = io.threshold.get.valid && io.threshold.get.bits =/= io.in.bits.uop.lqIdx 508 // Split Pipeline 509 splitPipeline.io.in <> io.in 510 io.in.ready := splitPipeline.io.in.ready && !mergeBufferNack 511 splitPipeline.io.redirect <> io.redirect 512 io.toMergeBuffer <> splitPipeline.io.toMergeBuffer 513 514 // skid buffer 515 skidBuffer(splitPipeline.io.out, splitBuffer.io.in, 516 Mux(splitPipeline.io.out.fire, 517 splitPipeline.io.out.bits.uop.robIdx.needFlush(io.redirect), 518 splitBuffer.io.in.bits.uop.robIdx.needFlush(io.redirect)), 519 "VSSplitSkidBuffer") 520 521 // Split Buffer 522 splitBuffer.io.redirect <> io.redirect 523 io.out <> splitBuffer.io.out 524} 525 526class VSSplitImp(implicit p: Parameters) extends VLSUModule{ 527 val io = IO(new VSplitIO(isVStore=true)) 528 val splitPipeline = Module(new VSSplitPipelineImp()) 529 val splitBuffer = Module(new VSSplitBufferImp()) 530 // Split Pipeline 531 splitPipeline.io.in <> io.in 532 splitPipeline.io.redirect <> io.redirect 533 io.toMergeBuffer <> splitPipeline.io.toMergeBuffer 534 535 // skid buffer 536 skidBuffer(splitPipeline.io.out, splitBuffer.io.in, 537 Mux(splitPipeline.io.out.fire, 538 splitPipeline.io.out.bits.uop.robIdx.needFlush(io.redirect), 539 splitBuffer.io.in.bits.uop.robIdx.needFlush(io.redirect)), 540 "VSSplitSkidBuffer") 541 542 // Split Buffer 543 splitBuffer.io.redirect <> io.redirect 544 io.out <> splitBuffer.io.out 545 io.vstd.get <> splitBuffer.io.vstd.get 546 547 io.vstdMisalign.get <> splitBuffer.io.vstdMisalign.get 548} 549 550