xref: /XiangShan/src/main/scala/xiangshan/mem/vector/VSplit.scala (revision 16c2d8bb27e9a24ed5ef5e4885693e6a30b536df)
1/***************************************************************************************
2  * Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3  * Copyright (c) 2020-2021 Peng Cheng Laboratory
4  *
5  * XiangShan is licensed under Mulan PSL v2.
6  * You can use this software according to the terms and conditions of the Mulan PSL v2.
7  * You may obtain a copy of Mulan PSL v2 at:
8  *          http://license.coscl.org.cn/MulanPSL2
9  *
10  * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11  * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12  * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13  *
14  * See the Mulan PSL v2 for more details.
15  ***************************************************************************************/
16
17package xiangshan.mem
18
19import org.chipsalliance.cde.config.Parameters
20import chisel3._
21import chisel3.util._
22import utils._
23import utility._
24import xiangshan.ExceptionNO._
25import xiangshan._
26import xiangshan.backend.rob.RobPtr
27import xiangshan.backend.Bundles._
28import xiangshan.mem._
29import xiangshan.backend.fu.vector.Bundles._
30import xiangshan.backend.fu.FuConfig._
31import xiangshan.backend.fu.FuType
32
33
34class VSplitPipeline(isVStore: Boolean = false)(implicit p: Parameters) extends VLSUModule{
35  val io = IO(new VSplitPipelineIO(isVStore))
36  // will be override later
37  def us_whole_reg(fuOpType: UInt): Bool = false.B
38  def us_mask(fuOpType: UInt): Bool = false.B
39  def us_fof(fuOpType: UInt): Bool = false.B
40  //TODO vdIdxReg should no longer be useful, don't delete it for now
41  val vdIdxReg = RegInit(0.U(3.W))
42
43  val s1_ready = WireInit(false.B)
44  io.in.ready := s1_ready
45
46  /**-----------------------------------------------------------
47    * s0 stage
48    * decode and generate AlignedType, uop mask, preIsSplit
49    * ----------------------------------------------------------
50    */
51  val s0_uop = io.in.bits.uop
52  val s0_vtype = s0_uop.vpu.vtype
53  val s0_sew = s0_vtype.vsew
54  val s0_eew = s0_uop.vpu.veew
55  val s0_lmul = s0_vtype.vlmul
56  // when load whole register or unit-stride masked , emul should be 1
57  val s0_fuOpType = s0_uop.fuOpType
58  val s0_mop = s0_fuOpType(6, 5)
59  val s0_nf = Mux(us_whole_reg(s0_fuOpType), 0.U, s0_uop.vpu.nf)
60  val s0_vm = s0_uop.vpu.vm
61  val s0_emul = Mux(us_whole_reg(s0_fuOpType) ,GenUSWholeEmul(s0_uop.vpu.nf), Mux(us_mask(s0_fuOpType), 0.U(mulBits.W), EewLog2(s0_eew) - s0_sew + s0_lmul))
62  val s0_preIsSplit = !isUnitStride(s0_mop)
63  val s0_nfield        = s0_nf +& 1.U
64
65  val s0_valid         = Wire(Bool())
66  val s0_kill          = io.in.bits.uop.robIdx.needFlush(io.redirect)
67  val s0_can_go        = s1_ready
68  val s0_fire          = s0_valid && s0_can_go
69  val s0_out           = Wire(new VLSBundle(isVStore))
70
71  val isUsWholeReg = isUnitStride(s0_mop) && us_whole_reg(s0_fuOpType)
72  val isMaskReg = isUnitStride(s0_mop) && us_mask(s0_fuOpType)
73  val isSegment = s0_nf =/= 0.U && !us_whole_reg(s0_fuOpType)
74  val instType = Cat(isSegment, s0_mop)
75  val uopIdx = io.in.bits.uop.vpu.vuopIdx
76  val uopIdxInField = GenUopIdxInField(instType, s0_emul, s0_lmul, uopIdx)
77  val vdIdxInField = GenVdIdxInField(instType, s0_emul, s0_lmul, uopIdxInField)
78  val lmulLog2 = Mux(s0_lmul.asSInt >= 0.S, 0.U, s0_lmul)
79  val emulLog2 = Mux(s0_emul.asSInt >= 0.S, 0.U, s0_emul)
80  val numEewLog2 = emulLog2 - EewLog2(s0_eew)
81  val numSewLog2 = lmulLog2 - s0_sew
82  val numFlowsSameVdLog2 = Mux(
83    isIndexed(instType),
84    log2Up(VLENB).U - s0_sew(1,0),
85    log2Up(VLENB).U - s0_eew
86  )
87  // numUops = nf * max(lmul, emul)
88  val lmulLog2Pos = Mux(s0_lmul.asSInt < 0.S, 0.U, s0_lmul)
89  val emulLog2Pos = Mux(s0_emul.asSInt < 0.S, 0.U, s0_emul)
90  val numUops = Mux(
91    isIndexed(s0_mop) && s0_lmul.asSInt > s0_emul.asSInt,
92    (s0_nf +& 1.U) << lmulLog2Pos,
93    (s0_nf +& 1.U) << emulLog2Pos
94  )
95
96  val vvl = io.in.bits.src_vl.asTypeOf(VConfig()).vl
97  val evl = Mux(isUsWholeReg,
98                GenUSWholeRegVL(io.in.bits.uop.vpu.nf +& 1.U, s0_eew),
99                Mux(isMaskReg,
100                    GenUSMaskRegVL(vvl),
101                    vvl))
102  val vvstart = io.in.bits.uop.vpu.vstart
103  val alignedType = Mux(isIndexed(instType), s0_sew(1, 0), s0_eew)
104  val broadenAligendType = Mux(s0_preIsSplit, Cat("b0".U, alignedType), "b100".U) // if is unit-stride, use 128-bits memory access
105  val flowsLog2 = GenRealFlowLog2(instType, s0_emul, s0_lmul, s0_eew, s0_sew)
106  val flowsPrevThisUop = (uopIdxInField << flowsLog2).asUInt // # of flows before this uop in a field
107  val flowsPrevThisVd = (vdIdxInField << numFlowsSameVdLog2).asUInt // # of flows before this vd in a field
108  val flowsIncludeThisUop = ((uopIdxInField +& 1.U) << flowsLog2).asUInt // # of flows before this uop besides this uop
109  val flowNum = io.in.bits.flowNum.get
110  // max index in vd, only use in index instructions for calculate index
111  val maxIdxInVdIndex = GenVLMAX(Mux(s0_emul.asSInt > 0.S, 0.U, s0_emul), s0_eew)
112  val indexVlMaxInVd = GenVlMaxMask(maxIdxInVdIndex, elemIdxBits)
113
114  // For vectore indexed  instructions:
115  //  When emul is greater than lmul, multiple uop correspond to a Vd, e.g:
116  //    vsetvli	t1,t0,e8,m1,ta,ma    lmul = 1
117  //    vluxei16.v	v2,(a0),v8       emul = 2
118  //    In this case, we need to ensure the flownumis right shift by flowsPrevThisUop, However, the mask passed to mergebuff is right shift by flowsPrevThisVd e.g:
119  //      vl = 9
120  //      srcMask = 0x1FF
121  //      uopIdxInField = 0 and vdIdxInField = 0, flowMask = 0x00FF, toMergeBuffMask = 0x01FF
122  //      uopIdxInField = 1 and vdIdxInField = 0, flowMask = 0x0001, toMergeBuffMask = 0x01FF
123  //      uopIdxInField = 0 and vdIdxInField = 0, flowMask = 0x0000, toMergeBuffMask = 0x0000
124  //      uopIdxInField = 0 and vdIdxInField = 0, flowMask = 0x0000, toMergeBuffMask = 0x0000
125  val isSpecialIndexed = isIndexed(instType) && s0_emul.asSInt > s0_lmul.asSInt
126
127  val srcMask = GenFlowMask(Mux(s0_vm, Fill(VLEN, 1.U(1.W)), io.in.bits.src_mask), vvstart, evl, true)
128  val srcMaskShiftBits = Mux(isSpecialIndexed, flowsPrevThisUop, flowsPrevThisVd)
129
130  val flowMask = ((srcMask &
131    UIntToMask(flowsIncludeThisUop.asUInt, VLEN + 1) &
132    (~UIntToMask(flowsPrevThisUop.asUInt, VLEN)).asUInt
133  ) >> srcMaskShiftBits)(VLENB - 1, 0)
134  val indexedSrcMask = (srcMask >> flowsPrevThisVd).asUInt //only for index instructions
135
136  // Used to calculate the element index.
137  // See 'splitbuffer' for 'io.out.splitIdxOffset' and 'mergebuffer' for 'merge data'
138  val indexedSplitOffset = Mux(isSpecialIndexed, flowsPrevThisUop - flowsPrevThisVd, 0.U) // only for index instructions of emul > lmul
139  val vlmax = GenVLMAX(s0_lmul, s0_sew)
140
141  // connect
142  s0_out := DontCare
143  s0_out match {case x =>
144    x.uop := io.in.bits.uop
145    x.uop.imm := 0.U
146    x.uop.vpu.vl := evl
147    x.uop.uopIdx := uopIdx
148    x.uop.numUops := numUops
149    x.uop.lastUop := (uopIdx +& 1.U) === numUops
150    x.uop.vpu.nf  := s0_nf
151    x.rawNf := io.in.bits.uop.vpu.nf
152    x.flowMask := flowMask
153    x.indexedSrcMask := indexedSrcMask // Only vector indexed instructions uses it
154    x.indexedSplitOffset := indexedSplitOffset
155    x.byteMask := GenUopByteMask(flowMask, Cat("b0".U, alignedType))(VLENB - 1, 0)
156    x.fof := isUnitStride(s0_mop) && us_fof(s0_fuOpType)
157    x.baseAddr := io.in.bits.src_rs1
158    x.stride := io.in.bits.src_stride
159    x.flowNum := flowNum
160    x.nfields := s0_nfield
161    x.vm := s0_vm
162    x.usWholeReg := isUsWholeReg
163    x.usMaskReg := isMaskReg
164    x.eew := s0_eew
165    x.sew := s0_sew
166    x.emul := s0_emul
167    x.lmul := s0_lmul
168    x.vlmax := Mux(isUsWholeReg, evl, vlmax)
169    x.instType := instType
170    x.data := io.in.bits.src_vs3
171    x.vdIdxInField := vdIdxInField
172    x.preIsSplit  := s0_preIsSplit
173    x.alignedType := broadenAligendType
174    x.indexVlMaxInVd := indexVlMaxInVd
175  }
176  s0_valid := io.in.valid && !s0_kill
177  /**-------------------------------------
178    * s1 stage
179    * ------------------------------------
180    * generate UopOffset
181    */
182  val s1_valid         = RegInit(false.B)
183  val s1_kill          = Wire(Bool())
184  val s1_in            = Wire(new VLSBundle(isVStore))
185  val s1_can_go        = io.out.ready && io.toMergeBuffer.req.ready
186  val s1_fire          = s1_valid && !s1_kill && s1_can_go
187
188  s1_ready         := s1_kill || !s1_valid || s1_can_go
189
190  when(s0_fire){
191    s1_valid := true.B
192  }.elsewhen(s1_fire){
193    s1_valid := false.B
194  }.elsewhen(s1_kill){
195    s1_valid := false.B
196  }
197  s1_in := RegEnable(s0_out, s0_fire)
198
199  val s1_flowNum          = s1_in.flowNum
200  val s1_uop              = s1_in.uop
201  val s1_uopidx           = s1_uop.vpu.vuopIdx
202  val s1_nf               = s1_uop.vpu.nf
203  val s1_nfields          = s1_in.nfields
204  val s1_eew              = s1_in.eew
205  val s1_emul             = s1_in.emul
206  val s1_lmul             = s1_in.lmul
207  val s1_instType         = s1_in.instType
208  val s1_stride           = s1_in.stride
209  val s1_vmask            = FillInterleaved(8, s1_in.byteMask)(VLEN-1, 0)
210  val s1_alignedType      = s1_in.alignedType
211  val s1_isSpecialIndexed = isIndexed(s1_instType) && s1_emul.asSInt > s1_lmul.asSInt
212  val s1_mask             = Mux(s1_isSpecialIndexed, s1_in.indexedSrcMask, s1_in.flowMask)
213  val s1_vdIdx            = s1_in.vdIdxInField
214  val s1_fof              = s1_in.fof
215  val s1_notIndexedStride = Mux( // stride for strided/unit-stride instruction
216    isStrided(s1_instType),
217    s1_stride(XLEN - 1, 0), // for strided load, stride = x[rs2]
218    s1_nfields << s1_eew // for unit-stride load, stride = eew * NFIELDS
219  )
220
221  val stride            = Mux(isIndexed(s1_instType), s1_stride, s1_notIndexedStride).asUInt // if is index instructions, get index when split
222  val uopOffset         = genVUopOffset(s1_instType, s1_fof, s1_uopidx, s1_nf, s1_eew, stride, s1_alignedType)
223  // for Unit-Stride, if uop's addr is aligned with 128-bits, split it to one flow, otherwise split two
224  val usLowBitsAddr     = getCheckAddrLowBits(s1_in.baseAddr, maxMemByteNum) + getCheckAddrLowBits(uopOffset, maxMemByteNum)
225  val usMask            = Cat(0.U(VLENB.W), s1_in.byteMask) << getCheckAddrLowBits(usLowBitsAddr, maxMemByteNum)
226  val usAligned128      = getCheckAddrLowBits(usLowBitsAddr, maxMemByteNum) === 0.U // addr 128-bit aligned
227  val usMaskLowActive   = genUSSplitMask(usMask.asTypeOf(UInt(32.W)), 1.U).asUInt.orR
228  val usMaskHighActive  = genUSSplitMask(usMask.asTypeOf(UInt(32.W)), 0.U).asUInt.orR
229  val usActiveNum       = Mux(
230                            usMaskLowActive && usMaskHighActive,
231                            VecMemUnitStrideMaxFlowNum.U,
232                            Mux(usMaskLowActive || usMaskHighActive, (VecMemUnitStrideMaxFlowNum - 1).U, 0.U)
233                          )
234
235  val activeNum         = Mux(s1_in.preIsSplit, PopCount(s1_in.flowMask), usActiveNum)
236
237
238  s1_kill               := s1_in.uop.robIdx.needFlush(io.redirect)
239
240  // query mergeBuffer
241  io.toMergeBuffer.req.valid             := io.out.ready && s1_valid// only can_go will get MergeBuffer entry
242  io.toMergeBuffer.req.bits.flowNum      := activeNum
243  io.toMergeBuffer.req.bits.data         := s1_in.data
244  io.toMergeBuffer.req.bits.uop          := s1_in.uop
245  io.toMergeBuffer.req.bits.uop.vpu.nf   := s1_in.rawNf
246  io.toMergeBuffer.req.bits.mask         := s1_mask
247  io.toMergeBuffer.req.bits.vaddr        := s1_in.baseAddr
248  io.toMergeBuffer.req.bits.vdIdx        := s1_vdIdx  //TODO vdIdxReg should no longer be useful, don't delete it for now
249  io.toMergeBuffer.req.bits.fof          := s1_in.fof
250  io.toMergeBuffer.req.bits.vlmax        := s1_in.vlmax
251//   io.toMergeBuffer.req.bits.vdOffset :=
252
253  //TODO vdIdxReg should no longer be useful, don't delete it for now
254//  when (s1_in.uop.lastUop && s1_fire || s1_kill) {
255//    vdIdxReg := 0.U
256//  }.elsewhen(s1_fire) {
257//    vdIdxReg := vdIdxReg + 1.U
258//    XSError(vdIdxReg + 1.U === 0.U, s"Overflow! The number of vd should be less than 8\n")
259//  }
260  // out connect
261  io.out.valid          := s1_valid && io.toMergeBuffer.resp.valid && (activeNum =/= 0.U) // if activeNum == 0, this uop do nothing, can be killed.
262  io.out.bits           := s1_in
263  io.out.bits.uopOffset := uopOffset
264  io.out.bits.uopAddr   := s1_in.baseAddr + uopOffset
265  io.out.bits.stride    := stride
266  io.out.bits.mBIndex   := io.toMergeBuffer.resp.bits.mBIndex
267  io.out.bits.usLowBitsAddr := usLowBitsAddr
268  io.out.bits.usAligned128  := usAligned128
269  io.out.bits.usMask        := usMask
270  io.out.bits.uop.vpu.nf    := s1_in.rawNf
271
272  XSPerfAccumulate("split_out",     io.out.fire)
273  XSPerfAccumulate("pipe_block",    io.out.valid && !io.out.ready)
274  XSPerfAccumulate("mbuffer_block", s1_valid && io.out.ready && !io.toMergeBuffer.resp.valid)
275}
276
277abstract class VSplitBuffer(isVStore: Boolean = false)(implicit p: Parameters) extends VLSUModule{
278  val io = IO(new VSplitBufferIO(isVStore))
279  lazy val fuCfg    = if(isVStore) VstuCfg else VlduCfg
280
281  val uopq          = Reg(new VLSBundle(isVStore))
282  val allocated     = RegInit(false.B)
283  val needCancel    = WireInit(false.B)
284  val activeIssue   = Wire(Bool())
285  val inActiveIssue = Wire(Bool())
286  val splitFinish   = WireInit(false.B)
287
288  // for split
289  val splitIdx = RegInit(0.U(flowIdxBits.W))
290  val strideOffsetReg = RegInit(0.U(VLEN.W))
291
292  /**
293    * Redirect
294    */
295  val cancelEnq    = io.in.bits.uop.robIdx.needFlush(io.redirect)
296  val canEnqueue   = io.in.valid
297  val needEnqueue  = canEnqueue && !cancelEnq
298
299  // enqueue
300  val offset    = PopCount(needEnqueue)
301  val canAccept = !allocated || allocated && splitFinish && (activeIssue || inActiveIssue) // if is valid entry, need split finish and send last uop
302  io.in.ready  := canAccept
303  val doEnqueue = canAccept && needEnqueue
304
305  when(doEnqueue){
306    uopq := io.in.bits
307  }
308
309  //split uops
310  val issueValid       = allocated && !needCancel
311  val issueEntry       = uopq
312  val issueMbIndex     = issueEntry.mBIndex
313  val issueFlowNum     = issueEntry.flowNum
314  val issueBaseAddr    = issueEntry.baseAddr
315  val issueUopAddr     = issueEntry.uopAddr
316  val issueUop         = issueEntry.uop
317  val issueUopIdx      = issueUop.vpu.vuopIdx
318  val issueInstType    = issueEntry.instType
319  val issueUopOffset   = issueEntry.uopOffset
320  val issueEew         = issueEntry.eew
321  val issueSew         = issueEntry.sew
322  val issueLmul        = issueEntry.lmul
323  val issueEmul        = issueEntry.emul
324  val issueAlignedType = issueEntry.alignedType
325  val issuePreIsSplit  = issueEntry.preIsSplit
326  val issueByteMask    = issueEntry.byteMask
327  val issueUsMask      = issueEntry.usMask
328  val issueVLMAXMask   = issueEntry.vlmax - 1.U
329  val issueIsWholeReg  = issueEntry.usWholeReg
330  val issueVLMAXLog2   = GenVLMAXLog2(issueEntry.lmul, issueSew)
331  val issueVlMaxInVd   = issueEntry.indexVlMaxInVd
332  val issueUsLowBitsAddr = issueEntry.usLowBitsAddr
333  val issueUsAligned128  = issueEntry.usAligned128
334  val elemIdx = GenElemIdx(
335    instType = issueInstType,
336    emul = issueEmul,
337    lmul = issueLmul,
338    eew = issueEew,
339    sew = issueSew,
340    uopIdx = issueUopIdx,
341    flowIdx = splitIdx
342  ) // elemIdx inside an inst, for exception
343
344  val splitIdxOffset = issueEntry.indexedSplitOffset + splitIdx
345
346  val indexFlowInnerIdx = elemIdx & issueVlMaxInVd
347  val nfIdx = Mux(issueIsWholeReg, 0.U, elemIdx >> issueVLMAXLog2)
348  val fieldOffset = nfIdx << issueAlignedType // field offset inside a segment
349
350  val indexedStride    = IndexAddr( // index for indexed instruction
351    index = issueEntry.stride,
352    flow_inner_idx = indexFlowInnerIdx,
353    eew = issueEew
354  )
355  val issueStride = Mux(isIndexed(issueInstType), indexedStride, strideOffsetReg)
356  val vaddr = issueUopAddr + issueStride
357  val mask = genVWmask128(vaddr ,issueAlignedType) // scala maske for flow
358  val flowMask = issueEntry.flowMask
359  /*
360   * Unit-Stride split to one flow or two flow.
361   * for Unit-Stride, if uop's addr is aligned with 128-bits, split it to one flow, otherwise split two
362   */
363  val usSplitMask      = genUSSplitMask(issueUsMask, splitIdx)
364  val usMaskInSingleUop = (genUSSplitMask(issueUsMask, 1.U) === 0.U) // if second splited Mask is zero, means this uop is unnecessary to split
365  val usNoSplit        = (issueUsAligned128 || usMaskInSingleUop) &&
366                          !issuePreIsSplit &&
367                          (splitIdx === 0.U)// unit-stride uop don't need to split into two flow
368  val usSplitVaddr     = genUSSplitAddr(issueUopAddr, splitIdx, XLEN)
369  val regOffset        = getCheckAddrLowBits(issueUsLowBitsAddr, maxMemByteNum) // offset in 256-bits vd
370  XSError((splitIdx > 1.U && usNoSplit) || (splitIdx > 1.U && !issuePreIsSplit) , "Unit-Stride addr split error!\n")
371
372  val vecActive = Mux(!issuePreIsSplit, usSplitMask.orR, (flowMask & UIntToOH(splitIdx)).orR)
373  // no-unit-stride can trigger misalign
374  val addrAligned = LookupTree(issueEew, List(
375    "b00".U   -> true.B,                //b
376    "b01".U   -> (vaddr(0)    === 0.U), //h
377    "b10".U   -> (vaddr(1, 0) === 0.U), //w
378    "b11".U   -> (vaddr(2, 0) === 0.U)  //d
379  )) || !issuePreIsSplit
380
381  // data
382  io.out.bits match { case x =>
383    x.uop                   := issueUop
384    x.uop.imm               := 0.U
385    x.uop.exceptionVec      := ExceptionNO.selectByFu(issueUop.exceptionVec, fuCfg)
386    x.vaddr                 := Mux(!issuePreIsSplit, usSplitVaddr, vaddr)
387    x.basevaddr             := issueBaseAddr
388    x.alignedType           := issueAlignedType
389    x.isvec                 := true.B
390    x.mask                  := Mux(!issuePreIsSplit, usSplitMask, mask)
391    x.reg_offset            := regOffset //for merge unit-stride data
392    x.vecActive             := vecActive // currently, unit-stride's flow always send to pipeline
393    x.is_first_ele          := DontCare
394    x.usSecondInv           := usNoSplit
395    x.elemIdx               := elemIdx
396    x.elemIdxInsideVd       := splitIdxOffset // if is Unit-Stride, elemIdx is the index of 2 splited mem request (for merge data)
397    x.uop_unit_stride_fof   := DontCare
398    x.isFirstIssue          := DontCare
399    x.mBIndex               := issueMbIndex
400  }
401
402  // redirect
403  needCancel := uopq.uop.robIdx.needFlush(io.redirect) && allocated
404
405 /* Execute logic */
406  /** Issue to scala pipeline**/
407
408  lazy val misalignedCanGo = true.B
409  val allowIssue = (addrAligned || misalignedCanGo) && io.out.ready
410  val issueCount = Mux(usNoSplit, 2.U, (PopCount(inActiveIssue) + PopCount(activeIssue))) // for dont need split unit-stride, issue two flow
411  splitFinish := splitIdx >= (issueFlowNum - issueCount)
412
413  // handshake
414  activeIssue := issueValid && allowIssue && vecActive // active issue, current use in no unit-stride
415  inActiveIssue := issueValid && !vecActive
416  when (!issueEntry.uop.robIdx.needFlush(io.redirect)) {
417    when (!splitFinish) {
418      when (activeIssue || inActiveIssue) {
419        // The uop has not been entirly splited yet
420        splitIdx := splitIdx + issueCount
421        strideOffsetReg := Mux(!issuePreIsSplit, 0.U, strideOffsetReg + issueEntry.stride) // when normal unit-stride, don't use strideOffsetReg
422      }
423    }.otherwise {
424      when (activeIssue || inActiveIssue) {
425        // The uop is done spliting
426        splitIdx := 0.U(flowIdxBits.W) // initialize flowIdx
427        strideOffsetReg := 0.U
428      }
429    }
430  }.otherwise {
431    splitIdx := 0.U(flowIdxBits.W) // initialize flowIdx
432    strideOffsetReg := 0.U
433  }
434  // allocated
435  when(doEnqueue){ // if enqueue need to been cancelled, it will be false, so this have high priority
436    allocated := true.B
437  }.elsewhen(needCancel) { // redirect
438    allocated := false.B
439  }.elsewhen(splitFinish && (activeIssue || inActiveIssue)){ //dequeue
440    allocated := false.B
441  }
442
443  // out connect
444  io.out.valid := issueValid && vecActive && (addrAligned || misalignedCanGo) // TODO: inactive unit-stride uop do not send to pipeline
445
446  XSPerfAccumulate("out_valid",             io.out.valid)
447  XSPerfAccumulate("out_fire",              io.out.fire)
448  XSPerfAccumulate("out_fire_unitstride",   io.out.fire && !issuePreIsSplit)
449  XSPerfAccumulate("unitstride_vlenAlign",  io.out.fire && !issuePreIsSplit && getCheckAddrLowBits(io.out.bits.vaddr, maxMemByteNum) === 0.U)
450  XSPerfAccumulate("unitstride_invalid",    io.out.ready && issueValid && !issuePreIsSplit && PopCount(io.out.bits.mask).orR)
451}
452
453class VSSplitBufferImp(implicit p: Parameters) extends VSplitBuffer(isVStore = true){
454  override lazy val misalignedCanGo = io.vstdMisalign.get.storePipeEmpty && io.vstdMisalign.get.storeMisalignBufferEmpty
455
456  // split data
457  val splitData = genVSData(
458        data = issueEntry.data.asUInt,
459        elemIdx = splitIdxOffset,
460        alignedType = issueAlignedType
461      )
462  val flowData = genVWdata(splitData, issueAlignedType)
463  val usSplitData      = genUSSplitData(issueEntry.data.asUInt, splitIdx, vaddr(3,0))
464
465  val sqIdx = issueUop.sqIdx + splitIdx
466  io.out.bits.uop.sqIdx := sqIdx
467  io.out.bits.uop.exceptionVec(storeAddrMisaligned) := !addrAligned && !issuePreIsSplit && io.out.bits.mask.orR
468
469  // send data to sq
470  val vstd = io.vstd.get
471  vstd.valid := issueValid && (vecActive || !issuePreIsSplit)
472  vstd.bits.uop := issueUop
473  vstd.bits.uop.sqIdx := sqIdx
474  vstd.bits.uop.fuType := FuType.vstu.U
475  vstd.bits.data := Mux(!issuePreIsSplit, usSplitData, flowData)
476  vstd.bits.debug := DontCare
477  vstd.bits.vdIdx.get := DontCare
478  vstd.bits.vdIdxInField.get := DontCare
479  vstd.bits.isFromLoadUnit   := DontCare
480  vstd.bits.mask.get := Mux(!issuePreIsSplit, usSplitMask, mask)
481
482}
483
484class VLSplitBufferImp(implicit p: Parameters) extends VSplitBuffer(isVStore = false){
485  io.out.bits.uop.lqIdx := issueUop.lqIdx + splitIdx
486  io.out.bits.uop.exceptionVec(loadAddrMisaligned) := !addrAligned && !issuePreIsSplit && io.out.bits.mask.orR
487  io.out.bits.uop.fuType := FuType.vldu.U
488}
489
490class VSSplitPipelineImp(implicit p: Parameters) extends VSplitPipeline(isVStore = true){
491  override def us_whole_reg(fuOpType: UInt): Bool = fuOpType === VstuType.vsr
492  override def us_mask(fuOpType: UInt): Bool      = fuOpType === VstuType.vsm
493  override def us_fof(fuOpType: UInt): Bool       = false.B // dont have vector fof store
494}
495
496class VLSplitPipelineImp(implicit p: Parameters) extends VSplitPipeline(isVStore = false){
497
498  override def us_whole_reg(fuOpType: UInt): Bool = fuOpType === VlduType.vlr
499  override def us_mask(fuOpType: UInt): Bool      = fuOpType === VlduType.vlm
500  override def us_fof(fuOpType: UInt): Bool       = fuOpType === VlduType.vleff
501}
502
503class VLSplitImp(implicit p: Parameters) extends VLSUModule{
504  val io = IO(new VSplitIO(isVStore=false))
505  val splitPipeline = Module(new VLSplitPipelineImp())
506  val splitBuffer = Module(new VLSplitBufferImp())
507  val mergeBufferNack = io.threshold.get.valid && io.threshold.get.bits =/= io.in.bits.uop.lqIdx
508  // Split Pipeline
509  splitPipeline.io.in <> io.in
510  io.in.ready := splitPipeline.io.in.ready && !mergeBufferNack
511  splitPipeline.io.redirect <> io.redirect
512  io.toMergeBuffer <> splitPipeline.io.toMergeBuffer
513
514  // skid buffer
515  skidBuffer(splitPipeline.io.out, splitBuffer.io.in,
516    Mux(splitPipeline.io.out.fire,
517      splitPipeline.io.out.bits.uop.robIdx.needFlush(io.redirect),
518      splitBuffer.io.in.bits.uop.robIdx.needFlush(io.redirect)),
519    "VSSplitSkidBuffer")
520
521  // Split Buffer
522  splitBuffer.io.redirect <> io.redirect
523  io.out <> splitBuffer.io.out
524}
525
526class VSSplitImp(implicit p: Parameters) extends VLSUModule{
527  val io = IO(new VSplitIO(isVStore=true))
528  val splitPipeline = Module(new VSSplitPipelineImp())
529  val splitBuffer = Module(new VSSplitBufferImp())
530  // Split Pipeline
531  splitPipeline.io.in <> io.in
532  splitPipeline.io.redirect <> io.redirect
533  io.toMergeBuffer <> splitPipeline.io.toMergeBuffer
534
535  // skid buffer
536  skidBuffer(splitPipeline.io.out, splitBuffer.io.in,
537    Mux(splitPipeline.io.out.fire,
538      splitPipeline.io.out.bits.uop.robIdx.needFlush(io.redirect),
539      splitBuffer.io.in.bits.uop.robIdx.needFlush(io.redirect)),
540    "VSSplitSkidBuffer")
541
542  // Split Buffer
543  splitBuffer.io.redirect <> io.redirect
544  io.out <> splitBuffer.io.out
545  io.vstd.get <> splitBuffer.io.vstd.get
546
547  io.vstdMisalign.get <> splitBuffer.io.vstdMisalign.get
548}
549
550