xref: /XiangShan/src/main/scala/xiangshan/backend/decode/DecodeUnitComp.scala (revision 25df626ec34ea3250afaec2b5e8ea334ab760b4a)
1/***************************************************************************************
2  * Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3  * Copyright (c) 2020-2021 Peng Cheng Laboratory
4  *
5  * XiangShan is licensed under Mulan PSL v2.
6  * You can use this software according to the terms and conditions of the Mulan PSL v2.
7  * You may obtain a copy of Mulan PSL v2 at:
8  *          http://license.coscl.org.cn/MulanPSL2
9  *
10  * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11  * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12  * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13  *
14  * See the Mulan PSL v2 for more details.
15  ***************************************************************************************/
16
17package xiangshan.backend.decode
18
19import org.chipsalliance.cde.config.Parameters
20import chisel3._
21import chisel3.util._
22import freechips.rocketchip.rocket.Instructions
23import freechips.rocketchip.util.uintToBitPat
24import utils._
25import utility._
26import xiangshan.ExceptionNO.illegalInstr
27import xiangshan._
28import xiangshan.backend.fu.fpu.FPU
29import xiangshan.backend.fu.FuType
30import freechips.rocketchip.rocket.Instructions._
31import xiangshan.backend.Bundles.{DecodedInst, StaticInst}
32import xiangshan.backend.decode.isa.bitfield.XSInstBitFields
33import xiangshan.backend.fu.vector.Bundles.{VSew, VType, VLmul}
34import yunsuan.VpermType
35import scala.collection.Seq
36import chisel3.util.experimental.decode.{QMCMinimizer, TruthTable, decoder}
37
38class indexedLSUopTable(uopIdx:Int) extends Module {
39  val src = IO(Input(UInt(4.W)))
40  val outOffsetVs2 = IO(Output(UInt(3.W)))
41  val outOffsetVd = IO(Output(UInt(3.W)))
42  def genCsBundle_VEC_INDEXED_LDST(lmul:Int, emul:Int, uopIdx:Int): (Int, Int) ={
43    // only consider non segment indexed load/store
44    if (lmul < emul) {    // lmul < emul, uop num is depend on emul * nf
45      var offset = 1 << (emul - lmul)
46      for (i <- 0 until (1 << emul)) {
47        if (uopIdx == i) {
48          return (i, i / offset)
49        }
50      }
51    } else {              // lmul > emul, uop num is depend on lmul * nf
52      var offset = 1 << (lmul - emul)
53      for (i <- 0 until (1 << lmul)) {
54        if (uopIdx == i) {
55          return (i / offset, i)
56        }
57      }
58    }
59    return (0, 0)
60  }
61  // strided load/store
62  var combVemulNf : Seq[(Int, Int, Int, Int)] = Seq()
63  for (emul <- 0 until 4) {
64    for (lmul <- 0 until 4) {
65      var offset = genCsBundle_VEC_INDEXED_LDST(lmul, emul, uopIdx)
66      var offsetVs2 = offset._1
67      var offsetVd = offset._2
68      combVemulNf :+= (emul, lmul, offsetVs2, offsetVd)
69    }
70  }
71  val out = decoder(QMCMinimizer, src, TruthTable(combVemulNf.map {
72    case (emul, lmul, offsetVs2, offsetVd) =>
73      (BitPat((emul << 2 | lmul).U(4.W)), BitPat((offsetVs2 << 3 | offsetVd).U(6.W)))
74  }, BitPat.N(6)))
75  outOffsetVs2 := out(5, 3)
76  outOffsetVd := out(2, 0)
77}
78
79trait VectorConstants {
80  val MAX_VLMUL = 8
81  val FP_TMP_REG_MV = 32
82  val VECTOR_TMP_REG_LMUL = 33 // 33~47  ->  15
83  val MAX_INDEXED_LS_UOPNUM = 64
84}
85
86class DecodeUnitCompIO(implicit p: Parameters) extends XSBundle {
87  val redirect = Input(Bool())
88  val csrCtrl = Input(new CustomCSRCtrlIO)
89  val vtypeBypass = Input(new VType)
90  // When the first inst in decode vector is complex inst, pass it in
91  val in = Flipped(DecoupledIO(new Bundle {
92    val simpleDecodedInst = new DecodedInst
93    val uopInfo = new UopInfo
94  }))
95  val out = new Bundle {
96    val complexDecodedInsts = Vec(RenameWidth, DecoupledIO(new DecodedInst))
97  }
98  val complexNum = Output(UInt(3.W))
99}
100
101/**
102  * @author zly
103  */
104class DecodeUnitComp()(implicit p : Parameters) extends XSModule with DecodeUnitConstants with VectorConstants {
105  val io = IO(new DecodeUnitCompIO)
106
107  // alias
108  private val inReady = io.in.ready
109  private val inValid = io.in.valid
110  private val inDecodedInst = WireInit(io.in.bits.simpleDecodedInst)
111  private val inInstFields = io.in.bits.simpleDecodedInst.instr.asTypeOf(new XSInstBitFields)
112  private val inUopInfo = io.in.bits.uopInfo
113  private val outValids = io.out.complexDecodedInsts.map(_.valid)
114  private val outReadys = io.out.complexDecodedInsts.map(_.ready)
115  private val outDecodedInsts = io.out.complexDecodedInsts.map(_.bits)
116  private val outComplexNum = io.complexNum
117
118  val maxUopSize = MaxUopSize
119  when (io.in.fire && io.in.bits.simpleDecodedInst.isVset) {
120    when(inInstFields.RD === 0.U && inInstFields.RS1 === 0.U) {
121      inDecodedInst.fuOpType := VSETOpType.keepVl(io.in.bits.simpleDecodedInst.fuOpType)
122    }.elsewhen(inInstFields.RS1 === 0.U) {
123      inDecodedInst.fuOpType := VSETOpType.setVlmax(io.in.bits.simpleDecodedInst.fuOpType)
124    }
125  }
126
127  val latchedInst = RegEnable(inDecodedInst, inValid && inReady)
128  val latchedUopInfo = RegEnable(inUopInfo, inValid && inReady)
129  //input bits
130  private val instFields: XSInstBitFields = latchedInst.instr.asTypeOf(new XSInstBitFields)
131
132  val src1 = Cat(0.U(1.W), instFields.RS1)
133  val src2 = Cat(0.U(1.W), instFields.RS2)
134  val dest = Cat(0.U(1.W), instFields.RD)
135
136  val nf    = instFields.NF
137  val width = instFields.WIDTH(1, 0)
138
139  //output of DecodeUnit
140  val numOfUop = Wire(UInt(log2Up(maxUopSize).W))
141  val numOfWB = Wire(UInt(log2Up(maxUopSize).W))
142  val lmul = Wire(UInt(4.W))
143  val isVsetSimple = Wire(Bool())
144
145  val indexedLSRegOffset = Seq.tabulate(MAX_VLMUL)(i => Module(new indexedLSUopTable(i)))
146  indexedLSRegOffset.map(_.src := 0.U)
147
148  //pre decode
149  lmul := latchedUopInfo.lmul
150  isVsetSimple := latchedInst.isVset
151  val vlmulReg = latchedInst.vpu.vlmul
152  val vsewReg = latchedInst.vpu.vsew
153
154  //Type of uop Div
155  val typeOfSplit = latchedInst.uopSplitType
156  val src1Type = latchedInst.srcType(0)
157  val src1IsImm = src1Type === SrcType.imm
158  val src1IsFp = src1Type === SrcType.fp
159
160  numOfUop := latchedUopInfo.numOfUop
161  numOfWB := latchedUopInfo.numOfWB
162
163  //uops dispatch
164  val s_idle :: s_active :: Nil = Enum(2)
165  val state = RegInit(s_idle)
166  val stateNext = WireDefault(state)
167  val numDecodedUop = RegInit(0.U(log2Up(maxUopSize).W))
168  val uopRes = RegInit(0.U(log2Up(maxUopSize).W))
169  val uopResNext = WireInit(uopRes)
170  val e64 = 3.U(2.W)
171  val isUsSegment = instFields.MOP === 0.U && nf =/= 0.U && (instFields.LUMOP === 0.U || instFields.LUMOP === "b10000".U)
172  val isIxSegment = instFields.MOP(0) === 1.U && nf =/= 0.U
173  val isSdSegment = instFields.MOP === "b10".U && nf =/= 0.U
174
175  //uop div up to maxUopSize
176  val csBundle = Wire(Vec(maxUopSize, new DecodedInst))
177  csBundle.foreach { case dst =>
178    dst := latchedInst
179    dst.numUops := latchedUopInfo.numOfUop
180    dst.numWB := latchedUopInfo.numOfWB
181    dst.firstUop := false.B
182    dst.lastUop := false.B
183    dst.vlsInstr := false.B
184  }
185
186  csBundle(0).firstUop := true.B
187  csBundle(numOfUop - 1.U).lastUop := true.B
188
189  switch(typeOfSplit) {
190    is(UopSplitType.VSET) {
191      // In simple decoder, rfWen and vecWen are not set
192      when(isVsetSimple) {
193        // Default
194        // uop0 set rd, never flushPipe
195        csBundle(0).fuType := FuType.vsetiwi.U
196        csBundle(0).flushPipe := false.B
197        csBundle(0).rfWen := true.B
198        // uop1 set vl, vsetvl will flushPipe
199        csBundle(1).ldest := VCONFIG_IDX.U
200        csBundle(1).vecWen := true.B
201        when(VSETOpType.isVsetvli(latchedInst.fuOpType) && dest === 0.U && src1 === 0.U) {
202          // write nothing, uop0 is a nop instruction
203          csBundle(0).rfWen := false.B
204          csBundle(0).fpWen := false.B
205          csBundle(0).vecWen := false.B
206          csBundle(1).fuType := FuType.vsetfwf.U
207          csBundle(1).srcType(0) := SrcType.vp
208          csBundle(1).lsrc(0) := VCONFIG_IDX.U
209        }.elsewhen(VSETOpType.isVsetvl(latchedInst.fuOpType) && dest === 0.U && src1 === 0.U) {
210          // uop0: mv vtype gpr to vector region
211          csBundle(0).srcType(0) := SrcType.xp
212          csBundle(0).srcType(1) := SrcType.no
213          csBundle(0).lsrc(1) := 0.U
214          csBundle(0).ldest := FP_TMP_REG_MV.U
215          csBundle(0).fuType := FuType.i2v.U
216          csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64)
217          csBundle(0).rfWen := false.B
218          csBundle(0).fpWen := true.B
219          csBundle(0).vecWen := false.B
220          csBundle(0).flushPipe := false.B
221          // uop1: uvsetvcfg_vv
222          csBundle(1).fuType := FuType.vsetfwf.U
223          // vl
224          csBundle(1).srcType(0) := SrcType.vp
225          csBundle(1).lsrc(0) := VCONFIG_IDX.U
226          // vtype
227          csBundle(1).srcType(1) := SrcType.fp
228          csBundle(1).lsrc(1) := FP_TMP_REG_MV.U
229          csBundle(1).vecWen := true.B
230          csBundle(1).ldest := VCONFIG_IDX.U
231        }.elsewhen(dest === 0.U) {
232          // write nothing, uop0 is a nop instruction
233          csBundle(0).rfWen := false.B
234          csBundle(0).fpWen := false.B
235          csBundle(0).vecWen := false.B
236        }
237        // use bypass vtype from vtypeGen
238        csBundle(0).vpu.connectVType(io.vtypeBypass)
239        csBundle(1).vpu.connectVType(io.vtypeBypass)
240      }
241    }
242    is(UopSplitType.VEC_VVV) {
243      for (i <- 0 until MAX_VLMUL) {
244        csBundle(i).lsrc(0) := src1 + i.U
245        csBundle(i).lsrc(1) := src2 + i.U
246        csBundle(i).lsrc(2) := dest + i.U
247        csBundle(i).ldest := dest + i.U
248        csBundle(i).uopIdx := i.U
249      }
250    }
251    is(UopSplitType.VEC_VFV) {
252      /*
253      i to vector move
254       */
255      csBundle(0).srcType(0) := SrcType.fp
256      csBundle(0).srcType(1) := SrcType.imm
257      csBundle(0).lsrc(1) := 0.U
258      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
259      csBundle(0).fuType := FuType.f2v.U
260      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
261      csBundle(0).vecWen := true.B
262      csBundle(0).vpu.isReverse := false.B
263      /*
264      LMUL
265       */
266      for (i <- 0 until MAX_VLMUL) {
267        csBundle(i + 1).srcType(0) := SrcType.vp
268        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
269        csBundle(i + 1).lsrc(1) := src2 + i.U
270        csBundle(i + 1).lsrc(2) := dest + i.U
271        csBundle(i + 1).ldest := dest + i.U
272        csBundle(i + 1).uopIdx := i.U
273      }
274    }
275    is(UopSplitType.VEC_EXT2) {
276      for (i <- 0 until MAX_VLMUL / 2) {
277        csBundle(2 * i).lsrc(1) := src2 + i.U
278        csBundle(2 * i).lsrc(2) := dest + (2 * i).U
279        csBundle(2 * i).ldest := dest + (2 * i).U
280        csBundle(2 * i).uopIdx := (2 * i).U
281        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
282        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U
283        csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U
284        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
285      }
286    }
287    is(UopSplitType.VEC_EXT4) {
288      for (i <- 0 until MAX_VLMUL / 4) {
289        csBundle(4 * i).lsrc(1) := src2 + i.U
290        csBundle(4 * i).lsrc(2) := dest + (4 * i).U
291        csBundle(4 * i).ldest := dest + (4 * i).U
292        csBundle(4 * i).uopIdx := (4 * i).U
293        csBundle(4 * i + 1).lsrc(1) := src2 + i.U
294        csBundle(4 * i + 1).lsrc(2) := dest + (4 * i + 1).U
295        csBundle(4 * i + 1).ldest := dest + (4 * i + 1).U
296        csBundle(4 * i + 1).uopIdx := (4 * i + 1).U
297        csBundle(4 * i + 2).lsrc(1) := src2 + i.U
298        csBundle(4 * i + 2).lsrc(2) := dest + (4 * i + 2).U
299        csBundle(4 * i + 2).ldest := dest + (4 * i + 2).U
300        csBundle(4 * i + 2).uopIdx := (4 * i + 2).U
301        csBundle(4 * i + 3).lsrc(1) := src2 + i.U
302        csBundle(4 * i + 3).lsrc(2) := dest + (4 * i + 3).U
303        csBundle(4 * i + 3).ldest := dest + (4 * i + 3).U
304        csBundle(4 * i + 3).uopIdx := (4 * i + 3).U
305      }
306    }
307    is(UopSplitType.VEC_EXT8) {
308      for (i <- 0 until MAX_VLMUL) {
309        csBundle(i).lsrc(1) := src2
310        csBundle(i).lsrc(2) := dest + i.U
311        csBundle(i).ldest := dest + i.U
312        csBundle(i).uopIdx := i.U
313      }
314    }
315    is(UopSplitType.VEC_0XV) {
316      /*
317      i/f to vector move
318       */
319      csBundle(0).srcType(0) := Mux(src1IsFp, SrcType.fp, SrcType.reg)
320      csBundle(0).srcType(1) := SrcType.imm
321      csBundle(0).lsrc(1) := 0.U
322      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
323      csBundle(0).fuType := Mux(src1IsFp, FuType.f2v.U, FuType.i2v.U)
324      csBundle(0).fuOpType := Cat(Mux(src1IsFp, IF2VectorType.fDup2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg)
325      csBundle(0).rfWen := false.B
326      csBundle(0).fpWen := false.B
327      csBundle(0).vecWen := true.B
328      /*
329      vmv.s.x
330       */
331      csBundle(1).srcType(0) := SrcType.vp
332      csBundle(1).srcType(1) := SrcType.imm
333      csBundle(1).srcType(2) := SrcType.vp
334      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
335      csBundle(1).lsrc(1) := 0.U
336      csBundle(1).lsrc(2) := dest
337      csBundle(1).ldest := dest
338      csBundle(1).rfWen := false.B
339      csBundle(1).fpWen := false.B
340      csBundle(1).vecWen := true.B
341      csBundle(1).uopIdx := 0.U
342    }
343    is(UopSplitType.VEC_VXV) {
344      /*
345      i to vector move
346       */
347      csBundle(0).srcType(0) := SrcType.reg
348      csBundle(0).srcType(1) := SrcType.imm
349      csBundle(0).lsrc(1) := 0.U
350      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
351      csBundle(0).fuType := FuType.i2v.U
352      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg)
353      csBundle(0).vecWen := true.B
354      csBundle(0).vpu.isReverse := false.B
355      /*
356      LMUL
357       */
358      for (i <- 0 until MAX_VLMUL) {
359        csBundle(i + 1).srcType(0) := SrcType.vp
360        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
361        csBundle(i + 1).lsrc(1) := src2 + i.U
362        csBundle(i + 1).lsrc(2) := dest + i.U
363        csBundle(i + 1).ldest := dest + i.U
364        csBundle(i + 1).uopIdx := i.U
365      }
366    }
367    is(UopSplitType.VEC_VVW) {
368      for (i <- 0 until MAX_VLMUL / 2) {
369        csBundle(2 * i).lsrc(0) := src1 + i.U
370        csBundle(2 * i).lsrc(1) := src2 + i.U
371        csBundle(2 * i).lsrc(2) := dest + (2 * i).U
372        csBundle(2 * i).ldest := dest + (2 * i).U
373        csBundle(2 * i).uopIdx := (2 * i).U
374        csBundle(2 * i + 1).lsrc(0) := src1 + i.U
375        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
376        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U
377        csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U
378        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
379      }
380    }
381    is(UopSplitType.VEC_VFW) {
382      /*
383      f to vector move
384       */
385      csBundle(0).srcType(0) := SrcType.fp
386      csBundle(0).srcType(1) := SrcType.imm
387      csBundle(0).lsrc(1) := 0.U
388      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
389      csBundle(0).fuType := FuType.f2v.U
390      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
391      csBundle(0).rfWen := false.B
392      csBundle(0).fpWen := false.B
393      csBundle(0).vecWen := true.B
394
395      for (i <- 0 until MAX_VLMUL / 2) {
396        csBundle(2 * i + 1).srcType(0) := SrcType.vp
397        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
398        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
399        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U
400        csBundle(2 * i + 1).ldest := dest + (2 * i).U
401        csBundle(2 * i + 1).uopIdx := (2 * i).U
402        csBundle(2 * i + 2).srcType(0) := SrcType.vp
403        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
404        csBundle(2 * i + 2).lsrc(1) := src2 + i.U
405        csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U
406        csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U
407        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
408      }
409    }
410    is(UopSplitType.VEC_WVW) {
411      for (i <- 0 until MAX_VLMUL / 2) {
412        csBundle(2 * i).lsrc(0) := src1 + i.U
413        csBundle(2 * i).lsrc(1) := src2 + (2 * i).U
414        csBundle(2 * i).lsrc(2) := dest + (2 * i).U
415        csBundle(2 * i).ldest := dest + (2 * i).U
416        csBundle(2 * i).uopIdx := (2 * i).U
417        csBundle(2 * i + 1).lsrc(0) := src1 + i.U
418        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i + 1).U
419        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U
420        csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U
421        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
422      }
423    }
424    is(UopSplitType.VEC_VXW) {
425      /*
426      i to vector move
427       */
428      csBundle(0).srcType(0) := SrcType.reg
429      csBundle(0).srcType(1) := SrcType.imm
430      csBundle(0).lsrc(1) := 0.U
431      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
432      csBundle(0).fuType := FuType.i2v.U
433      csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg)
434      csBundle(0).vecWen := true.B
435
436      for (i <- 0 until MAX_VLMUL / 2) {
437        csBundle(2 * i + 1).srcType(0) := SrcType.vp
438        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
439        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
440        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U
441        csBundle(2 * i + 1).ldest := dest + (2 * i).U
442        csBundle(2 * i + 1).uopIdx := (2 * i).U
443        csBundle(2 * i + 2).srcType(0) := SrcType.vp
444        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
445        csBundle(2 * i + 2).lsrc(1) := src2 + i.U
446        csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U
447        csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U
448        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
449      }
450    }
451    is(UopSplitType.VEC_WXW) {
452      /*
453      i to vector move
454       */
455      csBundle(0).srcType(0) := SrcType.reg
456      csBundle(0).srcType(1) := SrcType.imm
457      csBundle(0).lsrc(1) := 0.U
458      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
459      csBundle(0).fuType := FuType.i2v.U
460      csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg)
461      csBundle(0).vecWen := true.B
462
463      for (i <- 0 until MAX_VLMUL / 2) {
464        csBundle(2 * i + 1).srcType(0) := SrcType.vp
465        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
466        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U
467        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U
468        csBundle(2 * i + 1).ldest := dest + (2 * i).U
469        csBundle(2 * i + 1).uopIdx := (2 * i).U
470        csBundle(2 * i + 2).srcType(0) := SrcType.vp
471        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
472        csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U
473        csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U
474        csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U
475        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
476      }
477    }
478    is(UopSplitType.VEC_WVV) {
479      for (i <- 0 until MAX_VLMUL / 2) {
480
481        csBundle(2 * i).lsrc(0) := src1 + i.U
482        csBundle(2 * i).lsrc(1) := src2 + (2 * i).U
483        csBundle(2 * i).lsrc(2) := dest + i.U
484        csBundle(2 * i).ldest := dest + i.U
485        csBundle(2 * i).uopIdx := (2 * i).U
486        csBundle(2 * i + 1).lsrc(0) := src1 + i.U
487        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i + 1).U
488        csBundle(2 * i + 1).lsrc(2) := dest + i.U
489        csBundle(2 * i + 1).ldest := dest + i.U
490        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
491      }
492    }
493    is(UopSplitType.VEC_WFW) {
494      /*
495      f to vector move
496       */
497      csBundle(0).srcType(0) := SrcType.fp
498      csBundle(0).srcType(1) := SrcType.imm
499      csBundle(0).lsrc(1) := 0.U
500      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
501      csBundle(0).fuType := FuType.f2v.U
502      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
503      csBundle(0).rfWen := false.B
504      csBundle(0).fpWen := false.B
505      csBundle(0).vecWen := true.B
506
507      for (i <- 0 until MAX_VLMUL / 2) {
508        csBundle(2 * i + 1).srcType(0) := SrcType.vp
509        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
510        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U
511        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U
512        csBundle(2 * i + 1).ldest := dest + (2 * i).U
513        csBundle(2 * i + 1).uopIdx := (2 * i).U
514        csBundle(2 * i + 2).srcType(0) := SrcType.vp
515        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
516        csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U
517        csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U
518        csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U
519        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
520      }
521    }
522    is(UopSplitType.VEC_WXV) {
523      /*
524      i to vector move
525       */
526      csBundle(0).srcType(0) := SrcType.reg
527      csBundle(0).srcType(1) := SrcType.imm
528      csBundle(0).lsrc(1) := 0.U
529      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
530      csBundle(0).fuType := FuType.i2v.U
531      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg)
532      csBundle(0).vecWen := true.B
533
534      for (i <- 0 until MAX_VLMUL / 2) {
535        csBundle(2 * i + 1).srcType(0) := SrcType.vp
536        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
537        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U
538        csBundle(2 * i + 1).lsrc(2) := dest + i.U
539        csBundle(2 * i + 1).ldest := dest + i.U
540        csBundle(2 * i + 1).uopIdx := (2 * i).U
541        csBundle(2 * i + 2).srcType(0) := SrcType.vp
542        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
543        csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U
544        csBundle(2 * i + 2).lsrc(2) := dest + i.U
545        csBundle(2 * i + 2).ldest := dest + i.U
546        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
547      }
548    }
549    is(UopSplitType.VEC_VVM) {
550      csBundle(0).lsrc(2) := dest
551      csBundle(0).ldest := dest
552      csBundle(0).uopIdx := 0.U
553      for (i <- 1 until MAX_VLMUL) {
554        csBundle(i).lsrc(0) := src1 + i.U
555        csBundle(i).lsrc(1) := src2 + i.U
556        csBundle(i).lsrc(2) := dest
557        csBundle(i).ldest := dest
558        csBundle(i).uopIdx := i.U
559      }
560    }
561    is(UopSplitType.VEC_VFM) {
562      /*
563      f to vector move
564       */
565      csBundle(0).srcType(0) := SrcType.fp
566      csBundle(0).srcType(1) := SrcType.imm
567      csBundle(0).lsrc(1) := 0.U
568      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
569      csBundle(0).fuType := FuType.f2v.U
570      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
571      csBundle(0).rfWen := false.B
572      csBundle(0).fpWen := false.B
573      csBundle(0).vecWen := true.B
574      //LMUL
575      csBundle(1).srcType(0) := SrcType.vp
576      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
577      csBundle(1).lsrc(2) := dest
578      csBundle(1).ldest := dest
579      csBundle(1).uopIdx := 0.U
580      for (i <- 1 until MAX_VLMUL) {
581        csBundle(i + 1).srcType(0) := SrcType.vp
582        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
583        csBundle(i + 1).lsrc(1) := src2 + i.U
584        csBundle(i + 1).lsrc(2) := dest
585        csBundle(i + 1).ldest := dest
586        csBundle(i + 1).uopIdx := i.U
587      }
588      csBundle(numOfUop - 1.U).ldest := dest
589    }
590    is(UopSplitType.VEC_VXM) {
591      /*
592      i to vector move
593       */
594      csBundle(0).srcType(0) := SrcType.reg
595      csBundle(0).srcType(1) := SrcType.imm
596      csBundle(0).lsrc(1) := 0.U
597      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
598      csBundle(0).fuType := FuType.i2v.U
599      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg)
600      csBundle(0).vecWen := true.B
601      //LMUL
602      csBundle(1).srcType(0) := SrcType.vp
603      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
604      csBundle(1).lsrc(2) := dest
605      csBundle(1).ldest := dest
606      csBundle(1).uopIdx := 0.U
607      for (i <- 1 until MAX_VLMUL) {
608        csBundle(i + 1).srcType(0) := SrcType.vp
609        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
610        csBundle(i + 1).lsrc(1) := src2 + i.U
611        csBundle(i + 1).lsrc(2) := dest
612        csBundle(i + 1).ldest := dest
613        csBundle(i + 1).uopIdx := i.U
614      }
615      csBundle(numOfUop - 1.U).ldest := dest
616    }
617    is(UopSplitType.VEC_SLIDE1UP) {
618      /*
619      i to vector move
620       */
621      csBundle(0).srcType(0) := SrcType.reg
622      csBundle(0).srcType(1) := SrcType.imm
623      csBundle(0).lsrc(1) := 0.U
624      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
625      csBundle(0).fuType := FuType.i2v.U
626      csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg)
627      csBundle(0).vecWen := true.B
628      //LMUL
629      csBundle(1).srcType(0) := SrcType.vp
630      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
631      csBundle(1).lsrc(2) := dest
632      csBundle(1).ldest := dest
633      csBundle(1).uopIdx := 0.U
634      for (i <- 1 until MAX_VLMUL) {
635        csBundle(i + 1).srcType(0) := SrcType.vp
636        csBundle(i + 1).lsrc(0) := src2 + (i - 1).U
637        csBundle(i + 1).lsrc(1) := src2 + i.U
638        csBundle(i + 1).lsrc(2) := dest + i.U
639        csBundle(i + 1).ldest := dest + i.U
640        csBundle(i + 1).uopIdx := i.U
641      }
642    }
643    is(UopSplitType.VEC_FSLIDE1UP) {
644      /*
645      i to vector move
646       */
647      csBundle(0).srcType(0) := SrcType.fp
648      csBundle(0).srcType(1) := SrcType.imm
649      csBundle(0).lsrc(1) := 0.U
650      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
651      csBundle(0).fuType := FuType.f2v.U
652      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
653      csBundle(0).rfWen := false.B
654      csBundle(0).fpWen := false.B
655      csBundle(0).vecWen := true.B
656      //LMUL
657      csBundle(1).srcType(0) := SrcType.vp
658      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
659      csBundle(1).lsrc(1) := src2
660      csBundle(1).lsrc(2) := dest
661      csBundle(1).ldest := dest
662      csBundle(1).uopIdx := 0.U
663      for (i <- 1 until MAX_VLMUL) {
664        csBundle(i + 1).srcType(0) := SrcType.vp
665        csBundle(i + 1).lsrc(0) := src2 + (i - 1).U
666        csBundle(i + 1).lsrc(1) := src2 + i.U
667        csBundle(i + 1).lsrc(2) := dest + i.U
668        csBundle(i + 1).ldest := dest + i.U
669        csBundle(i + 1).uopIdx := i.U
670      }
671    }
672    is(UopSplitType.VEC_SLIDE1DOWN) { // lmul+lmul = 16
673      /*
674      i to vector move
675       */
676      csBundle(0).srcType(0) := SrcType.reg
677      csBundle(0).srcType(1) := SrcType.imm
678      csBundle(0).lsrc(1) := 0.U
679      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
680      csBundle(0).fuType := FuType.i2v.U
681      csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg)
682      csBundle(0).vecWen := true.B
683      //LMUL
684      for (i <- 0 until MAX_VLMUL) {
685        csBundle(2 * i + 1).srcType(0) := SrcType.vp
686        csBundle(2 * i + 1).srcType(1) := SrcType.vp
687        csBundle(2 * i + 1).lsrc(0) := src2 + (i + 1).U
688        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
689        csBundle(2 * i + 1).lsrc(2) := dest + i.U
690        csBundle(2 * i + 1).ldest := VECTOR_TMP_REG_LMUL.U + 1.U
691        csBundle(2 * i + 1).uopIdx := (2 * i).U
692        if (2 * i + 2 < MAX_VLMUL * 2) {
693          csBundle(2 * i + 2).srcType(0) := SrcType.vp
694          csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
695          // csBundle(2 * i + 2).lsrc(1) := src2 + i.U         // DontCare
696          csBundle(2 * i + 2).lsrc(2) := VECTOR_TMP_REG_LMUL.U + 1.U
697          csBundle(2 * i + 2).ldest := dest + i.U
698          csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
699        }
700      }
701      csBundle(numOfUop - 1.U).srcType(0) := SrcType.vp
702      csBundle(numOfUop - 1.U).lsrc(0) := VECTOR_TMP_REG_LMUL.U
703      csBundle(numOfUop - 1.U).ldest := dest + lmul - 1.U
704    }
705    is(UopSplitType.VEC_FSLIDE1DOWN) {
706      /*
707      i to vector move
708       */
709      csBundle(0).srcType(0) := SrcType.fp
710      csBundle(0).srcType(1) := SrcType.imm
711      csBundle(0).lsrc(1) := 0.U
712      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
713      csBundle(0).fuType := FuType.f2v.U
714      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
715      csBundle(0).rfWen := false.B
716      csBundle(0).fpWen := false.B
717      csBundle(0).vecWen := true.B
718      //LMUL
719      for (i <- 0 until MAX_VLMUL) {
720        csBundle(2 * i + 1).srcType(0) := SrcType.vp
721        csBundle(2 * i + 1).srcType(1) := SrcType.vp
722        csBundle(2 * i + 1).lsrc(0) := src2 + (i + 1).U
723        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
724        csBundle(2 * i + 1).lsrc(2) := dest + i.U
725        csBundle(2 * i + 1).ldest := VECTOR_TMP_REG_LMUL.U + 1.U
726        csBundle(2 * i + 1).uopIdx := (2 * i).U
727        if (2 * i + 2 < MAX_VLMUL * 2) {
728          csBundle(2 * i + 2).srcType(0) := SrcType.vp
729          csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
730          // csBundle(2 * i + 2).lsrc(1) := src2 + i.U         // DontCare
731          csBundle(2 * i + 2).lsrc(2) := VECTOR_TMP_REG_LMUL.U + 1.U
732          csBundle(2 * i + 2).ldest := dest + i.U
733          csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
734        }
735      }
736      csBundle(numOfUop - 1.U).srcType(0) := SrcType.vp
737      csBundle(numOfUop - 1.U).lsrc(0) := VECTOR_TMP_REG_LMUL.U
738      csBundle(numOfUop - 1.U).ldest := dest + lmul - 1.U
739    }
740    is(UopSplitType.VEC_VRED) {
741      when(vlmulReg === "b001".U) {
742        csBundle(0).srcType(2) := SrcType.DC
743        csBundle(0).lsrc(0) := src2 + 1.U
744        csBundle(0).lsrc(1) := src2
745        csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
746        csBundle(0).uopIdx := 0.U
747      }
748      when(vlmulReg === "b010".U) {
749        csBundle(0).srcType(2) := SrcType.DC
750        csBundle(0).lsrc(0) := src2 + 1.U
751        csBundle(0).lsrc(1) := src2
752        csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
753        csBundle(0).uopIdx := 0.U
754
755        csBundle(1).srcType(2) := SrcType.DC
756        csBundle(1).lsrc(0) := src2 + 3.U
757        csBundle(1).lsrc(1) := src2 + 2.U
758        csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
759        csBundle(1).uopIdx := 1.U
760
761        csBundle(2).srcType(2) := SrcType.DC
762        csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
763        csBundle(2).lsrc(1) := VECTOR_TMP_REG_LMUL.U
764        csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
765        csBundle(2).uopIdx := 2.U
766      }
767      when(vlmulReg === "b011".U) {
768        for (i <- 0 until MAX_VLMUL) {
769          if (i < MAX_VLMUL - MAX_VLMUL / 2) {
770            csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U
771            csBundle(i).lsrc(1) := src2 + (i * 2).U
772            csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
773          } else if (i < MAX_VLMUL - MAX_VLMUL / 4) {
774            csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + (i - MAX_VLMUL / 2) * 2 + 1).U
775            csBundle(i).lsrc(1) := (VECTOR_TMP_REG_LMUL + (i - MAX_VLMUL / 2) * 2).U
776            csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
777          } else if (i < MAX_VLMUL - MAX_VLMUL / 8) {
778            csBundle(6).lsrc(0) := (VECTOR_TMP_REG_LMUL + 5).U
779            csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
780            csBundle(6).ldest := (VECTOR_TMP_REG_LMUL + 6).U
781          }
782          csBundle(i).srcType(2) := SrcType.DC
783          csBundle(i).uopIdx := i.U
784        }
785      }
786      when(vlmulReg(2) === 0.U && vlmulReg(1, 0).orR) {
787        /*
788         * 2 <= vlmul <= 8
789         */
790        csBundle(numOfUop - 1.U).srcType(2) := SrcType.vp
791        csBundle(numOfUop - 1.U).lsrc(0) := src1
792        csBundle(numOfUop - 1.U).lsrc(1) := VECTOR_TMP_REG_LMUL.U + numOfUop - 2.U
793        csBundle(numOfUop - 1.U).lsrc(2) := dest
794        csBundle(numOfUop - 1.U).ldest := dest
795        csBundle(numOfUop - 1.U).uopIdx := numOfUop - 1.U
796      }
797    }
798    is(UopSplitType.VEC_VFRED) {
799      val vlmul = vlmulReg
800      val vsew = vsewReg
801      when(vlmul === VLmul.m8){
802        for (i <- 0 until 4) {
803          csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U
804          csBundle(i).lsrc(1) := src2 + (i * 2).U
805          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
806          csBundle(i).uopIdx := i.U
807        }
808        for (i <- 4 until 6) {
809          csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + (i - 4) * 2 + 1).U
810          csBundle(i).lsrc(1) := (VECTOR_TMP_REG_LMUL + (i - 4) * 2).U
811          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
812          csBundle(i).uopIdx := i.U
813        }
814        csBundle(6).lsrc(0) := (VECTOR_TMP_REG_LMUL + 5).U
815        csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
816        csBundle(6).ldest := (VECTOR_TMP_REG_LMUL + 6).U
817        csBundle(6).uopIdx := 6.U
818        when(vsew === VSew.e64) {
819          csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U
820          csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U
821          csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U
822          csBundle(7).vpu.fpu.isFoldTo1_2 := true.B
823          csBundle(7).uopIdx := 7.U
824          csBundle(8).lsrc(0) := src1
825          csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U
826          csBundle(8).ldest := dest
827          csBundle(8).uopIdx := 8.U
828        }
829        when(vsew === VSew.e32) {
830          csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U
831          csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U
832          csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U
833          csBundle(7).vpu.fpu.isFoldTo1_2 := true.B
834          csBundle(7).uopIdx := 7.U
835          csBundle(8).lsrc(0) := (VECTOR_TMP_REG_LMUL + 7).U
836          csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U
837          csBundle(8).ldest := (VECTOR_TMP_REG_LMUL + 8).U
838          csBundle(8).vpu.fpu.isFoldTo1_4 := true.B
839          csBundle(8).uopIdx := 8.U
840          csBundle(9).lsrc(0) := src1
841          csBundle(9).lsrc(1) := (VECTOR_TMP_REG_LMUL + 8).U
842          csBundle(9).ldest := dest
843          csBundle(9).uopIdx := 9.U
844        }
845        when(vsew === VSew.e16) {
846          csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U
847          csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U
848          csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U
849          csBundle(7).vpu.fpu.isFoldTo1_2 := true.B
850          csBundle(7).uopIdx := 7.U
851          csBundle(8).lsrc(0) := (VECTOR_TMP_REG_LMUL + 7).U
852          csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U
853          csBundle(8).ldest := (VECTOR_TMP_REG_LMUL + 8).U
854          csBundle(8).vpu.fpu.isFoldTo1_4 := true.B
855          csBundle(8).uopIdx := 8.U
856          csBundle(9).lsrc(0) := (VECTOR_TMP_REG_LMUL + 8).U
857          csBundle(9).lsrc(1) := (VECTOR_TMP_REG_LMUL + 8).U
858          csBundle(9).ldest := (VECTOR_TMP_REG_LMUL + 9).U
859          csBundle(9).vpu.fpu.isFoldTo1_8 := true.B
860          csBundle(9).uopIdx := 9.U
861          csBundle(10).lsrc(0) := src1
862          csBundle(10).lsrc(1) := (VECTOR_TMP_REG_LMUL + 9).U
863          csBundle(10).ldest := dest
864          csBundle(10).uopIdx := 10.U
865        }
866      }
867      when(vlmul === VLmul.m4) {
868        for (i <- 0 until 2) {
869          csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U
870          csBundle(i).lsrc(1) := src2 + (i * 2).U
871          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
872          csBundle(i).uopIdx := i.U
873        }
874        csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
875        csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
876        csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
877        csBundle(2).uopIdx := 2.U
878        when(vsew === VSew.e64) {
879          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
880          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
881          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
882          csBundle(3).vpu.fpu.isFoldTo1_2 := true.B
883          csBundle(3).uopIdx := 3.U
884          csBundle(4).lsrc(0) := src1
885          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
886          csBundle(4).ldest := dest
887          csBundle(4).uopIdx := 4.U
888        }
889        when(vsew === VSew.e32) {
890          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
891          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
892          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
893          csBundle(3).vpu.fpu.isFoldTo1_2 := true.B
894          csBundle(3).uopIdx := 3.U
895          csBundle(4).lsrc(0) := (VECTOR_TMP_REG_LMUL + 3).U
896          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
897          csBundle(4).ldest := (VECTOR_TMP_REG_LMUL + 4).U
898          csBundle(4).vpu.fpu.isFoldTo1_4 := true.B
899          csBundle(4).uopIdx := 4.U
900          csBundle(5).lsrc(0) := src1
901          csBundle(5).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
902          csBundle(5).ldest := dest
903          csBundle(5).uopIdx := 5.U
904        }
905        when(vsew === VSew.e16) {
906          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
907          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
908          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
909          csBundle(3).vpu.fpu.isFoldTo1_2 := true.B
910          csBundle(3).uopIdx := 3.U
911          csBundle(4).lsrc(0) := (VECTOR_TMP_REG_LMUL + 3).U
912          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
913          csBundle(4).ldest := (VECTOR_TMP_REG_LMUL + 4).U
914          csBundle(4).vpu.fpu.isFoldTo1_4 := true.B
915          csBundle(4).uopIdx := 4.U
916          csBundle(5).lsrc(0) := (VECTOR_TMP_REG_LMUL + 4).U
917          csBundle(5).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
918          csBundle(5).ldest := (VECTOR_TMP_REG_LMUL + 5).U
919          csBundle(5).vpu.fpu.isFoldTo1_8 := true.B
920          csBundle(5).uopIdx := 5.U
921          csBundle(6).lsrc(0) := src1
922          csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 5).U
923          csBundle(6).ldest := dest
924          csBundle(6).uopIdx := 6.U
925        }
926      }
927      when(vlmul === VLmul.m2) {
928        csBundle(0).lsrc(0) := src2 + 1.U
929        csBundle(0).lsrc(1) := src2 + 0.U
930        csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
931        csBundle(0).uopIdx := 0.U
932        when(vsew === VSew.e64) {
933          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
934          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
935          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
936          csBundle(1).vpu.fpu.isFoldTo1_2 := true.B
937          csBundle(1).uopIdx := 1.U
938          csBundle(2).lsrc(0) := src1
939          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
940          csBundle(2).ldest := dest
941          csBundle(2).uopIdx := 2.U
942        }
943        when(vsew === VSew.e32) {
944          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
945          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
946          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
947          csBundle(1).vpu.fpu.isFoldTo1_2 := true.B
948          csBundle(1).uopIdx := 1.U
949          csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
950          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
951          csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
952          csBundle(2).vpu.fpu.isFoldTo1_4 := true.B
953          csBundle(2).uopIdx := 2.U
954          csBundle(3).lsrc(0) := src1
955          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
956          csBundle(3).ldest := dest
957          csBundle(3).uopIdx := 3.U
958        }
959        when(vsew === VSew.e16) {
960          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
961          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
962          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
963          csBundle(1).vpu.fpu.isFoldTo1_2 := true.B
964          csBundle(1).uopIdx := 1.U
965          csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
966          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
967          csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
968          csBundle(2).vpu.fpu.isFoldTo1_4 := true.B
969          csBundle(2).uopIdx := 2.U
970          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
971          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
972          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
973          csBundle(3).vpu.fpu.isFoldTo1_8 := true.B
974          csBundle(3).uopIdx := 3.U
975          csBundle(4).lsrc(0) := src1
976          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
977          csBundle(4).ldest := dest
978          csBundle(4).uopIdx := 4.U
979        }
980      }
981      when(vlmul === VLmul.m1) {
982        when(vsew === VSew.e64) {
983          csBundle(0).lsrc(0) := src2
984          csBundle(0).lsrc(1) := src2
985          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
986          csBundle(0).vpu.fpu.isFoldTo1_2 := true.B
987          csBundle(0).uopIdx := 0.U
988          csBundle(1).lsrc(0) := src1
989          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
990          csBundle(1).ldest := dest
991          csBundle(1).uopIdx := 1.U
992        }
993        when(vsew === VSew.e32) {
994          csBundle(0).lsrc(0) := src2
995          csBundle(0).lsrc(1) := src2
996          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
997          csBundle(0).vpu.fpu.isFoldTo1_2 := true.B
998          csBundle(0).uopIdx := 0.U
999          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
1000          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1001          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
1002          csBundle(1).vpu.fpu.isFoldTo1_4 := true.B
1003          csBundle(1).uopIdx := 1.U
1004          csBundle(2).lsrc(0) := src1
1005          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
1006          csBundle(2).ldest := dest
1007          csBundle(2).uopIdx := 2.U
1008        }
1009        when(vsew === VSew.e16) {
1010          csBundle(0).lsrc(0) := src2
1011          csBundle(0).lsrc(1) := src2
1012          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1013          csBundle(0).vpu.fpu.isFoldTo1_2 := true.B
1014          csBundle(0).uopIdx := 0.U
1015          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
1016          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1017          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
1018          csBundle(1).vpu.fpu.isFoldTo1_4 := true.B
1019          csBundle(1).uopIdx := 1.U
1020          csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
1021          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
1022          csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
1023          csBundle(2).vpu.fpu.isFoldTo1_8 := true.B
1024          csBundle(2).uopIdx := 2.U
1025          csBundle(3).lsrc(0) := src1
1026          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
1027          csBundle(3).ldest := dest
1028          csBundle(3).uopIdx := 3.U
1029        }
1030      }
1031      when(vlmul === VLmul.mf2) {
1032        when(vsew === VSew.e32) {
1033          csBundle(0).lsrc(0) := src2
1034          csBundle(0).lsrc(1) := src2
1035          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1036          csBundle(0).vpu.fpu.isFoldTo1_4 := true.B
1037          csBundle(0).uopIdx := 0.U
1038          csBundle(1).lsrc(0) := src1
1039          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1040          csBundle(1).ldest := dest
1041          csBundle(1).uopIdx := 1.U
1042        }
1043        when(vsew === VSew.e16) {
1044          csBundle(0).lsrc(0) := src2
1045          csBundle(0).lsrc(1) := src2
1046          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1047          csBundle(0).vpu.fpu.isFoldTo1_4 := true.B
1048          csBundle(0).uopIdx := 0.U
1049          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
1050          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1051          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
1052          csBundle(1).vpu.fpu.isFoldTo1_8 := true.B
1053          csBundle(1).uopIdx := 1.U
1054          csBundle(2).lsrc(0) := src1
1055          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
1056          csBundle(2).ldest := dest
1057          csBundle(2).uopIdx := 2.U
1058        }
1059      }
1060      when(vlmul === VLmul.mf4) {
1061        when(vsew === VSew.e16) {
1062          csBundle(0).lsrc(0) := src2
1063          csBundle(0).lsrc(1) := src2
1064          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1065          csBundle(0).vpu.fpu.isFoldTo1_8 := true.B
1066          csBundle(0).uopIdx := 0.U
1067          csBundle(1).lsrc(0) := src1
1068          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1069          csBundle(1).ldest := dest
1070          csBundle(1).uopIdx := 1.U
1071        }
1072      }
1073    }
1074
1075    is(UopSplitType.VEC_VFREDOSUM) {
1076      import yunsuan.VfaluType
1077      val vlmul = vlmulReg
1078      val vsew = vsewReg
1079      val isWiden = latchedInst.fuOpType === VfaluType.vfwredosum
1080      when(vlmul === VLmul.m8) {
1081        when(vsew === VSew.e64) {
1082          val vlmax = 16
1083          for (i <- 0 until vlmax) {
1084            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1085            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1086            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1087            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1088            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1089            csBundle(i).uopIdx := i.U
1090          }
1091        }
1092        when(vsew === VSew.e32) {
1093          val vlmax = 32
1094          for (i <- 0 until vlmax) {
1095            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1096            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1097            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1098            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1099            csBundle(i).vpu.fpu.isFoldTo1_4 := (if (i % 4 == 0) false.B else true.B)
1100            csBundle(i).uopIdx := i.U
1101          }
1102        }
1103        when(vsew === VSew.e16) {
1104          val vlmax = 64
1105          for (i <- 0 until vlmax) {
1106            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1107            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1108            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1109            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1110            csBundle(i).vpu.fpu.isFoldTo1_8 := (if (i % 8 == 0) false.B else true.B)
1111            csBundle(i).uopIdx := i.U
1112          }
1113        }
1114      }
1115      when(vlmul === VLmul.m4) {
1116        when(vsew === VSew.e64) {
1117          val vlmax = 8
1118          for (i <- 0 until vlmax) {
1119            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1120            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1121            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1122            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1123            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1124            csBundle(i).uopIdx := i.U
1125          }
1126        }
1127        when(vsew === VSew.e32) {
1128          val vlmax = 16
1129          for (i <- 0 until vlmax) {
1130            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1131            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1132            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1133            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1134            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1135            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1136            csBundle(i).uopIdx := i.U
1137          }
1138        }
1139        when(vsew === VSew.e16) {
1140          val vlmax = 32
1141          for (i <- 0 until vlmax) {
1142            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1143            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1144            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1145            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1146            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1147            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1148            csBundle(i).uopIdx := i.U
1149          }
1150        }
1151      }
1152      when(vlmul === VLmul.m2) {
1153        when(vsew === VSew.e64) {
1154          val vlmax = 4
1155          for (i <- 0 until vlmax) {
1156            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1157            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1158            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1159            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1160            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1161            csBundle(i).uopIdx := i.U
1162          }
1163        }
1164        when(vsew === VSew.e32) {
1165          val vlmax = 8
1166          for (i <- 0 until vlmax) {
1167            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1168            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1169            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1170            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1171            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1172            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1173            csBundle(i).uopIdx := i.U
1174          }
1175        }
1176        when(vsew === VSew.e16) {
1177          val vlmax = 16
1178          for (i <- 0 until vlmax) {
1179            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1180            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1181            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1182            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1183            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1184            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1185            csBundle(i).uopIdx := i.U
1186          }
1187        }
1188      }
1189      when(vlmul === VLmul.m1) {
1190        when(vsew === VSew.e64) {
1191          val vlmax = 2
1192          for (i <- 0 until vlmax) {
1193            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1194            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1195            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1196            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1197            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1198            csBundle(i).uopIdx := i.U
1199          }
1200        }
1201        when(vsew === VSew.e32) {
1202          val vlmax = 4
1203          for (i <- 0 until vlmax) {
1204            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1205            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1206            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1207            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1208            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1209            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1210            csBundle(i).uopIdx := i.U
1211          }
1212        }
1213        when(vsew === VSew.e16) {
1214          val vlmax = 8
1215          for (i <- 0 until vlmax) {
1216            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1217            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1218            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1219            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1220            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1221            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1222            csBundle(i).uopIdx := i.U
1223          }
1224        }
1225      }
1226      when(vlmul === VLmul.mf2) {
1227        when(vsew === VSew.e32) {
1228          val vlmax = 2
1229          for (i <- 0 until vlmax) {
1230            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1231            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1232            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1233            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1234            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1235            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1236            csBundle(i).uopIdx := i.U
1237          }
1238        }
1239        when(vsew === VSew.e16) {
1240          val vlmax = 4
1241          for (i <- 0 until vlmax) {
1242            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1243            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1244            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1245            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1246            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1247            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1248            csBundle(i).uopIdx := i.U
1249          }
1250        }
1251      }
1252      when(vlmul === VLmul.mf4) {
1253        when(vsew === VSew.e16) {
1254          val vlmax = 2
1255          for (i <- 0 until vlmax) {
1256            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1257            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1258            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1259            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1260            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1261            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1262            csBundle(i).uopIdx := i.U
1263          }
1264        }
1265      }
1266    }
1267
1268    is(UopSplitType.VEC_SLIDEUP) {
1269      // i to vector move
1270      csBundle(0).srcType(0) := SrcType.reg
1271      csBundle(0).srcType(1) := SrcType.imm
1272      csBundle(0).lsrc(1) := 0.U
1273      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1274      csBundle(0).fuType := FuType.i2v.U
1275      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg)
1276      csBundle(0).vecWen := true.B
1277      // LMUL
1278      for (i <- 0 until MAX_VLMUL)
1279        for (j <- 0 to i) {
1280          val old_vd = if (j == 0) {
1281            dest + i.U
1282          } else (VECTOR_TMP_REG_LMUL + j).U
1283          val vd = if (j == i) {
1284            dest + i.U
1285          } else (VECTOR_TMP_REG_LMUL + j + 1).U
1286          csBundle(i * (i + 1) / 2 + j + 1).srcType(0) := SrcType.vp
1287          csBundle(i * (i + 1) / 2 + j + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1288          csBundle(i * (i + 1) / 2 + j + 1).lsrc(1) := src2 + j.U
1289          csBundle(i * (i + 1) / 2 + j + 1).lsrc(2) := old_vd
1290          csBundle(i * (i + 1) / 2 + j + 1).ldest := vd
1291          csBundle(i * (i + 1) / 2 + j + 1).uopIdx := (i * (i + 1) / 2 + j).U
1292        }
1293    }
1294
1295    is(UopSplitType.VEC_SLIDEDOWN) {
1296      // i to vector move
1297      csBundle(0).srcType(0) := SrcType.reg
1298      csBundle(0).srcType(1) := SrcType.imm
1299      csBundle(0).lsrc(1) := 0.U
1300      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1301      csBundle(0).fuType := FuType.i2v.U
1302      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg)
1303      csBundle(0).vecWen := true.B
1304      // LMUL
1305      for (i <- 0 until MAX_VLMUL)
1306        for (j <- (0 to i).reverse) {
1307          when(i.U < lmul) {
1308            val old_vd = if (j == 0) {
1309              dest + lmul - 1.U - i.U
1310            } else (VECTOR_TMP_REG_LMUL + j).U
1311            val vd = if (j == i) {
1312              dest + lmul - 1.U - i.U
1313            } else (VECTOR_TMP_REG_LMUL + j + 1).U
1314            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).srcType(0) := SrcType.vp
1315            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1316            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(1) := src2 + lmul - 1.U - j.U
1317            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(2) := old_vd
1318            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).ldest := vd
1319            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).uopIdx := numOfUop - (i * (i + 1) / 2 + i - j + 2).U
1320          }
1321        }
1322    }
1323
1324    is(UopSplitType.VEC_M0X) {
1325      // LMUL
1326      for (i <- 0 until MAX_VLMUL) {
1327        val srcType0 = if (i == 0) SrcType.DC else SrcType.vp
1328        val ldest = (VECTOR_TMP_REG_LMUL + i).U
1329        csBundle(i).srcType(0) := srcType0
1330        csBundle(i).srcType(1) := SrcType.vp
1331        csBundle(i).rfWen := false.B
1332        csBundle(i).fpWen := false.B
1333        csBundle(i).vecWen := true.B
1334        csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U
1335        csBundle(i).lsrc(1) := src2
1336        // csBundle(i).lsrc(2) := dest + i.U  DontCare
1337        csBundle(i).ldest := ldest
1338        csBundle(i).uopIdx := i.U
1339      }
1340      csBundle(lmul - 1.U).rfWen := true.B
1341      csBundle(lmul - 1.U).fpWen := false.B
1342      csBundle(lmul - 1.U).vecWen := false.B
1343      csBundle(lmul - 1.U).ldest := dest
1344    }
1345
1346    is(UopSplitType.VEC_MVV) {
1347      // LMUL
1348      for (i <- 0 until MAX_VLMUL) {
1349        val srcType0 = if (i == 0) SrcType.DC else SrcType.vp
1350        csBundle(i * 2 + 0).srcType(0) := srcType0
1351        csBundle(i * 2 + 0).srcType(1) := SrcType.vp
1352        csBundle(i * 2 + 0).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U
1353        csBundle(i * 2 + 0).lsrc(1) := src2
1354        csBundle(i * 2 + 0).lsrc(2) := dest + i.U
1355        csBundle(i * 2 + 0).ldest := dest + i.U
1356        csBundle(i * 2 + 0).uopIdx := (i * 2 + 0).U
1357
1358        csBundle(i * 2 + 1).srcType(0) := srcType0
1359        csBundle(i * 2 + 1).srcType(1) := SrcType.vp
1360        csBundle(i * 2 + 1).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U
1361        csBundle(i * 2 + 1).lsrc(1) := src2
1362        // csBundle(i).lsrc(2) := dest + i.U  DontCare
1363        csBundle(i * 2 + 1).ldest := (VECTOR_TMP_REG_LMUL + i).U
1364        csBundle(i * 2 + 1).uopIdx := (i * 2 + 1).U
1365      }
1366    }
1367
1368    is(UopSplitType.VEC_M0X_VFIRST) {
1369      // LMUL
1370      csBundle(0).rfWen := true.B
1371      csBundle(0).fpWen := false.B
1372      csBundle(0).vecWen := false.B
1373      csBundle(0).ldest := dest
1374    }
1375    is(UopSplitType.VEC_VWW) {
1376      for (i <- 0 until MAX_VLMUL*2) {
1377        when(i.U < lmul){
1378          csBundle(i).srcType(2) := SrcType.DC
1379          csBundle(i).lsrc(0) := src2 + i.U
1380          csBundle(i).lsrc(1) := src2 + i.U
1381          // csBundle(i).lsrc(2) := dest + (2 * i).U
1382          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
1383          csBundle(i).uopIdx :=  i.U
1384        } otherwise {
1385          csBundle(i).srcType(2) := SrcType.DC
1386          csBundle(i).lsrc(0) := VECTOR_TMP_REG_LMUL.U + Cat((i.U-lmul),0.U(1.W)) + 1.U
1387          csBundle(i).lsrc(1) := VECTOR_TMP_REG_LMUL.U + Cat((i.U-lmul),0.U(1.W))
1388          // csBundle(i).lsrc(2) := dest + (2 * i).U
1389          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
1390          csBundle(i).uopIdx := i.U
1391        }
1392        csBundle(numOfUop-1.U).srcType(2) := SrcType.vp
1393        csBundle(numOfUop-1.U).lsrc(0) := src1
1394        csBundle(numOfUop-1.U).lsrc(2) := dest
1395        csBundle(numOfUop-1.U).ldest := dest
1396      }
1397    }
1398    is(UopSplitType.VEC_RGATHER) {
1399      def genCsBundle_VEC_RGATHER(len:Int): Unit ={
1400        for (i <- 0 until len)
1401          for (j <- 0 until len) {
1402            // csBundle(i * len + j).srcType(0) := SrcType.vp // SrcType.imm
1403            // csBundle(i * len + j).srcType(1) := SrcType.vp
1404            // csBundle(i * len + j).srcType(2) := SrcType.vp
1405            csBundle(i * len + j).lsrc(0) := src1 + i.U
1406            csBundle(i * len + j).lsrc(1) := src2 + j.U
1407            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j - 1).U
1408            csBundle(i * len + j).lsrc(2) := vd_old
1409            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1410            csBundle(i * len + j).ldest := vd
1411            csBundle(i * len + j).uopIdx := (i * len + j).U
1412          }
1413      }
1414      switch(vlmulReg) {
1415        is("b001".U ){
1416          genCsBundle_VEC_RGATHER(2)
1417        }
1418        is("b010".U ){
1419          genCsBundle_VEC_RGATHER(4)
1420        }
1421        is("b011".U ){
1422          genCsBundle_VEC_RGATHER(8)
1423        }
1424      }
1425    }
1426    is(UopSplitType.VEC_RGATHER_VX) {
1427      def genCsBundle_RGATHER_VX(len:Int): Unit ={
1428        for (i <- 0 until len)
1429          for (j <- 0 until len) {
1430            csBundle(i * len + j + 1).srcType(0) := SrcType.vp
1431            // csBundle(i * len + j + 1).srcType(1) := SrcType.vp
1432            // csBundle(i * len + j + 1).srcType(2) := SrcType.vp
1433            csBundle(i * len + j + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1434            csBundle(i * len + j + 1).lsrc(1) := src2 + j.U
1435            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1436            csBundle(i * len + j + 1).lsrc(2) := vd_old
1437            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j + 1).U
1438            csBundle(i * len + j + 1).ldest := vd
1439            csBundle(i * len + j + 1).uopIdx := (i * len + j).U
1440          }
1441      }
1442      // i to vector move
1443      csBundle(0).srcType(0) := SrcType.reg
1444      csBundle(0).srcType(1) := SrcType.imm
1445      csBundle(0).lsrc(1) := 0.U
1446      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1447      csBundle(0).fuType := FuType.i2v.U
1448      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg)
1449      csBundle(0).rfWen := false.B
1450      csBundle(0).fpWen := false.B
1451      csBundle(0).vecWen := true.B
1452      genCsBundle_RGATHER_VX(1)
1453      switch(vlmulReg) {
1454        is("b001".U ){
1455          genCsBundle_RGATHER_VX(2)
1456        }
1457        is("b010".U ){
1458          genCsBundle_RGATHER_VX(4)
1459        }
1460        is("b011".U ){
1461          genCsBundle_RGATHER_VX(8)
1462        }
1463      }
1464    }
1465    is(UopSplitType.VEC_RGATHEREI16) {
1466      def genCsBundle_VEC_RGATHEREI16_SEW8(len:Int): Unit ={
1467        for (i <- 0 until len)
1468          for (j <- 0 until len) {
1469            val vd_old0 = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j*2-1).U
1470            val vd0 = (VECTOR_TMP_REG_LMUL + j*2 ).U
1471            csBundle((i * len + j)*2+0).lsrc(0) := src1 + (i*2+0).U
1472            csBundle((i * len + j)*2+0).lsrc(1) := src2 + j.U
1473            csBundle((i * len + j)*2+0).lsrc(2) := vd_old0
1474            csBundle((i * len + j)*2+0).ldest := vd0
1475            csBundle((i * len + j)*2+0).uopIdx := ((i * len + j)*2+0).U
1476            val vd_old1 = (VECTOR_TMP_REG_LMUL + j*2).U
1477            val vd1 = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j*2+1 ).U
1478            csBundle((i * len + j)*2+1).lsrc(0) := src1 + (i*2+1).U
1479            csBundle((i * len + j)*2+1).lsrc(1) := src2 + j.U
1480            csBundle((i * len + j)*2+1).lsrc(2) := vd_old1
1481            csBundle((i * len + j)*2+1).ldest := vd1
1482            csBundle((i * len + j)*2+1).uopIdx := ((i * len + j)*2+1).U
1483          }
1484      }
1485      def genCsBundle_VEC_RGATHEREI16(len:Int): Unit ={
1486        for (i <- 0 until len)
1487          for (j <- 0 until len) {
1488            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U
1489            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1490            csBundle(i * len + j).lsrc(0) := src1 + i.U
1491            csBundle(i * len + j).lsrc(1) := src2 + j.U
1492            csBundle(i * len + j).lsrc(2) := vd_old
1493            csBundle(i * len + j).ldest := vd
1494            csBundle(i * len + j).uopIdx := (i * len + j).U
1495          }
1496      }
1497      def genCsBundle_VEC_RGATHEREI16_SEW32(len:Int): Unit ={
1498        for (i <- 0 until len)
1499          for (j <- 0 until len) {
1500            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U
1501            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1502            csBundle(i * len + j).lsrc(0) := src1 + (i / 2).U
1503            csBundle(i * len + j).lsrc(1) := src2 + j.U
1504            csBundle(i * len + j).lsrc(2) := vd_old
1505            csBundle(i * len + j).ldest := vd
1506            csBundle(i * len + j).uopIdx := (i * len + j).U
1507          }
1508      }
1509      def genCsBundle_VEC_RGATHEREI16_SEW64(len:Int): Unit ={
1510        for (i <- 0 until len)
1511          for (j <- 0 until len) {
1512            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U
1513            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1514            csBundle(i * len + j).lsrc(0) := src1 + (i / 4).U
1515            csBundle(i * len + j).lsrc(1) := src2 + j.U
1516            csBundle(i * len + j).lsrc(2) := vd_old
1517            csBundle(i * len + j).ldest := vd
1518            csBundle(i * len + j).uopIdx := (i * len + j).U
1519          }
1520      }
1521      when(!vsewReg.orR){
1522        genCsBundle_VEC_RGATHEREI16_SEW8(1)
1523      }.elsewhen(vsewReg === VSew.e32){
1524        genCsBundle_VEC_RGATHEREI16_SEW32(1)
1525      }.elsewhen(vsewReg === VSew.e64){
1526        genCsBundle_VEC_RGATHEREI16_SEW64(1)
1527      }.otherwise{
1528        genCsBundle_VEC_RGATHEREI16(1)
1529      }
1530      switch(vlmulReg) {
1531        is("b001".U) {
1532          when(!vsewReg.orR) {
1533            genCsBundle_VEC_RGATHEREI16_SEW8(2)
1534          }.elsewhen(vsewReg === VSew.e32){
1535            genCsBundle_VEC_RGATHEREI16_SEW32(2)
1536          }.elsewhen(vsewReg === VSew.e64){
1537            genCsBundle_VEC_RGATHEREI16_SEW64(2)
1538          }.otherwise{
1539            genCsBundle_VEC_RGATHEREI16(2)
1540          }
1541        }
1542        is("b010".U) {
1543          when(!vsewReg.orR) {
1544            genCsBundle_VEC_RGATHEREI16_SEW8(4)
1545          }.elsewhen(vsewReg === VSew.e32){
1546            genCsBundle_VEC_RGATHEREI16_SEW32(4)
1547          }.elsewhen(vsewReg === VSew.e64){
1548            genCsBundle_VEC_RGATHEREI16_SEW64(4)
1549          }.otherwise{
1550            genCsBundle_VEC_RGATHEREI16(4)
1551          }
1552        }
1553        is("b011".U) {
1554          when(vsewReg === VSew.e32){
1555            genCsBundle_VEC_RGATHEREI16_SEW32(8)
1556          }.elsewhen(vsewReg === VSew.e64){
1557            genCsBundle_VEC_RGATHEREI16_SEW64(8)
1558          }.otherwise{
1559            genCsBundle_VEC_RGATHEREI16(8)
1560          }
1561        }
1562      }
1563    }
1564    is(UopSplitType.VEC_COMPRESS) {
1565      def genCsBundle_VEC_COMPRESS(len:Int): Unit = {
1566        for (i <- 0 until len) {
1567          val jlen = if (i == len-1) i+1 else i+2
1568          for (j <- 0 until jlen) {
1569            val vd_old = if(i==j) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j + 1).U
1570            val vd = if(i==len-1) (dest + j.U) else {
1571              if (j == i+1) VECTOR_TMP_REG_LMUL.U else (VECTOR_TMP_REG_LMUL + j + 1).U
1572            }
1573            val src13Type = if (j == i+1) DontCare else SrcType.vp
1574            csBundle(i*(i+3)/2 + j).srcType(0) := src13Type
1575            csBundle(i*(i+3)/2 + j).srcType(1) := SrcType.vp
1576            csBundle(i*(i+3)/2 + j).srcType(2) := src13Type
1577            csBundle(i*(i+3)/2 + j).srcType(3) := SrcType.vp
1578            csBundle(i*(i+3)/2 + j).lsrc(0) := src1
1579            csBundle(i*(i+3)/2 + j).lsrc(1) := src2 + i.U
1580            csBundle(i*(i+3)/2 + j).lsrc(2) := vd_old
1581            csBundle(i*(i+3)/2 + j).lsrc(3) := VECTOR_TMP_REG_LMUL.U
1582            csBundle(i*(i+3)/2 + j).ldest := vd
1583            csBundle(i*(i+3)/2 + j).uopIdx := (i*(i+3)/2 + j).U
1584          }
1585        }
1586      }
1587      switch(vlmulReg) {
1588        is("b001".U ){
1589          genCsBundle_VEC_COMPRESS(2)
1590        }
1591        is("b010".U ){
1592          genCsBundle_VEC_COMPRESS(4)
1593        }
1594        is("b011".U ){
1595          genCsBundle_VEC_COMPRESS(8)
1596        }
1597      }
1598    }
1599    is(UopSplitType.VEC_MVNR) {
1600      for (i <- 0 until MAX_VLMUL) {
1601        csBundle(i).lsrc(0) := src1 + i.U
1602        csBundle(i).lsrc(1) := src2 + i.U
1603        csBundle(i).lsrc(2) := dest + i.U
1604        csBundle(i).ldest := dest + i.U
1605        csBundle(i).uopIdx := i.U
1606      }
1607    }
1608    is(UopSplitType.VEC_US_LDST) {
1609      /*
1610      FMV.D.X
1611       */
1612      csBundle(0).srcType(0) := SrcType.reg
1613      csBundle(0).srcType(1) := SrcType.imm
1614      csBundle(0).lsrc(1) := 0.U
1615      csBundle(0).ldest := FP_TMP_REG_MV.U
1616      csBundle(0).fuType := FuType.i2v.U
1617      csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64)
1618      csBundle(0).rfWen := false.B
1619      csBundle(0).fpWen := true.B
1620      csBundle(0).vecWen := false.B
1621      csBundle(0).vlsInstr := true.B
1622      //LMUL
1623      for (i <- 0 until MAX_VLMUL) {
1624        csBundle(i + 1).srcType(0) := SrcType.fp
1625        csBundle(i + 1).lsrc(0) := FP_TMP_REG_MV.U
1626        csBundle(i + 1).lsrc(2) := dest + i.U // old vd
1627        csBundle(i + 1).ldest := dest + i.U
1628        csBundle(i + 1).uopIdx := i.U
1629        csBundle(i + 1).vlsInstr := true.B
1630      }
1631      csBundle.head.waitForward := isUsSegment
1632      csBundle(numOfUop - 1.U).blockBackward := isUsSegment
1633    }
1634    is(UopSplitType.VEC_S_LDST) {
1635      /*
1636      FMV.D.X
1637       */
1638      csBundle(0).srcType(0) := SrcType.reg
1639      csBundle(0).srcType(1) := SrcType.imm
1640      csBundle(0).lsrc(1) := 0.U
1641      csBundle(0).ldest := FP_TMP_REG_MV.U
1642      csBundle(0).fuType := FuType.i2v.U
1643      csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64)
1644      csBundle(0).rfWen := false.B
1645      csBundle(0).fpWen := true.B
1646      csBundle(0).vecWen := false.B
1647      csBundle(0).vlsInstr := true.B
1648
1649      csBundle(1).srcType(0) := SrcType.reg
1650      csBundle(1).srcType(1) := SrcType.imm
1651      csBundle(1).lsrc(0) := latchedInst.lsrc(1)
1652      csBundle(1).lsrc(1) := 0.U
1653      csBundle(1).ldest := VECTOR_TMP_REG_LMUL.U
1654      csBundle(1).fuType := FuType.i2v.U
1655      csBundle(1).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64)
1656      csBundle(1).rfWen := false.B
1657      csBundle(1).fpWen := true.B
1658      csBundle(1).vecWen := false.B
1659      csBundle(1).vlsInstr := true.B
1660
1661      //LMUL
1662      for (i <- 0 until MAX_VLMUL) {
1663        csBundle(i + 2).srcType(0) := SrcType.fp
1664        csBundle(i + 2).srcType(1) := SrcType.fp
1665        csBundle(i + 2).lsrc(0) := FP_TMP_REG_MV.U
1666        csBundle(i + 2).lsrc(1) := VECTOR_TMP_REG_LMUL.U
1667        csBundle(i + 2).lsrc(2) := dest + i.U // old vd
1668        csBundle(i + 2).ldest := dest + i.U
1669        csBundle(i + 2).uopIdx := i.U
1670        csBundle(i + 2).vlsInstr := true.B
1671      }
1672      csBundle.head.waitForward := isSdSegment
1673      csBundle(numOfUop - 1.U).blockBackward := isSdSegment
1674    }
1675    is(UopSplitType.VEC_I_LDST) {
1676      def genCsBundle_SEGMENT_INDEXED_LOADSTORE(emul:Int): Unit ={
1677        for (i <- 0 until MAX_VLMUL) {
1678          val src0Type = SrcType.fp
1679          val src1Type = if (i < emul) SrcType.vp else SrcType.no
1680          // lsrc0 is useless after uop 0, but we use it to ensure the correctness of the uop dependency
1681          val lsrc0 = FP_TMP_REG_MV.U
1682          val oldVd = dest + i.U
1683          csBundle(i + 1).srcType(0) := src0Type
1684          csBundle(i + 1).lsrc(0) := lsrc0
1685          csBundle(i + 1).srcType(1) := src1Type
1686          csBundle(i + 1).lsrc(1) := src2 + i.U
1687          csBundle(i + 1).srcType(2) := SrcType.vp
1688          csBundle(i + 1).lsrc(2) := oldVd
1689          csBundle(i + 1).ldest := dest + i.U
1690          csBundle(i + 1).uopIdx := i.U
1691          csBundle(i + 1).vlsInstr := true.B
1692        }
1693      }
1694
1695      val vlmul = vlmulReg
1696      val vsew = Cat(0.U(1.W), vsewReg)
1697      val veew = Cat(0.U(1.W), width)
1698      val vemul: UInt = veew.asUInt + 1.U + vlmul.asUInt + ~vsew.asUInt
1699      val simple_lmul = MuxLookup(vlmul, 0.U(2.W))(Array(
1700        "b001".U -> 1.U,
1701        "b010".U -> 2.U,
1702        "b011".U -> 3.U
1703      ))
1704      val simple_emul = MuxLookup(vemul, 0.U(2.W))(Array(
1705        "b001".U -> 1.U,
1706        "b010".U -> 2.U,
1707        "b011".U -> 3.U
1708      ))
1709      csBundle(0).srcType(0) := SrcType.reg
1710      csBundle(0).srcType(1) := SrcType.imm
1711      csBundle(0).lsrc(1) := 0.U
1712      csBundle(0).ldest := FP_TMP_REG_MV.U
1713      csBundle(0).fuType := FuType.i2v.U
1714      csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64)
1715      csBundle(0).rfWen := false.B
1716      csBundle(0).fpWen := true.B
1717      csBundle(0).vecWen := false.B
1718      csBundle(0).vlsInstr := true.B
1719
1720      //LMUL
1721      when(nf === 0.U) {
1722        for (i <- 0 until MAX_VLMUL) {
1723          indexedLSRegOffset(i).src := Cat(simple_emul, simple_lmul)
1724          val offsetVs2 = indexedLSRegOffset(i).outOffsetVs2
1725          val offsetVd = indexedLSRegOffset(i).outOffsetVd
1726          csBundle(i + 1).srcType(0) := SrcType.fp
1727          csBundle(i + 1).lsrc(0) := FP_TMP_REG_MV.U
1728          csBundle(i + 1).lsrc(1) := Mux1H(UIntToOH(offsetVs2, MAX_VLMUL), (0 until MAX_VLMUL).map(j => src2 + j.U))
1729          csBundle(i + 1).srcType(2) := SrcType.vp
1730          // lsrc2 is old vd
1731          csBundle(i + 1).lsrc(2) := Mux1H(UIntToOH(offsetVd, MAX_VLMUL), (0 until MAX_VLMUL).map(j => dest + j.U))
1732          csBundle(i + 1).ldest := Mux1H(UIntToOH(offsetVd, MAX_VLMUL), (0 until MAX_VLMUL).map(j => dest + j.U))
1733          csBundle(i + 1).uopIdx := i.U
1734          csBundle(i + 1).vlsInstr := true.B
1735        }
1736      }.otherwise{
1737        // nf > 1, is segment indexed load/store
1738        genCsBundle_SEGMENT_INDEXED_LOADSTORE(1)
1739        switch(vemul) {
1740          is("b001".U ){
1741            genCsBundle_SEGMENT_INDEXED_LOADSTORE(2)
1742          }
1743          is("b010".U ){
1744            genCsBundle_SEGMENT_INDEXED_LOADSTORE(4)
1745          }
1746          is("b011".U ){
1747            genCsBundle_SEGMENT_INDEXED_LOADSTORE(8)
1748          }
1749        }
1750      }
1751      csBundle.head.waitForward := isIxSegment
1752      csBundle(numOfUop - 1.U).blockBackward := isIxSegment
1753    }
1754  }
1755
1756  //readyFromRename Counter
1757  val readyCounter = PriorityMuxDefault(outReadys.map(x => !x).zip((0 until RenameWidth).map(_.U)), RenameWidth.U)
1758
1759  // The left uops of the complex inst in ComplexDecoder can be send out this cycle
1760  val thisAllOut = uopRes <= readyCounter
1761
1762  switch(state) {
1763    is(s_idle) {
1764      when (inValid) {
1765        stateNext := s_active
1766        uopResNext := inUopInfo.numOfUop
1767      }
1768    }
1769    is(s_active) {
1770      when (thisAllOut) {
1771        when (inValid) {
1772          stateNext := s_active
1773          uopResNext := inUopInfo.numOfUop
1774        }.otherwise {
1775          stateNext := s_idle
1776          uopResNext := 0.U
1777        }
1778      }.otherwise {
1779        stateNext := s_active
1780        uopResNext := uopRes - readyCounter
1781      }
1782    }
1783  }
1784
1785  state := Mux(io.redirect, s_idle, stateNext)
1786  uopRes := Mux(io.redirect, 0.U, uopResNext)
1787
1788  val complexNum = Mux(uopRes > readyCounter, readyCounter, uopRes)
1789
1790  for(i <- 0 until RenameWidth) {
1791    outValids(i) := complexNum > i.U
1792    outDecodedInsts(i) := Mux((i.U + numOfUop - uopRes) < maxUopSize.U, csBundle(i.U + numOfUop - uopRes), csBundle(maxUopSize - 1))
1793  }
1794
1795  outComplexNum := Mux(state === s_active, complexNum, 0.U)
1796  inReady := state === s_idle || state === s_active && thisAllOut
1797
1798//  val validSimple = Wire(Vec(DecodeWidth, Bool()))
1799//  validSimple.zip(io.validFromIBuf.zip(io.isComplex)).map{ case (dst, (src1, src2)) => dst := src1 && !src2 }
1800//  val notInf = Wire(Vec(DecodeWidth, Bool()))
1801//  notInf.drop(1).zip(io.validFromIBuf.drop(1).zip(validSimple.drop(1))).map{ case (dst, (src1, src2)) => dst := !src1 || src2 }
1802//  notInf(0) := !io.validFromIBuf(0) || validSimple(0) || (io.isComplex(0) && io.in0pc === io.simple.decodedInst.pc)
1803//  val notInfVec = Wire(Vec(DecodeWidth, Bool()))
1804//  notInfVec.zipWithIndex.map{ case (dst, i) => dst := Cat(notInf.take(i + 1)).andR}
1805//
1806//  complexNum := Mux(io.validFromIBuf(0) && readyCounter.orR ,
1807//    Mux(uopRes0 > readyCounter, readyCounter, uopRes0),
1808//    0.U)
1809//  validToRename.zipWithIndex.foreach{
1810//    case(dst, i) =>
1811//      val validFix = Mux(complexNum.orR, validSimple((i+1).U - complexNum), validSimple(i))
1812//      dst := MuxCase(false.B, Seq(
1813//        (io.validFromIBuf(0) && readyCounter.orR && uopRes0 > readyCounter) -> Mux(readyCounter > i.U, true.B, false.B),
1814//        (io.validFromIBuf(0) && readyCounter.orR && !(uopRes0 > readyCounter)) -> Mux(complexNum > i.U, true.B, validFix && notInfVec(i.U - complexNum) && io.readyFromRename(i)),
1815//      ).toSeq)
1816//  }
1817//
1818//  readyToIBuf.zipWithIndex.foreach {
1819//    case (dst, i) =>
1820//      val readyToIBuf0 = Mux(io.isComplex(0), io.in0pc === io.simple.decodedInst.pc, true.B)
1821//      dst := MuxCase(true.B, Seq(
1822//        (io.validFromIBuf(0) && uopRes0 > readyCounter || !readyCounter.orR) -> false.B,
1823//        (io.validFromIBuf(0) && !(uopRes0 > readyCounter) && readyCounter.orR) -> (if (i==0) readyToIBuf0 else Mux(RenameWidth.U - complexNum >= i.U, notInfVec(i) && validSimple(i) && io.readyFromRename(i), false.B))
1824//      ).toSeq)
1825//  }
1826//
1827//  io.deq.decodedInsts := decodedInsts
1828//  io.deq.complexNum := complexNum
1829//  io.deq.validToRename := validToRename
1830//  io.deq.readyToIBuf := readyToIBuf
1831}
1832