xref: /XiangShan/src/main/scala/xiangshan/backend/decode/DecodeUnitComp.scala (revision c3f1642540a337cd2431926bddabf6267b2c2774)
1/***************************************************************************************
2  * Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3  * Copyright (c) 2020-2021 Peng Cheng Laboratory
4  *
5  * XiangShan is licensed under Mulan PSL v2.
6  * You can use this software according to the terms and conditions of the Mulan PSL v2.
7  * You may obtain a copy of Mulan PSL v2 at:
8  *          http://license.coscl.org.cn/MulanPSL2
9  *
10  * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11  * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12  * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13  *
14  * See the Mulan PSL v2 for more details.
15  ***************************************************************************************/
16
17package xiangshan.backend.decode
18
19import org.chipsalliance.cde.config.Parameters
20import chisel3._
21import chisel3.util._
22import freechips.rocketchip.rocket.Instructions
23import freechips.rocketchip.util.uintToBitPat
24import utils._
25import utility._
26import xiangshan.ExceptionNO.illegalInstr
27import xiangshan._
28import xiangshan.backend.fu.fpu.FPU
29import xiangshan.backend.fu.FuType
30import freechips.rocketchip.rocket.Instructions._
31import xiangshan.backend.Bundles.{DecodedInst, StaticInst}
32import xiangshan.backend.decode.isa.bitfield.XSInstBitFields
33import xiangshan.backend.fu.vector.Bundles.{VSew, VType, VLmul}
34import yunsuan.VpermType
35import scala.collection.Seq
36import chisel3.util.experimental.decode.{QMCMinimizer, TruthTable, decoder}
37
38class indexedLSUopTable(uopIdx:Int) extends Module {
39  val src = IO(Input(UInt(7.W)))
40  val outOffsetVs2 = IO(Output(UInt(3.W)))
41  val outOffsetVd = IO(Output(UInt(3.W)))
42  val outIsFirstUopInVd = IO(Output(Bool()))
43  def genCsBundle_VEC_INDEXED_LDST(lmul:Int, emul:Int, nfields:Int, uopIdx:Int): (Int, Int, Int) ={
44    if (lmul * nfields <= 8) {
45      for (k <-0 until nfields) {
46        if (lmul < emul) {    // lmul < emul, uop num is depend on emul * nf
47          var offset = 1 << (emul - lmul)
48          for (i <- 0 until (1 << emul)) {
49            if (uopIdx == k * (1 << emul) + i) {
50              return (i, i / offset + k * (1 << lmul), if (i % offset == 0) 1 else 0)
51            }
52          }
53        } else {              // lmul > emul, uop num is depend on lmul * nf
54          var offset = 1 << (lmul - emul)
55          for (i <- 0 until (1 << lmul)) {
56            if (uopIdx == k * (1 << lmul) + i) {
57              return (i / offset, i + k * (1 << lmul), 1)
58            }
59          }
60        }
61      }
62    }
63    return (0, 0, 1)
64  }
65  // strided load/store
66  var combVemulNf : Seq[(Int, Int, Int, Int, Int, Int)] = Seq()
67  for (emul <- 0 until 4) {
68    for (lmul <- 0 until 4) {
69      for (nf <- 0 until 8) {
70        var offset = genCsBundle_VEC_INDEXED_LDST(lmul, emul, nf+1, uopIdx)
71        var offsetVs2 = offset._1
72        var offsetVd = offset._2
73        var isFirstUopInVd = offset._3
74        combVemulNf :+= (emul, lmul, nf, isFirstUopInVd, offsetVs2, offsetVd)
75      }
76    }
77  }
78  val out = decoder(QMCMinimizer, src, TruthTable(combVemulNf.map {
79    case (emul, lmul, nf, isFirstUopInVd, offsetVs2, offsetVd) =>
80      (BitPat((emul << 5 | lmul << 3 | nf).U(7.W)), BitPat((isFirstUopInVd << 6 | offsetVs2 << 3 | offsetVd).U(7.W)))
81  }, BitPat.N(7)))
82  outOffsetVs2 := out(5, 3)
83  outOffsetVd := out(2, 0)
84  outIsFirstUopInVd := out(6).asBool
85}
86
87trait VectorConstants {
88  val MAX_VLMUL = 8
89  val FP_TMP_REG_MV = 32
90  val VECTOR_TMP_REG_LMUL = 33 // 33~47  ->  15
91  val MAX_INDEXED_LS_UOPNUM = 64
92}
93
94class DecodeUnitCompIO(implicit p: Parameters) extends XSBundle {
95  val redirect = Input(Bool())
96  val csrCtrl = Input(new CustomCSRCtrlIO)
97  // When the first inst in decode vector is complex inst, pass it in
98  val in = Flipped(DecoupledIO(new Bundle {
99    val simpleDecodedInst = new DecodedInst
100    val uopInfo = new UopInfo
101  }))
102  val out = new Bundle {
103    val complexDecodedInsts = Vec(RenameWidth, DecoupledIO(new DecodedInst))
104  }
105  val complexNum = Output(UInt(3.W))
106}
107
108/**
109  * @author zly
110  */
111class DecodeUnitComp()(implicit p : Parameters) extends XSModule with DecodeUnitConstants with VectorConstants {
112  val io = IO(new DecodeUnitCompIO)
113
114  // alias
115  private val inReady = io.in.ready
116  private val inValid = io.in.valid
117  private val inDecodedInst = WireInit(io.in.bits.simpleDecodedInst)
118  private val inInstFields = io.in.bits.simpleDecodedInst.instr.asTypeOf(new XSInstBitFields)
119  private val inUopInfo = io.in.bits.uopInfo
120  private val outValids = io.out.complexDecodedInsts.map(_.valid)
121  private val outReadys = io.out.complexDecodedInsts.map(_.ready)
122  private val outDecodedInsts = io.out.complexDecodedInsts.map(_.bits)
123  private val outComplexNum = io.complexNum
124
125  val maxUopSize = MaxUopSize
126  when (io.in.fire && io.in.bits.simpleDecodedInst.isVset) {
127    when(inInstFields.RD === 0.U && inInstFields.RS1 === 0.U) {
128      inDecodedInst.fuOpType := VSETOpType.keepVl(io.in.bits.simpleDecodedInst.fuOpType)
129    }.elsewhen(inInstFields.RS1 === 0.U) {
130      inDecodedInst.fuOpType := VSETOpType.setVlmax(io.in.bits.simpleDecodedInst.fuOpType)
131    }
132  }
133
134  val latchedInst = RegEnable(inDecodedInst, inValid && inReady)
135  val latchedUopInfo = RegEnable(inUopInfo, inValid && inReady)
136  //input bits
137  private val instFields: XSInstBitFields = latchedInst.instr.asTypeOf(new XSInstBitFields)
138
139  val src1 = Cat(0.U(1.W), instFields.RS1)
140  val src2 = Cat(0.U(1.W), instFields.RS2)
141  val dest = Cat(0.U(1.W), instFields.RD)
142
143  val nf    = instFields.NF
144  val width = instFields.WIDTH(1, 0)
145
146  //output of DecodeUnit
147  val numOfUop = Wire(UInt(log2Up(maxUopSize).W))
148  val numOfWB = Wire(UInt(log2Up(maxUopSize).W))
149  val lmul = Wire(UInt(4.W))
150  val isVsetSimple = Wire(Bool())
151
152  val indexedLSRegOffset = Seq.tabulate(MAX_INDEXED_LS_UOPNUM)(i => Module(new indexedLSUopTable(i)))
153  indexedLSRegOffset.map(_.src := 0.U)
154
155  //pre decode
156  lmul := latchedUopInfo.lmul
157  isVsetSimple := latchedInst.isVset
158  val vlmulReg = latchedInst.vpu.vlmul
159  val vsewReg = latchedInst.vpu.vsew
160
161  //Type of uop Div
162  val typeOfSplit = latchedInst.uopSplitType
163  val src1Type = latchedInst.srcType(0)
164  val src1IsImm = src1Type === SrcType.imm
165
166  numOfUop := latchedUopInfo.numOfUop
167  numOfWB := latchedUopInfo.numOfWB
168
169  //uops dispatch
170  val s_idle :: s_active :: Nil = Enum(2)
171  val state = RegInit(s_idle)
172  val stateNext = WireDefault(state)
173  val numDecodedUop = RegInit(0.U(log2Up(maxUopSize).W))
174  val uopRes = RegInit(0.U(log2Up(maxUopSize).W))
175  val uopResNext = WireInit(uopRes)
176
177  //uop div up to maxUopSize
178  val csBundle = Wire(Vec(maxUopSize, new DecodedInst))
179  csBundle.foreach { case dst =>
180    dst := latchedInst
181    dst.numUops := latchedUopInfo.numOfUop
182    dst.numWB := latchedUopInfo.numOfWB
183    dst.firstUop := false.B
184    dst.lastUop := false.B
185  }
186
187  csBundle(0).firstUop := true.B
188  csBundle(numOfUop - 1.U).lastUop := true.B
189
190  switch(typeOfSplit) {
191    is(UopSplitType.VSET) {
192      // In simple decoder, rfWen and vecWen are not set
193      when(isVsetSimple) {
194        // Default
195        // uop0 set rd, never flushPipe
196        csBundle(0).fuType := FuType.vsetiwi.U
197        csBundle(0).flushPipe := false.B
198        csBundle(0).rfWen := true.B
199        // uop1 set vl, vsetvl will flushPipe
200        csBundle(1).ldest := VCONFIG_IDX.U
201        csBundle(1).vecWen := true.B
202        when(VSETOpType.isVsetvli(latchedInst.fuOpType) && dest === 0.U && src1 === 0.U) {
203          csBundle(1).fuType := FuType.vsetfwf.U
204          csBundle(1).srcType(0) := SrcType.vp
205          csBundle(1).lsrc(0) := VCONFIG_IDX.U
206        }.elsewhen(VSETOpType.isVsetvl(latchedInst.fuOpType) && dest === 0.U && src1 === 0.U) {
207          // uop0: mv vtype gpr to vector region
208          csBundle(0).srcType(0) := SrcType.xp
209          csBundle(0).srcType(1) := SrcType.no
210          csBundle(0).lsrc(1) := 0.U
211          csBundle(0).ldest := FP_TMP_REG_MV.U
212          csBundle(0).fuType := FuType.i2f.U
213          csBundle(0).fpWen := true.B
214          csBundle(0).fpu.isAddSub := false.B
215          csBundle(0).fpu.typeTagIn := FPU.D
216          csBundle(0).fpu.typeTagOut := FPU.D
217          csBundle(0).fpu.fromInt := true.B
218          csBundle(0).fpu.wflags := false.B
219          csBundle(0).fpu.fpWen := true.B
220          csBundle(0).fpu.div := false.B
221          csBundle(0).fpu.sqrt := false.B
222          csBundle(0).fpu.fcvt := false.B
223          csBundle(0).flushPipe := false.B
224          // uop1: uvsetvcfg_vv
225          csBundle(1).fuType := FuType.vsetfwf.U
226          // vl
227          csBundle(1).srcType(0) := SrcType.vp
228          csBundle(1).lsrc(0) := VCONFIG_IDX.U
229          // vtype
230          csBundle(1).srcType(1) := SrcType.fp
231          csBundle(1).lsrc(1) := FP_TMP_REG_MV.U
232          csBundle(1).vecWen := true.B
233          csBundle(1).ldest := VCONFIG_IDX.U
234        }
235      }
236    }
237    is(UopSplitType.VEC_VVV) {
238      for (i <- 0 until MAX_VLMUL) {
239        csBundle(i).lsrc(0) := src1 + i.U
240        csBundle(i).lsrc(1) := src2 + i.U
241        csBundle(i).lsrc(2) := dest + i.U
242        csBundle(i).ldest := dest + i.U
243        csBundle(i).uopIdx := i.U
244      }
245    }
246    is(UopSplitType.VEC_VFV) {
247      for (i <- 0 until MAX_VLMUL) {
248        csBundle(i).lsrc(1) := src2 + i.U
249        csBundle(i).lsrc(2) := dest + i.U
250        csBundle(i).ldest := dest + i.U
251        csBundle(i).uopIdx := i.U
252      }
253    }
254    is(UopSplitType.VEC_EXT2) {
255      for (i <- 0 until MAX_VLMUL / 2) {
256        csBundle(2 * i).lsrc(1) := src2 + i.U
257        csBundle(2 * i).lsrc(2) := dest + (2 * i).U
258        csBundle(2 * i).ldest := dest + (2 * i).U
259        csBundle(2 * i).uopIdx := (2 * i).U
260        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
261        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U
262        csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U
263        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
264      }
265    }
266    is(UopSplitType.VEC_EXT4) {
267      for (i <- 0 until MAX_VLMUL / 4) {
268        csBundle(4 * i).lsrc(1) := src2 + i.U
269        csBundle(4 * i).lsrc(2) := dest + (4 * i).U
270        csBundle(4 * i).ldest := dest + (4 * i).U
271        csBundle(4 * i).uopIdx := (4 * i).U
272        csBundle(4 * i + 1).lsrc(1) := src2 + i.U
273        csBundle(4 * i + 1).lsrc(2) := dest + (4 * i + 1).U
274        csBundle(4 * i + 1).ldest := dest + (4 * i + 1).U
275        csBundle(4 * i + 1).uopIdx := (4 * i + 1).U
276        csBundle(4 * i + 2).lsrc(1) := src2 + i.U
277        csBundle(4 * i + 2).lsrc(2) := dest + (4 * i + 2).U
278        csBundle(4 * i + 2).ldest := dest + (4 * i + 2).U
279        csBundle(4 * i + 2).uopIdx := (4 * i + 2).U
280        csBundle(4 * i + 3).lsrc(1) := src2 + i.U
281        csBundle(4 * i + 3).lsrc(2) := dest + (4 * i + 3).U
282        csBundle(4 * i + 3).ldest := dest + (4 * i + 3).U
283        csBundle(4 * i + 3).uopIdx := (4 * i + 3).U
284      }
285    }
286    is(UopSplitType.VEC_EXT8) {
287      for (i <- 0 until MAX_VLMUL) {
288        csBundle(i).lsrc(1) := src2
289        csBundle(i).lsrc(2) := dest + i.U
290        csBundle(i).ldest := dest + i.U
291        csBundle(i).uopIdx := i.U
292      }
293    }
294    is(UopSplitType.VEC_0XV) {
295      /*
296      FMV.D.X
297       */
298      csBundle(0).srcType(0) := SrcType.reg
299      csBundle(0).srcType(1) := SrcType.imm
300      csBundle(0).lsrc(1) := 0.U
301      csBundle(0).ldest := FP_TMP_REG_MV.U
302      csBundle(0).fuType := FuType.i2f.U
303      csBundle(0).rfWen := false.B
304      csBundle(0).fpWen := true.B
305      csBundle(0).vecWen := false.B
306      csBundle(0).fpu.isAddSub := false.B
307      csBundle(0).fpu.typeTagIn := FPU.D
308      csBundle(0).fpu.typeTagOut := FPU.D
309      csBundle(0).fpu.fromInt := true.B
310      csBundle(0).fpu.wflags := false.B
311      csBundle(0).fpu.fpWen := true.B
312      csBundle(0).fpu.div := false.B
313      csBundle(0).fpu.sqrt := false.B
314      csBundle(0).fpu.fcvt := false.B
315      /*
316      vfmv.s.f
317       */
318      csBundle(1).srcType(0) := SrcType.fp
319      csBundle(1).srcType(1) := SrcType.vp
320      csBundle(1).srcType(2) := SrcType.vp
321      csBundle(1).lsrc(0) := FP_TMP_REG_MV.U
322      csBundle(1).lsrc(1) := 0.U
323      csBundle(1).lsrc(2) := dest
324      csBundle(1).ldest := dest
325      csBundle(1).fuType := FuType.vppu.U
326      csBundle(1).fuOpType := VpermType.dummy
327      csBundle(1).rfWen := false.B
328      csBundle(1).fpWen := false.B
329      csBundle(1).vecWen := true.B
330    }
331    is(UopSplitType.VEC_VXV) {
332      /*
333      i to vector move
334       */
335      csBundle(0).srcType(0) := SrcType.reg
336      csBundle(0).srcType(1) := SrcType.imm
337      csBundle(0).lsrc(1) := 0.U
338      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
339      csBundle(0).fuType := FuType.i2v.U
340      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg)
341      csBundle(0).vecWen := true.B
342      /*
343      LMUL
344       */
345      for (i <- 0 until MAX_VLMUL) {
346        csBundle(i + 1).srcType(0) := SrcType.vp
347        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
348        csBundle(i + 1).lsrc(1) := src2 + i.U
349        csBundle(i + 1).lsrc(2) := dest + i.U
350        csBundle(i + 1).ldest := dest + i.U
351        csBundle(i + 1).uopIdx := i.U
352      }
353    }
354    is(UopSplitType.VEC_VVW) {
355      for (i <- 0 until MAX_VLMUL / 2) {
356        csBundle(2 * i).lsrc(0) := src1 + i.U
357        csBundle(2 * i).lsrc(1) := src2 + i.U
358        csBundle(2 * i).lsrc(2) := dest + (2 * i).U
359        csBundle(2 * i).ldest := dest + (2 * i).U
360        csBundle(2 * i).uopIdx := (2 * i).U
361        csBundle(2 * i + 1).lsrc(0) := src1 + i.U
362        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
363        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U
364        csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U
365        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
366      }
367    }
368    is(UopSplitType.VEC_VFW) {
369      for (i <- 0 until MAX_VLMUL / 2) {
370        csBundle(2 * i).lsrc(0) := src1
371        csBundle(2 * i).lsrc(1) := src2 + i.U
372        csBundle(2 * i).lsrc(2) := dest + (2 * i).U
373        csBundle(2 * i).ldest := dest + (2 * i).U
374        csBundle(2 * i).uopIdx := (2 * i).U
375        csBundle(2 * i + 1).lsrc(0) := src1
376        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
377        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U
378        csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U
379        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
380      }
381    }
382    is(UopSplitType.VEC_WVW) {
383      for (i <- 0 until MAX_VLMUL / 2) {
384        csBundle(2 * i).lsrc(0) := src1 + i.U
385        csBundle(2 * i).lsrc(1) := src2 + (2 * i).U
386        csBundle(2 * i).lsrc(2) := dest + (2 * i).U
387        csBundle(2 * i).ldest := dest + (2 * i).U
388        csBundle(2 * i).uopIdx := (2 * i).U
389        csBundle(2 * i + 1).lsrc(0) := src1 + i.U
390        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i + 1).U
391        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U
392        csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U
393        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
394      }
395    }
396    is(UopSplitType.VEC_VXW) {
397      /*
398      i to vector move
399       */
400      csBundle(0).srcType(0) := SrcType.reg
401      csBundle(0).srcType(1) := SrcType.imm
402      csBundle(0).lsrc(1) := 0.U
403      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
404      csBundle(0).fuType := FuType.i2v.U
405      csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg)
406      csBundle(0).vecWen := true.B
407
408      for (i <- 0 until MAX_VLMUL / 2) {
409        csBundle(2 * i + 1).srcType(0) := SrcType.vp
410        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
411        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
412        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U
413        csBundle(2 * i + 1).ldest := dest + (2 * i).U
414        csBundle(2 * i + 1).uopIdx := (2 * i).U
415        csBundle(2 * i + 2).srcType(0) := SrcType.vp
416        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
417        csBundle(2 * i + 2).lsrc(1) := src2 + i.U
418        csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U
419        csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U
420        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
421      }
422    }
423    is(UopSplitType.VEC_WXW) {
424      /*
425      i to vector move
426       */
427      csBundle(0).srcType(0) := SrcType.reg
428      csBundle(0).srcType(1) := SrcType.imm
429      csBundle(0).lsrc(1) := 0.U
430      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
431      csBundle(0).fuType := FuType.i2v.U
432      csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg)
433      csBundle(0).vecWen := true.B
434
435      for (i <- 0 until MAX_VLMUL / 2) {
436        csBundle(2 * i + 1).srcType(0) := SrcType.vp
437        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
438        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U
439        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U
440        csBundle(2 * i + 1).ldest := dest + (2 * i).U
441        csBundle(2 * i + 1).uopIdx := (2 * i).U
442        csBundle(2 * i + 2).srcType(0) := SrcType.vp
443        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
444        csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U
445        csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U
446        csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U
447        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
448      }
449    }
450    is(UopSplitType.VEC_WVV) {
451      for (i <- 0 until MAX_VLMUL / 2) {
452
453        csBundle(2 * i).lsrc(0) := src1 + i.U
454        csBundle(2 * i).lsrc(1) := src2 + (2 * i).U
455        csBundle(2 * i).lsrc(2) := dest + i.U
456        csBundle(2 * i).ldest := dest + i.U
457        csBundle(2 * i).uopIdx := (2 * i).U
458        csBundle(2 * i + 1).lsrc(0) := src1 + i.U
459        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i + 1).U
460        csBundle(2 * i + 1).lsrc(2) := dest + i.U
461        csBundle(2 * i + 1).ldest := dest + i.U
462        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
463      }
464    }
465    is(UopSplitType.VEC_WFW) {
466      for (i <- 0 until MAX_VLMUL / 2) {
467        csBundle(2 * i).lsrc(0) := src1
468        csBundle(2 * i).lsrc(1) := src2 + (2 * i).U
469        csBundle(2 * i).lsrc(2) := dest + (2 * i).U
470        csBundle(2 * i).ldest := dest + (2 * i).U
471        csBundle(2 * i).uopIdx := (2 * i).U
472        csBundle(2 * i + 1).lsrc(0) := src1
473        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i + 1).U
474        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U
475        csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U
476        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
477      }
478    }
479    is(UopSplitType.VEC_WXV) {
480      /*
481      i to vector move
482       */
483      csBundle(0).srcType(0) := SrcType.reg
484      csBundle(0).srcType(1) := SrcType.imm
485      csBundle(0).lsrc(1) := 0.U
486      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
487      csBundle(0).fuType := FuType.i2v.U
488      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg)
489      csBundle(0).vecWen := true.B
490
491      for (i <- 0 until MAX_VLMUL / 2) {
492        csBundle(2 * i + 1).srcType(0) := SrcType.vp
493        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
494        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U
495        csBundle(2 * i + 1).lsrc(2) := dest + i.U
496        csBundle(2 * i + 1).ldest := dest + i.U
497        csBundle(2 * i + 1).uopIdx := (2 * i).U
498        csBundle(2 * i + 2).srcType(0) := SrcType.vp
499        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
500        csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U
501        csBundle(2 * i + 2).lsrc(2) := dest + i.U
502        csBundle(2 * i + 2).ldest := dest + i.U
503        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
504      }
505    }
506    is(UopSplitType.VEC_VVM) {
507      csBundle(0).lsrc(2) := dest
508      csBundle(0).ldest := dest
509      csBundle(0).uopIdx := 0.U
510      for (i <- 1 until MAX_VLMUL) {
511        csBundle(i).lsrc(0) := src1 + i.U
512        csBundle(i).lsrc(1) := src2 + i.U
513        csBundle(i).lsrc(2) := dest
514        csBundle(i).ldest := dest
515        csBundle(i).uopIdx := i.U
516      }
517    }
518    is(UopSplitType.VEC_VFM) {
519      csBundle(0).lsrc(2) := dest
520      csBundle(0).ldest := dest
521      csBundle(0).uopIdx := 0.U
522      for (i <- 1 until MAX_VLMUL) {
523        csBundle(i).lsrc(0) := src1
524        csBundle(i).lsrc(1) := src2 + i.U
525        csBundle(i).lsrc(2) := dest
526        csBundle(i).ldest := dest
527        csBundle(i).uopIdx := i.U
528      }
529      csBundle(numOfUop - 1.U).ldest := dest
530    }
531    is(UopSplitType.VEC_VXM) {
532      /*
533      i to vector move
534       */
535      csBundle(0).srcType(0) := SrcType.reg
536      csBundle(0).srcType(1) := SrcType.imm
537      csBundle(0).lsrc(1) := 0.U
538      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
539      csBundle(0).fuType := FuType.i2v.U
540      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg)
541      csBundle(0).vecWen := true.B
542      //LMUL
543      csBundle(1).srcType(0) := SrcType.vp
544      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
545      csBundle(1).lsrc(2) := dest
546      csBundle(1).ldest := dest
547      csBundle(1).uopIdx := 0.U
548      for (i <- 1 until MAX_VLMUL) {
549        csBundle(i + 1).srcType(0) := SrcType.vp
550        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
551        csBundle(i + 1).lsrc(1) := src2 + i.U
552        csBundle(i + 1).lsrc(2) := dest
553        csBundle(i + 1).ldest := dest
554        csBundle(i + 1).uopIdx := i.U
555      }
556      csBundle(numOfUop - 1.U).ldest := dest
557    }
558    is(UopSplitType.VEC_SLIDE1UP) {
559      /*
560      i to vector move
561       */
562      csBundle(0).srcType(0) := SrcType.reg
563      csBundle(0).srcType(1) := SrcType.imm
564      csBundle(0).lsrc(1) := 0.U
565      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
566      csBundle(0).fuType := FuType.i2v.U
567      csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), vsewReg)
568      csBundle(0).vecWen := true.B
569      //LMUL
570      csBundle(1).srcType(0) := SrcType.vp
571      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
572      csBundle(1).lsrc(2) := dest
573      csBundle(1).ldest := dest
574      csBundle(1).uopIdx := 0.U
575      for (i <- 1 until MAX_VLMUL) {
576        csBundle(i + 1).srcType(0) := SrcType.vp
577        csBundle(i + 1).lsrc(0) := src2 + (i - 1).U
578        csBundle(i + 1).lsrc(1) := src2 + i.U
579        csBundle(i + 1).lsrc(2) := dest + i.U
580        csBundle(i + 1).ldest := dest + i.U
581        csBundle(i + 1).uopIdx := i.U
582      }
583    }
584    is(UopSplitType.VEC_FSLIDE1UP) {
585      //LMUL
586      csBundle(0).srcType(0) := SrcType.fp
587      csBundle(0).lsrc(0) := src1
588      csBundle(0).lsrc(1) := src2
589      csBundle(0).lsrc(2) := dest
590      csBundle(0).ldest := dest
591      csBundle(0).uopIdx := 0.U
592      for (i <- 1 until MAX_VLMUL) {
593        csBundle(i).srcType(0) := SrcType.vp
594        csBundle(i).lsrc(0) := src2 + (i - 1).U
595        csBundle(i).lsrc(1) := src2 + i.U
596        csBundle(i).lsrc(2) := dest + i.U
597        csBundle(i).ldest := dest + i.U
598        csBundle(i).uopIdx := i.U
599      }
600    }
601    is(UopSplitType.VEC_SLIDE1DOWN) { // lmul+lmul = 16
602      /*
603      i to vector move
604       */
605      csBundle(0).srcType(0) := SrcType.reg
606      csBundle(0).srcType(1) := SrcType.imm
607      csBundle(0).lsrc(1) := 0.U
608      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
609      csBundle(0).fuType := FuType.i2v.U
610      csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), vsewReg)
611      csBundle(0).vecWen := true.B
612      //LMUL
613      for (i <- 0 until MAX_VLMUL) {
614        csBundle(2 * i + 1).srcType(0) := SrcType.vp
615        csBundle(2 * i + 1).srcType(1) := SrcType.vp
616        csBundle(2 * i + 1).lsrc(0) := src2 + (i + 1).U
617        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
618        csBundle(2 * i + 1).lsrc(2) := dest + i.U
619        csBundle(2 * i + 1).ldest := VECTOR_TMP_REG_LMUL.U + 1.U
620        csBundle(2 * i + 1).uopIdx := (2 * i).U
621        if (2 * i + 2 < MAX_VLMUL * 2) {
622          csBundle(2 * i + 2).srcType(0) := SrcType.vp
623          csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
624          // csBundle(2 * i + 2).lsrc(1) := src2 + i.U         // DontCare
625          csBundle(2 * i + 2).lsrc(2) := VECTOR_TMP_REG_LMUL.U + 1.U
626          csBundle(2 * i + 2).ldest := dest + i.U
627          csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
628        }
629      }
630      csBundle(numOfUop - 1.U).srcType(0) := SrcType.vp
631      csBundle(numOfUop - 1.U).lsrc(0) := VECTOR_TMP_REG_LMUL.U
632      csBundle(numOfUop - 1.U).ldest := dest + lmul - 1.U
633    }
634    is(UopSplitType.VEC_FSLIDE1DOWN) {
635      //LMUL
636      for (i <- 0 until MAX_VLMUL) {
637        csBundle(2 * i).srcType(0) := SrcType.vp
638        csBundle(2 * i).srcType(1) := SrcType.vp
639        csBundle(2 * i).lsrc(0) := src2 + (i + 1).U
640        csBundle(2 * i).lsrc(1) := src2 + i.U
641        csBundle(2 * i).lsrc(2) := dest + i.U
642        csBundle(2 * i).ldest := VECTOR_TMP_REG_LMUL.U
643        csBundle(2 * i).uopIdx := (2 * i).U
644        csBundle(2 * i + 1).srcType(0) := SrcType.fp
645        csBundle(2 * i + 1).lsrc(0) := src1
646        csBundle(2 * i + 1).lsrc(2) := VECTOR_TMP_REG_LMUL.U
647        csBundle(2 * i + 1).ldest := dest + i.U
648        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
649      }
650      csBundle(numOfUop - 1.U).srcType(0) := SrcType.fp
651      csBundle(numOfUop - 1.U).lsrc(0) := src1
652      csBundle(numOfUop - 1.U).ldest := dest + lmul - 1.U
653    }
654    is(UopSplitType.VEC_VRED) {
655      when(vlmulReg === "b001".U) {
656        csBundle(0).srcType(2) := SrcType.DC
657        csBundle(0).lsrc(0) := src2 + 1.U
658        csBundle(0).lsrc(1) := src2
659        csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
660        csBundle(0).uopIdx := 0.U
661      }
662      when(vlmulReg === "b010".U) {
663        csBundle(0).srcType(2) := SrcType.DC
664        csBundle(0).lsrc(0) := src2 + 1.U
665        csBundle(0).lsrc(1) := src2
666        csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
667        csBundle(0).uopIdx := 0.U
668
669        csBundle(1).srcType(2) := SrcType.DC
670        csBundle(1).lsrc(0) := src2 + 3.U
671        csBundle(1).lsrc(1) := src2 + 2.U
672        csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
673        csBundle(1).uopIdx := 1.U
674
675        csBundle(2).srcType(2) := SrcType.DC
676        csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
677        csBundle(2).lsrc(1) := VECTOR_TMP_REG_LMUL.U
678        csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
679        csBundle(2).uopIdx := 2.U
680      }
681      when(vlmulReg === "b011".U) {
682        for (i <- 0 until MAX_VLMUL) {
683          if (i < MAX_VLMUL - MAX_VLMUL / 2) {
684            csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U
685            csBundle(i).lsrc(1) := src2 + (i * 2).U
686            csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
687          } else if (i < MAX_VLMUL - MAX_VLMUL / 4) {
688            csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + (i - MAX_VLMUL / 2) * 2 + 1).U
689            csBundle(i).lsrc(1) := (VECTOR_TMP_REG_LMUL + (i - MAX_VLMUL / 2) * 2).U
690            csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
691          } else if (i < MAX_VLMUL - MAX_VLMUL / 8) {
692            csBundle(6).lsrc(0) := (VECTOR_TMP_REG_LMUL + 5).U
693            csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
694            csBundle(6).ldest := (VECTOR_TMP_REG_LMUL + 6).U
695          }
696          csBundle(i).srcType(2) := SrcType.DC
697          csBundle(i).uopIdx := i.U
698        }
699      }
700      when(vlmulReg.orR) {
701        csBundle(numOfUop - 1.U).srcType(2) := SrcType.vp
702        csBundle(numOfUop - 1.U).lsrc(0) := src1
703        csBundle(numOfUop - 1.U).lsrc(1) := VECTOR_TMP_REG_LMUL.U + numOfUop - 2.U
704        csBundle(numOfUop - 1.U).lsrc(2) := dest
705        csBundle(numOfUop - 1.U).ldest := dest
706        csBundle(numOfUop - 1.U).uopIdx := numOfUop - 1.U
707      }
708    }
709    is(UopSplitType.VEC_VFRED) {
710      val vlmul = vlmulReg
711      val vsew = vsewReg
712      when(vlmul === VLmul.m8){
713        for (i <- 0 until 4) {
714          csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U
715          csBundle(i).lsrc(1) := src2 + (i * 2).U
716          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
717          csBundle(i).uopIdx := i.U
718        }
719        for (i <- 4 until 6) {
720          csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + (i - 4) * 2 + 1).U
721          csBundle(i).lsrc(1) := (VECTOR_TMP_REG_LMUL + (i - 4) * 2).U
722          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
723          csBundle(i).uopIdx := i.U
724        }
725        csBundle(6).lsrc(0) := (VECTOR_TMP_REG_LMUL + 5).U
726        csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
727        csBundle(6).ldest := (VECTOR_TMP_REG_LMUL + 6).U
728        csBundle(6).uopIdx := 6.U
729        when(vsew === VSew.e64) {
730          csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U
731          csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U
732          csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U
733          csBundle(7).vpu.fpu.isFoldTo1_2 := true.B
734          csBundle(7).uopIdx := 7.U
735          csBundle(8).lsrc(0) := src1
736          csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U
737          csBundle(8).ldest := dest
738          csBundle(8).uopIdx := 8.U
739        }
740        when(vsew === VSew.e32) {
741          csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U
742          csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U
743          csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U
744          csBundle(7).vpu.fpu.isFoldTo1_2 := true.B
745          csBundle(7).uopIdx := 7.U
746          csBundle(8).lsrc(0) := (VECTOR_TMP_REG_LMUL + 7).U
747          csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U
748          csBundle(8).ldest := (VECTOR_TMP_REG_LMUL + 8).U
749          csBundle(8).vpu.fpu.isFoldTo1_4 := true.B
750          csBundle(8).uopIdx := 8.U
751          csBundle(9).lsrc(0) := src1
752          csBundle(9).lsrc(1) := (VECTOR_TMP_REG_LMUL + 8).U
753          csBundle(9).ldest := dest
754          csBundle(9).uopIdx := 9.U
755        }
756        when(vsew === VSew.e16) {
757          csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U
758          csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U
759          csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U
760          csBundle(7).vpu.fpu.isFoldTo1_2 := true.B
761          csBundle(7).uopIdx := 7.U
762          csBundle(8).lsrc(0) := (VECTOR_TMP_REG_LMUL + 7).U
763          csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U
764          csBundle(8).ldest := (VECTOR_TMP_REG_LMUL + 8).U
765          csBundle(8).vpu.fpu.isFoldTo1_4 := true.B
766          csBundle(8).uopIdx := 8.U
767          csBundle(9).lsrc(0) := (VECTOR_TMP_REG_LMUL + 8).U
768          csBundle(9).lsrc(1) := (VECTOR_TMP_REG_LMUL + 8).U
769          csBundle(9).ldest := (VECTOR_TMP_REG_LMUL + 9).U
770          csBundle(9).vpu.fpu.isFoldTo1_8 := true.B
771          csBundle(9).uopIdx := 9.U
772          csBundle(10).lsrc(0) := src1
773          csBundle(10).lsrc(1) := (VECTOR_TMP_REG_LMUL + 9).U
774          csBundle(10).ldest := dest
775          csBundle(10).uopIdx := 10.U
776        }
777      }
778      when(vlmul === VLmul.m4) {
779        for (i <- 0 until 2) {
780          csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U
781          csBundle(i).lsrc(1) := src2 + (i * 2).U
782          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
783          csBundle(i).uopIdx := i.U
784        }
785        csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
786        csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
787        csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
788        csBundle(2).uopIdx := 2.U
789        when(vsew === VSew.e64) {
790          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
791          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
792          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
793          csBundle(3).vpu.fpu.isFoldTo1_2 := true.B
794          csBundle(3).uopIdx := 3.U
795          csBundle(4).lsrc(0) := src1
796          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
797          csBundle(4).ldest := dest
798          csBundle(4).uopIdx := 4.U
799        }
800        when(vsew === VSew.e32) {
801          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
802          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
803          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
804          csBundle(3).vpu.fpu.isFoldTo1_2 := true.B
805          csBundle(3).uopIdx := 3.U
806          csBundle(4).lsrc(0) := (VECTOR_TMP_REG_LMUL + 3).U
807          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
808          csBundle(4).ldest := (VECTOR_TMP_REG_LMUL + 4).U
809          csBundle(4).vpu.fpu.isFoldTo1_4 := true.B
810          csBundle(4).uopIdx := 4.U
811          csBundle(5).lsrc(0) := src1
812          csBundle(5).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
813          csBundle(5).ldest := dest
814          csBundle(5).uopIdx := 5.U
815        }
816        when(vsew === VSew.e16) {
817          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
818          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
819          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
820          csBundle(3).vpu.fpu.isFoldTo1_2 := true.B
821          csBundle(3).uopIdx := 3.U
822          csBundle(4).lsrc(0) := (VECTOR_TMP_REG_LMUL + 3).U
823          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
824          csBundle(4).ldest := (VECTOR_TMP_REG_LMUL + 4).U
825          csBundle(4).vpu.fpu.isFoldTo1_4 := true.B
826          csBundle(4).uopIdx := 4.U
827          csBundle(5).lsrc(0) := (VECTOR_TMP_REG_LMUL + 4).U
828          csBundle(5).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
829          csBundle(5).ldest := (VECTOR_TMP_REG_LMUL + 5).U
830          csBundle(5).vpu.fpu.isFoldTo1_8 := true.B
831          csBundle(5).uopIdx := 5.U
832          csBundle(6).lsrc(0) := src1
833          csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 5).U
834          csBundle(6).ldest := dest
835          csBundle(6).uopIdx := 6.U
836        }
837      }
838      when(vlmul === VLmul.m2) {
839        csBundle(0).lsrc(0) := src2 + 1.U
840        csBundle(0).lsrc(1) := src2 + 0.U
841        csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
842        csBundle(0).uopIdx := 0.U
843        when(vsew === VSew.e64) {
844          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
845          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
846          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
847          csBundle(1).vpu.fpu.isFoldTo1_2 := true.B
848          csBundle(1).uopIdx := 1.U
849          csBundle(2).lsrc(0) := src1
850          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
851          csBundle(2).ldest := dest
852          csBundle(2).uopIdx := 2.U
853        }
854        when(vsew === VSew.e32) {
855          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
856          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
857          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
858          csBundle(1).vpu.fpu.isFoldTo1_2 := true.B
859          csBundle(1).uopIdx := 1.U
860          csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
861          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
862          csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
863          csBundle(2).vpu.fpu.isFoldTo1_4 := true.B
864          csBundle(2).uopIdx := 2.U
865          csBundle(3).lsrc(0) := src1
866          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
867          csBundle(3).ldest := dest
868          csBundle(3).uopIdx := 3.U
869        }
870        when(vsew === VSew.e16) {
871          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
872          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
873          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
874          csBundle(1).vpu.fpu.isFoldTo1_2 := true.B
875          csBundle(1).uopIdx := 1.U
876          csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
877          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
878          csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
879          csBundle(2).vpu.fpu.isFoldTo1_4 := true.B
880          csBundle(2).uopIdx := 2.U
881          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
882          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
883          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
884          csBundle(3).vpu.fpu.isFoldTo1_8 := true.B
885          csBundle(3).uopIdx := 3.U
886          csBundle(4).lsrc(0) := src1
887          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
888          csBundle(4).ldest := dest
889          csBundle(4).uopIdx := 4.U
890        }
891      }
892      when(vlmul === VLmul.m1) {
893        when(vsew === VSew.e64) {
894          csBundle(0).lsrc(0) := src2
895          csBundle(0).lsrc(1) := src2
896          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
897          csBundle(0).vpu.fpu.isFoldTo1_2 := true.B
898          csBundle(0).uopIdx := 0.U
899          csBundle(1).lsrc(0) := src1
900          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
901          csBundle(1).ldest := dest
902          csBundle(1).uopIdx := 1.U
903        }
904        when(vsew === VSew.e32) {
905          csBundle(0).lsrc(0) := src2
906          csBundle(0).lsrc(1) := src2
907          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
908          csBundle(0).vpu.fpu.isFoldTo1_2 := true.B
909          csBundle(0).uopIdx := 0.U
910          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
911          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
912          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
913          csBundle(1).vpu.fpu.isFoldTo1_4 := true.B
914          csBundle(1).uopIdx := 1.U
915          csBundle(2).lsrc(0) := src1
916          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
917          csBundle(2).ldest := dest
918          csBundle(2).uopIdx := 2.U
919        }
920        when(vsew === VSew.e16) {
921          csBundle(0).lsrc(0) := src2
922          csBundle(0).lsrc(1) := src2
923          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
924          csBundle(0).vpu.fpu.isFoldTo1_2 := true.B
925          csBundle(0).uopIdx := 0.U
926          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
927          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
928          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
929          csBundle(1).vpu.fpu.isFoldTo1_4 := true.B
930          csBundle(1).uopIdx := 1.U
931          csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
932          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
933          csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
934          csBundle(2).vpu.fpu.isFoldTo1_8 := true.B
935          csBundle(2).uopIdx := 2.U
936          csBundle(3).lsrc(0) := src1
937          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
938          csBundle(3).ldest := dest
939          csBundle(3).uopIdx := 3.U
940        }
941      }
942      when(vlmul === VLmul.mf2) {
943        when(vsew === VSew.e32) {
944          csBundle(0).lsrc(0) := src2
945          csBundle(0).lsrc(1) := src2
946          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
947          csBundle(0).vpu.fpu.isFoldTo1_4 := true.B
948          csBundle(0).uopIdx := 0.U
949          csBundle(1).lsrc(0) := src1
950          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
951          csBundle(1).ldest := dest
952          csBundle(1).uopIdx := 1.U
953        }
954        when(vsew === VSew.e16) {
955          csBundle(0).lsrc(0) := src2
956          csBundle(0).lsrc(1) := src2
957          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
958          csBundle(0).vpu.fpu.isFoldTo1_4 := true.B
959          csBundle(0).uopIdx := 0.U
960          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
961          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
962          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
963          csBundle(1).vpu.fpu.isFoldTo1_8 := true.B
964          csBundle(1).uopIdx := 1.U
965          csBundle(2).lsrc(0) := src1
966          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
967          csBundle(2).ldest := dest
968          csBundle(2).uopIdx := 2.U
969        }
970      }
971      when(vlmul === VLmul.mf4) {
972        when(vsew === VSew.e16) {
973          csBundle(0).lsrc(0) := src2
974          csBundle(0).lsrc(1) := src2
975          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
976          csBundle(0).vpu.fpu.isFoldTo1_8 := true.B
977          csBundle(0).uopIdx := 0.U
978          csBundle(1).lsrc(0) := src1
979          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
980          csBundle(1).ldest := dest
981          csBundle(1).uopIdx := 1.U
982        }
983      }
984    }
985
986    is(UopSplitType.VEC_VFREDOSUM) {
987      import yunsuan.VfaluType
988      val vlmul = vlmulReg
989      val vsew = vsewReg
990      val isWiden = latchedInst.fuOpType === VfaluType.vfwredosum
991      when(vlmul === VLmul.m8) {
992        when(vsew === VSew.e64) {
993          val vlmax = 16
994          for (i <- 0 until vlmax) {
995            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
996            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
997            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
998            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
999            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1000            csBundle(i).uopIdx := i.U
1001          }
1002        }
1003        when(vsew === VSew.e32) {
1004          val vlmax = 32
1005          for (i <- 0 until vlmax) {
1006            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1007            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1008            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1009            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1010            csBundle(i).vpu.fpu.isFoldTo1_4 := (if (i % 4 == 0) false.B else true.B)
1011            csBundle(i).uopIdx := i.U
1012          }
1013        }
1014        when(vsew === VSew.e16) {
1015          val vlmax = 64
1016          for (i <- 0 until vlmax) {
1017            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1018            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1019            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1020            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1021            csBundle(i).vpu.fpu.isFoldTo1_8 := (if (i % 8 == 0) false.B else true.B)
1022            csBundle(i).uopIdx := i.U
1023          }
1024        }
1025      }
1026      when(vlmul === VLmul.m4) {
1027        when(vsew === VSew.e64) {
1028          val vlmax = 8
1029          for (i <- 0 until vlmax) {
1030            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1031            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1032            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1033            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1034            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1035            csBundle(i).uopIdx := i.U
1036          }
1037        }
1038        when(vsew === VSew.e32) {
1039          val vlmax = 16
1040          for (i <- 0 until vlmax) {
1041            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1042            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1043            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1044            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1045            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1046            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1047            csBundle(i).uopIdx := i.U
1048          }
1049        }
1050        when(vsew === VSew.e16) {
1051          val vlmax = 32
1052          for (i <- 0 until vlmax) {
1053            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1054            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1055            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1056            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1057            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1058            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1059            csBundle(i).uopIdx := i.U
1060          }
1061        }
1062      }
1063      when(vlmul === VLmul.m2) {
1064        when(vsew === VSew.e64) {
1065          val vlmax = 4
1066          for (i <- 0 until vlmax) {
1067            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1068            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1069            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1070            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1071            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1072            csBundle(i).uopIdx := i.U
1073          }
1074        }
1075        when(vsew === VSew.e32) {
1076          val vlmax = 8
1077          for (i <- 0 until vlmax) {
1078            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1079            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1080            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1081            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1082            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1083            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1084            csBundle(i).uopIdx := i.U
1085          }
1086        }
1087        when(vsew === VSew.e16) {
1088          val vlmax = 16
1089          for (i <- 0 until vlmax) {
1090            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1091            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1092            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1093            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1094            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1095            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1096            csBundle(i).uopIdx := i.U
1097          }
1098        }
1099      }
1100      when(vlmul === VLmul.m1) {
1101        when(vsew === VSew.e64) {
1102          val vlmax = 2
1103          for (i <- 0 until vlmax) {
1104            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1105            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1106            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1107            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1108            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1109            csBundle(i).uopIdx := i.U
1110          }
1111        }
1112        when(vsew === VSew.e32) {
1113          val vlmax = 4
1114          for (i <- 0 until vlmax) {
1115            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1116            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1117            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1118            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1119            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1120            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1121            csBundle(i).uopIdx := i.U
1122          }
1123        }
1124        when(vsew === VSew.e16) {
1125          val vlmax = 8
1126          for (i <- 0 until vlmax) {
1127            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1128            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1129            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1130            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1131            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1132            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1133            csBundle(i).uopIdx := i.U
1134          }
1135        }
1136      }
1137      when(vlmul === VLmul.mf2) {
1138        when(vsew === VSew.e32) {
1139          val vlmax = 2
1140          for (i <- 0 until vlmax) {
1141            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1142            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1143            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1144            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1145            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1146            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1147            csBundle(i).uopIdx := i.U
1148          }
1149        }
1150        when(vsew === VSew.e16) {
1151          val vlmax = 4
1152          for (i <- 0 until vlmax) {
1153            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1154            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1155            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1156            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1157            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1158            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1159            csBundle(i).uopIdx := i.U
1160          }
1161        }
1162      }
1163      when(vlmul === VLmul.mf4) {
1164        when(vsew === VSew.e16) {
1165          val vlmax = 2
1166          for (i <- 0 until vlmax) {
1167            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1168            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1169            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1170            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1171            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1172            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1173            csBundle(i).uopIdx := i.U
1174          }
1175        }
1176      }
1177    }
1178
1179    is(UopSplitType.VEC_SLIDEUP) {
1180      // i to vector move
1181      csBundle(0).srcType(0) := SrcType.reg
1182      csBundle(0).srcType(1) := SrcType.imm
1183      csBundle(0).lsrc(1) := 0.U
1184      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1185      csBundle(0).fuType := FuType.i2v.U
1186      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg)
1187      csBundle(0).vecWen := true.B
1188      // LMUL
1189      for (i <- 0 until MAX_VLMUL)
1190        for (j <- 0 to i) {
1191          val old_vd = if (j == 0) {
1192            dest + i.U
1193          } else (VECTOR_TMP_REG_LMUL + j).U
1194          val vd = if (j == i) {
1195            dest + i.U
1196          } else (VECTOR_TMP_REG_LMUL + j + 1).U
1197          csBundle(i * (i + 1) / 2 + j + 1).srcType(0) := SrcType.vp
1198          csBundle(i * (i + 1) / 2 + j + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1199          csBundle(i * (i + 1) / 2 + j + 1).lsrc(1) := src2 + j.U
1200          csBundle(i * (i + 1) / 2 + j + 1).lsrc(2) := old_vd
1201          csBundle(i * (i + 1) / 2 + j + 1).ldest := vd
1202          csBundle(i * (i + 1) / 2 + j + 1).uopIdx := (i * (i + 1) / 2 + j).U
1203        }
1204    }
1205
1206    is(UopSplitType.VEC_SLIDEDOWN) {
1207      // i to vector move
1208      csBundle(0).srcType(0) := SrcType.reg
1209      csBundle(0).srcType(1) := SrcType.imm
1210      csBundle(0).lsrc(1) := 0.U
1211      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1212      csBundle(0).fuType := FuType.i2v.U
1213      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg)
1214      csBundle(0).vecWen := true.B
1215      // LMUL
1216      for (i <- 0 until MAX_VLMUL)
1217        for (j <- (0 to i).reverse) {
1218          when(i.U < lmul) {
1219            val old_vd = if (j == 0) {
1220              dest + lmul - 1.U - i.U
1221            } else (VECTOR_TMP_REG_LMUL + j).U
1222            val vd = if (j == i) {
1223              dest + lmul - 1.U - i.U
1224            } else (VECTOR_TMP_REG_LMUL + j + 1).U
1225            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).srcType(0) := SrcType.vp
1226            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1227            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(1) := src2 + lmul - 1.U - j.U
1228            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(2) := old_vd
1229            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).ldest := vd
1230            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).uopIdx := numOfUop - (i * (i + 1) / 2 + i - j + 2).U
1231          }
1232        }
1233    }
1234
1235    is(UopSplitType.VEC_M0X) {
1236      // LMUL
1237      for (i <- 0 until MAX_VLMUL) {
1238        val srcType0 = if (i == 0) SrcType.DC else SrcType.vp
1239        val ldest = (VECTOR_TMP_REG_LMUL + i).U
1240        csBundle(i).srcType(0) := srcType0
1241        csBundle(i).srcType(1) := SrcType.vp
1242        csBundle(i).rfWen := false.B
1243        csBundle(i).vecWen := true.B
1244        csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U
1245        csBundle(i).lsrc(1) := src2
1246        // csBundle(i).lsrc(2) := dest + i.U  DontCare
1247        csBundle(i).ldest := ldest
1248        csBundle(i).uopIdx := i.U
1249      }
1250      csBundle(lmul - 1.U).vecWen := false.B
1251      csBundle(lmul - 1.U).fpWen := true.B
1252      csBundle(lmul - 1.U).ldest := FP_TMP_REG_MV.U
1253      // FMV_X_D
1254      csBundle(lmul).srcType(0) := SrcType.fp
1255      csBundle(lmul).srcType(1) := SrcType.imm
1256      csBundle(lmul).lsrc(0) := FP_TMP_REG_MV.U
1257      csBundle(lmul).lsrc(1) := 0.U
1258      csBundle(lmul).ldest := dest
1259      csBundle(lmul).fuType := FuType.fmisc.U
1260      csBundle(lmul).rfWen := true.B
1261      csBundle(lmul).fpWen := false.B
1262      csBundle(lmul).vecWen := false.B
1263      csBundle(lmul).fpu.isAddSub := false.B
1264      csBundle(lmul).fpu.typeTagIn := FPU.D
1265      csBundle(lmul).fpu.typeTagOut := FPU.D
1266      csBundle(lmul).fpu.fromInt := false.B
1267      csBundle(lmul).fpu.wflags := false.B
1268      csBundle(lmul).fpu.fpWen := false.B
1269      csBundle(lmul).fpu.div := false.B
1270      csBundle(lmul).fpu.sqrt := false.B
1271      csBundle(lmul).fpu.fcvt := false.B
1272    }
1273
1274    is(UopSplitType.VEC_MVV) {
1275      // LMUL
1276      for (i <- 0 until MAX_VLMUL) {
1277        val srcType0 = if (i == 0) SrcType.DC else SrcType.vp
1278        csBundle(i * 2 + 0).srcType(0) := srcType0
1279        csBundle(i * 2 + 0).srcType(1) := SrcType.vp
1280        csBundle(i * 2 + 0).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U
1281        csBundle(i * 2 + 0).lsrc(1) := src2
1282        csBundle(i * 2 + 0).lsrc(2) := dest + i.U
1283        csBundle(i * 2 + 0).ldest := dest + i.U
1284        csBundle(i * 2 + 0).uopIdx := (i * 2 + 0).U
1285
1286        csBundle(i * 2 + 1).srcType(0) := srcType0
1287        csBundle(i * 2 + 1).srcType(1) := SrcType.vp
1288        csBundle(i * 2 + 1).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U
1289        csBundle(i * 2 + 1).lsrc(1) := src2
1290        // csBundle(i).lsrc(2) := dest + i.U  DontCare
1291        csBundle(i * 2 + 1).ldest := (VECTOR_TMP_REG_LMUL + i).U
1292        csBundle(i * 2 + 1).uopIdx := (i * 2 + 1).U
1293      }
1294    }
1295
1296    is(UopSplitType.VEC_M0X_VFIRST) {
1297      // LMUL
1298      csBundle(0).rfWen := false.B
1299      csBundle(0).fpWen := true.B
1300      csBundle(0).ldest := FP_TMP_REG_MV.U
1301      // FMV_X_D
1302      csBundle(1).srcType(0) := SrcType.fp
1303      csBundle(1).srcType(1) := SrcType.imm
1304      csBundle(1).lsrc(0) := FP_TMP_REG_MV.U
1305      csBundle(1).lsrc(1) := 0.U
1306      csBundle(1).ldest := dest
1307      csBundle(1).fuType := FuType.fmisc.U
1308      csBundle(1).rfWen := true.B
1309      csBundle(1).fpWen := false.B
1310      csBundle(1).vecWen := false.B
1311      csBundle(1).fpu.isAddSub := false.B
1312      csBundle(1).fpu.typeTagIn := FPU.D
1313      csBundle(1).fpu.typeTagOut := FPU.D
1314      csBundle(1).fpu.fromInt := false.B
1315      csBundle(1).fpu.wflags := false.B
1316      csBundle(1).fpu.fpWen := false.B
1317      csBundle(1).fpu.div := false.B
1318      csBundle(1).fpu.sqrt := false.B
1319      csBundle(1).fpu.fcvt := false.B
1320    }
1321    is(UopSplitType.VEC_VWW) {
1322      for (i <- 0 until MAX_VLMUL*2) {
1323        when(i.U < lmul){
1324          csBundle(i).srcType(2) := SrcType.DC
1325          csBundle(i).lsrc(0) := src2 + i.U
1326          csBundle(i).lsrc(1) := src2 + i.U
1327          // csBundle(i).lsrc(2) := dest + (2 * i).U
1328          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
1329          csBundle(i).uopIdx :=  i.U
1330        } otherwise {
1331          csBundle(i).srcType(2) := SrcType.DC
1332          csBundle(i).lsrc(0) := VECTOR_TMP_REG_LMUL.U + Cat((i.U-lmul),0.U(1.W)) + 1.U
1333          csBundle(i).lsrc(1) := VECTOR_TMP_REG_LMUL.U + Cat((i.U-lmul),0.U(1.W))
1334          // csBundle(i).lsrc(2) := dest + (2 * i).U
1335          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
1336          csBundle(i).uopIdx := i.U
1337        }
1338        csBundle(numOfUop-1.U).srcType(2) := SrcType.vp
1339        csBundle(numOfUop-1.U).lsrc(0) := src1
1340        csBundle(numOfUop-1.U).lsrc(2) := dest
1341        csBundle(numOfUop-1.U).ldest := dest
1342      }
1343    }
1344    is(UopSplitType.VEC_RGATHER) {
1345      def genCsBundle_VEC_RGATHER(len:Int): Unit ={
1346        for (i <- 0 until len)
1347          for (j <- 0 until len) {
1348            // csBundle(i * len + j).srcType(0) := SrcType.vp // SrcType.imm
1349            // csBundle(i * len + j).srcType(1) := SrcType.vp
1350            // csBundle(i * len + j).srcType(2) := SrcType.vp
1351            csBundle(i * len + j).lsrc(0) := src1 + i.U
1352            csBundle(i * len + j).lsrc(1) := src2 + j.U
1353            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j - 1).U
1354            csBundle(i * len + j).lsrc(2) := vd_old
1355            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1356            csBundle(i * len + j).ldest := vd
1357            csBundle(i * len + j).uopIdx := (i * len + j).U
1358          }
1359      }
1360      switch(vlmulReg) {
1361        is("b001".U ){
1362          genCsBundle_VEC_RGATHER(2)
1363        }
1364        is("b010".U ){
1365          genCsBundle_VEC_RGATHER(4)
1366        }
1367        is("b011".U ){
1368          genCsBundle_VEC_RGATHER(8)
1369        }
1370      }
1371    }
1372    is(UopSplitType.VEC_RGATHER_VX) {
1373      def genCsBundle_RGATHER_VX(len:Int): Unit ={
1374        for (i <- 0 until len)
1375          for (j <- 0 until len) {
1376            csBundle(i * len + j + 1).srcType(0) := SrcType.vp
1377            // csBundle(i * len + j + 1).srcType(1) := SrcType.vp
1378            // csBundle(i * len + j + 1).srcType(2) := SrcType.vp
1379            csBundle(i * len + j + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1380            csBundle(i * len + j + 1).lsrc(1) := src2 + j.U
1381            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1382            csBundle(i * len + j + 1).lsrc(2) := vd_old
1383            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j + 1).U
1384            csBundle(i * len + j + 1).ldest := vd
1385            csBundle(i * len + j + 1).uopIdx := (i * len + j).U
1386          }
1387      }
1388      // i to vector move
1389      csBundle(0).srcType(0) := SrcType.reg
1390      csBundle(0).srcType(1) := SrcType.imm
1391      csBundle(0).lsrc(1) := 0.U
1392      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1393      csBundle(0).fuType := FuType.i2v.U
1394      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg)
1395      csBundle(0).vecWen := true.B
1396      switch(vlmulReg) {
1397        is("b000".U ){
1398          genCsBundle_RGATHER_VX(1)
1399        }
1400        is("b001".U ){
1401          genCsBundle_RGATHER_VX(2)
1402        }
1403        is("b010".U ){
1404          genCsBundle_RGATHER_VX(4)
1405        }
1406        is("b011".U ){
1407          genCsBundle_RGATHER_VX(8)
1408        }
1409      }
1410    }
1411    is(UopSplitType.VEC_RGATHEREI16) {
1412      def genCsBundle_VEC_RGATHEREI16_SEW8(len:Int): Unit ={
1413        for (i <- 0 until len)
1414          for (j <- 0 until len) {
1415            val vd_old0 = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j*2-1).U
1416            val vd0 = (VECTOR_TMP_REG_LMUL + j*2 ).U
1417            // csBundle(i * len + j).srcType(0) := SrcType.vp // SrcType.imm
1418            // csBundle(i * len + j).srcType(1) := SrcType.vp
1419            // csBundle(i * len + j).srcType(2) := SrcType.vp
1420            csBundle((i * len + j)*2+0).lsrc(0) := src1 + (i*2+0).U
1421            csBundle((i * len + j)*2+0).lsrc(1) := src2 + j.U
1422            csBundle((i * len + j)*2+0).lsrc(2) := vd_old0
1423            csBundle((i * len + j)*2+0).ldest := vd0
1424            csBundle((i * len + j)*2+0).uopIdx := ((i * len + j)*2+0).U
1425            val vd_old1 = (VECTOR_TMP_REG_LMUL + j*2).U
1426            val vd1 = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j*2+1 ).U
1427            csBundle((i * len + j)*2+1).lsrc(0) := src1 + (i*2+1).U
1428            csBundle((i * len + j)*2+1).lsrc(1) := src2 + j.U
1429            csBundle((i * len + j)*2+1).lsrc(2) := vd_old1
1430            csBundle((i * len + j)*2+1).ldest := vd1
1431            csBundle((i * len + j)*2+1).uopIdx := ((i * len + j)*2+1).U
1432          }
1433      }
1434      def genCsBundle_VEC_RGATHEREI16(len:Int): Unit ={
1435        for (i <- 0 until len)
1436          for (j <- 0 until len) {
1437            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U
1438            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1439            // csBundle(i * len + j).srcType(0) := SrcType.vp // SrcType.imm
1440            // csBundle(i * len + j).srcType(1) := SrcType.vp
1441            // csBundle(i * len + j).srcType(2) := SrcType.vp
1442            csBundle(i * len + j).lsrc(0) := src1 + i.U
1443            csBundle(i * len + j).lsrc(1) := src2 + j.U
1444            csBundle(i * len + j).lsrc(2) := vd_old
1445            csBundle(i * len + j).ldest := vd
1446            csBundle(i * len + j).uopIdx := (i * len + j).U
1447          }
1448      }
1449      switch(vlmulReg) {
1450        is("b000".U ){
1451          when(!vsewReg.orR){
1452            genCsBundle_VEC_RGATHEREI16_SEW8(1)
1453          } .otherwise{
1454            genCsBundle_VEC_RGATHEREI16(1)
1455          }
1456        }
1457        is("b001".U) {
1458          when(!vsewReg.orR) {
1459            genCsBundle_VEC_RGATHEREI16_SEW8(2)
1460          }.otherwise {
1461            genCsBundle_VEC_RGATHEREI16(2)
1462          }
1463        }
1464        is("b010".U) {
1465          when(!vsewReg.orR) {
1466            genCsBundle_VEC_RGATHEREI16_SEW8(4)
1467          }.otherwise {
1468            genCsBundle_VEC_RGATHEREI16(4)
1469          }
1470        }
1471        is("b011".U) {
1472          genCsBundle_VEC_RGATHEREI16(8)
1473        }
1474      }
1475    }
1476    is(UopSplitType.VEC_COMPRESS) {
1477      def genCsBundle_VEC_COMPRESS(len:Int): Unit ={
1478        for (i <- 0 until len){
1479          val jlen = if (i == len-1) i+1 else i+2
1480          for (j <- 0 until jlen) {
1481            val vd_old = if(i==j) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j + 1).U
1482            val vd = if(i==len-1) (dest + j.U) else{
1483              if (j == i+1) VECTOR_TMP_REG_LMUL.U else (VECTOR_TMP_REG_LMUL + j + 1).U
1484            }
1485            val src23Type = if (j == i+1) DontCare else SrcType.vp
1486            csBundle(i*(i+3)/2 + j).srcType(0) := SrcType.vp
1487            csBundle(i*(i+3)/2 + j).srcType(1) := src23Type
1488            csBundle(i*(i+3)/2 + j).srcType(2) := src23Type
1489            csBundle(i*(i+3)/2 + j).lsrc(0) := src1
1490            csBundle(i*(i+3)/2 + j).lsrc(1) := src2 + i.U
1491            csBundle(i*(i+3)/2 + j).lsrc(2) := vd_old
1492            // csBundle(i*(i+3)/2 + j).lsrc(3) := VECTOR_TMP_REG_LMUL.U
1493            csBundle(i*(i+3)/2 + j).ldest := vd
1494            csBundle(i*(i+3)/2 + j).uopIdx := (i*(i+3)/2 + j).U
1495          }
1496        }
1497      }
1498      switch(vlmulReg) {
1499        is("b001".U ){
1500          genCsBundle_VEC_COMPRESS(2)
1501        }
1502        is("b010".U ){
1503          genCsBundle_VEC_COMPRESS(4)
1504        }
1505        is("b011".U ){
1506          genCsBundle_VEC_COMPRESS(8)
1507        }
1508      }
1509    }
1510    is(UopSplitType.VEC_MVNR) {
1511      for (i <- 0 until MAX_VLMUL) {
1512        csBundle(i).lsrc(0) := src1 + i.U
1513        csBundle(i).lsrc(1) := src2 + i.U
1514        csBundle(i).lsrc(2) := dest + i.U
1515        csBundle(i).ldest := dest + i.U
1516        csBundle(i).uopIdx := i.U
1517      }
1518    }
1519    is(UopSplitType.VEC_US_LDST) {
1520      /*
1521      FMV.D.X
1522       */
1523      csBundle(0).srcType(0) := SrcType.reg
1524      csBundle(0).srcType(1) := SrcType.imm
1525      csBundle(0).lsrc(1) := 0.U
1526      csBundle(0).ldest := FP_TMP_REG_MV.U
1527      csBundle(0).fuType := FuType.i2f.U
1528      csBundle(0).rfWen := false.B
1529      csBundle(0).fpWen := true.B
1530      csBundle(0).vecWen := false.B
1531      csBundle(0).fpu.isAddSub := false.B
1532      csBundle(0).fpu.typeTagIn := FPU.D
1533      csBundle(0).fpu.typeTagOut := FPU.D
1534      csBundle(0).fpu.fromInt := true.B
1535      csBundle(0).fpu.wflags := false.B
1536      csBundle(0).fpu.fpWen := true.B
1537      csBundle(0).fpu.div := false.B
1538      csBundle(0).fpu.sqrt := false.B
1539      csBundle(0).fpu.fcvt := false.B
1540      //LMUL
1541      for (i <- 0 until MAX_VLMUL) {
1542        csBundle(i + 1).srcType(0) := SrcType.fp
1543        csBundle(i + 1).lsrc(0) := FP_TMP_REG_MV.U
1544        csBundle(i + 1).lsrc(2) := dest + i.U // old vd
1545        csBundle(i + 1).ldest := dest + i.U
1546        csBundle(i + 1).uopIdx := i.U
1547      }
1548    }
1549    is(UopSplitType.VEC_S_LDST) {
1550      /*
1551      FMV.D.X
1552       */
1553      csBundle(0).srcType(0) := SrcType.reg
1554      csBundle(0).srcType(1) := SrcType.imm
1555      csBundle(0).lsrc(1) := 0.U
1556      csBundle(0).ldest := FP_TMP_REG_MV.U
1557      csBundle(0).fuType := FuType.i2f.U
1558      csBundle(0).rfWen := false.B
1559      csBundle(0).fpWen := true.B
1560      csBundle(0).vecWen := false.B
1561      csBundle(0).fpu.isAddSub := false.B
1562      csBundle(0).fpu.typeTagIn := FPU.D
1563      csBundle(0).fpu.typeTagOut := FPU.D
1564      csBundle(0).fpu.fromInt := true.B
1565      csBundle(0).fpu.wflags := false.B
1566      csBundle(0).fpu.fpWen := true.B
1567      csBundle(0).fpu.div := false.B
1568      csBundle(0).fpu.sqrt := false.B
1569      csBundle(0).fpu.fcvt := false.B
1570
1571      csBundle(1).srcType(0) := SrcType.reg
1572      csBundle(1).srcType(1) := SrcType.imm
1573      csBundle(1).lsrc(0) := latchedInst.lsrc(1)
1574      csBundle(1).lsrc(1) := 0.U
1575      csBundle(1).ldest := VECTOR_TMP_REG_LMUL.U
1576      csBundle(1).fuType := FuType.i2f.U
1577      csBundle(1).rfWen := false.B
1578      csBundle(1).fpWen := true.B
1579      csBundle(1).vecWen := false.B
1580      csBundle(1).fpu.isAddSub := false.B
1581      csBundle(1).fpu.typeTagIn := FPU.D
1582      csBundle(1).fpu.typeTagOut := FPU.D
1583      csBundle(1).fpu.fromInt := true.B
1584      csBundle(1).fpu.wflags := false.B
1585      csBundle(1).fpu.fpWen := true.B
1586      csBundle(1).fpu.div := false.B
1587      csBundle(1).fpu.sqrt := false.B
1588      csBundle(1).fpu.fcvt := false.B
1589
1590      //LMUL
1591      for (i <- 0 until MAX_VLMUL) {
1592        csBundle(i + 2).srcType(0) := SrcType.fp
1593        csBundle(i + 2).srcType(1) := SrcType.fp
1594        csBundle(i + 2).lsrc(0) := FP_TMP_REG_MV.U
1595        csBundle(i + 2).lsrc(1) := VECTOR_TMP_REG_LMUL.U
1596        csBundle(i + 2).lsrc(2) := dest + i.U // old vd
1597        csBundle(i + 2).ldest := dest + i.U
1598        csBundle(i + 2).uopIdx := i.U
1599      }
1600    }
1601    is(UopSplitType.VEC_I_LDST) {
1602    /*
1603      FMV.D.X
1604       */
1605      val vlmul = vlmulReg
1606      val vsew = Cat(0.U(1.W), vsewReg)
1607      val veew = Cat(0.U(1.W), width)
1608      val vemul: UInt = veew.asUInt + 1.U + vlmul.asUInt + ~vsew.asUInt
1609      val simple_lmul = MuxLookup(vlmul, 0.U(2.W), Array(
1610        "b001".U -> 1.U,
1611        "b010".U -> 2.U,
1612        "b011".U -> 3.U
1613      ))
1614      val simple_emul = MuxLookup(vemul, 0.U(2.W), Array(
1615        "b001".U -> 1.U,
1616        "b010".U -> 2.U,
1617        "b011".U -> 3.U
1618      ))
1619      csBundle(0).srcType(0) := SrcType.reg
1620      csBundle(0).srcType(1) := SrcType.imm
1621      csBundle(0).lsrc(1) := 0.U
1622      csBundle(0).ldest := FP_TMP_REG_MV.U
1623      csBundle(0).fuType := FuType.i2f.U
1624      csBundle(0).rfWen := false.B
1625      csBundle(0).fpWen := true.B
1626      csBundle(0).vecWen := false.B
1627      csBundle(0).fpu.isAddSub := false.B
1628      csBundle(0).fpu.typeTagIn := FPU.D
1629      csBundle(0).fpu.typeTagOut := FPU.D
1630      csBundle(0).fpu.fromInt := true.B
1631      csBundle(0).fpu.wflags := false.B
1632      csBundle(0).fpu.fpWen := true.B
1633      csBundle(0).fpu.div := false.B
1634      csBundle(0).fpu.sqrt := false.B
1635      csBundle(0).fpu.fcvt := false.B
1636
1637      //LMUL
1638      for (i <- 0 until MAX_INDEXED_LS_UOPNUM) {
1639        indexedLSRegOffset(i).src := Cat(simple_emul, simple_lmul, nf)
1640        val offsetVs2 = indexedLSRegOffset(i).outOffsetVs2
1641        val offsetVd = indexedLSRegOffset(i).outOffsetVd
1642        val isFirstUopInVd = indexedLSRegOffset(i).outIsFirstUopInVd
1643        csBundle(i + 1).srcType(0) := SrcType.fp
1644        csBundle(i + 1).lsrc(0) := FP_TMP_REG_MV.U
1645        csBundle(i + 1).lsrc(1) := Mux1H(UIntToOH(offsetVs2, MAX_VLMUL), (0 until MAX_VLMUL).map(j => src2 + j.U))
1646        /**
1647          * For indexed instructions, VLSU will concatenate all the uops that write the same logic vd register and
1648          * writeback only once for all these uops. However, these uops share the same lsrc(2)/old vd and the same
1649          * ldest/vd that is equal to old vd, which leads to data dependence between the uops. Therefore there will be
1650          * deadlock for indexed instructions with emul > lmul.
1651          *
1652          * Assume N = emul/lmul. To break the deadlock, only the first uop will read old vd as lsrc(2), and the rest
1653          * N-1 uops will read temporary vector register.
1654          */
1655        // csBundle(i + 1).lsrc(2) := Mux1H(UIntToOH(offsetVd, MAX_VLMUL), (0 until MAX_VLMUL).map(j => dest + j.U))
1656        csBundle(i + 1).lsrc(2) := Mux(
1657          isFirstUopInVd,
1658          Mux1H(UIntToOH(offsetVd, MAX_VLMUL), (0 until MAX_VLMUL).map(j => dest + j.U)),
1659          VECTOR_TMP_REG_LMUL.U
1660        )
1661        csBundle(i + 1).ldest := Mux1H(UIntToOH(offsetVd, MAX_VLMUL), (0 until MAX_VLMUL).map(j => dest + j.U))
1662        csBundle(i + 1).uopIdx := i.U
1663      }
1664    }
1665  }
1666
1667  //readyFromRename Counter
1668  val readyCounter = PriorityMuxDefault(outReadys.map(x => !x).zip((0 until RenameWidth).map(_.U)), RenameWidth.U)
1669
1670  // The left uops of the complex inst in ComplexDecoder can be send out this cycle
1671  val thisAllOut = uopRes <= readyCounter
1672
1673  switch(state) {
1674    is(s_idle) {
1675      when (inValid) {
1676        stateNext := s_active
1677        uopResNext := inUopInfo.numOfUop
1678      }
1679    }
1680    is(s_active) {
1681      when (thisAllOut) {
1682        when (inValid) {
1683          stateNext := s_active
1684          uopResNext := inUopInfo.numOfUop
1685        }.otherwise {
1686          stateNext := s_idle
1687          uopResNext := 0.U
1688        }
1689      }.otherwise {
1690        stateNext := s_active
1691        uopResNext := uopRes - readyCounter
1692      }
1693    }
1694  }
1695
1696  state := Mux(io.redirect, s_idle, stateNext)
1697  uopRes := Mux(io.redirect, 0.U, uopResNext)
1698
1699  val complexNum = Mux(uopRes > readyCounter, readyCounter, uopRes)
1700
1701  for(i <- 0 until RenameWidth) {
1702    outValids(i) := complexNum > i.U
1703    outDecodedInsts(i) := Mux((i.U + numOfUop - uopRes) < maxUopSize.U, csBundle(i.U + numOfUop - uopRes), csBundle(maxUopSize - 1))
1704  }
1705
1706  outComplexNum := Mux(state === s_active, complexNum, 0.U)
1707  inReady := state === s_idle || state === s_active && thisAllOut
1708
1709//  val validSimple = Wire(Vec(DecodeWidth, Bool()))
1710//  validSimple.zip(io.validFromIBuf.zip(io.isComplex)).map{ case (dst, (src1, src2)) => dst := src1 && !src2 }
1711//  val notInf = Wire(Vec(DecodeWidth, Bool()))
1712//  notInf.drop(1).zip(io.validFromIBuf.drop(1).zip(validSimple.drop(1))).map{ case (dst, (src1, src2)) => dst := !src1 || src2 }
1713//  notInf(0) := !io.validFromIBuf(0) || validSimple(0) || (io.isComplex(0) && io.in0pc === io.simple.decodedInst.pc)
1714//  val notInfVec = Wire(Vec(DecodeWidth, Bool()))
1715//  notInfVec.zipWithIndex.map{ case (dst, i) => dst := Cat(notInf.take(i + 1)).andR}
1716//
1717//  complexNum := Mux(io.validFromIBuf(0) && readyCounter.orR ,
1718//    Mux(uopRes0 > readyCounter, readyCounter, uopRes0),
1719//    0.U)
1720//  validToRename.zipWithIndex.foreach{
1721//    case(dst, i) =>
1722//      val validFix = Mux(complexNum.orR, validSimple((i+1).U - complexNum), validSimple(i))
1723//      dst := MuxCase(false.B, Seq(
1724//        (io.validFromIBuf(0) && readyCounter.orR && uopRes0 > readyCounter) -> Mux(readyCounter > i.U, true.B, false.B),
1725//        (io.validFromIBuf(0) && readyCounter.orR && !(uopRes0 > readyCounter)) -> Mux(complexNum > i.U, true.B, validFix && notInfVec(i.U - complexNum) && io.readyFromRename(i)),
1726//      ).toSeq)
1727//  }
1728//
1729//  readyToIBuf.zipWithIndex.foreach {
1730//    case (dst, i) =>
1731//      val readyToIBuf0 = Mux(io.isComplex(0), io.in0pc === io.simple.decodedInst.pc, true.B)
1732//      dst := MuxCase(true.B, Seq(
1733//        (io.validFromIBuf(0) && uopRes0 > readyCounter || !readyCounter.orR) -> false.B,
1734//        (io.validFromIBuf(0) && !(uopRes0 > readyCounter) && readyCounter.orR) -> (if (i==0) readyToIBuf0 else Mux(RenameWidth.U - complexNum >= i.U, notInfVec(i) && validSimple(i) && io.readyFromRename(i), false.B))
1735//      ).toSeq)
1736//  }
1737//
1738//  io.deq.decodedInsts := decodedInsts
1739//  io.deq.complexNum := complexNum
1740//  io.deq.validToRename := validToRename
1741//  io.deq.readyToIBuf := readyToIBuf
1742}
1743