xref: /XiangShan/src/main/scala/xiangshan/backend/decode/DecodeUnitComp.scala (revision 0c7ebb58175b51109677230e8cbab09e73166956)
1/***************************************************************************************
2  * Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3  * Copyright (c) 2020-2021 Peng Cheng Laboratory
4  *
5  * XiangShan is licensed under Mulan PSL v2.
6  * You can use this software according to the terms and conditions of the Mulan PSL v2.
7  * You may obtain a copy of Mulan PSL v2 at:
8  *          http://license.coscl.org.cn/MulanPSL2
9  *
10  * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11  * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12  * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13  *
14  * See the Mulan PSL v2 for more details.
15  ***************************************************************************************/
16
17package xiangshan.backend.decode
18
19import org.chipsalliance.cde.config.Parameters
20import chisel3._
21import chisel3.util._
22import freechips.rocketchip.rocket.Instructions
23import freechips.rocketchip.util.uintToBitPat
24import utils._
25import utility._
26import xiangshan.ExceptionNO.illegalInstr
27import xiangshan._
28import xiangshan.backend.fu.fpu.FPU
29import xiangshan.backend.fu.FuType
30import freechips.rocketchip.rocket.Instructions._
31import xiangshan.backend.Bundles.{DecodedInst, StaticInst}
32import xiangshan.backend.decode.isa.bitfield.XSInstBitFields
33import xiangshan.backend.fu.vector.Bundles.{VSew, VType, VLmul}
34import yunsuan.VpermType
35import scala.collection.Seq
36import chisel3.util.experimental.decode.{QMCMinimizer, TruthTable, decoder}
37
38class indexedLSUopTable(uopIdx:Int) extends Module {
39  val src = IO(Input(UInt(7.W)))
40  val outOffsetVs2 = IO(Output(UInt(3.W)))
41  val outOffsetVd = IO(Output(UInt(3.W)))
42  val outIsFirstUopInVd = IO(Output(Bool()))
43  def genCsBundle_VEC_INDEXED_LDST(lmul:Int, emul:Int, nfields:Int, uopIdx:Int): (Int, Int, Int) ={
44    if (lmul * nfields <= 8) {
45      for (k <-0 until nfields) {
46        if (lmul < emul) {    // lmul < emul, uop num is depend on emul * nf
47          var offset = 1 << (emul - lmul)
48          for (i <- 0 until (1 << emul)) {
49            if (uopIdx == k * (1 << emul) + i) {
50              return (i, i / offset + k * (1 << lmul), if (i % offset == 0) 1 else 0)
51            }
52          }
53        } else {              // lmul > emul, uop num is depend on lmul * nf
54          var offset = 1 << (lmul - emul)
55          for (i <- 0 until (1 << lmul)) {
56            if (uopIdx == k * (1 << lmul) + i) {
57              return (i / offset, i + k * (1 << lmul), 1)
58            }
59          }
60        }
61      }
62    }
63    return (0, 0, 1)
64  }
65  // strided load/store
66  var combVemulNf : Seq[(Int, Int, Int, Int, Int, Int)] = Seq()
67  for (emul <- 0 until 4) {
68    for (lmul <- 0 until 4) {
69      for (nf <- 0 until 8) {
70        var offset = genCsBundle_VEC_INDEXED_LDST(lmul, emul, nf+1, uopIdx)
71        var offsetVs2 = offset._1
72        var offsetVd = offset._2
73        var isFirstUopInVd = offset._3
74        combVemulNf :+= (emul, lmul, nf, isFirstUopInVd, offsetVs2, offsetVd)
75      }
76    }
77  }
78  val out = decoder(QMCMinimizer, src, TruthTable(combVemulNf.map {
79    case (emul, lmul, nf, isFirstUopInVd, offsetVs2, offsetVd) =>
80      (BitPat((emul << 5 | lmul << 3 | nf).U(7.W)), BitPat((isFirstUopInVd << 6 | offsetVs2 << 3 | offsetVd).U(7.W)))
81  }, BitPat.N(7)))
82  outOffsetVs2 := out(5, 3)
83  outOffsetVd := out(2, 0)
84  outIsFirstUopInVd := out(6).asBool
85}
86
87trait VectorConstants {
88  val MAX_VLMUL = 8
89  val FP_TMP_REG_MV = 32
90  val VECTOR_TMP_REG_LMUL = 33 // 33~47  ->  15
91  val MAX_INDEXED_LS_UOPNUM = 64
92}
93
94class DecodeUnitCompIO(implicit p: Parameters) extends XSBundle {
95  val redirect = Input(Bool())
96  val csrCtrl = Input(new CustomCSRCtrlIO)
97  val vtypeBypass = Input(new VType)
98  // When the first inst in decode vector is complex inst, pass it in
99  val in = Flipped(DecoupledIO(new Bundle {
100    val simpleDecodedInst = new DecodedInst
101    val uopInfo = new UopInfo
102  }))
103  val out = new Bundle {
104    val complexDecodedInsts = Vec(RenameWidth, DecoupledIO(new DecodedInst))
105  }
106  val complexNum = Output(UInt(3.W))
107}
108
109/**
110  * @author zly
111  */
112class DecodeUnitComp()(implicit p : Parameters) extends XSModule with DecodeUnitConstants with VectorConstants {
113  val io = IO(new DecodeUnitCompIO)
114
115  // alias
116  private val inReady = io.in.ready
117  private val inValid = io.in.valid
118  private val inDecodedInst = WireInit(io.in.bits.simpleDecodedInst)
119  private val inInstFields = io.in.bits.simpleDecodedInst.instr.asTypeOf(new XSInstBitFields)
120  private val inUopInfo = io.in.bits.uopInfo
121  private val outValids = io.out.complexDecodedInsts.map(_.valid)
122  private val outReadys = io.out.complexDecodedInsts.map(_.ready)
123  private val outDecodedInsts = io.out.complexDecodedInsts.map(_.bits)
124  private val outComplexNum = io.complexNum
125
126  val maxUopSize = MaxUopSize
127  when (io.in.fire && io.in.bits.simpleDecodedInst.isVset) {
128    when(inInstFields.RD === 0.U && inInstFields.RS1 === 0.U) {
129      inDecodedInst.fuOpType := VSETOpType.keepVl(io.in.bits.simpleDecodedInst.fuOpType)
130    }.elsewhen(inInstFields.RS1 === 0.U) {
131      inDecodedInst.fuOpType := VSETOpType.setVlmax(io.in.bits.simpleDecodedInst.fuOpType)
132    }
133  }
134
135  val latchedInst = RegEnable(inDecodedInst, inValid && inReady)
136  val latchedUopInfo = RegEnable(inUopInfo, inValid && inReady)
137  //input bits
138  private val instFields: XSInstBitFields = latchedInst.instr.asTypeOf(new XSInstBitFields)
139
140  val src1 = Cat(0.U(1.W), instFields.RS1)
141  val src2 = Cat(0.U(1.W), instFields.RS2)
142  val dest = Cat(0.U(1.W), instFields.RD)
143
144  val nf    = instFields.NF
145  val width = instFields.WIDTH(1, 0)
146
147  //output of DecodeUnit
148  val numOfUop = Wire(UInt(log2Up(maxUopSize).W))
149  val numOfWB = Wire(UInt(log2Up(maxUopSize).W))
150  val lmul = Wire(UInt(4.W))
151  val isVsetSimple = Wire(Bool())
152
153  val indexedLSRegOffset = Seq.tabulate(MAX_INDEXED_LS_UOPNUM)(i => Module(new indexedLSUopTable(i)))
154  indexedLSRegOffset.map(_.src := 0.U)
155
156  //pre decode
157  lmul := latchedUopInfo.lmul
158  isVsetSimple := latchedInst.isVset
159  val vlmulReg = latchedInst.vpu.vlmul
160  val vsewReg = latchedInst.vpu.vsew
161
162  //Type of uop Div
163  val typeOfSplit = latchedInst.uopSplitType
164  val src1Type = latchedInst.srcType(0)
165  val src1IsImm = src1Type === SrcType.imm
166  val src1IsFp = src1Type === SrcType.fp
167
168  numOfUop := latchedUopInfo.numOfUop
169  numOfWB := latchedUopInfo.numOfWB
170
171  //uops dispatch
172  val s_idle :: s_active :: Nil = Enum(2)
173  val state = RegInit(s_idle)
174  val stateNext = WireDefault(state)
175  val numDecodedUop = RegInit(0.U(log2Up(maxUopSize).W))
176  val uopRes = RegInit(0.U(log2Up(maxUopSize).W))
177  val uopResNext = WireInit(uopRes)
178
179  //uop div up to maxUopSize
180  val csBundle = Wire(Vec(maxUopSize, new DecodedInst))
181  csBundle.foreach { case dst =>
182    dst := latchedInst
183    dst.numUops := latchedUopInfo.numOfUop
184    dst.numWB := latchedUopInfo.numOfWB
185    dst.firstUop := false.B
186    dst.lastUop := false.B
187    dst.vlsInstr := false.B
188  }
189
190  csBundle(0).firstUop := true.B
191  csBundle(numOfUop - 1.U).lastUop := true.B
192
193  switch(typeOfSplit) {
194    is(UopSplitType.VSET) {
195      // In simple decoder, rfWen and vecWen are not set
196      when(isVsetSimple) {
197        // Default
198        // uop0 set rd, never flushPipe
199        csBundle(0).fuType := FuType.vsetiwi.U
200        csBundle(0).flushPipe := false.B
201        csBundle(0).rfWen := true.B
202        // uop1 set vl, vsetvl will flushPipe
203        csBundle(1).ldest := VCONFIG_IDX.U
204        csBundle(1).vecWen := true.B
205        when(VSETOpType.isVsetvli(latchedInst.fuOpType) && dest === 0.U && src1 === 0.U) {
206          csBundle(1).fuType := FuType.vsetfwf.U
207          csBundle(1).srcType(0) := SrcType.vp
208          csBundle(1).lsrc(0) := VCONFIG_IDX.U
209        }.elsewhen(VSETOpType.isVsetvl(latchedInst.fuOpType) && dest === 0.U && src1 === 0.U) {
210          // uop0: mv vtype gpr to vector region
211          csBundle(0).srcType(0) := SrcType.xp
212          csBundle(0).srcType(1) := SrcType.no
213          csBundle(0).lsrc(1) := 0.U
214          csBundle(0).ldest := FP_TMP_REG_MV.U
215          csBundle(0).fuType := FuType.i2f.U
216          csBundle(0).fpWen := true.B
217          csBundle(0).fpu.isAddSub := false.B
218          csBundle(0).fpu.typeTagIn := FPU.D
219          csBundle(0).fpu.typeTagOut := FPU.D
220          csBundle(0).fpu.fromInt := true.B
221          csBundle(0).fpu.wflags := false.B
222          csBundle(0).fpu.fpWen := true.B
223          csBundle(0).fpu.div := false.B
224          csBundle(0).fpu.sqrt := false.B
225          csBundle(0).fpu.fcvt := false.B
226          csBundle(0).flushPipe := false.B
227          // uop1: uvsetvcfg_vv
228          csBundle(1).fuType := FuType.vsetfwf.U
229          // vl
230          csBundle(1).srcType(0) := SrcType.vp
231          csBundle(1).lsrc(0) := VCONFIG_IDX.U
232          // vtype
233          csBundle(1).srcType(1) := SrcType.fp
234          csBundle(1).lsrc(1) := FP_TMP_REG_MV.U
235          csBundle(1).vecWen := true.B
236          csBundle(1).ldest := VCONFIG_IDX.U
237        }
238        // use bypass vtype from vtypeGen
239        csBundle(0).vpu.connectVType(io.vtypeBypass)
240        csBundle(1).vpu.connectVType(io.vtypeBypass)
241      }
242    }
243    is(UopSplitType.VEC_VVV) {
244      for (i <- 0 until MAX_VLMUL) {
245        csBundle(i).lsrc(0) := src1 + i.U
246        csBundle(i).lsrc(1) := src2 + i.U
247        csBundle(i).lsrc(2) := dest + i.U
248        csBundle(i).ldest := dest + i.U
249        csBundle(i).uopIdx := i.U
250      }
251    }
252    is(UopSplitType.VEC_VFV) {
253      /*
254      i to vector move
255       */
256      csBundle(0).srcType(0) := SrcType.fp
257      csBundle(0).srcType(1) := SrcType.imm
258      csBundle(0).lsrc(1) := 0.U
259      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
260      csBundle(0).fuType := FuType.f2v.U
261      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
262      csBundle(0).vecWen := true.B
263      /*
264      LMUL
265       */
266      for (i <- 0 until MAX_VLMUL) {
267        csBundle(i + 1).srcType(0) := SrcType.vp
268        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
269        csBundle(i + 1).lsrc(1) := src2 + i.U
270        csBundle(i + 1).lsrc(2) := dest + i.U
271        csBundle(i + 1).ldest := dest + i.U
272        csBundle(i + 1).uopIdx := i.U
273      }
274    }
275    is(UopSplitType.VEC_EXT2) {
276      for (i <- 0 until MAX_VLMUL / 2) {
277        csBundle(2 * i).lsrc(1) := src2 + i.U
278        csBundle(2 * i).lsrc(2) := dest + (2 * i).U
279        csBundle(2 * i).ldest := dest + (2 * i).U
280        csBundle(2 * i).uopIdx := (2 * i).U
281        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
282        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U
283        csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U
284        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
285      }
286    }
287    is(UopSplitType.VEC_EXT4) {
288      for (i <- 0 until MAX_VLMUL / 4) {
289        csBundle(4 * i).lsrc(1) := src2 + i.U
290        csBundle(4 * i).lsrc(2) := dest + (4 * i).U
291        csBundle(4 * i).ldest := dest + (4 * i).U
292        csBundle(4 * i).uopIdx := (4 * i).U
293        csBundle(4 * i + 1).lsrc(1) := src2 + i.U
294        csBundle(4 * i + 1).lsrc(2) := dest + (4 * i + 1).U
295        csBundle(4 * i + 1).ldest := dest + (4 * i + 1).U
296        csBundle(4 * i + 1).uopIdx := (4 * i + 1).U
297        csBundle(4 * i + 2).lsrc(1) := src2 + i.U
298        csBundle(4 * i + 2).lsrc(2) := dest + (4 * i + 2).U
299        csBundle(4 * i + 2).ldest := dest + (4 * i + 2).U
300        csBundle(4 * i + 2).uopIdx := (4 * i + 2).U
301        csBundle(4 * i + 3).lsrc(1) := src2 + i.U
302        csBundle(4 * i + 3).lsrc(2) := dest + (4 * i + 3).U
303        csBundle(4 * i + 3).ldest := dest + (4 * i + 3).U
304        csBundle(4 * i + 3).uopIdx := (4 * i + 3).U
305      }
306    }
307    is(UopSplitType.VEC_EXT8) {
308      for (i <- 0 until MAX_VLMUL) {
309        csBundle(i).lsrc(1) := src2
310        csBundle(i).lsrc(2) := dest + i.U
311        csBundle(i).ldest := dest + i.U
312        csBundle(i).uopIdx := i.U
313      }
314    }
315    is(UopSplitType.VEC_0XV) {
316      /*
317      i/f to vector move
318       */
319      csBundle(0).srcType(0) := Mux(src1IsFp, SrcType.fp, SrcType.reg)
320      csBundle(0).srcType(1) := SrcType.imm
321      csBundle(0).lsrc(1) := 0.U
322      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
323      csBundle(0).fuType := Mux(src1IsFp, FuType.f2v.U, FuType.i2v.U)
324      csBundle(0).fuOpType := Cat(Mux(src1IsFp, IF2VectorType.fDup2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg)
325      csBundle(0).rfWen := false.B
326      csBundle(0).fpWen := false.B
327      csBundle(0).vecWen := true.B
328      /*
329      vmv.s.x
330       */
331      csBundle(1).srcType(0) := SrcType.vp
332      csBundle(1).srcType(1) := SrcType.imm
333      csBundle(1).srcType(2) := SrcType.vp
334      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
335      csBundle(1).lsrc(1) := 0.U
336      csBundle(1).lsrc(2) := dest
337      csBundle(1).ldest := dest
338      csBundle(1).rfWen := false.B
339      csBundle(1).fpWen := false.B
340      csBundle(1).vecWen := true.B
341      csBundle(1).uopIdx := 0.U
342    }
343    is(UopSplitType.VEC_VXV) {
344      /*
345      i to vector move
346       */
347      csBundle(0).srcType(0) := SrcType.reg
348      csBundle(0).srcType(1) := SrcType.imm
349      csBundle(0).lsrc(1) := 0.U
350      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
351      csBundle(0).fuType := FuType.i2v.U
352      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg)
353      csBundle(0).vecWen := true.B
354      /*
355      LMUL
356       */
357      for (i <- 0 until MAX_VLMUL) {
358        csBundle(i + 1).srcType(0) := SrcType.vp
359        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
360        csBundle(i + 1).lsrc(1) := src2 + i.U
361        csBundle(i + 1).lsrc(2) := dest + i.U
362        csBundle(i + 1).ldest := dest + i.U
363        csBundle(i + 1).uopIdx := i.U
364      }
365    }
366    is(UopSplitType.VEC_VVW) {
367      for (i <- 0 until MAX_VLMUL / 2) {
368        csBundle(2 * i).lsrc(0) := src1 + i.U
369        csBundle(2 * i).lsrc(1) := src2 + i.U
370        csBundle(2 * i).lsrc(2) := dest + (2 * i).U
371        csBundle(2 * i).ldest := dest + (2 * i).U
372        csBundle(2 * i).uopIdx := (2 * i).U
373        csBundle(2 * i + 1).lsrc(0) := src1 + i.U
374        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
375        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U
376        csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U
377        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
378      }
379    }
380    is(UopSplitType.VEC_VFW) {
381      /*
382      f to vector move
383       */
384      csBundle(0).srcType(0) := SrcType.fp
385      csBundle(0).srcType(1) := SrcType.imm
386      csBundle(0).lsrc(1) := 0.U
387      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
388      csBundle(0).fuType := FuType.f2v.U
389      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
390      csBundle(0).rfWen := false.B
391      csBundle(0).fpWen := false.B
392      csBundle(0).vecWen := true.B
393
394      for (i <- 0 until MAX_VLMUL / 2) {
395        csBundle(2 * i + 1).srcType(0) := SrcType.vp
396        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
397        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
398        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U
399        csBundle(2 * i + 1).ldest := dest + (2 * i).U
400        csBundle(2 * i + 1).uopIdx := (2 * i).U
401        csBundle(2 * i + 2).srcType(0) := SrcType.vp
402        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
403        csBundle(2 * i + 2).lsrc(1) := src2 + i.U
404        csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U
405        csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U
406        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
407      }
408    }
409    is(UopSplitType.VEC_WVW) {
410      for (i <- 0 until MAX_VLMUL / 2) {
411        csBundle(2 * i).lsrc(0) := src1 + i.U
412        csBundle(2 * i).lsrc(1) := src2 + (2 * i).U
413        csBundle(2 * i).lsrc(2) := dest + (2 * i).U
414        csBundle(2 * i).ldest := dest + (2 * i).U
415        csBundle(2 * i).uopIdx := (2 * i).U
416        csBundle(2 * i + 1).lsrc(0) := src1 + i.U
417        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i + 1).U
418        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U
419        csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U
420        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
421      }
422    }
423    is(UopSplitType.VEC_VXW) {
424      /*
425      i to vector move
426       */
427      csBundle(0).srcType(0) := SrcType.reg
428      csBundle(0).srcType(1) := SrcType.imm
429      csBundle(0).lsrc(1) := 0.U
430      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
431      csBundle(0).fuType := FuType.i2v.U
432      csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg)
433      csBundle(0).vecWen := true.B
434
435      for (i <- 0 until MAX_VLMUL / 2) {
436        csBundle(2 * i + 1).srcType(0) := SrcType.vp
437        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
438        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
439        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U
440        csBundle(2 * i + 1).ldest := dest + (2 * i).U
441        csBundle(2 * i + 1).uopIdx := (2 * i).U
442        csBundle(2 * i + 2).srcType(0) := SrcType.vp
443        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
444        csBundle(2 * i + 2).lsrc(1) := src2 + i.U
445        csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U
446        csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U
447        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
448      }
449    }
450    is(UopSplitType.VEC_WXW) {
451      /*
452      i to vector move
453       */
454      csBundle(0).srcType(0) := SrcType.reg
455      csBundle(0).srcType(1) := SrcType.imm
456      csBundle(0).lsrc(1) := 0.U
457      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
458      csBundle(0).fuType := FuType.i2v.U
459      csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg)
460      csBundle(0).vecWen := true.B
461
462      for (i <- 0 until MAX_VLMUL / 2) {
463        csBundle(2 * i + 1).srcType(0) := SrcType.vp
464        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
465        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U
466        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U
467        csBundle(2 * i + 1).ldest := dest + (2 * i).U
468        csBundle(2 * i + 1).uopIdx := (2 * i).U
469        csBundle(2 * i + 2).srcType(0) := SrcType.vp
470        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
471        csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U
472        csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U
473        csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U
474        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
475      }
476    }
477    is(UopSplitType.VEC_WVV) {
478      for (i <- 0 until MAX_VLMUL / 2) {
479
480        csBundle(2 * i).lsrc(0) := src1 + i.U
481        csBundle(2 * i).lsrc(1) := src2 + (2 * i).U
482        csBundle(2 * i).lsrc(2) := dest + i.U
483        csBundle(2 * i).ldest := dest + i.U
484        csBundle(2 * i).uopIdx := (2 * i).U
485        csBundle(2 * i + 1).lsrc(0) := src1 + i.U
486        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i + 1).U
487        csBundle(2 * i + 1).lsrc(2) := dest + i.U
488        csBundle(2 * i + 1).ldest := dest + i.U
489        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
490      }
491    }
492    is(UopSplitType.VEC_WFW) {
493      /*
494      f to vector move
495       */
496      csBundle(0).srcType(0) := SrcType.fp
497      csBundle(0).srcType(1) := SrcType.imm
498      csBundle(0).lsrc(1) := 0.U
499      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
500      csBundle(0).fuType := FuType.f2v.U
501      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
502      csBundle(0).rfWen := false.B
503      csBundle(0).fpWen := false.B
504      csBundle(0).vecWen := true.B
505
506      for (i <- 0 until MAX_VLMUL / 2) {
507        csBundle(2 * i + 1).srcType(0) := SrcType.vp
508        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
509        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U
510        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U
511        csBundle(2 * i + 1).ldest := dest + (2 * i).U
512        csBundle(2 * i + 1).uopIdx := (2 * i).U
513        csBundle(2 * i + 2).srcType(0) := SrcType.vp
514        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
515        csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U
516        csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U
517        csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U
518        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
519      }
520    }
521    is(UopSplitType.VEC_WXV) {
522      /*
523      i to vector move
524       */
525      csBundle(0).srcType(0) := SrcType.reg
526      csBundle(0).srcType(1) := SrcType.imm
527      csBundle(0).lsrc(1) := 0.U
528      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
529      csBundle(0).fuType := FuType.i2v.U
530      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg)
531      csBundle(0).vecWen := true.B
532
533      for (i <- 0 until MAX_VLMUL / 2) {
534        csBundle(2 * i + 1).srcType(0) := SrcType.vp
535        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
536        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U
537        csBundle(2 * i + 1).lsrc(2) := dest + i.U
538        csBundle(2 * i + 1).ldest := dest + i.U
539        csBundle(2 * i + 1).uopIdx := (2 * i).U
540        csBundle(2 * i + 2).srcType(0) := SrcType.vp
541        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
542        csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U
543        csBundle(2 * i + 2).lsrc(2) := dest + i.U
544        csBundle(2 * i + 2).ldest := dest + i.U
545        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
546      }
547    }
548    is(UopSplitType.VEC_VVM) {
549      csBundle(0).lsrc(2) := dest
550      csBundle(0).ldest := dest
551      csBundle(0).uopIdx := 0.U
552      for (i <- 1 until MAX_VLMUL) {
553        csBundle(i).lsrc(0) := src1 + i.U
554        csBundle(i).lsrc(1) := src2 + i.U
555        csBundle(i).lsrc(2) := dest
556        csBundle(i).ldest := dest
557        csBundle(i).uopIdx := i.U
558      }
559    }
560    is(UopSplitType.VEC_VFM) {
561      /*
562      f to vector move
563       */
564      csBundle(0).srcType(0) := SrcType.fp
565      csBundle(0).srcType(1) := SrcType.imm
566      csBundle(0).lsrc(1) := 0.U
567      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
568      csBundle(0).fuType := FuType.f2v.U
569      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
570      csBundle(0).rfWen := false.B
571      csBundle(0).fpWen := false.B
572      csBundle(0).vecWen := true.B
573      //LMUL
574      csBundle(1).srcType(0) := SrcType.vp
575      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
576      csBundle(1).lsrc(2) := dest
577      csBundle(1).ldest := dest
578      csBundle(1).uopIdx := 0.U
579      for (i <- 1 until MAX_VLMUL) {
580        csBundle(i + 1).srcType(0) := SrcType.vp
581        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
582        csBundle(i + 1).lsrc(1) := src2 + i.U
583        csBundle(i + 1).lsrc(2) := dest
584        csBundle(i + 1).ldest := dest
585        csBundle(i + 1).uopIdx := i.U
586      }
587      csBundle(numOfUop - 1.U).ldest := dest
588    }
589    is(UopSplitType.VEC_VXM) {
590      /*
591      i to vector move
592       */
593      csBundle(0).srcType(0) := SrcType.reg
594      csBundle(0).srcType(1) := SrcType.imm
595      csBundle(0).lsrc(1) := 0.U
596      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
597      csBundle(0).fuType := FuType.i2v.U
598      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg)
599      csBundle(0).vecWen := true.B
600      //LMUL
601      csBundle(1).srcType(0) := SrcType.vp
602      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
603      csBundle(1).lsrc(2) := dest
604      csBundle(1).ldest := dest
605      csBundle(1).uopIdx := 0.U
606      for (i <- 1 until MAX_VLMUL) {
607        csBundle(i + 1).srcType(0) := SrcType.vp
608        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
609        csBundle(i + 1).lsrc(1) := src2 + i.U
610        csBundle(i + 1).lsrc(2) := dest
611        csBundle(i + 1).ldest := dest
612        csBundle(i + 1).uopIdx := i.U
613      }
614      csBundle(numOfUop - 1.U).ldest := dest
615    }
616    is(UopSplitType.VEC_SLIDE1UP) {
617      /*
618      i to vector move
619       */
620      csBundle(0).srcType(0) := SrcType.reg
621      csBundle(0).srcType(1) := SrcType.imm
622      csBundle(0).lsrc(1) := 0.U
623      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
624      csBundle(0).fuType := FuType.i2v.U
625      csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg)
626      csBundle(0).vecWen := true.B
627      //LMUL
628      csBundle(1).srcType(0) := SrcType.vp
629      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
630      csBundle(1).lsrc(2) := dest
631      csBundle(1).ldest := dest
632      csBundle(1).uopIdx := 0.U
633      for (i <- 1 until MAX_VLMUL) {
634        csBundle(i + 1).srcType(0) := SrcType.vp
635        csBundle(i + 1).lsrc(0) := src2 + (i - 1).U
636        csBundle(i + 1).lsrc(1) := src2 + i.U
637        csBundle(i + 1).lsrc(2) := dest + i.U
638        csBundle(i + 1).ldest := dest + i.U
639        csBundle(i + 1).uopIdx := i.U
640      }
641    }
642    is(UopSplitType.VEC_FSLIDE1UP) {
643      /*
644      i to vector move
645       */
646      csBundle(0).srcType(0) := SrcType.fp
647      csBundle(0).srcType(1) := SrcType.imm
648      csBundle(0).lsrc(1) := 0.U
649      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
650      csBundle(0).fuType := FuType.f2v.U
651      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
652      csBundle(0).rfWen := false.B
653      csBundle(0).fpWen := false.B
654      csBundle(0).vecWen := true.B
655      //LMUL
656      csBundle(1).srcType(0) := SrcType.vp
657      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
658      csBundle(1).lsrc(1) := src2
659      csBundle(1).lsrc(2) := dest
660      csBundle(1).ldest := dest
661      csBundle(1).uopIdx := 0.U
662      for (i <- 1 until MAX_VLMUL) {
663        csBundle(i + 1).srcType(0) := SrcType.vp
664        csBundle(i + 1).lsrc(0) := src2 + (i - 1).U
665        csBundle(i + 1).lsrc(1) := src2 + i.U
666        csBundle(i + 1).lsrc(2) := dest + i.U
667        csBundle(i + 1).ldest := dest + i.U
668        csBundle(i + 1).uopIdx := i.U
669      }
670    }
671    is(UopSplitType.VEC_SLIDE1DOWN) { // lmul+lmul = 16
672      /*
673      i to vector move
674       */
675      csBundle(0).srcType(0) := SrcType.reg
676      csBundle(0).srcType(1) := SrcType.imm
677      csBundle(0).lsrc(1) := 0.U
678      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
679      csBundle(0).fuType := FuType.i2v.U
680      csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg)
681      csBundle(0).vecWen := true.B
682      //LMUL
683      for (i <- 0 until MAX_VLMUL) {
684        csBundle(2 * i + 1).srcType(0) := SrcType.vp
685        csBundle(2 * i + 1).srcType(1) := SrcType.vp
686        csBundle(2 * i + 1).lsrc(0) := src2 + (i + 1).U
687        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
688        csBundle(2 * i + 1).lsrc(2) := dest + i.U
689        csBundle(2 * i + 1).ldest := VECTOR_TMP_REG_LMUL.U + 1.U
690        csBundle(2 * i + 1).uopIdx := (2 * i).U
691        if (2 * i + 2 < MAX_VLMUL * 2) {
692          csBundle(2 * i + 2).srcType(0) := SrcType.vp
693          csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
694          // csBundle(2 * i + 2).lsrc(1) := src2 + i.U         // DontCare
695          csBundle(2 * i + 2).lsrc(2) := VECTOR_TMP_REG_LMUL.U + 1.U
696          csBundle(2 * i + 2).ldest := dest + i.U
697          csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
698        }
699      }
700      csBundle(numOfUop - 1.U).srcType(0) := SrcType.vp
701      csBundle(numOfUop - 1.U).lsrc(0) := VECTOR_TMP_REG_LMUL.U
702      csBundle(numOfUop - 1.U).ldest := dest + lmul - 1.U
703    }
704    is(UopSplitType.VEC_FSLIDE1DOWN) {
705      /*
706      i to vector move
707       */
708      csBundle(0).srcType(0) := SrcType.fp
709      csBundle(0).srcType(1) := SrcType.imm
710      csBundle(0).lsrc(1) := 0.U
711      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
712      csBundle(0).fuType := FuType.f2v.U
713      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
714      csBundle(0).rfWen := false.B
715      csBundle(0).fpWen := false.B
716      csBundle(0).vecWen := true.B
717      //LMUL
718      for (i <- 0 until MAX_VLMUL) {
719        csBundle(2 * i + 1).srcType(0) := SrcType.vp
720        csBundle(2 * i + 1).srcType(1) := SrcType.vp
721        csBundle(2 * i + 1).lsrc(0) := src2 + (i + 1).U
722        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
723        csBundle(2 * i + 1).lsrc(2) := dest + i.U
724        csBundle(2 * i + 1).ldest := VECTOR_TMP_REG_LMUL.U + 1.U
725        csBundle(2 * i + 1).uopIdx := (2 * i).U
726        if (2 * i + 2 < MAX_VLMUL * 2) {
727          csBundle(2 * i + 2).srcType(0) := SrcType.vp
728          csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
729          // csBundle(2 * i + 2).lsrc(1) := src2 + i.U         // DontCare
730          csBundle(2 * i + 2).lsrc(2) := VECTOR_TMP_REG_LMUL.U + 1.U
731          csBundle(2 * i + 2).ldest := dest + i.U
732          csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
733        }
734      }
735      csBundle(numOfUop - 1.U).srcType(0) := SrcType.vp
736      csBundle(numOfUop - 1.U).lsrc(0) := VECTOR_TMP_REG_LMUL.U
737      csBundle(numOfUop - 1.U).ldest := dest + lmul - 1.U
738    }
739    is(UopSplitType.VEC_VRED) {
740      when(vlmulReg === "b001".U) {
741        csBundle(0).srcType(2) := SrcType.DC
742        csBundle(0).lsrc(0) := src2 + 1.U
743        csBundle(0).lsrc(1) := src2
744        csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
745        csBundle(0).uopIdx := 0.U
746      }
747      when(vlmulReg === "b010".U) {
748        csBundle(0).srcType(2) := SrcType.DC
749        csBundle(0).lsrc(0) := src2 + 1.U
750        csBundle(0).lsrc(1) := src2
751        csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
752        csBundle(0).uopIdx := 0.U
753
754        csBundle(1).srcType(2) := SrcType.DC
755        csBundle(1).lsrc(0) := src2 + 3.U
756        csBundle(1).lsrc(1) := src2 + 2.U
757        csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
758        csBundle(1).uopIdx := 1.U
759
760        csBundle(2).srcType(2) := SrcType.DC
761        csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
762        csBundle(2).lsrc(1) := VECTOR_TMP_REG_LMUL.U
763        csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
764        csBundle(2).uopIdx := 2.U
765      }
766      when(vlmulReg === "b011".U) {
767        for (i <- 0 until MAX_VLMUL) {
768          if (i < MAX_VLMUL - MAX_VLMUL / 2) {
769            csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U
770            csBundle(i).lsrc(1) := src2 + (i * 2).U
771            csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
772          } else if (i < MAX_VLMUL - MAX_VLMUL / 4) {
773            csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + (i - MAX_VLMUL / 2) * 2 + 1).U
774            csBundle(i).lsrc(1) := (VECTOR_TMP_REG_LMUL + (i - MAX_VLMUL / 2) * 2).U
775            csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
776          } else if (i < MAX_VLMUL - MAX_VLMUL / 8) {
777            csBundle(6).lsrc(0) := (VECTOR_TMP_REG_LMUL + 5).U
778            csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
779            csBundle(6).ldest := (VECTOR_TMP_REG_LMUL + 6).U
780          }
781          csBundle(i).srcType(2) := SrcType.DC
782          csBundle(i).uopIdx := i.U
783        }
784      }
785      when(vlmulReg(2) === 0.U && vlmulReg(1, 0).orR) {
786        /*
787         * 2 <= vlmul <= 8
788         */
789        csBundle(numOfUop - 1.U).srcType(2) := SrcType.vp
790        csBundle(numOfUop - 1.U).lsrc(0) := src1
791        csBundle(numOfUop - 1.U).lsrc(1) := VECTOR_TMP_REG_LMUL.U + numOfUop - 2.U
792        csBundle(numOfUop - 1.U).lsrc(2) := dest
793        csBundle(numOfUop - 1.U).ldest := dest
794        csBundle(numOfUop - 1.U).uopIdx := numOfUop - 1.U
795      }
796    }
797    is(UopSplitType.VEC_VFRED) {
798      val vlmul = vlmulReg
799      val vsew = vsewReg
800      when(vlmul === VLmul.m8){
801        for (i <- 0 until 4) {
802          csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U
803          csBundle(i).lsrc(1) := src2 + (i * 2).U
804          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
805          csBundle(i).uopIdx := i.U
806        }
807        for (i <- 4 until 6) {
808          csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + (i - 4) * 2 + 1).U
809          csBundle(i).lsrc(1) := (VECTOR_TMP_REG_LMUL + (i - 4) * 2).U
810          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
811          csBundle(i).uopIdx := i.U
812        }
813        csBundle(6).lsrc(0) := (VECTOR_TMP_REG_LMUL + 5).U
814        csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
815        csBundle(6).ldest := (VECTOR_TMP_REG_LMUL + 6).U
816        csBundle(6).uopIdx := 6.U
817        when(vsew === VSew.e64) {
818          csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U
819          csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U
820          csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U
821          csBundle(7).vpu.fpu.isFoldTo1_2 := true.B
822          csBundle(7).uopIdx := 7.U
823          csBundle(8).lsrc(0) := src1
824          csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U
825          csBundle(8).ldest := dest
826          csBundle(8).uopIdx := 8.U
827        }
828        when(vsew === VSew.e32) {
829          csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U
830          csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U
831          csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U
832          csBundle(7).vpu.fpu.isFoldTo1_2 := true.B
833          csBundle(7).uopIdx := 7.U
834          csBundle(8).lsrc(0) := (VECTOR_TMP_REG_LMUL + 7).U
835          csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U
836          csBundle(8).ldest := (VECTOR_TMP_REG_LMUL + 8).U
837          csBundle(8).vpu.fpu.isFoldTo1_4 := true.B
838          csBundle(8).uopIdx := 8.U
839          csBundle(9).lsrc(0) := src1
840          csBundle(9).lsrc(1) := (VECTOR_TMP_REG_LMUL + 8).U
841          csBundle(9).ldest := dest
842          csBundle(9).uopIdx := 9.U
843        }
844        when(vsew === VSew.e16) {
845          csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U
846          csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U
847          csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U
848          csBundle(7).vpu.fpu.isFoldTo1_2 := true.B
849          csBundle(7).uopIdx := 7.U
850          csBundle(8).lsrc(0) := (VECTOR_TMP_REG_LMUL + 7).U
851          csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U
852          csBundle(8).ldest := (VECTOR_TMP_REG_LMUL + 8).U
853          csBundle(8).vpu.fpu.isFoldTo1_4 := true.B
854          csBundle(8).uopIdx := 8.U
855          csBundle(9).lsrc(0) := (VECTOR_TMP_REG_LMUL + 8).U
856          csBundle(9).lsrc(1) := (VECTOR_TMP_REG_LMUL + 8).U
857          csBundle(9).ldest := (VECTOR_TMP_REG_LMUL + 9).U
858          csBundle(9).vpu.fpu.isFoldTo1_8 := true.B
859          csBundle(9).uopIdx := 9.U
860          csBundle(10).lsrc(0) := src1
861          csBundle(10).lsrc(1) := (VECTOR_TMP_REG_LMUL + 9).U
862          csBundle(10).ldest := dest
863          csBundle(10).uopIdx := 10.U
864        }
865      }
866      when(vlmul === VLmul.m4) {
867        for (i <- 0 until 2) {
868          csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U
869          csBundle(i).lsrc(1) := src2 + (i * 2).U
870          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
871          csBundle(i).uopIdx := i.U
872        }
873        csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
874        csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
875        csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
876        csBundle(2).uopIdx := 2.U
877        when(vsew === VSew.e64) {
878          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
879          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
880          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
881          csBundle(3).vpu.fpu.isFoldTo1_2 := true.B
882          csBundle(3).uopIdx := 3.U
883          csBundle(4).lsrc(0) := src1
884          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
885          csBundle(4).ldest := dest
886          csBundle(4).uopIdx := 4.U
887        }
888        when(vsew === VSew.e32) {
889          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
890          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
891          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
892          csBundle(3).vpu.fpu.isFoldTo1_2 := true.B
893          csBundle(3).uopIdx := 3.U
894          csBundle(4).lsrc(0) := (VECTOR_TMP_REG_LMUL + 3).U
895          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
896          csBundle(4).ldest := (VECTOR_TMP_REG_LMUL + 4).U
897          csBundle(4).vpu.fpu.isFoldTo1_4 := true.B
898          csBundle(4).uopIdx := 4.U
899          csBundle(5).lsrc(0) := src1
900          csBundle(5).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
901          csBundle(5).ldest := dest
902          csBundle(5).uopIdx := 5.U
903        }
904        when(vsew === VSew.e16) {
905          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
906          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
907          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
908          csBundle(3).vpu.fpu.isFoldTo1_2 := true.B
909          csBundle(3).uopIdx := 3.U
910          csBundle(4).lsrc(0) := (VECTOR_TMP_REG_LMUL + 3).U
911          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
912          csBundle(4).ldest := (VECTOR_TMP_REG_LMUL + 4).U
913          csBundle(4).vpu.fpu.isFoldTo1_4 := true.B
914          csBundle(4).uopIdx := 4.U
915          csBundle(5).lsrc(0) := (VECTOR_TMP_REG_LMUL + 4).U
916          csBundle(5).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
917          csBundle(5).ldest := (VECTOR_TMP_REG_LMUL + 5).U
918          csBundle(5).vpu.fpu.isFoldTo1_8 := true.B
919          csBundle(5).uopIdx := 5.U
920          csBundle(6).lsrc(0) := src1
921          csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 5).U
922          csBundle(6).ldest := dest
923          csBundle(6).uopIdx := 6.U
924        }
925      }
926      when(vlmul === VLmul.m2) {
927        csBundle(0).lsrc(0) := src2 + 1.U
928        csBundle(0).lsrc(1) := src2 + 0.U
929        csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
930        csBundle(0).uopIdx := 0.U
931        when(vsew === VSew.e64) {
932          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
933          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
934          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
935          csBundle(1).vpu.fpu.isFoldTo1_2 := true.B
936          csBundle(1).uopIdx := 1.U
937          csBundle(2).lsrc(0) := src1
938          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
939          csBundle(2).ldest := dest
940          csBundle(2).uopIdx := 2.U
941        }
942        when(vsew === VSew.e32) {
943          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
944          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
945          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
946          csBundle(1).vpu.fpu.isFoldTo1_2 := true.B
947          csBundle(1).uopIdx := 1.U
948          csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
949          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
950          csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
951          csBundle(2).vpu.fpu.isFoldTo1_4 := true.B
952          csBundle(2).uopIdx := 2.U
953          csBundle(3).lsrc(0) := src1
954          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
955          csBundle(3).ldest := dest
956          csBundle(3).uopIdx := 3.U
957        }
958        when(vsew === VSew.e16) {
959          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
960          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
961          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
962          csBundle(1).vpu.fpu.isFoldTo1_2 := true.B
963          csBundle(1).uopIdx := 1.U
964          csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
965          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
966          csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
967          csBundle(2).vpu.fpu.isFoldTo1_4 := true.B
968          csBundle(2).uopIdx := 2.U
969          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
970          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
971          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
972          csBundle(3).vpu.fpu.isFoldTo1_8 := true.B
973          csBundle(3).uopIdx := 3.U
974          csBundle(4).lsrc(0) := src1
975          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
976          csBundle(4).ldest := dest
977          csBundle(4).uopIdx := 4.U
978        }
979      }
980      when(vlmul === VLmul.m1) {
981        when(vsew === VSew.e64) {
982          csBundle(0).lsrc(0) := src2
983          csBundle(0).lsrc(1) := src2
984          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
985          csBundle(0).vpu.fpu.isFoldTo1_2 := true.B
986          csBundle(0).uopIdx := 0.U
987          csBundle(1).lsrc(0) := src1
988          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
989          csBundle(1).ldest := dest
990          csBundle(1).uopIdx := 1.U
991        }
992        when(vsew === VSew.e32) {
993          csBundle(0).lsrc(0) := src2
994          csBundle(0).lsrc(1) := src2
995          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
996          csBundle(0).vpu.fpu.isFoldTo1_2 := true.B
997          csBundle(0).uopIdx := 0.U
998          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
999          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1000          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
1001          csBundle(1).vpu.fpu.isFoldTo1_4 := true.B
1002          csBundle(1).uopIdx := 1.U
1003          csBundle(2).lsrc(0) := src1
1004          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
1005          csBundle(2).ldest := dest
1006          csBundle(2).uopIdx := 2.U
1007        }
1008        when(vsew === VSew.e16) {
1009          csBundle(0).lsrc(0) := src2
1010          csBundle(0).lsrc(1) := src2
1011          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1012          csBundle(0).vpu.fpu.isFoldTo1_2 := true.B
1013          csBundle(0).uopIdx := 0.U
1014          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
1015          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1016          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
1017          csBundle(1).vpu.fpu.isFoldTo1_4 := true.B
1018          csBundle(1).uopIdx := 1.U
1019          csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
1020          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
1021          csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
1022          csBundle(2).vpu.fpu.isFoldTo1_8 := true.B
1023          csBundle(2).uopIdx := 2.U
1024          csBundle(3).lsrc(0) := src1
1025          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
1026          csBundle(3).ldest := dest
1027          csBundle(3).uopIdx := 3.U
1028        }
1029      }
1030      when(vlmul === VLmul.mf2) {
1031        when(vsew === VSew.e32) {
1032          csBundle(0).lsrc(0) := src2
1033          csBundle(0).lsrc(1) := src2
1034          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1035          csBundle(0).vpu.fpu.isFoldTo1_4 := true.B
1036          csBundle(0).uopIdx := 0.U
1037          csBundle(1).lsrc(0) := src1
1038          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1039          csBundle(1).ldest := dest
1040          csBundle(1).uopIdx := 1.U
1041        }
1042        when(vsew === VSew.e16) {
1043          csBundle(0).lsrc(0) := src2
1044          csBundle(0).lsrc(1) := src2
1045          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1046          csBundle(0).vpu.fpu.isFoldTo1_4 := true.B
1047          csBundle(0).uopIdx := 0.U
1048          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
1049          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1050          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
1051          csBundle(1).vpu.fpu.isFoldTo1_8 := true.B
1052          csBundle(1).uopIdx := 1.U
1053          csBundle(2).lsrc(0) := src1
1054          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
1055          csBundle(2).ldest := dest
1056          csBundle(2).uopIdx := 2.U
1057        }
1058      }
1059      when(vlmul === VLmul.mf4) {
1060        when(vsew === VSew.e16) {
1061          csBundle(0).lsrc(0) := src2
1062          csBundle(0).lsrc(1) := src2
1063          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1064          csBundle(0).vpu.fpu.isFoldTo1_8 := true.B
1065          csBundle(0).uopIdx := 0.U
1066          csBundle(1).lsrc(0) := src1
1067          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1068          csBundle(1).ldest := dest
1069          csBundle(1).uopIdx := 1.U
1070        }
1071      }
1072    }
1073
1074    is(UopSplitType.VEC_VFREDOSUM) {
1075      import yunsuan.VfaluType
1076      val vlmul = vlmulReg
1077      val vsew = vsewReg
1078      val isWiden = latchedInst.fuOpType === VfaluType.vfwredosum
1079      when(vlmul === VLmul.m8) {
1080        when(vsew === VSew.e64) {
1081          val vlmax = 16
1082          for (i <- 0 until vlmax) {
1083            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1084            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1085            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1086            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1087            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1088            csBundle(i).uopIdx := i.U
1089          }
1090        }
1091        when(vsew === VSew.e32) {
1092          val vlmax = 32
1093          for (i <- 0 until vlmax) {
1094            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1095            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1096            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1097            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1098            csBundle(i).vpu.fpu.isFoldTo1_4 := (if (i % 4 == 0) false.B else true.B)
1099            csBundle(i).uopIdx := i.U
1100          }
1101        }
1102        when(vsew === VSew.e16) {
1103          val vlmax = 64
1104          for (i <- 0 until vlmax) {
1105            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1106            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1107            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1108            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1109            csBundle(i).vpu.fpu.isFoldTo1_8 := (if (i % 8 == 0) false.B else true.B)
1110            csBundle(i).uopIdx := i.U
1111          }
1112        }
1113      }
1114      when(vlmul === VLmul.m4) {
1115        when(vsew === VSew.e64) {
1116          val vlmax = 8
1117          for (i <- 0 until vlmax) {
1118            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1119            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1120            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1121            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1122            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1123            csBundle(i).uopIdx := i.U
1124          }
1125        }
1126        when(vsew === VSew.e32) {
1127          val vlmax = 16
1128          for (i <- 0 until vlmax) {
1129            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1130            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1131            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1132            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1133            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1134            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1135            csBundle(i).uopIdx := i.U
1136          }
1137        }
1138        when(vsew === VSew.e16) {
1139          val vlmax = 32
1140          for (i <- 0 until vlmax) {
1141            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1142            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1143            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1144            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1145            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1146            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1147            csBundle(i).uopIdx := i.U
1148          }
1149        }
1150      }
1151      when(vlmul === VLmul.m2) {
1152        when(vsew === VSew.e64) {
1153          val vlmax = 4
1154          for (i <- 0 until vlmax) {
1155            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1156            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1157            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1158            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1159            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1160            csBundle(i).uopIdx := i.U
1161          }
1162        }
1163        when(vsew === VSew.e32) {
1164          val vlmax = 8
1165          for (i <- 0 until vlmax) {
1166            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1167            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1168            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1169            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1170            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1171            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1172            csBundle(i).uopIdx := i.U
1173          }
1174        }
1175        when(vsew === VSew.e16) {
1176          val vlmax = 16
1177          for (i <- 0 until vlmax) {
1178            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1179            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1180            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1181            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1182            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1183            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1184            csBundle(i).uopIdx := i.U
1185          }
1186        }
1187      }
1188      when(vlmul === VLmul.m1) {
1189        when(vsew === VSew.e64) {
1190          val vlmax = 2
1191          for (i <- 0 until vlmax) {
1192            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1193            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1194            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1195            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1196            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1197            csBundle(i).uopIdx := i.U
1198          }
1199        }
1200        when(vsew === VSew.e32) {
1201          val vlmax = 4
1202          for (i <- 0 until vlmax) {
1203            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1204            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1205            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1206            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1207            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1208            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1209            csBundle(i).uopIdx := i.U
1210          }
1211        }
1212        when(vsew === VSew.e16) {
1213          val vlmax = 8
1214          for (i <- 0 until vlmax) {
1215            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1216            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1217            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1218            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1219            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1220            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1221            csBundle(i).uopIdx := i.U
1222          }
1223        }
1224      }
1225      when(vlmul === VLmul.mf2) {
1226        when(vsew === VSew.e32) {
1227          val vlmax = 2
1228          for (i <- 0 until vlmax) {
1229            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1230            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1231            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1232            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1233            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1234            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1235            csBundle(i).uopIdx := i.U
1236          }
1237        }
1238        when(vsew === VSew.e16) {
1239          val vlmax = 4
1240          for (i <- 0 until vlmax) {
1241            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1242            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1243            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1244            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1245            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1246            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1247            csBundle(i).uopIdx := i.U
1248          }
1249        }
1250      }
1251      when(vlmul === VLmul.mf4) {
1252        when(vsew === VSew.e16) {
1253          val vlmax = 2
1254          for (i <- 0 until vlmax) {
1255            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1256            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1257            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1258            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1259            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1260            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1261            csBundle(i).uopIdx := i.U
1262          }
1263        }
1264      }
1265    }
1266
1267    is(UopSplitType.VEC_SLIDEUP) {
1268      // i to vector move
1269      csBundle(0).srcType(0) := SrcType.reg
1270      csBundle(0).srcType(1) := SrcType.imm
1271      csBundle(0).lsrc(1) := 0.U
1272      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1273      csBundle(0).fuType := FuType.i2v.U
1274      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg)
1275      csBundle(0).vecWen := true.B
1276      // LMUL
1277      for (i <- 0 until MAX_VLMUL)
1278        for (j <- 0 to i) {
1279          val old_vd = if (j == 0) {
1280            dest + i.U
1281          } else (VECTOR_TMP_REG_LMUL + j).U
1282          val vd = if (j == i) {
1283            dest + i.U
1284          } else (VECTOR_TMP_REG_LMUL + j + 1).U
1285          csBundle(i * (i + 1) / 2 + j + 1).srcType(0) := SrcType.vp
1286          csBundle(i * (i + 1) / 2 + j + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1287          csBundle(i * (i + 1) / 2 + j + 1).lsrc(1) := src2 + j.U
1288          csBundle(i * (i + 1) / 2 + j + 1).lsrc(2) := old_vd
1289          csBundle(i * (i + 1) / 2 + j + 1).ldest := vd
1290          csBundle(i * (i + 1) / 2 + j + 1).uopIdx := (i * (i + 1) / 2 + j).U
1291        }
1292    }
1293
1294    is(UopSplitType.VEC_SLIDEDOWN) {
1295      // i to vector move
1296      csBundle(0).srcType(0) := SrcType.reg
1297      csBundle(0).srcType(1) := SrcType.imm
1298      csBundle(0).lsrc(1) := 0.U
1299      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1300      csBundle(0).fuType := FuType.i2v.U
1301      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg)
1302      csBundle(0).vecWen := true.B
1303      // LMUL
1304      for (i <- 0 until MAX_VLMUL)
1305        for (j <- (0 to i).reverse) {
1306          when(i.U < lmul) {
1307            val old_vd = if (j == 0) {
1308              dest + lmul - 1.U - i.U
1309            } else (VECTOR_TMP_REG_LMUL + j).U
1310            val vd = if (j == i) {
1311              dest + lmul - 1.U - i.U
1312            } else (VECTOR_TMP_REG_LMUL + j + 1).U
1313            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).srcType(0) := SrcType.vp
1314            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1315            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(1) := src2 + lmul - 1.U - j.U
1316            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(2) := old_vd
1317            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).ldest := vd
1318            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).uopIdx := numOfUop - (i * (i + 1) / 2 + i - j + 2).U
1319          }
1320        }
1321    }
1322
1323    is(UopSplitType.VEC_M0X) {
1324      // LMUL
1325      for (i <- 0 until MAX_VLMUL) {
1326        val srcType0 = if (i == 0) SrcType.DC else SrcType.vp
1327        val ldest = (VECTOR_TMP_REG_LMUL + i).U
1328        csBundle(i).srcType(0) := srcType0
1329        csBundle(i).srcType(1) := SrcType.vp
1330        csBundle(i).rfWen := false.B
1331        csBundle(i).fpWen := false.B
1332        csBundle(i).vecWen := true.B
1333        csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U
1334        csBundle(i).lsrc(1) := src2
1335        // csBundle(i).lsrc(2) := dest + i.U  DontCare
1336        csBundle(i).ldest := ldest
1337        csBundle(i).uopIdx := i.U
1338      }
1339      csBundle(lmul - 1.U).rfWen := true.B
1340      csBundle(lmul - 1.U).fpWen := false.B
1341      csBundle(lmul - 1.U).vecWen := false.B
1342      csBundle(lmul - 1.U).ldest := dest
1343    }
1344
1345    is(UopSplitType.VEC_MVV) {
1346      // LMUL
1347      for (i <- 0 until MAX_VLMUL) {
1348        val srcType0 = if (i == 0) SrcType.DC else SrcType.vp
1349        csBundle(i * 2 + 0).srcType(0) := srcType0
1350        csBundle(i * 2 + 0).srcType(1) := SrcType.vp
1351        csBundle(i * 2 + 0).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U
1352        csBundle(i * 2 + 0).lsrc(1) := src2
1353        csBundle(i * 2 + 0).lsrc(2) := dest + i.U
1354        csBundle(i * 2 + 0).ldest := dest + i.U
1355        csBundle(i * 2 + 0).uopIdx := (i * 2 + 0).U
1356
1357        csBundle(i * 2 + 1).srcType(0) := srcType0
1358        csBundle(i * 2 + 1).srcType(1) := SrcType.vp
1359        csBundle(i * 2 + 1).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U
1360        csBundle(i * 2 + 1).lsrc(1) := src2
1361        // csBundle(i).lsrc(2) := dest + i.U  DontCare
1362        csBundle(i * 2 + 1).ldest := (VECTOR_TMP_REG_LMUL + i).U
1363        csBundle(i * 2 + 1).uopIdx := (i * 2 + 1).U
1364      }
1365    }
1366
1367    is(UopSplitType.VEC_M0X_VFIRST) {
1368      // LMUL
1369      csBundle(0).rfWen := true.B
1370      csBundle(0).fpWen := false.B
1371      csBundle(0).vecWen := false.B
1372      csBundle(0).ldest := dest
1373    }
1374    is(UopSplitType.VEC_VWW) {
1375      for (i <- 0 until MAX_VLMUL*2) {
1376        when(i.U < lmul){
1377          csBundle(i).srcType(2) := SrcType.DC
1378          csBundle(i).lsrc(0) := src2 + i.U
1379          csBundle(i).lsrc(1) := src2 + i.U
1380          // csBundle(i).lsrc(2) := dest + (2 * i).U
1381          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
1382          csBundle(i).uopIdx :=  i.U
1383        } otherwise {
1384          csBundle(i).srcType(2) := SrcType.DC
1385          csBundle(i).lsrc(0) := VECTOR_TMP_REG_LMUL.U + Cat((i.U-lmul),0.U(1.W)) + 1.U
1386          csBundle(i).lsrc(1) := VECTOR_TMP_REG_LMUL.U + Cat((i.U-lmul),0.U(1.W))
1387          // csBundle(i).lsrc(2) := dest + (2 * i).U
1388          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
1389          csBundle(i).uopIdx := i.U
1390        }
1391        csBundle(numOfUop-1.U).srcType(2) := SrcType.vp
1392        csBundle(numOfUop-1.U).lsrc(0) := src1
1393        csBundle(numOfUop-1.U).lsrc(2) := dest
1394        csBundle(numOfUop-1.U).ldest := dest
1395      }
1396    }
1397    is(UopSplitType.VEC_RGATHER) {
1398      def genCsBundle_VEC_RGATHER(len:Int): Unit ={
1399        for (i <- 0 until len)
1400          for (j <- 0 until len) {
1401            // csBundle(i * len + j).srcType(0) := SrcType.vp // SrcType.imm
1402            // csBundle(i * len + j).srcType(1) := SrcType.vp
1403            // csBundle(i * len + j).srcType(2) := SrcType.vp
1404            csBundle(i * len + j).lsrc(0) := src1 + i.U
1405            csBundle(i * len + j).lsrc(1) := src2 + j.U
1406            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j - 1).U
1407            csBundle(i * len + j).lsrc(2) := vd_old
1408            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1409            csBundle(i * len + j).ldest := vd
1410            csBundle(i * len + j).uopIdx := (i * len + j).U
1411          }
1412      }
1413      switch(vlmulReg) {
1414        is("b001".U ){
1415          genCsBundle_VEC_RGATHER(2)
1416        }
1417        is("b010".U ){
1418          genCsBundle_VEC_RGATHER(4)
1419        }
1420        is("b011".U ){
1421          genCsBundle_VEC_RGATHER(8)
1422        }
1423      }
1424    }
1425    is(UopSplitType.VEC_RGATHER_VX) {
1426      def genCsBundle_RGATHER_VX(len:Int): Unit ={
1427        for (i <- 0 until len)
1428          for (j <- 0 until len) {
1429            csBundle(i * len + j + 1).srcType(0) := SrcType.vp
1430            // csBundle(i * len + j + 1).srcType(1) := SrcType.vp
1431            // csBundle(i * len + j + 1).srcType(2) := SrcType.vp
1432            csBundle(i * len + j + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1433            csBundle(i * len + j + 1).lsrc(1) := src2 + j.U
1434            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1435            csBundle(i * len + j + 1).lsrc(2) := vd_old
1436            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j + 1).U
1437            csBundle(i * len + j + 1).ldest := vd
1438            csBundle(i * len + j + 1).uopIdx := (i * len + j).U
1439          }
1440      }
1441      // i to vector move
1442      csBundle(0).srcType(0) := SrcType.reg
1443      csBundle(0).srcType(1) := SrcType.imm
1444      csBundle(0).lsrc(1) := 0.U
1445      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1446      csBundle(0).fuType := FuType.i2v.U
1447      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg)
1448      csBundle(0).vecWen := true.B
1449      switch(vlmulReg) {
1450        is("b000".U ){
1451          genCsBundle_RGATHER_VX(1)
1452        }
1453        is("b001".U ){
1454          genCsBundle_RGATHER_VX(2)
1455        }
1456        is("b010".U ){
1457          genCsBundle_RGATHER_VX(4)
1458        }
1459        is("b011".U ){
1460          genCsBundle_RGATHER_VX(8)
1461        }
1462      }
1463    }
1464    is(UopSplitType.VEC_RGATHEREI16) {
1465      def genCsBundle_VEC_RGATHEREI16_SEW8(len:Int): Unit ={
1466        for (i <- 0 until len)
1467          for (j <- 0 until len) {
1468            val vd_old0 = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j*2-1).U
1469            val vd0 = (VECTOR_TMP_REG_LMUL + j*2 ).U
1470            // csBundle(i * len + j).srcType(0) := SrcType.vp // SrcType.imm
1471            // csBundle(i * len + j).srcType(1) := SrcType.vp
1472            // csBundle(i * len + j).srcType(2) := SrcType.vp
1473            csBundle((i * len + j)*2+0).lsrc(0) := src1 + (i*2+0).U
1474            csBundle((i * len + j)*2+0).lsrc(1) := src2 + j.U
1475            csBundle((i * len + j)*2+0).lsrc(2) := vd_old0
1476            csBundle((i * len + j)*2+0).ldest := vd0
1477            csBundle((i * len + j)*2+0).uopIdx := ((i * len + j)*2+0).U
1478            val vd_old1 = (VECTOR_TMP_REG_LMUL + j*2).U
1479            val vd1 = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j*2+1 ).U
1480            csBundle((i * len + j)*2+1).lsrc(0) := src1 + (i*2+1).U
1481            csBundle((i * len + j)*2+1).lsrc(1) := src2 + j.U
1482            csBundle((i * len + j)*2+1).lsrc(2) := vd_old1
1483            csBundle((i * len + j)*2+1).ldest := vd1
1484            csBundle((i * len + j)*2+1).uopIdx := ((i * len + j)*2+1).U
1485          }
1486      }
1487      def genCsBundle_VEC_RGATHEREI16(len:Int): Unit ={
1488        for (i <- 0 until len)
1489          for (j <- 0 until len) {
1490            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U
1491            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1492            // csBundle(i * len + j).srcType(0) := SrcType.vp // SrcType.imm
1493            // csBundle(i * len + j).srcType(1) := SrcType.vp
1494            // csBundle(i * len + j).srcType(2) := SrcType.vp
1495            csBundle(i * len + j).lsrc(0) := src1 + i.U
1496            csBundle(i * len + j).lsrc(1) := src2 + j.U
1497            csBundle(i * len + j).lsrc(2) := vd_old
1498            csBundle(i * len + j).ldest := vd
1499            csBundle(i * len + j).uopIdx := (i * len + j).U
1500          }
1501      }
1502      switch(vlmulReg) {
1503        is("b000".U ){
1504          when(!vsewReg.orR){
1505            genCsBundle_VEC_RGATHEREI16_SEW8(1)
1506          } .otherwise{
1507            genCsBundle_VEC_RGATHEREI16(1)
1508          }
1509        }
1510        is("b001".U) {
1511          when(!vsewReg.orR) {
1512            genCsBundle_VEC_RGATHEREI16_SEW8(2)
1513          }.otherwise {
1514            genCsBundle_VEC_RGATHEREI16(2)
1515          }
1516        }
1517        is("b010".U) {
1518          when(!vsewReg.orR) {
1519            genCsBundle_VEC_RGATHEREI16_SEW8(4)
1520          }.otherwise {
1521            genCsBundle_VEC_RGATHEREI16(4)
1522          }
1523        }
1524        is("b011".U) {
1525          genCsBundle_VEC_RGATHEREI16(8)
1526        }
1527      }
1528    }
1529    is(UopSplitType.VEC_COMPRESS) {
1530      def genCsBundle_VEC_COMPRESS(len:Int): Unit = {
1531        for (i <- 0 until len) {
1532          val jlen = if (i == len-1) i+1 else i+2
1533          for (j <- 0 until jlen) {
1534            val vd_old = if(i==j) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j + 1).U
1535            val vd = if(i==len-1) (dest + j.U) else {
1536              if (j == i+1) VECTOR_TMP_REG_LMUL.U else (VECTOR_TMP_REG_LMUL + j + 1).U
1537            }
1538            val src13Type = if (j == i+1) DontCare else SrcType.vp
1539            csBundle(i*(i+3)/2 + j).srcType(0) := src13Type
1540            csBundle(i*(i+3)/2 + j).srcType(1) := SrcType.vp
1541            csBundle(i*(i+3)/2 + j).srcType(2) := src13Type
1542            csBundle(i*(i+3)/2 + j).srcType(3) := SrcType.vp
1543            csBundle(i*(i+3)/2 + j).lsrc(0) := src1
1544            csBundle(i*(i+3)/2 + j).lsrc(1) := src2 + i.U
1545            csBundle(i*(i+3)/2 + j).lsrc(2) := vd_old
1546            csBundle(i*(i+3)/2 + j).lsrc(3) := VECTOR_TMP_REG_LMUL.U
1547            csBundle(i*(i+3)/2 + j).ldest := vd
1548            csBundle(i*(i+3)/2 + j).uopIdx := (i*(i+3)/2 + j).U
1549          }
1550        }
1551      }
1552      switch(vlmulReg) {
1553        is("b001".U ){
1554          genCsBundle_VEC_COMPRESS(2)
1555        }
1556        is("b010".U ){
1557          genCsBundle_VEC_COMPRESS(4)
1558        }
1559        is("b011".U ){
1560          genCsBundle_VEC_COMPRESS(8)
1561        }
1562      }
1563    }
1564    is(UopSplitType.VEC_MVNR) {
1565      for (i <- 0 until MAX_VLMUL) {
1566        csBundle(i).lsrc(0) := src1 + i.U
1567        csBundle(i).lsrc(1) := src2 + i.U
1568        csBundle(i).lsrc(2) := dest + i.U
1569        csBundle(i).ldest := dest + i.U
1570        csBundle(i).uopIdx := i.U
1571      }
1572    }
1573    is(UopSplitType.VEC_US_LDST) {
1574      /*
1575      FMV.D.X
1576       */
1577      csBundle(0).srcType(0) := SrcType.reg
1578      csBundle(0).srcType(1) := SrcType.imm
1579      csBundle(0).lsrc(1) := 0.U
1580      csBundle(0).ldest := FP_TMP_REG_MV.U
1581      csBundle(0).fuType := FuType.i2f.U
1582      csBundle(0).rfWen := false.B
1583      csBundle(0).fpWen := true.B
1584      csBundle(0).vecWen := false.B
1585      csBundle(0).fpu.isAddSub := false.B
1586      csBundle(0).fpu.typeTagIn := FPU.D
1587      csBundle(0).fpu.typeTagOut := FPU.D
1588      csBundle(0).fpu.fromInt := true.B
1589      csBundle(0).fpu.wflags := false.B
1590      csBundle(0).fpu.fpWen := true.B
1591      csBundle(0).fpu.div := false.B
1592      csBundle(0).fpu.sqrt := false.B
1593      csBundle(0).fpu.fcvt := false.B
1594      csBundle(0).vlsInstr := true.B
1595      //LMUL
1596      for (i <- 0 until MAX_VLMUL) {
1597        csBundle(i + 1).srcType(0) := SrcType.fp
1598        csBundle(i + 1).lsrc(0) := FP_TMP_REG_MV.U
1599        csBundle(i + 1).lsrc(2) := dest + i.U // old vd
1600        csBundle(i + 1).ldest := dest + i.U
1601        csBundle(i + 1).uopIdx := i.U
1602        csBundle(i + 1).vlsInstr := true.B
1603      }
1604    }
1605    is(UopSplitType.VEC_S_LDST) {
1606      /*
1607      FMV.D.X
1608       */
1609      csBundle(0).srcType(0) := SrcType.reg
1610      csBundle(0).srcType(1) := SrcType.imm
1611      csBundle(0).lsrc(1) := 0.U
1612      csBundle(0).ldest := FP_TMP_REG_MV.U
1613      csBundle(0).fuType := FuType.i2f.U
1614      csBundle(0).rfWen := false.B
1615      csBundle(0).fpWen := true.B
1616      csBundle(0).vecWen := false.B
1617      csBundle(0).fpu.isAddSub := false.B
1618      csBundle(0).fpu.typeTagIn := FPU.D
1619      csBundle(0).fpu.typeTagOut := FPU.D
1620      csBundle(0).fpu.fromInt := true.B
1621      csBundle(0).fpu.wflags := false.B
1622      csBundle(0).fpu.fpWen := true.B
1623      csBundle(0).fpu.div := false.B
1624      csBundle(0).fpu.sqrt := false.B
1625      csBundle(0).fpu.fcvt := false.B
1626      csBundle(0).vlsInstr := true.B
1627
1628      csBundle(1).srcType(0) := SrcType.reg
1629      csBundle(1).srcType(1) := SrcType.imm
1630      csBundle(1).lsrc(0) := latchedInst.lsrc(1)
1631      csBundle(1).lsrc(1) := 0.U
1632      csBundle(1).ldest := VECTOR_TMP_REG_LMUL.U
1633      csBundle(1).fuType := FuType.i2f.U
1634      csBundle(1).rfWen := false.B
1635      csBundle(1).fpWen := true.B
1636      csBundle(1).vecWen := false.B
1637      csBundle(1).fpu.isAddSub := false.B
1638      csBundle(1).fpu.typeTagIn := FPU.D
1639      csBundle(1).fpu.typeTagOut := FPU.D
1640      csBundle(1).fpu.fromInt := true.B
1641      csBundle(1).fpu.wflags := false.B
1642      csBundle(1).fpu.fpWen := true.B
1643      csBundle(1).fpu.div := false.B
1644      csBundle(1).fpu.sqrt := false.B
1645      csBundle(1).fpu.fcvt := false.B
1646      csBundle(1).vlsInstr := true.B
1647
1648      //LMUL
1649      for (i <- 0 until MAX_VLMUL) {
1650        csBundle(i + 2).srcType(0) := SrcType.fp
1651        csBundle(i + 2).srcType(1) := SrcType.fp
1652        csBundle(i + 2).lsrc(0) := FP_TMP_REG_MV.U
1653        csBundle(i + 2).lsrc(1) := VECTOR_TMP_REG_LMUL.U
1654        csBundle(i + 2).lsrc(2) := dest + i.U // old vd
1655        csBundle(i + 2).ldest := dest + i.U
1656        csBundle(i + 2).uopIdx := i.U
1657        csBundle(i + 2).vlsInstr := true.B
1658      }
1659    }
1660    is(UopSplitType.VEC_I_LDST) {
1661    /*
1662      FMV.D.X
1663       */
1664      val vlmul = vlmulReg
1665      val vsew = Cat(0.U(1.W), vsewReg)
1666      val veew = Cat(0.U(1.W), width)
1667      val vemul: UInt = veew.asUInt + 1.U + vlmul.asUInt + ~vsew.asUInt
1668      val simple_lmul = MuxLookup(vlmul, 0.U(2.W), Array(
1669        "b001".U -> 1.U,
1670        "b010".U -> 2.U,
1671        "b011".U -> 3.U
1672      ))
1673      val simple_emul = MuxLookup(vemul, 0.U(2.W), Array(
1674        "b001".U -> 1.U,
1675        "b010".U -> 2.U,
1676        "b011".U -> 3.U
1677      ))
1678      csBundle(0).srcType(0) := SrcType.reg
1679      csBundle(0).srcType(1) := SrcType.imm
1680      csBundle(0).lsrc(1) := 0.U
1681      csBundle(0).ldest := FP_TMP_REG_MV.U
1682      csBundle(0).fuType := FuType.i2f.U
1683      csBundle(0).rfWen := false.B
1684      csBundle(0).fpWen := true.B
1685      csBundle(0).vecWen := false.B
1686      csBundle(0).fpu.isAddSub := false.B
1687      csBundle(0).fpu.typeTagIn := FPU.D
1688      csBundle(0).fpu.typeTagOut := FPU.D
1689      csBundle(0).fpu.fromInt := true.B
1690      csBundle(0).fpu.wflags := false.B
1691      csBundle(0).fpu.fpWen := true.B
1692      csBundle(0).fpu.div := false.B
1693      csBundle(0).fpu.sqrt := false.B
1694      csBundle(0).fpu.fcvt := false.B
1695      csBundle(0).vlsInstr := true.B
1696
1697      //LMUL
1698      for (i <- 0 until MAX_INDEXED_LS_UOPNUM) {
1699        indexedLSRegOffset(i).src := Cat(simple_emul, simple_lmul, nf)
1700        val offsetVs2 = indexedLSRegOffset(i).outOffsetVs2
1701        val offsetVd = indexedLSRegOffset(i).outOffsetVd
1702        val isFirstUopInVd = indexedLSRegOffset(i).outIsFirstUopInVd
1703        csBundle(i + 1).srcType(0) := SrcType.fp
1704        csBundle(i + 1).lsrc(0) := FP_TMP_REG_MV.U
1705        csBundle(i + 1).lsrc(1) := Mux1H(UIntToOH(offsetVs2, MAX_VLMUL), (0 until MAX_VLMUL).map(j => src2 + j.U))
1706        /**
1707          * For indexed instructions, VLSU will concatenate all the uops that write the same logic vd register and
1708          * writeback only once for all these uops. However, these uops share the same lsrc(2)/old vd and the same
1709          * ldest/vd that is equal to old vd, which leads to data dependence between the uops. Therefore there will be
1710          * deadlock for indexed instructions with emul > lmul.
1711          *
1712          * Assume N = emul/lmul. To break the deadlock, only the first uop will read old vd as lsrc(2), and the rest
1713          * N-1 uops will read temporary vector register.
1714          */
1715        // csBundle(i + 1).lsrc(2) := Mux1H(UIntToOH(offsetVd, MAX_VLMUL), (0 until MAX_VLMUL).map(j => dest + j.U))
1716        csBundle(i + 1).lsrc(2) := Mux(
1717          isFirstUopInVd,
1718          Mux1H(UIntToOH(offsetVd, MAX_VLMUL), (0 until MAX_VLMUL).map(j => dest + j.U)),
1719          VECTOR_TMP_REG_LMUL.U
1720        )
1721        csBundle(i + 1).ldest := Mux1H(UIntToOH(offsetVd, MAX_VLMUL), (0 until MAX_VLMUL).map(j => dest + j.U))
1722        csBundle(i + 1).uopIdx := i.U
1723        csBundle(i + 1).vlsInstr := true.B
1724      }
1725    }
1726  }
1727
1728  //readyFromRename Counter
1729  val readyCounter = PriorityMuxDefault(outReadys.map(x => !x).zip((0 until RenameWidth).map(_.U)), RenameWidth.U)
1730
1731  // The left uops of the complex inst in ComplexDecoder can be send out this cycle
1732  val thisAllOut = uopRes <= readyCounter
1733
1734  switch(state) {
1735    is(s_idle) {
1736      when (inValid) {
1737        stateNext := s_active
1738        uopResNext := inUopInfo.numOfUop
1739      }
1740    }
1741    is(s_active) {
1742      when (thisAllOut) {
1743        when (inValid) {
1744          stateNext := s_active
1745          uopResNext := inUopInfo.numOfUop
1746        }.otherwise {
1747          stateNext := s_idle
1748          uopResNext := 0.U
1749        }
1750      }.otherwise {
1751        stateNext := s_active
1752        uopResNext := uopRes - readyCounter
1753      }
1754    }
1755  }
1756
1757  state := Mux(io.redirect, s_idle, stateNext)
1758  uopRes := Mux(io.redirect, 0.U, uopResNext)
1759
1760  val complexNum = Mux(uopRes > readyCounter, readyCounter, uopRes)
1761
1762  for(i <- 0 until RenameWidth) {
1763    outValids(i) := complexNum > i.U
1764    outDecodedInsts(i) := Mux((i.U + numOfUop - uopRes) < maxUopSize.U, csBundle(i.U + numOfUop - uopRes), csBundle(maxUopSize - 1))
1765  }
1766
1767  outComplexNum := Mux(state === s_active, complexNum, 0.U)
1768  inReady := state === s_idle || state === s_active && thisAllOut
1769
1770//  val validSimple = Wire(Vec(DecodeWidth, Bool()))
1771//  validSimple.zip(io.validFromIBuf.zip(io.isComplex)).map{ case (dst, (src1, src2)) => dst := src1 && !src2 }
1772//  val notInf = Wire(Vec(DecodeWidth, Bool()))
1773//  notInf.drop(1).zip(io.validFromIBuf.drop(1).zip(validSimple.drop(1))).map{ case (dst, (src1, src2)) => dst := !src1 || src2 }
1774//  notInf(0) := !io.validFromIBuf(0) || validSimple(0) || (io.isComplex(0) && io.in0pc === io.simple.decodedInst.pc)
1775//  val notInfVec = Wire(Vec(DecodeWidth, Bool()))
1776//  notInfVec.zipWithIndex.map{ case (dst, i) => dst := Cat(notInf.take(i + 1)).andR}
1777//
1778//  complexNum := Mux(io.validFromIBuf(0) && readyCounter.orR ,
1779//    Mux(uopRes0 > readyCounter, readyCounter, uopRes0),
1780//    0.U)
1781//  validToRename.zipWithIndex.foreach{
1782//    case(dst, i) =>
1783//      val validFix = Mux(complexNum.orR, validSimple((i+1).U - complexNum), validSimple(i))
1784//      dst := MuxCase(false.B, Seq(
1785//        (io.validFromIBuf(0) && readyCounter.orR && uopRes0 > readyCounter) -> Mux(readyCounter > i.U, true.B, false.B),
1786//        (io.validFromIBuf(0) && readyCounter.orR && !(uopRes0 > readyCounter)) -> Mux(complexNum > i.U, true.B, validFix && notInfVec(i.U - complexNum) && io.readyFromRename(i)),
1787//      ).toSeq)
1788//  }
1789//
1790//  readyToIBuf.zipWithIndex.foreach {
1791//    case (dst, i) =>
1792//      val readyToIBuf0 = Mux(io.isComplex(0), io.in0pc === io.simple.decodedInst.pc, true.B)
1793//      dst := MuxCase(true.B, Seq(
1794//        (io.validFromIBuf(0) && uopRes0 > readyCounter || !readyCounter.orR) -> false.B,
1795//        (io.validFromIBuf(0) && !(uopRes0 > readyCounter) && readyCounter.orR) -> (if (i==0) readyToIBuf0 else Mux(RenameWidth.U - complexNum >= i.U, notInfVec(i) && validSimple(i) && io.readyFromRename(i), false.B))
1796//      ).toSeq)
1797//  }
1798//
1799//  io.deq.decodedInsts := decodedInsts
1800//  io.deq.complexNum := complexNum
1801//  io.deq.validToRename := validToRename
1802//  io.deq.readyToIBuf := readyToIBuf
1803}
1804