xref: /XiangShan/src/main/scala/xiangshan/backend/decode/DecodeUnitComp.scala (revision f7af4c746b893ede5aa64c681f8da182c602efe0)
1/***************************************************************************************
2  * Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3  * Copyright (c) 2020-2021 Peng Cheng Laboratory
4  *
5  * XiangShan is licensed under Mulan PSL v2.
6  * You can use this software according to the terms and conditions of the Mulan PSL v2.
7  * You may obtain a copy of Mulan PSL v2 at:
8  *          http://license.coscl.org.cn/MulanPSL2
9  *
10  * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11  * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12  * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13  *
14  * See the Mulan PSL v2 for more details.
15  ***************************************************************************************/
16
17package xiangshan.backend.decode
18
19import org.chipsalliance.cde.config.Parameters
20import chisel3._
21import chisel3.util._
22import freechips.rocketchip.rocket.Instructions
23import freechips.rocketchip.util.uintToBitPat
24import utils._
25import utility._
26import xiangshan.ExceptionNO.illegalInstr
27import xiangshan._
28import xiangshan.backend.fu.fpu.FPU
29import xiangshan.backend.fu.FuType
30import freechips.rocketchip.rocket.Instructions._
31import xiangshan.backend.Bundles.{DecodedInst, StaticInst}
32import xiangshan.backend.decode.isa.bitfield.XSInstBitFields
33import xiangshan.backend.fu.vector.Bundles.{VSew, VType, VLmul}
34import yunsuan.VpermType
35import scala.collection.Seq
36import chisel3.util.experimental.decode.{QMCMinimizer, TruthTable, decoder}
37
38class indexedLSUopTable(uopIdx:Int) extends Module {
39  val src = IO(Input(UInt(7.W)))
40  val outOffsetVs2 = IO(Output(UInt(3.W)))
41  val outOffsetVd = IO(Output(UInt(3.W)))
42  val outIsFirstUopInVd = IO(Output(Bool()))
43  def genCsBundle_VEC_INDEXED_LDST(lmul:Int, emul:Int, nfields:Int, uopIdx:Int): (Int, Int, Int) ={
44    if (lmul * nfields <= 8) {
45      for (k <-0 until nfields) {
46        if (lmul < emul) {    // lmul < emul, uop num is depend on emul * nf
47          var offset = 1 << (emul - lmul)
48          for (i <- 0 until (1 << emul)) {
49            if (uopIdx == k * (1 << emul) + i) {
50              return (i, i / offset + k * (1 << lmul), if (i % offset == 0) 1 else 0)
51            }
52          }
53        } else {              // lmul > emul, uop num is depend on lmul * nf
54          var offset = 1 << (lmul - emul)
55          for (i <- 0 until (1 << lmul)) {
56            if (uopIdx == k * (1 << lmul) + i) {
57              return (i / offset, i + k * (1 << lmul), 1)
58            }
59          }
60        }
61      }
62    }
63    return (0, 0, 1)
64  }
65  // strided load/store
66  var combVemulNf : Seq[(Int, Int, Int, Int, Int, Int)] = Seq()
67  for (emul <- 0 until 4) {
68    for (lmul <- 0 until 4) {
69      for (nf <- 0 until 8) {
70        var offset = genCsBundle_VEC_INDEXED_LDST(lmul, emul, nf+1, uopIdx)
71        var offsetVs2 = offset._1
72        var offsetVd = offset._2
73        var isFirstUopInVd = offset._3
74        combVemulNf :+= (emul, lmul, nf, isFirstUopInVd, offsetVs2, offsetVd)
75      }
76    }
77  }
78  val out = decoder(QMCMinimizer, src, TruthTable(combVemulNf.map {
79    case (emul, lmul, nf, isFirstUopInVd, offsetVs2, offsetVd) =>
80      (BitPat((emul << 5 | lmul << 3 | nf).U(7.W)), BitPat((isFirstUopInVd << 6 | offsetVs2 << 3 | offsetVd).U(7.W)))
81  }, BitPat.N(7)))
82  outOffsetVs2 := out(5, 3)
83  outOffsetVd := out(2, 0)
84  outIsFirstUopInVd := out(6).asBool
85}
86
87trait VectorConstants {
88  val MAX_VLMUL = 8
89  val FP_TMP_REG_MV = 32
90  val VECTOR_TMP_REG_LMUL = 33 // 33~47  ->  15
91  val MAX_INDEXED_LS_UOPNUM = 64
92}
93
94class DecodeUnitCompIO(implicit p: Parameters) extends XSBundle {
95  val redirect = Input(Bool())
96  val csrCtrl = Input(new CustomCSRCtrlIO)
97  // When the first inst in decode vector is complex inst, pass it in
98  val in = Flipped(DecoupledIO(new Bundle {
99    val simpleDecodedInst = new DecodedInst
100    val uopInfo = new UopInfo
101  }))
102  val out = new Bundle {
103    val complexDecodedInsts = Vec(RenameWidth, DecoupledIO(new DecodedInst))
104  }
105  val complexNum = Output(UInt(3.W))
106}
107
108/**
109  * @author zly
110  */
111class DecodeUnitComp()(implicit p : Parameters) extends XSModule with DecodeUnitConstants with VectorConstants {
112  val io = IO(new DecodeUnitCompIO)
113
114  // alias
115  private val inReady = io.in.ready
116  private val inValid = io.in.valid
117  private val inDecodedInst = WireInit(io.in.bits.simpleDecodedInst)
118  private val inUopInfo = io.in.bits.uopInfo
119  private val outValids = io.out.complexDecodedInsts.map(_.valid)
120  private val outReadys = io.out.complexDecodedInsts.map(_.ready)
121  private val outDecodedInsts = io.out.complexDecodedInsts.map(_.bits)
122  private val outComplexNum = io.complexNum
123
124  val maxUopSize = MaxUopSize
125  val latchedInst = RegEnable(inDecodedInst, inValid && inReady)
126  val latchedUopInfo = RegEnable(inUopInfo, inValid && inReady)
127  //input bits
128  private val instFields: XSInstBitFields = latchedInst.instr.asTypeOf(new XSInstBitFields)
129
130  val src1 = Cat(0.U(1.W), instFields.RS1)
131  val src2 = Cat(0.U(1.W), instFields.RS2)
132  val dest = Cat(0.U(1.W), instFields.RD)
133
134  val nf    = instFields.NF
135  val width = instFields.WIDTH(1, 0)
136
137  //output of DecodeUnit
138  val numOfUop = Wire(UInt(log2Up(maxUopSize).W))
139  val numOfWB = Wire(UInt(log2Up(maxUopSize).W))
140  val lmul = Wire(UInt(4.W))
141  val isVsetSimple = Wire(Bool())
142
143  val indexedLSRegOffset = Seq.tabulate(MAX_INDEXED_LS_UOPNUM)(i => Module(new indexedLSUopTable(i)))
144  indexedLSRegOffset.map(_.src := 0.U)
145
146  //pre decode
147  lmul := latchedUopInfo.lmul
148  isVsetSimple := latchedInst.isVset
149  val vlmulReg = latchedInst.vpu.vlmul
150  val vsewReg = latchedInst.vpu.vsew
151  when(isVsetSimple) {
152    when(dest === 0.U && src1 === 0.U) {
153      latchedInst.fuOpType := VSETOpType.keepVl(inDecodedInst.fuOpType)
154    }.elsewhen(src1 === 0.U) {
155      latchedInst.fuOpType := VSETOpType.setVlmax(inDecodedInst.fuOpType)
156    }
157    when(inDecodedInst.vpu.vill) {
158      latchedInst.exceptionVec(ExceptionNO.illegalInstr) := true.B
159    }
160  }
161  //Type of uop Div
162  val typeOfSplit = latchedInst.uopSplitType
163  val src1Type = latchedInst.srcType(0)
164  val src1IsImm = src1Type === SrcType.imm
165
166  numOfUop := latchedUopInfo.numOfUop
167  numOfWB := latchedUopInfo.numOfWB
168
169  //uops dispatch
170  val s_idle :: s_active :: Nil = Enum(2)
171  val state = RegInit(s_idle)
172  val stateNext = WireDefault(state)
173  val numDecodedUop = RegInit(0.U(log2Up(maxUopSize).W))
174  val uopRes = RegInit(0.U(log2Up(maxUopSize).W))
175  val uopResNext = WireInit(uopRes)
176
177  //uop div up to maxUopSize
178  val csBundle = Wire(Vec(maxUopSize, new DecodedInst))
179  csBundle.foreach { case dst =>
180    dst := latchedInst
181    dst.numUops := latchedUopInfo.numOfUop
182    dst.numWB := latchedUopInfo.numOfWB
183    dst.firstUop := false.B
184    dst.lastUop := false.B
185  }
186
187  csBundle(0).firstUop := true.B
188  csBundle(numOfUop - 1.U).lastUop := true.B
189
190  switch(typeOfSplit) {
191    is(UopSplitType.VSET) {
192      when(isVsetSimple) {
193        when(dest =/= 0.U) {
194          csBundle(0).fuType := FuType.vsetiwi.U
195          csBundle(0).fuOpType := VSETOpType.switchDest(latchedInst.fuOpType)
196          csBundle(0).flushPipe := false.B
197          csBundle(0).rfWen := true.B
198          csBundle(0).vecWen := false.B
199          csBundle(1).ldest := VCONFIG_IDX.U
200          csBundle(1).rfWen := false.B
201          csBundle(1).vecWen := true.B
202        }.elsewhen(src1 =/= 0.U) {
203          csBundle(0).ldest := VCONFIG_IDX.U
204        }.elsewhen(VSETOpType.isVsetvli(latchedInst.fuOpType)) {
205          csBundle(0).fuType := FuType.vsetfwf.U
206          csBundle(0).srcType(0) := SrcType.vp
207          csBundle(0).lsrc(0) := VCONFIG_IDX.U
208        }.elsewhen(VSETOpType.isVsetvl(latchedInst.fuOpType)) {
209          csBundle(0).srcType(0) := SrcType.reg
210          csBundle(0).srcType(1) := SrcType.imm
211          csBundle(0).lsrc(1) := 0.U
212          csBundle(0).ldest := FP_TMP_REG_MV.U
213          csBundle(0).fuType := FuType.i2f.U
214          csBundle(0).rfWen := false.B
215          csBundle(0).fpWen := true.B
216          csBundle(0).vecWen := false.B
217          csBundle(0).fpu.isAddSub := false.B
218          csBundle(0).fpu.typeTagIn := FPU.D
219          csBundle(0).fpu.typeTagOut := FPU.D
220          csBundle(0).fpu.fromInt := true.B
221          csBundle(0).fpu.wflags := false.B
222          csBundle(0).fpu.fpWen := true.B
223          csBundle(0).fpu.div := false.B
224          csBundle(0).fpu.sqrt := false.B
225          csBundle(0).fpu.fcvt := false.B
226          csBundle(0).flushPipe := false.B
227          csBundle(1).fuType := FuType.vsetfwf.U
228          csBundle(1).srcType(0) := SrcType.vp
229          csBundle(1).lsrc(0) := VCONFIG_IDX.U
230          csBundle(1).srcType(1) := SrcType.fp
231          csBundle(1).lsrc(1) := FP_TMP_REG_MV.U
232          csBundle(1).ldest := VCONFIG_IDX.U
233        }
234      }
235    }
236    is(UopSplitType.VEC_VVV) {
237      for (i <- 0 until MAX_VLMUL) {
238        csBundle(i).lsrc(0) := src1 + i.U
239        csBundle(i).lsrc(1) := src2 + i.U
240        csBundle(i).lsrc(2) := dest + i.U
241        csBundle(i).ldest := dest + i.U
242        csBundle(i).uopIdx := i.U
243      }
244    }
245    is(UopSplitType.VEC_VFV) {
246      for (i <- 0 until MAX_VLMUL) {
247        csBundle(i).lsrc(1) := src2 + i.U
248        csBundle(i).lsrc(2) := dest + i.U
249        csBundle(i).ldest := dest + i.U
250        csBundle(i).uopIdx := i.U
251      }
252    }
253    is(UopSplitType.VEC_EXT2) {
254      for (i <- 0 until MAX_VLMUL / 2) {
255        csBundle(2 * i).lsrc(1) := src2 + i.U
256        csBundle(2 * i).lsrc(2) := dest + (2 * i).U
257        csBundle(2 * i).ldest := dest + (2 * i).U
258        csBundle(2 * i).uopIdx := (2 * i).U
259        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
260        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U
261        csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U
262        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
263      }
264    }
265    is(UopSplitType.VEC_EXT4) {
266      for (i <- 0 until MAX_VLMUL / 4) {
267        csBundle(4 * i).lsrc(1) := src2 + i.U
268        csBundle(4 * i).lsrc(2) := dest + (4 * i).U
269        csBundle(4 * i).ldest := dest + (4 * i).U
270        csBundle(4 * i).uopIdx := (4 * i).U
271        csBundle(4 * i + 1).lsrc(1) := src2 + i.U
272        csBundle(4 * i + 1).lsrc(2) := dest + (4 * i + 1).U
273        csBundle(4 * i + 1).ldest := dest + (4 * i + 1).U
274        csBundle(4 * i + 1).uopIdx := (4 * i + 1).U
275        csBundle(4 * i + 2).lsrc(1) := src2 + i.U
276        csBundle(4 * i + 2).lsrc(2) := dest + (4 * i + 2).U
277        csBundle(4 * i + 2).ldest := dest + (4 * i + 2).U
278        csBundle(4 * i + 2).uopIdx := (4 * i + 2).U
279        csBundle(4 * i + 3).lsrc(1) := src2 + i.U
280        csBundle(4 * i + 3).lsrc(2) := dest + (4 * i + 3).U
281        csBundle(4 * i + 3).ldest := dest + (4 * i + 3).U
282        csBundle(4 * i + 3).uopIdx := (4 * i + 3).U
283      }
284    }
285    is(UopSplitType.VEC_EXT8) {
286      for (i <- 0 until MAX_VLMUL) {
287        csBundle(i).lsrc(1) := src2
288        csBundle(i).lsrc(2) := dest + i.U
289        csBundle(i).ldest := dest + i.U
290        csBundle(i).uopIdx := i.U
291      }
292    }
293    is(UopSplitType.VEC_0XV) {
294      /*
295      FMV.D.X
296       */
297      csBundle(0).srcType(0) := SrcType.reg
298      csBundle(0).srcType(1) := SrcType.imm
299      csBundle(0).lsrc(1) := 0.U
300      csBundle(0).ldest := FP_TMP_REG_MV.U
301      csBundle(0).fuType := FuType.i2f.U
302      csBundle(0).rfWen := false.B
303      csBundle(0).fpWen := true.B
304      csBundle(0).vecWen := false.B
305      csBundle(0).fpu.isAddSub := false.B
306      csBundle(0).fpu.typeTagIn := FPU.D
307      csBundle(0).fpu.typeTagOut := FPU.D
308      csBundle(0).fpu.fromInt := true.B
309      csBundle(0).fpu.wflags := false.B
310      csBundle(0).fpu.fpWen := true.B
311      csBundle(0).fpu.div := false.B
312      csBundle(0).fpu.sqrt := false.B
313      csBundle(0).fpu.fcvt := false.B
314      /*
315      vfmv.s.f
316       */
317      csBundle(1).srcType(0) := SrcType.fp
318      csBundle(1).srcType(1) := SrcType.vp
319      csBundle(1).srcType(2) := SrcType.vp
320      csBundle(1).lsrc(0) := FP_TMP_REG_MV.U
321      csBundle(1).lsrc(1) := 0.U
322      csBundle(1).lsrc(2) := dest
323      csBundle(1).ldest := dest
324      csBundle(1).fuType := FuType.vppu.U
325      csBundle(1).fuOpType := VpermType.dummy
326      csBundle(1).rfWen := false.B
327      csBundle(1).fpWen := false.B
328      csBundle(1).vecWen := true.B
329    }
330    is(UopSplitType.VEC_VXV) {
331      /*
332      i to vector move
333       */
334      csBundle(0).srcType(0) := SrcType.reg
335      csBundle(0).srcType(1) := SrcType.imm
336      csBundle(0).lsrc(1) := 0.U
337      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
338      csBundle(0).fuType := FuType.i2v.U
339      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2vector(2, 0), IF2VectorType.i2vector(2, 0)), vsewReg)
340      csBundle(0).vecWen := true.B
341      /*
342      LMUL
343       */
344      for (i <- 0 until MAX_VLMUL) {
345        csBundle(i + 1).srcType(0) := SrcType.vp
346        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
347        csBundle(i + 1).lsrc(1) := src2 + i.U
348        csBundle(i + 1).lsrc(2) := dest + i.U
349        csBundle(i + 1).ldest := dest + i.U
350        csBundle(i + 1).uopIdx := i.U
351      }
352    }
353    is(UopSplitType.VEC_VVW) {
354      for (i <- 0 until MAX_VLMUL / 2) {
355        csBundle(2 * i).lsrc(0) := src1 + i.U
356        csBundle(2 * i).lsrc(1) := src2 + i.U
357        csBundle(2 * i).lsrc(2) := dest + (2 * i).U
358        csBundle(2 * i).ldest := dest + (2 * i).U
359        csBundle(2 * i).uopIdx := (2 * i).U
360        csBundle(2 * i + 1).lsrc(0) := src1 + i.U
361        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
362        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U
363        csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U
364        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
365      }
366    }
367    is(UopSplitType.VEC_VFW) {
368      for (i <- 0 until MAX_VLMUL / 2) {
369        csBundle(2 * i).lsrc(0) := src1
370        csBundle(2 * i).lsrc(1) := src2 + i.U
371        csBundle(2 * i).lsrc(2) := dest + (2 * i).U
372        csBundle(2 * i).ldest := dest + (2 * i).U
373        csBundle(2 * i).uopIdx := (2 * i).U
374        csBundle(2 * i + 1).lsrc(0) := src1
375        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
376        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U
377        csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U
378        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
379      }
380    }
381    is(UopSplitType.VEC_WVW) {
382      for (i <- 0 until MAX_VLMUL / 2) {
383        csBundle(2 * i).lsrc(0) := src1 + i.U
384        csBundle(2 * i).lsrc(1) := src2 + (2 * i).U
385        csBundle(2 * i).lsrc(2) := dest + (2 * i).U
386        csBundle(2 * i).ldest := dest + (2 * i).U
387        csBundle(2 * i).uopIdx := (2 * i).U
388        csBundle(2 * i + 1).lsrc(0) := src1 + i.U
389        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i + 1).U
390        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U
391        csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U
392        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
393      }
394    }
395    is(UopSplitType.VEC_VXW) {
396      /*
397      i to vector move
398       */
399      csBundle(0).srcType(0) := SrcType.reg
400      csBundle(0).srcType(1) := SrcType.imm
401      csBundle(0).lsrc(1) := 0.U
402      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
403      csBundle(0).fuType := FuType.i2v.U
404      csBundle(0).fuOpType := vsewReg
405      csBundle(0).vecWen := true.B
406
407      for (i <- 0 until MAX_VLMUL / 2) {
408        csBundle(2 * i + 1).srcType(0) := SrcType.vp
409        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
410        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
411        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U
412        csBundle(2 * i + 1).ldest := dest + (2 * i).U
413        csBundle(2 * i + 1).uopIdx := (2 * i).U
414        csBundle(2 * i + 2).srcType(0) := SrcType.vp
415        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
416        csBundle(2 * i + 2).lsrc(1) := src2 + i.U
417        csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U
418        csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U
419        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
420      }
421    }
422    is(UopSplitType.VEC_WXW) {
423      /*
424      i to vector move
425       */
426      csBundle(0).srcType(0) := SrcType.reg
427      csBundle(0).srcType(1) := SrcType.imm
428      csBundle(0).lsrc(1) := 0.U
429      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
430      csBundle(0).fuType := FuType.i2v.U
431      csBundle(0).fuOpType := vsewReg
432      csBundle(0).vecWen := true.B
433
434      for (i <- 0 until MAX_VLMUL / 2) {
435        csBundle(2 * i + 1).srcType(0) := SrcType.vp
436        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
437        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U
438        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U
439        csBundle(2 * i + 1).ldest := dest + (2 * i).U
440        csBundle(2 * i + 1).uopIdx := (2 * i).U
441        csBundle(2 * i + 2).srcType(0) := SrcType.vp
442        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
443        csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U
444        csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U
445        csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U
446        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
447      }
448    }
449    is(UopSplitType.VEC_WVV) {
450      for (i <- 0 until MAX_VLMUL / 2) {
451
452        csBundle(2 * i).lsrc(0) := src1 + i.U
453        csBundle(2 * i).lsrc(1) := src2 + (2 * i).U
454        csBundle(2 * i).lsrc(2) := dest + i.U
455        csBundle(2 * i).ldest := dest + i.U
456        csBundle(2 * i).uopIdx := (2 * i).U
457        csBundle(2 * i + 1).lsrc(0) := src1 + i.U
458        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i + 1).U
459        csBundle(2 * i + 1).lsrc(2) := dest + i.U
460        csBundle(2 * i + 1).ldest := dest + i.U
461        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
462      }
463    }
464    is(UopSplitType.VEC_WFW) {
465      for (i <- 0 until MAX_VLMUL / 2) {
466        csBundle(2 * i).lsrc(0) := src1
467        csBundle(2 * i).lsrc(1) := src2 + (2 * i).U
468        csBundle(2 * i).lsrc(2) := dest + (2 * i).U
469        csBundle(2 * i).ldest := dest + (2 * i).U
470        csBundle(2 * i).uopIdx := (2 * i).U
471        csBundle(2 * i + 1).lsrc(0) := src1
472        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i + 1).U
473        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U
474        csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U
475        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
476      }
477    }
478    is(UopSplitType.VEC_WXV) {
479      /*
480      i to vector move
481       */
482      csBundle(0).srcType(0) := SrcType.reg
483      csBundle(0).srcType(1) := SrcType.imm
484      csBundle(0).lsrc(1) := 0.U
485      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
486      csBundle(0).fuType := FuType.i2v.U
487      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2vector(2, 0), IF2VectorType.i2vector(2, 0)), vsewReg)
488      csBundle(0).vecWen := true.B
489
490      for (i <- 0 until MAX_VLMUL / 2) {
491        csBundle(2 * i + 1).srcType(0) := SrcType.vp
492        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
493        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U
494        csBundle(2 * i + 1).lsrc(2) := dest + i.U
495        csBundle(2 * i + 1).ldest := dest + i.U
496        csBundle(2 * i + 1).uopIdx := (2 * i).U
497        csBundle(2 * i + 2).srcType(0) := SrcType.vp
498        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
499        csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U
500        csBundle(2 * i + 2).lsrc(2) := dest + i.U
501        csBundle(2 * i + 2).ldest := dest + i.U
502        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
503      }
504    }
505    is(UopSplitType.VEC_VVM) {
506      csBundle(0).lsrc(2) := dest
507      csBundle(0).ldest := dest
508      csBundle(0).uopIdx := 0.U
509      for (i <- 1 until MAX_VLMUL) {
510        csBundle(i).lsrc(0) := src1 + i.U
511        csBundle(i).lsrc(1) := src2 + i.U
512        csBundle(i).lsrc(2) := dest
513        csBundle(i).ldest := dest
514        csBundle(i).uopIdx := i.U
515      }
516    }
517    is(UopSplitType.VEC_VFM) {
518      csBundle(0).lsrc(2) := dest
519      csBundle(0).ldest := dest
520      csBundle(0).uopIdx := 0.U
521      for (i <- 1 until MAX_VLMUL) {
522        csBundle(i).lsrc(0) := src1
523        csBundle(i).lsrc(1) := src2 + i.U
524        csBundle(i).lsrc(2) := dest
525        csBundle(i).ldest := dest
526        csBundle(i).uopIdx := i.U
527      }
528      csBundle(numOfUop - 1.U).ldest := dest
529    }
530    is(UopSplitType.VEC_VXM) {
531      /*
532      i to vector move
533       */
534      csBundle(0).srcType(0) := SrcType.reg
535      csBundle(0).srcType(1) := SrcType.imm
536      csBundle(0).lsrc(1) := 0.U
537      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
538      csBundle(0).fuType := FuType.i2v.U
539      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2vector(2, 0), IF2VectorType.i2vector(2, 0)), vsewReg)
540      csBundle(0).vecWen := true.B
541      //LMUL
542      csBundle(1).srcType(0) := SrcType.vp
543      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
544      csBundle(1).lsrc(2) := dest
545      csBundle(1).ldest := dest
546      csBundle(1).uopIdx := 0.U
547      for (i <- 1 until MAX_VLMUL) {
548        csBundle(i + 1).srcType(0) := SrcType.vp
549        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
550        csBundle(i + 1).lsrc(1) := src2 + i.U
551        csBundle(i + 1).lsrc(2) := dest
552        csBundle(i + 1).ldest := dest
553        csBundle(i + 1).uopIdx := i.U
554      }
555      csBundle(numOfUop - 1.U).ldest := dest
556    }
557    is(UopSplitType.VEC_SLIDE1UP) {
558      /*
559      i to vector move
560       */
561      csBundle(0).srcType(0) := SrcType.reg
562      csBundle(0).srcType(1) := SrcType.imm
563      csBundle(0).lsrc(1) := 0.U
564      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
565      csBundle(0).fuType := FuType.i2v.U
566      csBundle(0).fuOpType := vsewReg
567      csBundle(0).vecWen := true.B
568      //LMUL
569      csBundle(1).srcType(0) := SrcType.vp
570      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
571      csBundle(1).lsrc(2) := dest
572      csBundle(1).ldest := dest
573      csBundle(1).uopIdx := 0.U
574      for (i <- 1 until MAX_VLMUL) {
575        csBundle(i + 1).srcType(0) := SrcType.vp
576        csBundle(i + 1).lsrc(0) := src2 + (i - 1).U
577        csBundle(i + 1).lsrc(1) := src2 + i.U
578        csBundle(i + 1).lsrc(2) := dest + i.U
579        csBundle(i + 1).ldest := dest + i.U
580        csBundle(i + 1).uopIdx := i.U
581      }
582    }
583    is(UopSplitType.VEC_FSLIDE1UP) {
584      //LMUL
585      csBundle(0).srcType(0) := SrcType.fp
586      csBundle(0).lsrc(0) := src1
587      csBundle(0).lsrc(1) := src2
588      csBundle(0).lsrc(2) := dest
589      csBundle(0).ldest := dest
590      csBundle(0).uopIdx := 0.U
591      for (i <- 1 until MAX_VLMUL) {
592        csBundle(i).srcType(0) := SrcType.vp
593        csBundle(i).lsrc(0) := src2 + (i - 1).U
594        csBundle(i).lsrc(1) := src2 + i.U
595        csBundle(i).lsrc(2) := dest + i.U
596        csBundle(i).ldest := dest + i.U
597        csBundle(i).uopIdx := i.U
598      }
599    }
600    is(UopSplitType.VEC_SLIDE1DOWN) { // lmul+lmul = 16
601      /*
602      i to vector move
603       */
604      csBundle(0).srcType(0) := SrcType.reg
605      csBundle(0).srcType(1) := SrcType.imm
606      csBundle(0).lsrc(1) := 0.U
607      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
608      csBundle(0).fuType := FuType.i2v.U
609      csBundle(0).fuOpType := vsewReg
610      csBundle(0).vecWen := true.B
611      //LMUL
612      for (i <- 0 until MAX_VLMUL) {
613        csBundle(2 * i + 1).srcType(0) := SrcType.vp
614        csBundle(2 * i + 1).srcType(1) := SrcType.vp
615        csBundle(2 * i + 1).lsrc(0) := src2 + (i + 1).U
616        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
617        csBundle(2 * i + 1).lsrc(2) := dest + i.U
618        csBundle(2 * i + 1).ldest := VECTOR_TMP_REG_LMUL.U + 1.U
619        csBundle(2 * i + 1).uopIdx := (2 * i).U
620        if (2 * i + 2 < MAX_VLMUL * 2) {
621          csBundle(2 * i + 2).srcType(0) := SrcType.vp
622          csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
623          // csBundle(2 * i + 2).lsrc(1) := src2 + i.U         // DontCare
624          csBundle(2 * i + 2).lsrc(2) := VECTOR_TMP_REG_LMUL.U + 1.U
625          csBundle(2 * i + 2).ldest := dest + i.U
626          csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
627        }
628      }
629      csBundle(numOfUop - 1.U).srcType(0) := SrcType.vp
630      csBundle(numOfUop - 1.U).lsrc(0) := VECTOR_TMP_REG_LMUL.U
631      csBundle(numOfUop - 1.U).ldest := dest + lmul - 1.U
632    }
633    is(UopSplitType.VEC_FSLIDE1DOWN) {
634      //LMUL
635      for (i <- 0 until MAX_VLMUL) {
636        csBundle(2 * i).srcType(0) := SrcType.vp
637        csBundle(2 * i).srcType(1) := SrcType.vp
638        csBundle(2 * i).lsrc(0) := src2 + (i + 1).U
639        csBundle(2 * i).lsrc(1) := src2 + i.U
640        csBundle(2 * i).lsrc(2) := dest + i.U
641        csBundle(2 * i).ldest := VECTOR_TMP_REG_LMUL.U
642        csBundle(2 * i).uopIdx := (2 * i).U
643        csBundle(2 * i + 1).srcType(0) := SrcType.fp
644        csBundle(2 * i + 1).lsrc(0) := src1
645        csBundle(2 * i + 1).lsrc(2) := VECTOR_TMP_REG_LMUL.U
646        csBundle(2 * i + 1).ldest := dest + i.U
647        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
648      }
649      csBundle(numOfUop - 1.U).srcType(0) := SrcType.fp
650      csBundle(numOfUop - 1.U).lsrc(0) := src1
651      csBundle(numOfUop - 1.U).ldest := dest + lmul - 1.U
652    }
653    is(UopSplitType.VEC_VRED) {
654      when(vlmulReg === "b001".U) {
655        csBundle(0).srcType(2) := SrcType.DC
656        csBundle(0).lsrc(0) := src2 + 1.U
657        csBundle(0).lsrc(1) := src2
658        csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
659        csBundle(0).uopIdx := 0.U
660      }
661      when(vlmulReg === "b010".U) {
662        csBundle(0).srcType(2) := SrcType.DC
663        csBundle(0).lsrc(0) := src2 + 1.U
664        csBundle(0).lsrc(1) := src2
665        csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
666        csBundle(0).uopIdx := 0.U
667
668        csBundle(1).srcType(2) := SrcType.DC
669        csBundle(1).lsrc(0) := src2 + 3.U
670        csBundle(1).lsrc(1) := src2 + 2.U
671        csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
672        csBundle(1).uopIdx := 1.U
673
674        csBundle(2).srcType(2) := SrcType.DC
675        csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
676        csBundle(2).lsrc(1) := VECTOR_TMP_REG_LMUL.U
677        csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
678        csBundle(2).uopIdx := 2.U
679      }
680      when(vlmulReg === "b011".U) {
681        for (i <- 0 until MAX_VLMUL) {
682          if (i < MAX_VLMUL - MAX_VLMUL / 2) {
683            csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U
684            csBundle(i).lsrc(1) := src2 + (i * 2).U
685            csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
686          } else if (i < MAX_VLMUL - MAX_VLMUL / 4) {
687            csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + (i - MAX_VLMUL / 2) * 2 + 1).U
688            csBundle(i).lsrc(1) := (VECTOR_TMP_REG_LMUL + (i - MAX_VLMUL / 2) * 2).U
689            csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
690          } else if (i < MAX_VLMUL - MAX_VLMUL / 8) {
691            csBundle(6).lsrc(0) := (VECTOR_TMP_REG_LMUL + 5).U
692            csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
693            csBundle(6).ldest := (VECTOR_TMP_REG_LMUL + 6).U
694          }
695          csBundle(i).srcType(2) := SrcType.DC
696          csBundle(i).uopIdx := i.U
697        }
698      }
699      when(vlmulReg.orR) {
700        csBundle(numOfUop - 1.U).srcType(2) := SrcType.vp
701        csBundle(numOfUop - 1.U).lsrc(0) := src1
702        csBundle(numOfUop - 1.U).lsrc(1) := VECTOR_TMP_REG_LMUL.U + numOfUop - 2.U
703        csBundle(numOfUop - 1.U).lsrc(2) := dest
704        csBundle(numOfUop - 1.U).ldest := dest
705        csBundle(numOfUop - 1.U).uopIdx := numOfUop - 1.U
706      }
707    }
708    is(UopSplitType.VEC_VFRED) {
709      val vlmul = vlmulReg
710      val vsew = vsewReg
711      when(vlmul === VLmul.m8){
712        for (i <- 0 until 4) {
713          csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U
714          csBundle(i).lsrc(1) := src2 + (i * 2).U
715          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
716          csBundle(i).uopIdx := i.U
717        }
718        for (i <- 4 until 6) {
719          csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + (i - 4) * 2 + 1).U
720          csBundle(i).lsrc(1) := (VECTOR_TMP_REG_LMUL + (i - 4) * 2).U
721          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
722          csBundle(i).uopIdx := i.U
723        }
724        csBundle(6).lsrc(0) := (VECTOR_TMP_REG_LMUL + 5).U
725        csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
726        csBundle(6).ldest := (VECTOR_TMP_REG_LMUL + 6).U
727        csBundle(6).uopIdx := 6.U
728        when(vsew === VSew.e64) {
729          csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U
730          csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U
731          csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U
732          csBundle(7).vpu.fpu.isFoldTo1_2 := true.B
733          csBundle(7).uopIdx := 7.U
734          csBundle(8).lsrc(0) := src1
735          csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U
736          csBundle(8).ldest := dest
737          csBundle(8).uopIdx := 8.U
738        }
739        when(vsew === VSew.e32) {
740          csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U
741          csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U
742          csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U
743          csBundle(7).vpu.fpu.isFoldTo1_2 := true.B
744          csBundle(7).uopIdx := 7.U
745          csBundle(8).lsrc(0) := (VECTOR_TMP_REG_LMUL + 7).U
746          csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U
747          csBundle(8).ldest := (VECTOR_TMP_REG_LMUL + 8).U
748          csBundle(8).vpu.fpu.isFoldTo1_4 := true.B
749          csBundle(8).uopIdx := 8.U
750          csBundle(9).lsrc(0) := src1
751          csBundle(9).lsrc(1) := (VECTOR_TMP_REG_LMUL + 8).U
752          csBundle(9).ldest := dest
753          csBundle(9).uopIdx := 9.U
754        }
755        when(vsew === VSew.e16) {
756          csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U
757          csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U
758          csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U
759          csBundle(7).vpu.fpu.isFoldTo1_2 := true.B
760          csBundle(7).uopIdx := 7.U
761          csBundle(8).lsrc(0) := (VECTOR_TMP_REG_LMUL + 7).U
762          csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U
763          csBundle(8).ldest := (VECTOR_TMP_REG_LMUL + 8).U
764          csBundle(8).vpu.fpu.isFoldTo1_4 := true.B
765          csBundle(8).uopIdx := 8.U
766          csBundle(9).lsrc(0) := (VECTOR_TMP_REG_LMUL + 8).U
767          csBundle(9).lsrc(1) := (VECTOR_TMP_REG_LMUL + 8).U
768          csBundle(9).ldest := (VECTOR_TMP_REG_LMUL + 9).U
769          csBundle(9).vpu.fpu.isFoldTo1_8 := true.B
770          csBundle(9).uopIdx := 9.U
771          csBundle(10).lsrc(0) := src1
772          csBundle(10).lsrc(1) := (VECTOR_TMP_REG_LMUL + 9).U
773          csBundle(10).ldest := dest
774          csBundle(10).uopIdx := 10.U
775        }
776      }
777      when(vlmul === VLmul.m4) {
778        for (i <- 0 until 2) {
779          csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U
780          csBundle(i).lsrc(1) := src2 + (i * 2).U
781          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
782          csBundle(i).uopIdx := i.U
783        }
784        csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
785        csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
786        csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
787        csBundle(2).uopIdx := 2.U
788        when(vsew === VSew.e64) {
789          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
790          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
791          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
792          csBundle(3).vpu.fpu.isFoldTo1_2 := true.B
793          csBundle(3).uopIdx := 3.U
794          csBundle(4).lsrc(0) := src1
795          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
796          csBundle(4).ldest := dest
797          csBundle(4).uopIdx := 4.U
798        }
799        when(vsew === VSew.e32) {
800          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
801          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
802          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
803          csBundle(3).vpu.fpu.isFoldTo1_2 := true.B
804          csBundle(3).uopIdx := 3.U
805          csBundle(4).lsrc(0) := (VECTOR_TMP_REG_LMUL + 3).U
806          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
807          csBundle(4).ldest := (VECTOR_TMP_REG_LMUL + 4).U
808          csBundle(4).vpu.fpu.isFoldTo1_4 := true.B
809          csBundle(4).uopIdx := 4.U
810          csBundle(5).lsrc(0) := src1
811          csBundle(5).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
812          csBundle(5).ldest := dest
813          csBundle(5).uopIdx := 5.U
814        }
815        when(vsew === VSew.e16) {
816          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
817          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
818          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
819          csBundle(3).vpu.fpu.isFoldTo1_2 := true.B
820          csBundle(3).uopIdx := 3.U
821          csBundle(4).lsrc(0) := (VECTOR_TMP_REG_LMUL + 3).U
822          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
823          csBundle(4).ldest := (VECTOR_TMP_REG_LMUL + 4).U
824          csBundle(4).vpu.fpu.isFoldTo1_4 := true.B
825          csBundle(4).uopIdx := 4.U
826          csBundle(5).lsrc(0) := (VECTOR_TMP_REG_LMUL + 4).U
827          csBundle(5).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
828          csBundle(5).ldest := (VECTOR_TMP_REG_LMUL + 5).U
829          csBundle(5).vpu.fpu.isFoldTo1_8 := true.B
830          csBundle(5).uopIdx := 5.U
831          csBundle(6).lsrc(0) := src1
832          csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 5).U
833          csBundle(6).ldest := dest
834          csBundle(6).uopIdx := 6.U
835        }
836      }
837      when(vlmul === VLmul.m2) {
838        csBundle(0).lsrc(0) := src2 + 1.U
839        csBundle(0).lsrc(1) := src2 + 0.U
840        csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
841        csBundle(0).uopIdx := 0.U
842        when(vsew === VSew.e64) {
843          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
844          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
845          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
846          csBundle(1).vpu.fpu.isFoldTo1_2 := true.B
847          csBundle(1).uopIdx := 1.U
848          csBundle(2).lsrc(0) := src1
849          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
850          csBundle(2).ldest := dest
851          csBundle(2).uopIdx := 2.U
852        }
853        when(vsew === VSew.e32) {
854          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
855          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
856          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
857          csBundle(1).vpu.fpu.isFoldTo1_2 := true.B
858          csBundle(1).uopIdx := 1.U
859          csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
860          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
861          csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
862          csBundle(2).vpu.fpu.isFoldTo1_4 := true.B
863          csBundle(2).uopIdx := 2.U
864          csBundle(3).lsrc(0) := src1
865          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
866          csBundle(3).ldest := dest
867          csBundle(3).uopIdx := 3.U
868        }
869        when(vsew === VSew.e16) {
870          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
871          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
872          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
873          csBundle(1).vpu.fpu.isFoldTo1_2 := true.B
874          csBundle(1).uopIdx := 1.U
875          csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
876          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
877          csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
878          csBundle(2).vpu.fpu.isFoldTo1_4 := true.B
879          csBundle(2).uopIdx := 2.U
880          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
881          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
882          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
883          csBundle(3).vpu.fpu.isFoldTo1_8 := true.B
884          csBundle(3).uopIdx := 3.U
885          csBundle(4).lsrc(0) := src1
886          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
887          csBundle(4).ldest := dest
888          csBundle(4).uopIdx := 4.U
889        }
890      }
891      when(vlmul === VLmul.m1) {
892        when(vsew === VSew.e64) {
893          csBundle(0).lsrc(0) := src2
894          csBundle(0).lsrc(1) := src2
895          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
896          csBundle(0).vpu.fpu.isFoldTo1_2 := true.B
897          csBundle(0).uopIdx := 0.U
898          csBundle(1).lsrc(0) := src1
899          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
900          csBundle(1).ldest := dest
901          csBundle(1).uopIdx := 1.U
902        }
903        when(vsew === VSew.e32) {
904          csBundle(0).lsrc(0) := src2
905          csBundle(0).lsrc(1) := src2
906          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
907          csBundle(0).vpu.fpu.isFoldTo1_2 := true.B
908          csBundle(0).uopIdx := 0.U
909          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
910          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
911          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
912          csBundle(1).vpu.fpu.isFoldTo1_4 := true.B
913          csBundle(1).uopIdx := 1.U
914          csBundle(2).lsrc(0) := src1
915          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
916          csBundle(2).ldest := dest
917          csBundle(2).uopIdx := 2.U
918        }
919        when(vsew === VSew.e16) {
920          csBundle(0).lsrc(0) := src2
921          csBundle(0).lsrc(1) := src2
922          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
923          csBundle(0).vpu.fpu.isFoldTo1_2 := true.B
924          csBundle(0).uopIdx := 0.U
925          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
926          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
927          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
928          csBundle(1).vpu.fpu.isFoldTo1_4 := true.B
929          csBundle(1).uopIdx := 1.U
930          csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
931          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
932          csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
933          csBundle(2).vpu.fpu.isFoldTo1_8 := true.B
934          csBundle(2).uopIdx := 2.U
935          csBundle(3).lsrc(0) := src1
936          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
937          csBundle(3).ldest := dest
938          csBundle(3).uopIdx := 3.U
939        }
940      }
941      when(vlmul === VLmul.mf2) {
942        when(vsew === VSew.e32) {
943          csBundle(0).lsrc(0) := src2
944          csBundle(0).lsrc(1) := src2
945          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
946          csBundle(0).vpu.fpu.isFoldTo1_4 := true.B
947          csBundle(0).uopIdx := 0.U
948          csBundle(1).lsrc(0) := src1
949          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
950          csBundle(1).ldest := dest
951          csBundle(1).uopIdx := 1.U
952        }
953        when(vsew === VSew.e16) {
954          csBundle(0).lsrc(0) := src2
955          csBundle(0).lsrc(1) := src2
956          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
957          csBundle(0).vpu.fpu.isFoldTo1_4 := true.B
958          csBundle(0).uopIdx := 0.U
959          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
960          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
961          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
962          csBundle(1).vpu.fpu.isFoldTo1_8 := true.B
963          csBundle(1).uopIdx := 1.U
964          csBundle(2).lsrc(0) := src1
965          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
966          csBundle(2).ldest := dest
967          csBundle(2).uopIdx := 2.U
968        }
969      }
970      when(vlmul === VLmul.mf4) {
971        when(vsew === VSew.e16) {
972          csBundle(0).lsrc(0) := src2
973          csBundle(0).lsrc(1) := src2
974          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
975          csBundle(0).vpu.fpu.isFoldTo1_8 := true.B
976          csBundle(0).uopIdx := 0.U
977          csBundle(1).lsrc(0) := src1
978          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
979          csBundle(1).ldest := dest
980          csBundle(1).uopIdx := 1.U
981        }
982      }
983    }
984
985    is(UopSplitType.VEC_VFREDOSUM) {
986      import yunsuan.VfaluType
987      val vlmul = vlmulReg
988      val vsew = vsewReg
989      val isWiden = latchedInst.fuOpType === VfaluType.vfwredosum
990      when(vlmul === VLmul.m8) {
991        when(vsew === VSew.e64) {
992          val vlmax = 16
993          for (i <- 0 until vlmax) {
994            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
995            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
996            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
997            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
998            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
999            csBundle(i).uopIdx := i.U
1000          }
1001        }
1002        when(vsew === VSew.e32) {
1003          val vlmax = 32
1004          for (i <- 0 until vlmax) {
1005            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1006            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1007            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1008            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1009            csBundle(i).vpu.fpu.isFoldTo1_4 := (if (i % 4 == 0) false.B else true.B)
1010            csBundle(i).uopIdx := i.U
1011          }
1012        }
1013        when(vsew === VSew.e16) {
1014          val vlmax = 64
1015          for (i <- 0 until vlmax) {
1016            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1017            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1018            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1019            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1020            csBundle(i).vpu.fpu.isFoldTo1_8 := (if (i % 8 == 0) false.B else true.B)
1021            csBundle(i).uopIdx := i.U
1022          }
1023        }
1024      }
1025      when(vlmul === VLmul.m4) {
1026        when(vsew === VSew.e64) {
1027          val vlmax = 8
1028          for (i <- 0 until vlmax) {
1029            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1030            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1031            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1032            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1033            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1034            csBundle(i).uopIdx := i.U
1035          }
1036        }
1037        when(vsew === VSew.e32) {
1038          val vlmax = 16
1039          for (i <- 0 until vlmax) {
1040            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1041            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1042            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1043            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1044            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1045            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1046            csBundle(i).uopIdx := i.U
1047          }
1048        }
1049        when(vsew === VSew.e16) {
1050          val vlmax = 32
1051          for (i <- 0 until vlmax) {
1052            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1053            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1054            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1055            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1056            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1057            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1058            csBundle(i).uopIdx := i.U
1059          }
1060        }
1061      }
1062      when(vlmul === VLmul.m2) {
1063        when(vsew === VSew.e64) {
1064          val vlmax = 4
1065          for (i <- 0 until vlmax) {
1066            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1067            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1068            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1069            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1070            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1071            csBundle(i).uopIdx := i.U
1072          }
1073        }
1074        when(vsew === VSew.e32) {
1075          val vlmax = 8
1076          for (i <- 0 until vlmax) {
1077            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1078            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1079            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1080            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1081            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1082            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1083            csBundle(i).uopIdx := i.U
1084          }
1085        }
1086        when(vsew === VSew.e16) {
1087          val vlmax = 16
1088          for (i <- 0 until vlmax) {
1089            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1090            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1091            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1092            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1093            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1094            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1095            csBundle(i).uopIdx := i.U
1096          }
1097        }
1098      }
1099      when(vlmul === VLmul.m1) {
1100        when(vsew === VSew.e64) {
1101          val vlmax = 2
1102          for (i <- 0 until vlmax) {
1103            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1104            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1105            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1106            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1107            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1108            csBundle(i).uopIdx := i.U
1109          }
1110        }
1111        when(vsew === VSew.e32) {
1112          val vlmax = 4
1113          for (i <- 0 until vlmax) {
1114            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1115            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1116            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1117            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1118            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1119            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1120            csBundle(i).uopIdx := i.U
1121          }
1122        }
1123        when(vsew === VSew.e16) {
1124          val vlmax = 8
1125          for (i <- 0 until vlmax) {
1126            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1127            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1128            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1129            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1130            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1131            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1132            csBundle(i).uopIdx := i.U
1133          }
1134        }
1135      }
1136      when(vlmul === VLmul.mf2) {
1137        when(vsew === VSew.e32) {
1138          val vlmax = 2
1139          for (i <- 0 until vlmax) {
1140            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1141            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1142            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1143            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1144            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1145            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1146            csBundle(i).uopIdx := i.U
1147          }
1148        }
1149        when(vsew === VSew.e16) {
1150          val vlmax = 4
1151          for (i <- 0 until vlmax) {
1152            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1153            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1154            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1155            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1156            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1157            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1158            csBundle(i).uopIdx := i.U
1159          }
1160        }
1161      }
1162      when(vlmul === VLmul.mf4) {
1163        when(vsew === VSew.e16) {
1164          val vlmax = 2
1165          for (i <- 0 until vlmax) {
1166            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1167            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1168            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1169            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1170            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1171            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1172            csBundle(i).uopIdx := i.U
1173          }
1174        }
1175      }
1176    }
1177
1178    is(UopSplitType.VEC_SLIDEUP) {
1179      // i to vector move
1180      csBundle(0).srcType(0) := SrcType.reg
1181      csBundle(0).srcType(1) := SrcType.imm
1182      csBundle(0).lsrc(1) := 0.U
1183      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1184      csBundle(0).fuType := FuType.i2v.U
1185      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.permImm2vector(2, 0), IF2VectorType.i2vector(2, 0)), vsewReg)
1186      csBundle(0).vecWen := true.B
1187      // LMUL
1188      for (i <- 0 until MAX_VLMUL)
1189        for (j <- 0 to i) {
1190          val old_vd = if (j == 0) {
1191            dest + i.U
1192          } else (VECTOR_TMP_REG_LMUL + j).U
1193          val vd = if (j == i) {
1194            dest + i.U
1195          } else (VECTOR_TMP_REG_LMUL + j + 1).U
1196          csBundle(i * (i + 1) / 2 + j + 1).srcType(0) := SrcType.vp
1197          csBundle(i * (i + 1) / 2 + j + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1198          csBundle(i * (i + 1) / 2 + j + 1).lsrc(1) := src2 + j.U
1199          csBundle(i * (i + 1) / 2 + j + 1).lsrc(2) := old_vd
1200          csBundle(i * (i + 1) / 2 + j + 1).ldest := vd
1201          csBundle(i * (i + 1) / 2 + j + 1).uopIdx := (i * (i + 1) / 2 + j).U
1202        }
1203    }
1204
1205    is(UopSplitType.VEC_SLIDEDOWN) {
1206      // i to vector move
1207      csBundle(0).srcType(0) := SrcType.reg
1208      csBundle(0).srcType(1) := SrcType.imm
1209      csBundle(0).lsrc(1) := 0.U
1210      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1211      csBundle(0).fuType := FuType.i2v.U
1212      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.permImm2vector(2, 0), IF2VectorType.i2vector(2, 0)), vsewReg)
1213      csBundle(0).vecWen := true.B
1214      // LMUL
1215      for (i <- 0 until MAX_VLMUL)
1216        for (j <- (0 to i).reverse) {
1217          when(i.U < lmul) {
1218            val old_vd = if (j == 0) {
1219              dest + lmul - 1.U - i.U
1220            } else (VECTOR_TMP_REG_LMUL + j).U
1221            val vd = if (j == i) {
1222              dest + lmul - 1.U - i.U
1223            } else (VECTOR_TMP_REG_LMUL + j + 1).U
1224            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).srcType(0) := SrcType.vp
1225            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1226            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(1) := src2 + lmul - 1.U - j.U
1227            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(2) := old_vd
1228            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).ldest := vd
1229            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).uopIdx := numOfUop - (i * (i + 1) / 2 + i - j + 2).U
1230          }
1231        }
1232    }
1233
1234    is(UopSplitType.VEC_M0X) {
1235      // LMUL
1236      for (i <- 0 until MAX_VLMUL) {
1237        val srcType0 = if (i == 0) SrcType.DC else SrcType.vp
1238        val ldest = (VECTOR_TMP_REG_LMUL + i).U
1239        csBundle(i).srcType(0) := srcType0
1240        csBundle(i).srcType(1) := SrcType.vp
1241        csBundle(i).rfWen := false.B
1242        csBundle(i).vecWen := true.B
1243        csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U
1244        csBundle(i).lsrc(1) := src2
1245        // csBundle(i).lsrc(2) := dest + i.U  DontCare
1246        csBundle(i).ldest := ldest
1247        csBundle(i).uopIdx := i.U
1248      }
1249      csBundle(lmul - 1.U).vecWen := false.B
1250      csBundle(lmul - 1.U).fpWen := true.B
1251      csBundle(lmul - 1.U).ldest := FP_TMP_REG_MV.U
1252      // FMV_X_D
1253      csBundle(lmul).srcType(0) := SrcType.fp
1254      csBundle(lmul).srcType(1) := SrcType.imm
1255      csBundle(lmul).lsrc(0) := FP_TMP_REG_MV.U
1256      csBundle(lmul).lsrc(1) := 0.U
1257      csBundle(lmul).ldest := dest
1258      csBundle(lmul).fuType := FuType.fmisc.U
1259      csBundle(lmul).rfWen := true.B
1260      csBundle(lmul).fpWen := false.B
1261      csBundle(lmul).vecWen := false.B
1262      csBundle(lmul).fpu.isAddSub := false.B
1263      csBundle(lmul).fpu.typeTagIn := FPU.D
1264      csBundle(lmul).fpu.typeTagOut := FPU.D
1265      csBundle(lmul).fpu.fromInt := false.B
1266      csBundle(lmul).fpu.wflags := false.B
1267      csBundle(lmul).fpu.fpWen := false.B
1268      csBundle(lmul).fpu.div := false.B
1269      csBundle(lmul).fpu.sqrt := false.B
1270      csBundle(lmul).fpu.fcvt := false.B
1271    }
1272
1273    is(UopSplitType.VEC_MVV) {
1274      // LMUL
1275      for (i <- 0 until MAX_VLMUL) {
1276        val srcType0 = if (i == 0) SrcType.DC else SrcType.vp
1277        csBundle(i * 2 + 0).srcType(0) := srcType0
1278        csBundle(i * 2 + 0).srcType(1) := SrcType.vp
1279        csBundle(i * 2 + 0).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U
1280        csBundle(i * 2 + 0).lsrc(1) := src2
1281        csBundle(i * 2 + 0).lsrc(2) := dest + i.U
1282        csBundle(i * 2 + 0).ldest := dest + i.U
1283        csBundle(i * 2 + 0).uopIdx := (i * 2 + 0).U
1284
1285        csBundle(i * 2 + 1).srcType(0) := srcType0
1286        csBundle(i * 2 + 1).srcType(1) := SrcType.vp
1287        csBundle(i * 2 + 1).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U
1288        csBundle(i * 2 + 1).lsrc(1) := src2
1289        // csBundle(i).lsrc(2) := dest + i.U  DontCare
1290        csBundle(i * 2 + 1).ldest := (VECTOR_TMP_REG_LMUL + i).U
1291        csBundle(i * 2 + 1).uopIdx := (i * 2 + 1).U
1292      }
1293    }
1294
1295    is(UopSplitType.VEC_M0X_VFIRST) {
1296      // LMUL
1297      csBundle(0).rfWen := false.B
1298      csBundle(0).fpWen := true.B
1299      csBundle(0).ldest := FP_TMP_REG_MV.U
1300      // FMV_X_D
1301      csBundle(1).srcType(0) := SrcType.fp
1302      csBundle(1).srcType(1) := SrcType.imm
1303      csBundle(1).lsrc(0) := FP_TMP_REG_MV.U
1304      csBundle(1).lsrc(1) := 0.U
1305      csBundle(1).ldest := dest
1306      csBundle(1).fuType := FuType.fmisc.U
1307      csBundle(1).rfWen := true.B
1308      csBundle(1).fpWen := false.B
1309      csBundle(1).vecWen := false.B
1310      csBundle(1).fpu.isAddSub := false.B
1311      csBundle(1).fpu.typeTagIn := FPU.D
1312      csBundle(1).fpu.typeTagOut := FPU.D
1313      csBundle(1).fpu.fromInt := false.B
1314      csBundle(1).fpu.wflags := false.B
1315      csBundle(1).fpu.fpWen := false.B
1316      csBundle(1).fpu.div := false.B
1317      csBundle(1).fpu.sqrt := false.B
1318      csBundle(1).fpu.fcvt := false.B
1319    }
1320    is(UopSplitType.VEC_VWW) {
1321      for (i <- 0 until MAX_VLMUL*2) {
1322        when(i.U < lmul){
1323          csBundle(i).srcType(2) := SrcType.DC
1324          csBundle(i).lsrc(0) := src2 + i.U
1325          csBundle(i).lsrc(1) := src2 + i.U
1326          // csBundle(i).lsrc(2) := dest + (2 * i).U
1327          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
1328          csBundle(i).uopIdx :=  i.U
1329        } otherwise {
1330          csBundle(i).srcType(2) := SrcType.DC
1331          csBundle(i).lsrc(0) := VECTOR_TMP_REG_LMUL.U + Cat((i.U-lmul),0.U(1.W)) + 1.U
1332          csBundle(i).lsrc(1) := VECTOR_TMP_REG_LMUL.U + Cat((i.U-lmul),0.U(1.W))
1333          // csBundle(i).lsrc(2) := dest + (2 * i).U
1334          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
1335          csBundle(i).uopIdx := i.U
1336        }
1337        csBundle(numOfUop-1.U).srcType(2) := SrcType.vp
1338        csBundle(numOfUop-1.U).lsrc(0) := src1
1339        csBundle(numOfUop-1.U).lsrc(2) := dest
1340        csBundle(numOfUop-1.U).ldest := dest
1341      }
1342    }
1343    is(UopSplitType.VEC_RGATHER) {
1344      def genCsBundle_VEC_RGATHER(len:Int): Unit ={
1345        for (i <- 0 until len)
1346          for (j <- 0 until len) {
1347            // csBundle(i * len + j).srcType(0) := SrcType.vp // SrcType.imm
1348            // csBundle(i * len + j).srcType(1) := SrcType.vp
1349            // csBundle(i * len + j).srcType(2) := SrcType.vp
1350            csBundle(i * len + j).lsrc(0) := src1 + i.U
1351            csBundle(i * len + j).lsrc(1) := src2 + j.U
1352            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j - 1).U
1353            csBundle(i * len + j).lsrc(2) := vd_old
1354            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1355            csBundle(i * len + j).ldest := vd
1356            csBundle(i * len + j).uopIdx := (i * len + j).U
1357          }
1358      }
1359      switch(vlmulReg) {
1360        is("b001".U ){
1361          genCsBundle_VEC_RGATHER(2)
1362        }
1363        is("b010".U ){
1364          genCsBundle_VEC_RGATHER(4)
1365        }
1366        is("b011".U ){
1367          genCsBundle_VEC_RGATHER(8)
1368        }
1369      }
1370    }
1371    is(UopSplitType.VEC_RGATHER_VX) {
1372      def genCsBundle_RGATHER_VX(len:Int): Unit ={
1373        for (i <- 0 until len)
1374          for (j <- 0 until len) {
1375            csBundle(i * len + j + 1).srcType(0) := SrcType.vp
1376            // csBundle(i * len + j + 1).srcType(1) := SrcType.vp
1377            // csBundle(i * len + j + 1).srcType(2) := SrcType.vp
1378            csBundle(i * len + j + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1379            csBundle(i * len + j + 1).lsrc(1) := src2 + j.U
1380            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1381            csBundle(i * len + j + 1).lsrc(2) := vd_old
1382            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j + 1).U
1383            csBundle(i * len + j + 1).ldest := vd
1384            csBundle(i * len + j + 1).uopIdx := (i * len + j).U
1385          }
1386      }
1387      // i to vector move
1388      csBundle(0).srcType(0) := SrcType.reg
1389      csBundle(0).srcType(1) := SrcType.imm
1390      csBundle(0).lsrc(1) := 0.U
1391      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1392      csBundle(0).fuType := FuType.i2v.U
1393      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.permImm2vector(2, 0), IF2VectorType.i2vector(2, 0)), vsewReg)
1394      csBundle(0).vecWen := true.B
1395      switch(vlmulReg) {
1396        is("b000".U ){
1397          genCsBundle_RGATHER_VX(1)
1398        }
1399        is("b001".U ){
1400          genCsBundle_RGATHER_VX(2)
1401        }
1402        is("b010".U ){
1403          genCsBundle_RGATHER_VX(4)
1404        }
1405        is("b011".U ){
1406          genCsBundle_RGATHER_VX(8)
1407        }
1408      }
1409    }
1410    is(UopSplitType.VEC_RGATHEREI16) {
1411      def genCsBundle_VEC_RGATHEREI16_SEW8(len:Int): Unit ={
1412        for (i <- 0 until len)
1413          for (j <- 0 until len) {
1414            val vd_old0 = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j*2-1).U
1415            val vd0 = (VECTOR_TMP_REG_LMUL + j*2 ).U
1416            // csBundle(i * len + j).srcType(0) := SrcType.vp // SrcType.imm
1417            // csBundle(i * len + j).srcType(1) := SrcType.vp
1418            // csBundle(i * len + j).srcType(2) := SrcType.vp
1419            csBundle((i * len + j)*2+0).lsrc(0) := src1 + (i*2+0).U
1420            csBundle((i * len + j)*2+0).lsrc(1) := src2 + j.U
1421            csBundle((i * len + j)*2+0).lsrc(2) := vd_old0
1422            csBundle((i * len + j)*2+0).ldest := vd0
1423            csBundle((i * len + j)*2+0).uopIdx := ((i * len + j)*2+0).U
1424            val vd_old1 = (VECTOR_TMP_REG_LMUL + j*2).U
1425            val vd1 = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j*2+1 ).U
1426            csBundle((i * len + j)*2+1).lsrc(0) := src1 + (i*2+1).U
1427            csBundle((i * len + j)*2+1).lsrc(1) := src2 + j.U
1428            csBundle((i * len + j)*2+1).lsrc(2) := vd_old1
1429            csBundle((i * len + j)*2+1).ldest := vd1
1430            csBundle((i * len + j)*2+1).uopIdx := ((i * len + j)*2+1).U
1431          }
1432      }
1433      def genCsBundle_VEC_RGATHEREI16(len:Int): Unit ={
1434        for (i <- 0 until len)
1435          for (j <- 0 until len) {
1436            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U
1437            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1438            // csBundle(i * len + j).srcType(0) := SrcType.vp // SrcType.imm
1439            // csBundle(i * len + j).srcType(1) := SrcType.vp
1440            // csBundle(i * len + j).srcType(2) := SrcType.vp
1441            csBundle(i * len + j).lsrc(0) := src1 + i.U
1442            csBundle(i * len + j).lsrc(1) := src2 + j.U
1443            csBundle(i * len + j).lsrc(2) := vd_old
1444            csBundle(i * len + j).ldest := vd
1445            csBundle(i * len + j).uopIdx := (i * len + j).U
1446          }
1447      }
1448      switch(vlmulReg) {
1449        is("b000".U ){
1450          when(!vsewReg.orR){
1451            genCsBundle_VEC_RGATHEREI16_SEW8(1)
1452          } .otherwise{
1453            genCsBundle_VEC_RGATHEREI16(1)
1454          }
1455        }
1456        is("b001".U) {
1457          when(!vsewReg.orR) {
1458            genCsBundle_VEC_RGATHEREI16_SEW8(2)
1459          }.otherwise {
1460            genCsBundle_VEC_RGATHEREI16(2)
1461          }
1462        }
1463        is("b010".U) {
1464          when(!vsewReg.orR) {
1465            genCsBundle_VEC_RGATHEREI16_SEW8(4)
1466          }.otherwise {
1467            genCsBundle_VEC_RGATHEREI16(4)
1468          }
1469        }
1470        is("b011".U) {
1471          genCsBundle_VEC_RGATHEREI16(8)
1472        }
1473      }
1474    }
1475    is(UopSplitType.VEC_COMPRESS) {
1476      def genCsBundle_VEC_COMPRESS(len:Int): Unit ={
1477        for (i <- 0 until len){
1478          val jlen = if (i == len-1) i+1 else i+2
1479          for (j <- 0 until jlen) {
1480            val vd_old = if(i==j) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j + 1).U
1481            val vd = if(i==len-1) (dest + j.U) else{
1482              if (j == i+1) VECTOR_TMP_REG_LMUL.U else (VECTOR_TMP_REG_LMUL + j + 1).U
1483            }
1484            val src23Type = if (j == i+1) DontCare else SrcType.vp
1485            csBundle(i*(i+3)/2 + j).srcType(0) := SrcType.vp
1486            csBundle(i*(i+3)/2 + j).srcType(1) := src23Type
1487            csBundle(i*(i+3)/2 + j).srcType(2) := src23Type
1488            csBundle(i*(i+3)/2 + j).lsrc(0) := src1
1489            csBundle(i*(i+3)/2 + j).lsrc(1) := src2 + i.U
1490            csBundle(i*(i+3)/2 + j).lsrc(2) := vd_old
1491            // csBundle(i*(i+3)/2 + j).lsrc(3) := VECTOR_TMP_REG_LMUL.U
1492            csBundle(i*(i+3)/2 + j).ldest := vd
1493            csBundle(i*(i+3)/2 + j).uopIdx := (i*(i+3)/2 + j).U
1494          }
1495        }
1496      }
1497      switch(vlmulReg) {
1498        is("b001".U ){
1499          genCsBundle_VEC_COMPRESS(2)
1500        }
1501        is("b010".U ){
1502          genCsBundle_VEC_COMPRESS(4)
1503        }
1504        is("b011".U ){
1505          genCsBundle_VEC_COMPRESS(8)
1506        }
1507      }
1508    }
1509    is(UopSplitType.VEC_MVNR) {
1510      for (i <- 0 until MAX_VLMUL) {
1511        csBundle(i).lsrc(0) := src1 + i.U
1512        csBundle(i).lsrc(1) := src2 + i.U
1513        csBundle(i).lsrc(2) := dest + i.U
1514        csBundle(i).ldest := dest + i.U
1515        csBundle(i).uopIdx := i.U
1516      }
1517    }
1518    is(UopSplitType.VEC_US_LDST) {
1519      /*
1520      FMV.D.X
1521       */
1522      csBundle(0).srcType(0) := SrcType.reg
1523      csBundle(0).srcType(1) := SrcType.imm
1524      csBundle(0).lsrc(1) := 0.U
1525      csBundle(0).ldest := FP_TMP_REG_MV.U
1526      csBundle(0).fuType := FuType.i2f.U
1527      csBundle(0).rfWen := false.B
1528      csBundle(0).fpWen := true.B
1529      csBundle(0).vecWen := false.B
1530      csBundle(0).fpu.isAddSub := false.B
1531      csBundle(0).fpu.typeTagIn := FPU.D
1532      csBundle(0).fpu.typeTagOut := FPU.D
1533      csBundle(0).fpu.fromInt := true.B
1534      csBundle(0).fpu.wflags := false.B
1535      csBundle(0).fpu.fpWen := true.B
1536      csBundle(0).fpu.div := false.B
1537      csBundle(0).fpu.sqrt := false.B
1538      csBundle(0).fpu.fcvt := false.B
1539      //LMUL
1540      for (i <- 0 until MAX_VLMUL) {
1541        csBundle(i + 1).srcType(0) := SrcType.fp
1542        csBundle(i + 1).lsrc(0) := FP_TMP_REG_MV.U
1543        csBundle(i + 1).lsrc(2) := dest + i.U // old vd
1544        csBundle(i + 1).ldest := dest + i.U
1545        csBundle(i + 1).uopIdx := i.U
1546      }
1547    }
1548    is(UopSplitType.VEC_S_LDST) {
1549      /*
1550      FMV.D.X
1551       */
1552      csBundle(0).srcType(0) := SrcType.reg
1553      csBundle(0).srcType(1) := SrcType.imm
1554      csBundle(0).lsrc(1) := 0.U
1555      csBundle(0).ldest := FP_TMP_REG_MV.U
1556      csBundle(0).fuType := FuType.i2f.U
1557      csBundle(0).rfWen := false.B
1558      csBundle(0).fpWen := true.B
1559      csBundle(0).vecWen := false.B
1560      csBundle(0).fpu.isAddSub := false.B
1561      csBundle(0).fpu.typeTagIn := FPU.D
1562      csBundle(0).fpu.typeTagOut := FPU.D
1563      csBundle(0).fpu.fromInt := true.B
1564      csBundle(0).fpu.wflags := false.B
1565      csBundle(0).fpu.fpWen := true.B
1566      csBundle(0).fpu.div := false.B
1567      csBundle(0).fpu.sqrt := false.B
1568      csBundle(0).fpu.fcvt := false.B
1569
1570      csBundle(1).srcType(0) := SrcType.reg
1571      csBundle(1).srcType(1) := SrcType.imm
1572      csBundle(1).lsrc(0) := latchedInst.lsrc(1)
1573      csBundle(1).lsrc(1) := 0.U
1574      csBundle(1).ldest := VECTOR_TMP_REG_LMUL.U
1575      csBundle(1).fuType := FuType.i2f.U
1576      csBundle(1).rfWen := false.B
1577      csBundle(1).fpWen := true.B
1578      csBundle(1).vecWen := false.B
1579      csBundle(1).fpu.isAddSub := false.B
1580      csBundle(1).fpu.typeTagIn := FPU.D
1581      csBundle(1).fpu.typeTagOut := FPU.D
1582      csBundle(1).fpu.fromInt := true.B
1583      csBundle(1).fpu.wflags := false.B
1584      csBundle(1).fpu.fpWen := true.B
1585      csBundle(1).fpu.div := false.B
1586      csBundle(1).fpu.sqrt := false.B
1587      csBundle(1).fpu.fcvt := false.B
1588
1589      //LMUL
1590      for (i <- 0 until MAX_VLMUL) {
1591        csBundle(i + 2).srcType(0) := SrcType.fp
1592        csBundle(i + 2).srcType(1) := SrcType.fp
1593        csBundle(i + 2).lsrc(0) := FP_TMP_REG_MV.U
1594        csBundle(i + 2).lsrc(1) := VECTOR_TMP_REG_LMUL.U
1595        csBundle(i + 2).lsrc(2) := dest + i.U // old vd
1596        csBundle(i + 2).ldest := dest + i.U
1597        csBundle(i + 2).uopIdx := i.U
1598      }
1599    }
1600    is(UopSplitType.VEC_I_LDST) {
1601    /*
1602      FMV.D.X
1603       */
1604      val vlmul = vlmulReg
1605      val vsew = Cat(0.U(1.W), vsewReg)
1606      val veew = Cat(0.U(1.W), width)
1607      val vemul: UInt = veew.asUInt + 1.U + vlmul.asUInt + ~vsew.asUInt
1608      val simple_lmul = MuxLookup(vlmul, 0.U(2.W), Array(
1609        "b001".U -> 1.U,
1610        "b010".U -> 2.U,
1611        "b011".U -> 3.U
1612      ))
1613      val simple_emul = MuxLookup(vemul, 0.U(2.W), Array(
1614        "b001".U -> 1.U,
1615        "b010".U -> 2.U,
1616        "b011".U -> 3.U
1617      ))
1618      csBundle(0).srcType(0) := SrcType.reg
1619      csBundle(0).srcType(1) := SrcType.imm
1620      csBundle(0).lsrc(1) := 0.U
1621      csBundle(0).ldest := FP_TMP_REG_MV.U
1622      csBundle(0).fuType := FuType.i2f.U
1623      csBundle(0).rfWen := false.B
1624      csBundle(0).fpWen := true.B
1625      csBundle(0).vecWen := false.B
1626      csBundle(0).fpu.isAddSub := false.B
1627      csBundle(0).fpu.typeTagIn := FPU.D
1628      csBundle(0).fpu.typeTagOut := FPU.D
1629      csBundle(0).fpu.fromInt := true.B
1630      csBundle(0).fpu.wflags := false.B
1631      csBundle(0).fpu.fpWen := true.B
1632      csBundle(0).fpu.div := false.B
1633      csBundle(0).fpu.sqrt := false.B
1634      csBundle(0).fpu.fcvt := false.B
1635
1636      //LMUL
1637      for (i <- 0 until MAX_INDEXED_LS_UOPNUM) {
1638        indexedLSRegOffset(i).src := Cat(simple_emul, simple_lmul, nf)
1639        val offsetVs2 = indexedLSRegOffset(i).outOffsetVs2
1640        val offsetVd = indexedLSRegOffset(i).outOffsetVd
1641        val isFirstUopInVd = indexedLSRegOffset(i).outIsFirstUopInVd
1642        csBundle(i + 1).srcType(0) := SrcType.fp
1643        csBundle(i + 1).lsrc(0) := FP_TMP_REG_MV.U
1644        csBundle(i + 1).lsrc(1) := Mux1H(UIntToOH(offsetVs2, MAX_VLMUL), (0 until MAX_VLMUL).map(j => src2 + j.U))
1645        /**
1646          * For indexed instructions, VLSU will concatenate all the uops that write the same logic vd register and
1647          * writeback only once for all these uops. However, these uops share the same lsrc(2)/old vd and the same
1648          * ldest/vd that is equal to old vd, which leads to data dependence between the uops. Therefore there will be
1649          * deadlock for indexed instructions with emul > lmul.
1650          *
1651          * Assume N = emul/lmul. To break the deadlock, only the first uop will read old vd as lsrc(2), and the rest
1652          * N-1 uops will read temporary vector register.
1653          */
1654        // csBundle(i + 1).lsrc(2) := Mux1H(UIntToOH(offsetVd, MAX_VLMUL), (0 until MAX_VLMUL).map(j => dest + j.U))
1655        csBundle(i + 1).lsrc(2) := Mux(
1656          isFirstUopInVd,
1657          Mux1H(UIntToOH(offsetVd, MAX_VLMUL), (0 until MAX_VLMUL).map(j => dest + j.U)),
1658          VECTOR_TMP_REG_LMUL.U
1659        )
1660        csBundle(i + 1).ldest := Mux1H(UIntToOH(offsetVd, MAX_VLMUL), (0 until MAX_VLMUL).map(j => dest + j.U))
1661        csBundle(i + 1).uopIdx := i.U
1662      }
1663    }
1664  }
1665
1666  //readyFromRename Counter
1667  val readyCounter = PriorityMuxDefault(outReadys.map(x => !x).zip((0 until RenameWidth).map(_.U)), RenameWidth.U)
1668
1669  // The left uops of the complex inst in ComplexDecoder can be send out this cycle
1670  val thisAllOut = uopRes <= readyCounter
1671
1672  switch(state) {
1673    is(s_idle) {
1674      when (inValid) {
1675        stateNext := s_active
1676        uopResNext := inUopInfo.numOfUop
1677      }
1678    }
1679    is(s_active) {
1680      when (thisAllOut) {
1681        when (inValid) {
1682          stateNext := s_active
1683          uopResNext := inUopInfo.numOfUop
1684        }.otherwise {
1685          stateNext := s_idle
1686          uopResNext := 0.U
1687        }
1688      }.otherwise {
1689        stateNext := s_active
1690        uopResNext := uopRes - readyCounter
1691      }
1692    }
1693  }
1694
1695  state := Mux(io.redirect, s_idle, stateNext)
1696  uopRes := Mux(io.redirect, 0.U, uopResNext)
1697
1698  val complexNum = Mux(uopRes > readyCounter, readyCounter, uopRes)
1699
1700  for(i <- 0 until RenameWidth) {
1701    outValids(i) := complexNum > i.U
1702    outDecodedInsts(i) := Mux((i.U + numOfUop - uopRes) < maxUopSize.U, csBundle(i.U + numOfUop - uopRes), csBundle(maxUopSize - 1))
1703  }
1704
1705  outComplexNum := Mux(state === s_active, complexNum, 0.U)
1706  inReady := state === s_idle || state === s_active && thisAllOut
1707
1708//  val validSimple = Wire(Vec(DecodeWidth, Bool()))
1709//  validSimple.zip(io.validFromIBuf.zip(io.isComplex)).map{ case (dst, (src1, src2)) => dst := src1 && !src2 }
1710//  val notInf = Wire(Vec(DecodeWidth, Bool()))
1711//  notInf.drop(1).zip(io.validFromIBuf.drop(1).zip(validSimple.drop(1))).map{ case (dst, (src1, src2)) => dst := !src1 || src2 }
1712//  notInf(0) := !io.validFromIBuf(0) || validSimple(0) || (io.isComplex(0) && io.in0pc === io.simple.decodedInst.pc)
1713//  val notInfVec = Wire(Vec(DecodeWidth, Bool()))
1714//  notInfVec.zipWithIndex.map{ case (dst, i) => dst := Cat(notInf.take(i + 1)).andR}
1715//
1716//  complexNum := Mux(io.validFromIBuf(0) && readyCounter.orR ,
1717//    Mux(uopRes0 > readyCounter, readyCounter, uopRes0),
1718//    0.U)
1719//  validToRename.zipWithIndex.foreach{
1720//    case(dst, i) =>
1721//      val validFix = Mux(complexNum.orR, validSimple((i+1).U - complexNum), validSimple(i))
1722//      dst := MuxCase(false.B, Seq(
1723//        (io.validFromIBuf(0) && readyCounter.orR && uopRes0 > readyCounter) -> Mux(readyCounter > i.U, true.B, false.B),
1724//        (io.validFromIBuf(0) && readyCounter.orR && !(uopRes0 > readyCounter)) -> Mux(complexNum > i.U, true.B, validFix && notInfVec(i.U - complexNum) && io.readyFromRename(i)),
1725//      ).toSeq)
1726//  }
1727//
1728//  readyToIBuf.zipWithIndex.foreach {
1729//    case (dst, i) =>
1730//      val readyToIBuf0 = Mux(io.isComplex(0), io.in0pc === io.simple.decodedInst.pc, true.B)
1731//      dst := MuxCase(true.B, Seq(
1732//        (io.validFromIBuf(0) && uopRes0 > readyCounter || !readyCounter.orR) -> false.B,
1733//        (io.validFromIBuf(0) && !(uopRes0 > readyCounter) && readyCounter.orR) -> (if (i==0) readyToIBuf0 else Mux(RenameWidth.U - complexNum >= i.U, notInfVec(i) && validSimple(i) && io.readyFromRename(i), false.B))
1734//      ).toSeq)
1735//  }
1736//
1737//  io.deq.decodedInsts := decodedInsts
1738//  io.deq.complexNum := complexNum
1739//  io.deq.validToRename := validToRename
1740//  io.deq.readyToIBuf := readyToIBuf
1741}
1742