xref: /XiangShan/src/main/scala/xiangshan/backend/decode/DecodeUnitComp.scala (revision e4d4d30585412eb8ac83b5c75599a348356342a2)
1/***************************************************************************************
2  * Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3  * Copyright (c) 2020-2021 Peng Cheng Laboratory
4  *
5  * XiangShan is licensed under Mulan PSL v2.
6  * You can use this software according to the terms and conditions of the Mulan PSL v2.
7  * You may obtain a copy of Mulan PSL v2 at:
8  *          http://license.coscl.org.cn/MulanPSL2
9  *
10  * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11  * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12  * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13  *
14  * See the Mulan PSL v2 for more details.
15  ***************************************************************************************/
16
17package xiangshan.backend.decode
18
19import org.chipsalliance.cde.config.Parameters
20import chisel3._
21import chisel3.util._
22import freechips.rocketchip.rocket.Instructions
23import freechips.rocketchip.util.uintToBitPat
24import utils._
25import utility._
26import xiangshan.ExceptionNO.illegalInstr
27import xiangshan._
28import xiangshan.backend.fu.fpu.FPU
29import xiangshan.backend.fu.FuType
30import freechips.rocketchip.rocket.Instructions._
31import xiangshan.backend.Bundles.{DecodedInst, StaticInst}
32import xiangshan.backend.decode.isa.bitfield.XSInstBitFields
33import xiangshan.backend.fu.vector.Bundles.{VSew, VType, VLmul}
34import yunsuan.VpermType
35import scala.collection.Seq
36import chisel3.util.experimental.decode.{QMCMinimizer, TruthTable, decoder}
37
38class indexedLSUopTable(uopIdx:Int) extends Module {
39  val src = IO(Input(UInt(7.W)))
40  val outOffsetVs2 = IO(Output(UInt(3.W)))
41  val outOffsetVd = IO(Output(UInt(3.W)))
42  val outIsFirstUopInVd = IO(Output(Bool()))
43  def genCsBundle_VEC_INDEXED_LDST(lmul:Int, emul:Int, nfields:Int, uopIdx:Int): (Int, Int, Int) ={
44    if (lmul * nfields <= 8) {
45      for (k <-0 until nfields) {
46        if (lmul < emul) {    // lmul < emul, uop num is depend on emul * nf
47          var offset = 1 << (emul - lmul)
48          for (i <- 0 until (1 << emul)) {
49            if (uopIdx == k * (1 << emul) + i) {
50              return (i, i / offset + k * (1 << lmul), if (i % offset == 0) 1 else 0)
51            }
52          }
53        } else {              // lmul > emul, uop num is depend on lmul * nf
54          var offset = 1 << (lmul - emul)
55          for (i <- 0 until (1 << lmul)) {
56            if (uopIdx == k * (1 << lmul) + i) {
57              return (i / offset, i + k * (1 << lmul), 1)
58            }
59          }
60        }
61      }
62    }
63    return (0, 0, 1)
64  }
65  // strided load/store
66  var combVemulNf : Seq[(Int, Int, Int, Int, Int, Int)] = Seq()
67  for (emul <- 0 until 4) {
68    for (lmul <- 0 until 4) {
69      for (nf <- 0 until 8) {
70        var offset = genCsBundle_VEC_INDEXED_LDST(lmul, emul, nf+1, uopIdx)
71        var offsetVs2 = offset._1
72        var offsetVd = offset._2
73        var isFirstUopInVd = offset._3
74        combVemulNf :+= (emul, lmul, nf, isFirstUopInVd, offsetVs2, offsetVd)
75      }
76    }
77  }
78  val out = decoder(QMCMinimizer, src, TruthTable(combVemulNf.map {
79    case (emul, lmul, nf, isFirstUopInVd, offsetVs2, offsetVd) =>
80      (BitPat((emul << 5 | lmul << 3 | nf).U(7.W)), BitPat((isFirstUopInVd << 6 | offsetVs2 << 3 | offsetVd).U(7.W)))
81  }, BitPat.N(7)))
82  outOffsetVs2 := out(5, 3)
83  outOffsetVd := out(2, 0)
84  outIsFirstUopInVd := out(6).asBool
85}
86
87trait VectorConstants {
88  val MAX_VLMUL = 8
89  val FP_TMP_REG_MV = 32
90  val VECTOR_TMP_REG_LMUL = 33 // 33~47  ->  15
91  val MAX_INDEXED_LS_UOPNUM = 64
92}
93
94class DecodeUnitCompIO(implicit p: Parameters) extends XSBundle {
95  val redirect = Input(Bool())
96  val csrCtrl = Input(new CustomCSRCtrlIO)
97  // When the first inst in decode vector is complex inst, pass it in
98  val in = Flipped(DecoupledIO(new Bundle {
99    val simpleDecodedInst = new DecodedInst
100    val uopInfo = new UopInfo
101  }))
102  val out = new Bundle {
103    val complexDecodedInsts = Vec(RenameWidth, DecoupledIO(new DecodedInst))
104  }
105  val complexNum = Output(UInt(3.W))
106}
107
108/**
109  * @author zly
110  */
111class DecodeUnitComp()(implicit p : Parameters) extends XSModule with DecodeUnitConstants with VectorConstants {
112  val io = IO(new DecodeUnitCompIO)
113
114  // alias
115  private val inReady = io.in.ready
116  private val inValid = io.in.valid
117  private val inDecodedInst = WireInit(io.in.bits.simpleDecodedInst)
118  private val inInstFields = io.in.bits.simpleDecodedInst.instr.asTypeOf(new XSInstBitFields)
119  private val inUopInfo = io.in.bits.uopInfo
120  private val outValids = io.out.complexDecodedInsts.map(_.valid)
121  private val outReadys = io.out.complexDecodedInsts.map(_.ready)
122  private val outDecodedInsts = io.out.complexDecodedInsts.map(_.bits)
123  private val outComplexNum = io.complexNum
124
125  val maxUopSize = MaxUopSize
126  when (io.in.fire && io.in.bits.simpleDecodedInst.isVset) {
127    when(inInstFields.RD === 0.U && inInstFields.RS1 === 0.U) {
128      inDecodedInst.fuOpType := VSETOpType.keepVl(io.in.bits.simpleDecodedInst.fuOpType)
129    }.elsewhen(inInstFields.RS1 === 0.U) {
130      inDecodedInst.fuOpType := VSETOpType.setVlmax(io.in.bits.simpleDecodedInst.fuOpType)
131    }
132  }
133
134  val latchedInst = RegEnable(inDecodedInst, inValid && inReady)
135  val latchedUopInfo = RegEnable(inUopInfo, inValid && inReady)
136  //input bits
137  private val instFields: XSInstBitFields = latchedInst.instr.asTypeOf(new XSInstBitFields)
138
139  val src1 = Cat(0.U(1.W), instFields.RS1)
140  val src2 = Cat(0.U(1.W), instFields.RS2)
141  val dest = Cat(0.U(1.W), instFields.RD)
142
143  val nf    = instFields.NF
144  val width = instFields.WIDTH(1, 0)
145
146  //output of DecodeUnit
147  val numOfUop = Wire(UInt(log2Up(maxUopSize).W))
148  val numOfWB = Wire(UInt(log2Up(maxUopSize).W))
149  val lmul = Wire(UInt(4.W))
150  val isVsetSimple = Wire(Bool())
151
152  val indexedLSRegOffset = Seq.tabulate(MAX_INDEXED_LS_UOPNUM)(i => Module(new indexedLSUopTable(i)))
153  indexedLSRegOffset.map(_.src := 0.U)
154
155  //pre decode
156  lmul := latchedUopInfo.lmul
157  isVsetSimple := latchedInst.isVset
158  val vlmulReg = latchedInst.vpu.vlmul
159  val vsewReg = latchedInst.vpu.vsew
160
161  //Type of uop Div
162  val typeOfSplit = latchedInst.uopSplitType
163  val src1Type = latchedInst.srcType(0)
164  val src1IsImm = src1Type === SrcType.imm
165
166  numOfUop := latchedUopInfo.numOfUop
167  numOfWB := latchedUopInfo.numOfWB
168
169  //uops dispatch
170  val s_idle :: s_active :: Nil = Enum(2)
171  val state = RegInit(s_idle)
172  val stateNext = WireDefault(state)
173  val numDecodedUop = RegInit(0.U(log2Up(maxUopSize).W))
174  val uopRes = RegInit(0.U(log2Up(maxUopSize).W))
175  val uopResNext = WireInit(uopRes)
176
177  //uop div up to maxUopSize
178  val csBundle = Wire(Vec(maxUopSize, new DecodedInst))
179  csBundle.foreach { case dst =>
180    dst := latchedInst
181    dst.numUops := latchedUopInfo.numOfUop
182    dst.numWB := latchedUopInfo.numOfWB
183    dst.firstUop := false.B
184    dst.lastUop := false.B
185  }
186
187  csBundle(0).firstUop := true.B
188  csBundle(numOfUop - 1.U).lastUop := true.B
189
190  switch(typeOfSplit) {
191    is(UopSplitType.VSET) {
192      // In simple decoder, rfWen and vecWen are not set
193      when(isVsetSimple) {
194        // Default
195        // uop0 set rd, never flushPipe
196        csBundle(0).fuType := FuType.vsetiwi.U
197        csBundle(0).flushPipe := false.B
198        csBundle(0).rfWen := true.B
199        // uop1 set vl, vsetvl will flushPipe
200        csBundle(1).ldest := VCONFIG_IDX.U
201        csBundle(1).vecWen := true.B
202        when(VSETOpType.isVsetvli(latchedInst.fuOpType) && dest === 0.U && src1 === 0.U) {
203          csBundle(1).fuType := FuType.vsetfwf.U
204          csBundle(1).srcType(0) := SrcType.vp
205          csBundle(1).lsrc(0) := VCONFIG_IDX.U
206        }.elsewhen(VSETOpType.isVsetvl(latchedInst.fuOpType) && dest === 0.U && src1 === 0.U) {
207          // uop0: mv vtype gpr to vector region
208          csBundle(0).srcType(0) := SrcType.xp
209          csBundle(0).srcType(1) := SrcType.no
210          csBundle(0).lsrc(1) := 0.U
211          csBundle(0).ldest := FP_TMP_REG_MV.U
212          csBundle(0).fuType := FuType.i2f.U
213          csBundle(0).fpWen := true.B
214          csBundle(0).fpu.isAddSub := false.B
215          csBundle(0).fpu.typeTagIn := FPU.D
216          csBundle(0).fpu.typeTagOut := FPU.D
217          csBundle(0).fpu.fromInt := true.B
218          csBundle(0).fpu.wflags := false.B
219          csBundle(0).fpu.fpWen := true.B
220          csBundle(0).fpu.div := false.B
221          csBundle(0).fpu.sqrt := false.B
222          csBundle(0).fpu.fcvt := false.B
223          csBundle(0).flushPipe := false.B
224          // uop1: uvsetvcfg_vv
225          csBundle(1).fuType := FuType.vsetfwf.U
226          // vl
227          csBundle(1).srcType(0) := SrcType.vp
228          csBundle(1).lsrc(0) := VCONFIG_IDX.U
229          // vtype
230          csBundle(1).srcType(1) := SrcType.fp
231          csBundle(1).lsrc(1) := FP_TMP_REG_MV.U
232          csBundle(1).vecWen := true.B
233          csBundle(1).ldest := VCONFIG_IDX.U
234        }
235      }
236    }
237    is(UopSplitType.VEC_VVV) {
238      for (i <- 0 until MAX_VLMUL) {
239        csBundle(i).lsrc(0) := src1 + i.U
240        csBundle(i).lsrc(1) := src2 + i.U
241        csBundle(i).lsrc(2) := dest + i.U
242        csBundle(i).ldest := dest + i.U
243        csBundle(i).uopIdx := i.U
244      }
245    }
246    is(UopSplitType.VEC_VFV) {
247      for (i <- 0 until MAX_VLMUL) {
248        csBundle(i).lsrc(1) := src2 + i.U
249        csBundle(i).lsrc(2) := dest + i.U
250        csBundle(i).ldest := dest + i.U
251        csBundle(i).uopIdx := i.U
252      }
253    }
254    is(UopSplitType.VEC_EXT2) {
255      for (i <- 0 until MAX_VLMUL / 2) {
256        csBundle(2 * i).lsrc(1) := src2 + i.U
257        csBundle(2 * i).lsrc(2) := dest + (2 * i).U
258        csBundle(2 * i).ldest := dest + (2 * i).U
259        csBundle(2 * i).uopIdx := (2 * i).U
260        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
261        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U
262        csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U
263        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
264      }
265    }
266    is(UopSplitType.VEC_EXT4) {
267      for (i <- 0 until MAX_VLMUL / 4) {
268        csBundle(4 * i).lsrc(1) := src2 + i.U
269        csBundle(4 * i).lsrc(2) := dest + (4 * i).U
270        csBundle(4 * i).ldest := dest + (4 * i).U
271        csBundle(4 * i).uopIdx := (4 * i).U
272        csBundle(4 * i + 1).lsrc(1) := src2 + i.U
273        csBundle(4 * i + 1).lsrc(2) := dest + (4 * i + 1).U
274        csBundle(4 * i + 1).ldest := dest + (4 * i + 1).U
275        csBundle(4 * i + 1).uopIdx := (4 * i + 1).U
276        csBundle(4 * i + 2).lsrc(1) := src2 + i.U
277        csBundle(4 * i + 2).lsrc(2) := dest + (4 * i + 2).U
278        csBundle(4 * i + 2).ldest := dest + (4 * i + 2).U
279        csBundle(4 * i + 2).uopIdx := (4 * i + 2).U
280        csBundle(4 * i + 3).lsrc(1) := src2 + i.U
281        csBundle(4 * i + 3).lsrc(2) := dest + (4 * i + 3).U
282        csBundle(4 * i + 3).ldest := dest + (4 * i + 3).U
283        csBundle(4 * i + 3).uopIdx := (4 * i + 3).U
284      }
285    }
286    is(UopSplitType.VEC_EXT8) {
287      for (i <- 0 until MAX_VLMUL) {
288        csBundle(i).lsrc(1) := src2
289        csBundle(i).lsrc(2) := dest + i.U
290        csBundle(i).ldest := dest + i.U
291        csBundle(i).uopIdx := i.U
292      }
293    }
294    is(UopSplitType.VEC_0XV) {
295      /*
296      FMV.D.X
297       */
298      csBundle(0).srcType(0) := SrcType.reg
299      csBundle(0).srcType(1) := SrcType.imm
300      csBundle(0).lsrc(1) := 0.U
301      csBundle(0).ldest := FP_TMP_REG_MV.U
302      csBundle(0).fuType := FuType.i2f.U
303      csBundle(0).rfWen := false.B
304      csBundle(0).fpWen := true.B
305      csBundle(0).vecWen := false.B
306      csBundle(0).fpu.isAddSub := false.B
307      csBundle(0).fpu.typeTagIn := FPU.D
308      csBundle(0).fpu.typeTagOut := FPU.D
309      csBundle(0).fpu.fromInt := true.B
310      csBundle(0).fpu.wflags := false.B
311      csBundle(0).fpu.fpWen := true.B
312      csBundle(0).fpu.div := false.B
313      csBundle(0).fpu.sqrt := false.B
314      csBundle(0).fpu.fcvt := false.B
315      /*
316      vfmv.s.f
317       */
318      csBundle(1).srcType(0) := SrcType.fp
319      csBundle(1).srcType(1) := SrcType.vp
320      csBundle(1).srcType(2) := SrcType.vp
321      csBundle(1).lsrc(0) := FP_TMP_REG_MV.U
322      csBundle(1).lsrc(1) := 0.U
323      csBundle(1).lsrc(2) := dest
324      csBundle(1).ldest := dest
325      csBundle(1).fuType := FuType.vppu.U
326      csBundle(1).fuOpType := VpermType.dummy
327      csBundle(1).rfWen := false.B
328      csBundle(1).fpWen := false.B
329      csBundle(1).vecWen := true.B
330    }
331    is(UopSplitType.VEC_VXV) {
332      /*
333      i to vector move
334       */
335      csBundle(0).srcType(0) := SrcType.reg
336      csBundle(0).srcType(1) := SrcType.imm
337      csBundle(0).lsrc(1) := 0.U
338      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
339      csBundle(0).fuType := FuType.i2v.U
340      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg)
341      csBundle(0).vecWen := true.B
342      /*
343      LMUL
344       */
345      for (i <- 0 until MAX_VLMUL) {
346        csBundle(i + 1).srcType(0) := SrcType.vp
347        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
348        csBundle(i + 1).lsrc(1) := src2 + i.U
349        csBundle(i + 1).lsrc(2) := dest + i.U
350        csBundle(i + 1).ldest := dest + i.U
351        csBundle(i + 1).uopIdx := i.U
352      }
353    }
354    is(UopSplitType.VEC_VVW) {
355      for (i <- 0 until MAX_VLMUL / 2) {
356        csBundle(2 * i).lsrc(0) := src1 + i.U
357        csBundle(2 * i).lsrc(1) := src2 + i.U
358        csBundle(2 * i).lsrc(2) := dest + (2 * i).U
359        csBundle(2 * i).ldest := dest + (2 * i).U
360        csBundle(2 * i).uopIdx := (2 * i).U
361        csBundle(2 * i + 1).lsrc(0) := src1 + i.U
362        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
363        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U
364        csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U
365        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
366      }
367    }
368    is(UopSplitType.VEC_VFW) {
369      for (i <- 0 until MAX_VLMUL / 2) {
370        csBundle(2 * i).lsrc(0) := src1
371        csBundle(2 * i).lsrc(1) := src2 + i.U
372        csBundle(2 * i).lsrc(2) := dest + (2 * i).U
373        csBundle(2 * i).ldest := dest + (2 * i).U
374        csBundle(2 * i).uopIdx := (2 * i).U
375        csBundle(2 * i + 1).lsrc(0) := src1
376        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
377        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U
378        csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U
379        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
380      }
381    }
382    is(UopSplitType.VEC_WVW) {
383      for (i <- 0 until MAX_VLMUL / 2) {
384        csBundle(2 * i).lsrc(0) := src1 + i.U
385        csBundle(2 * i).lsrc(1) := src2 + (2 * i).U
386        csBundle(2 * i).lsrc(2) := dest + (2 * i).U
387        csBundle(2 * i).ldest := dest + (2 * i).U
388        csBundle(2 * i).uopIdx := (2 * i).U
389        csBundle(2 * i + 1).lsrc(0) := src1 + i.U
390        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i + 1).U
391        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U
392        csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U
393        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
394      }
395    }
396    is(UopSplitType.VEC_VXW) {
397      /*
398      i to vector move
399       */
400      csBundle(0).srcType(0) := SrcType.reg
401      csBundle(0).srcType(1) := SrcType.imm
402      csBundle(0).lsrc(1) := 0.U
403      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
404      csBundle(0).fuType := FuType.i2v.U
405      csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg)
406      csBundle(0).vecWen := true.B
407
408      for (i <- 0 until MAX_VLMUL / 2) {
409        csBundle(2 * i + 1).srcType(0) := SrcType.vp
410        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
411        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
412        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U
413        csBundle(2 * i + 1).ldest := dest + (2 * i).U
414        csBundle(2 * i + 1).uopIdx := (2 * i).U
415        csBundle(2 * i + 2).srcType(0) := SrcType.vp
416        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
417        csBundle(2 * i + 2).lsrc(1) := src2 + i.U
418        csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U
419        csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U
420        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
421      }
422    }
423    is(UopSplitType.VEC_WXW) {
424      /*
425      i to vector move
426       */
427      csBundle(0).srcType(0) := SrcType.reg
428      csBundle(0).srcType(1) := SrcType.imm
429      csBundle(0).lsrc(1) := 0.U
430      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
431      csBundle(0).fuType := FuType.i2v.U
432      csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg)
433      csBundle(0).vecWen := true.B
434
435      for (i <- 0 until MAX_VLMUL / 2) {
436        csBundle(2 * i + 1).srcType(0) := SrcType.vp
437        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
438        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U
439        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U
440        csBundle(2 * i + 1).ldest := dest + (2 * i).U
441        csBundle(2 * i + 1).uopIdx := (2 * i).U
442        csBundle(2 * i + 2).srcType(0) := SrcType.vp
443        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
444        csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U
445        csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U
446        csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U
447        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
448      }
449    }
450    is(UopSplitType.VEC_WVV) {
451      for (i <- 0 until MAX_VLMUL / 2) {
452
453        csBundle(2 * i).lsrc(0) := src1 + i.U
454        csBundle(2 * i).lsrc(1) := src2 + (2 * i).U
455        csBundle(2 * i).lsrc(2) := dest + i.U
456        csBundle(2 * i).ldest := dest + i.U
457        csBundle(2 * i).uopIdx := (2 * i).U
458        csBundle(2 * i + 1).lsrc(0) := src1 + i.U
459        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i + 1).U
460        csBundle(2 * i + 1).lsrc(2) := dest + i.U
461        csBundle(2 * i + 1).ldest := dest + i.U
462        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
463      }
464    }
465    is(UopSplitType.VEC_WFW) {
466      for (i <- 0 until MAX_VLMUL / 2) {
467        csBundle(2 * i).lsrc(0) := src1
468        csBundle(2 * i).lsrc(1) := src2 + (2 * i).U
469        csBundle(2 * i).lsrc(2) := dest + (2 * i).U
470        csBundle(2 * i).ldest := dest + (2 * i).U
471        csBundle(2 * i).uopIdx := (2 * i).U
472        csBundle(2 * i + 1).lsrc(0) := src1
473        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i + 1).U
474        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U
475        csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U
476        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
477      }
478    }
479    is(UopSplitType.VEC_WXV) {
480      /*
481      i to vector move
482       */
483      csBundle(0).srcType(0) := SrcType.reg
484      csBundle(0).srcType(1) := SrcType.imm
485      csBundle(0).lsrc(1) := 0.U
486      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
487      csBundle(0).fuType := FuType.i2v.U
488      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg)
489      csBundle(0).vecWen := true.B
490
491      for (i <- 0 until MAX_VLMUL / 2) {
492        csBundle(2 * i + 1).srcType(0) := SrcType.vp
493        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
494        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U
495        csBundle(2 * i + 1).lsrc(2) := dest + i.U
496        csBundle(2 * i + 1).ldest := dest + i.U
497        csBundle(2 * i + 1).uopIdx := (2 * i).U
498        csBundle(2 * i + 2).srcType(0) := SrcType.vp
499        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
500        csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U
501        csBundle(2 * i + 2).lsrc(2) := dest + i.U
502        csBundle(2 * i + 2).ldest := dest + i.U
503        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
504      }
505    }
506    is(UopSplitType.VEC_VVM) {
507      csBundle(0).lsrc(2) := dest
508      csBundle(0).ldest := dest
509      csBundle(0).uopIdx := 0.U
510      for (i <- 1 until MAX_VLMUL) {
511        csBundle(i).lsrc(0) := src1 + i.U
512        csBundle(i).lsrc(1) := src2 + i.U
513        csBundle(i).lsrc(2) := dest
514        csBundle(i).ldest := dest
515        csBundle(i).uopIdx := i.U
516      }
517    }
518    is(UopSplitType.VEC_VFM) {
519      csBundle(0).lsrc(2) := dest
520      csBundle(0).ldest := dest
521      csBundle(0).uopIdx := 0.U
522      for (i <- 1 until MAX_VLMUL) {
523        csBundle(i).lsrc(0) := src1
524        csBundle(i).lsrc(1) := src2 + i.U
525        csBundle(i).lsrc(2) := dest
526        csBundle(i).ldest := dest
527        csBundle(i).uopIdx := i.U
528      }
529      csBundle(numOfUop - 1.U).ldest := dest
530    }
531    is(UopSplitType.VEC_VXM) {
532      /*
533      i to vector move
534       */
535      csBundle(0).srcType(0) := SrcType.reg
536      csBundle(0).srcType(1) := SrcType.imm
537      csBundle(0).lsrc(1) := 0.U
538      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
539      csBundle(0).fuType := FuType.i2v.U
540      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg)
541      csBundle(0).vecWen := true.B
542      //LMUL
543      csBundle(1).srcType(0) := SrcType.vp
544      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
545      csBundle(1).lsrc(2) := dest
546      csBundle(1).ldest := dest
547      csBundle(1).uopIdx := 0.U
548      for (i <- 1 until MAX_VLMUL) {
549        csBundle(i + 1).srcType(0) := SrcType.vp
550        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
551        csBundle(i + 1).lsrc(1) := src2 + i.U
552        csBundle(i + 1).lsrc(2) := dest
553        csBundle(i + 1).ldest := dest
554        csBundle(i + 1).uopIdx := i.U
555      }
556      csBundle(numOfUop - 1.U).ldest := dest
557    }
558    is(UopSplitType.VEC_SLIDE1UP) {
559      /*
560      i to vector move
561       */
562      csBundle(0).srcType(0) := SrcType.reg
563      csBundle(0).srcType(1) := SrcType.imm
564      csBundle(0).lsrc(1) := 0.U
565      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
566      csBundle(0).fuType := FuType.i2v.U
567      csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), vsewReg)
568      csBundle(0).vecWen := true.B
569      //LMUL
570      csBundle(1).srcType(0) := SrcType.vp
571      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
572      csBundle(1).lsrc(2) := dest
573      csBundle(1).ldest := dest
574      csBundle(1).uopIdx := 0.U
575      for (i <- 1 until MAX_VLMUL) {
576        csBundle(i + 1).srcType(0) := SrcType.vp
577        csBundle(i + 1).lsrc(0) := src2 + (i - 1).U
578        csBundle(i + 1).lsrc(1) := src2 + i.U
579        csBundle(i + 1).lsrc(2) := dest + i.U
580        csBundle(i + 1).ldest := dest + i.U
581        csBundle(i + 1).uopIdx := i.U
582      }
583    }
584    is(UopSplitType.VEC_FSLIDE1UP) {
585      //LMUL
586      csBundle(0).srcType(0) := SrcType.fp
587      csBundle(0).lsrc(0) := src1
588      csBundle(0).lsrc(1) := src2
589      csBundle(0).lsrc(2) := dest
590      csBundle(0).ldest := dest
591      csBundle(0).uopIdx := 0.U
592      for (i <- 1 until MAX_VLMUL) {
593        csBundle(i).srcType(0) := SrcType.vp
594        csBundle(i).lsrc(0) := src2 + (i - 1).U
595        csBundle(i).lsrc(1) := src2 + i.U
596        csBundle(i).lsrc(2) := dest + i.U
597        csBundle(i).ldest := dest + i.U
598        csBundle(i).uopIdx := i.U
599      }
600    }
601    is(UopSplitType.VEC_SLIDE1DOWN) { // lmul+lmul = 16
602      /*
603      i to vector move
604       */
605      csBundle(0).srcType(0) := SrcType.reg
606      csBundle(0).srcType(1) := SrcType.imm
607      csBundle(0).lsrc(1) := 0.U
608      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
609      csBundle(0).fuType := FuType.i2v.U
610      csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), vsewReg)
611      csBundle(0).vecWen := true.B
612      //LMUL
613      for (i <- 0 until MAX_VLMUL) {
614        csBundle(2 * i + 1).srcType(0) := SrcType.vp
615        csBundle(2 * i + 1).srcType(1) := SrcType.vp
616        csBundle(2 * i + 1).lsrc(0) := src2 + (i + 1).U
617        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
618        csBundle(2 * i + 1).lsrc(2) := dest + i.U
619        csBundle(2 * i + 1).ldest := VECTOR_TMP_REG_LMUL.U + 1.U
620        csBundle(2 * i + 1).uopIdx := (2 * i).U
621        if (2 * i + 2 < MAX_VLMUL * 2) {
622          csBundle(2 * i + 2).srcType(0) := SrcType.vp
623          csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
624          // csBundle(2 * i + 2).lsrc(1) := src2 + i.U         // DontCare
625          csBundle(2 * i + 2).lsrc(2) := VECTOR_TMP_REG_LMUL.U + 1.U
626          csBundle(2 * i + 2).ldest := dest + i.U
627          csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
628        }
629      }
630      csBundle(numOfUop - 1.U).srcType(0) := SrcType.vp
631      csBundle(numOfUop - 1.U).lsrc(0) := VECTOR_TMP_REG_LMUL.U
632      csBundle(numOfUop - 1.U).ldest := dest + lmul - 1.U
633    }
634    is(UopSplitType.VEC_FSLIDE1DOWN) {
635      //LMUL
636      for (i <- 0 until MAX_VLMUL) {
637        csBundle(2 * i).srcType(0) := SrcType.vp
638        csBundle(2 * i).srcType(1) := SrcType.vp
639        csBundle(2 * i).lsrc(0) := src2 + (i + 1).U
640        csBundle(2 * i).lsrc(1) := src2 + i.U
641        csBundle(2 * i).lsrc(2) := dest + i.U
642        csBundle(2 * i).ldest := VECTOR_TMP_REG_LMUL.U
643        csBundle(2 * i).uopIdx := (2 * i).U
644        csBundle(2 * i + 1).srcType(0) := SrcType.fp
645        csBundle(2 * i + 1).lsrc(0) := src1
646        csBundle(2 * i + 1).lsrc(2) := VECTOR_TMP_REG_LMUL.U
647        csBundle(2 * i + 1).ldest := dest + i.U
648        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
649      }
650      csBundle(numOfUop - 1.U).srcType(0) := SrcType.fp
651      csBundle(numOfUop - 1.U).lsrc(0) := src1
652      csBundle(numOfUop - 1.U).ldest := dest + lmul - 1.U
653    }
654    is(UopSplitType.VEC_VRED) {
655      when(vlmulReg === "b001".U) {
656        csBundle(0).srcType(2) := SrcType.DC
657        csBundle(0).lsrc(0) := src2 + 1.U
658        csBundle(0).lsrc(1) := src2
659        csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
660        csBundle(0).uopIdx := 0.U
661      }
662      when(vlmulReg === "b010".U) {
663        csBundle(0).srcType(2) := SrcType.DC
664        csBundle(0).lsrc(0) := src2 + 1.U
665        csBundle(0).lsrc(1) := src2
666        csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
667        csBundle(0).uopIdx := 0.U
668
669        csBundle(1).srcType(2) := SrcType.DC
670        csBundle(1).lsrc(0) := src2 + 3.U
671        csBundle(1).lsrc(1) := src2 + 2.U
672        csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
673        csBundle(1).uopIdx := 1.U
674
675        csBundle(2).srcType(2) := SrcType.DC
676        csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
677        csBundle(2).lsrc(1) := VECTOR_TMP_REG_LMUL.U
678        csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
679        csBundle(2).uopIdx := 2.U
680      }
681      when(vlmulReg === "b011".U) {
682        for (i <- 0 until MAX_VLMUL) {
683          if (i < MAX_VLMUL - MAX_VLMUL / 2) {
684            csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U
685            csBundle(i).lsrc(1) := src2 + (i * 2).U
686            csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
687          } else if (i < MAX_VLMUL - MAX_VLMUL / 4) {
688            csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + (i - MAX_VLMUL / 2) * 2 + 1).U
689            csBundle(i).lsrc(1) := (VECTOR_TMP_REG_LMUL + (i - MAX_VLMUL / 2) * 2).U
690            csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
691          } else if (i < MAX_VLMUL - MAX_VLMUL / 8) {
692            csBundle(6).lsrc(0) := (VECTOR_TMP_REG_LMUL + 5).U
693            csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
694            csBundle(6).ldest := (VECTOR_TMP_REG_LMUL + 6).U
695          }
696          csBundle(i).srcType(2) := SrcType.DC
697          csBundle(i).uopIdx := i.U
698        }
699      }
700      when(vlmulReg.orR) {
701        csBundle(numOfUop - 1.U).srcType(2) := SrcType.vp
702        csBundle(numOfUop - 1.U).lsrc(0) := src1
703        csBundle(numOfUop - 1.U).lsrc(1) := VECTOR_TMP_REG_LMUL.U + numOfUop - 2.U
704        csBundle(numOfUop - 1.U).lsrc(2) := dest
705        csBundle(numOfUop - 1.U).ldest := dest
706        csBundle(numOfUop - 1.U).uopIdx := numOfUop - 1.U
707      }
708    }
709    is(UopSplitType.VEC_VFRED) {
710      val vlmul = vlmulReg
711      val vsew = vsewReg
712      when(vlmul === VLmul.m8){
713        for (i <- 0 until 4) {
714          csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U
715          csBundle(i).lsrc(1) := src2 + (i * 2).U
716          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
717          csBundle(i).uopIdx := i.U
718        }
719        for (i <- 4 until 6) {
720          csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + (i - 4) * 2 + 1).U
721          csBundle(i).lsrc(1) := (VECTOR_TMP_REG_LMUL + (i - 4) * 2).U
722          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
723          csBundle(i).uopIdx := i.U
724        }
725        csBundle(6).lsrc(0) := (VECTOR_TMP_REG_LMUL + 5).U
726        csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
727        csBundle(6).ldest := (VECTOR_TMP_REG_LMUL + 6).U
728        csBundle(6).uopIdx := 6.U
729        when(vsew === VSew.e64) {
730          csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U
731          csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U
732          csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U
733          csBundle(7).vpu.fpu.isFoldTo1_2 := true.B
734          csBundle(7).uopIdx := 7.U
735          csBundle(8).lsrc(0) := src1
736          csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U
737          csBundle(8).ldest := dest
738          csBundle(8).uopIdx := 8.U
739        }
740        when(vsew === VSew.e32) {
741          csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U
742          csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U
743          csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U
744          csBundle(7).vpu.fpu.isFoldTo1_2 := true.B
745          csBundle(7).uopIdx := 7.U
746          csBundle(8).lsrc(0) := (VECTOR_TMP_REG_LMUL + 7).U
747          csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U
748          csBundle(8).ldest := (VECTOR_TMP_REG_LMUL + 8).U
749          csBundle(8).vpu.fpu.isFoldTo1_4 := true.B
750          csBundle(8).uopIdx := 8.U
751          csBundle(9).lsrc(0) := src1
752          csBundle(9).lsrc(1) := (VECTOR_TMP_REG_LMUL + 8).U
753          csBundle(9).ldest := dest
754          csBundle(9).uopIdx := 9.U
755        }
756        when(vsew === VSew.e16) {
757          csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U
758          csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U
759          csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U
760          csBundle(7).vpu.fpu.isFoldTo1_2 := true.B
761          csBundle(7).uopIdx := 7.U
762          csBundle(8).lsrc(0) := (VECTOR_TMP_REG_LMUL + 7).U
763          csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U
764          csBundle(8).ldest := (VECTOR_TMP_REG_LMUL + 8).U
765          csBundle(8).vpu.fpu.isFoldTo1_4 := true.B
766          csBundle(8).uopIdx := 8.U
767          csBundle(9).lsrc(0) := (VECTOR_TMP_REG_LMUL + 8).U
768          csBundle(9).lsrc(1) := (VECTOR_TMP_REG_LMUL + 8).U
769          csBundle(9).ldest := (VECTOR_TMP_REG_LMUL + 9).U
770          csBundle(9).vpu.fpu.isFoldTo1_8 := true.B
771          csBundle(9).uopIdx := 9.U
772          csBundle(10).lsrc(0) := src1
773          csBundle(10).lsrc(1) := (VECTOR_TMP_REG_LMUL + 9).U
774          csBundle(10).ldest := dest
775          csBundle(10).uopIdx := 10.U
776        }
777      }
778      when(vlmul === VLmul.m4) {
779        for (i <- 0 until 2) {
780          csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U
781          csBundle(i).lsrc(1) := src2 + (i * 2).U
782          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
783          csBundle(i).uopIdx := i.U
784        }
785        csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
786        csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
787        csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
788        csBundle(2).uopIdx := 2.U
789        when(vsew === VSew.e64) {
790          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
791          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
792          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
793          csBundle(3).vpu.fpu.isFoldTo1_2 := true.B
794          csBundle(3).uopIdx := 3.U
795          csBundle(4).lsrc(0) := src1
796          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
797          csBundle(4).ldest := dest
798          csBundle(4).uopIdx := 4.U
799        }
800        when(vsew === VSew.e32) {
801          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
802          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
803          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
804          csBundle(3).vpu.fpu.isFoldTo1_2 := true.B
805          csBundle(3).uopIdx := 3.U
806          csBundle(4).lsrc(0) := (VECTOR_TMP_REG_LMUL + 3).U
807          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
808          csBundle(4).ldest := (VECTOR_TMP_REG_LMUL + 4).U
809          csBundle(4).vpu.fpu.isFoldTo1_4 := true.B
810          csBundle(4).uopIdx := 4.U
811          csBundle(5).lsrc(0) := src1
812          csBundle(5).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
813          csBundle(5).ldest := dest
814          csBundle(5).uopIdx := 5.U
815        }
816        when(vsew === VSew.e16) {
817          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
818          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
819          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
820          csBundle(3).vpu.fpu.isFoldTo1_2 := true.B
821          csBundle(3).uopIdx := 3.U
822          csBundle(4).lsrc(0) := (VECTOR_TMP_REG_LMUL + 3).U
823          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
824          csBundle(4).ldest := (VECTOR_TMP_REG_LMUL + 4).U
825          csBundle(4).vpu.fpu.isFoldTo1_4 := true.B
826          csBundle(4).uopIdx := 4.U
827          csBundle(5).lsrc(0) := (VECTOR_TMP_REG_LMUL + 4).U
828          csBundle(5).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
829          csBundle(5).ldest := (VECTOR_TMP_REG_LMUL + 5).U
830          csBundle(5).vpu.fpu.isFoldTo1_8 := true.B
831          csBundle(5).uopIdx := 5.U
832          csBundle(6).lsrc(0) := src1
833          csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 5).U
834          csBundle(6).ldest := dest
835          csBundle(6).uopIdx := 6.U
836        }
837      }
838      when(vlmul === VLmul.m2) {
839        csBundle(0).lsrc(0) := src2 + 1.U
840        csBundle(0).lsrc(1) := src2 + 0.U
841        csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
842        csBundle(0).uopIdx := 0.U
843        when(vsew === VSew.e64) {
844          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
845          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
846          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
847          csBundle(1).vpu.fpu.isFoldTo1_2 := true.B
848          csBundle(1).uopIdx := 1.U
849          csBundle(2).lsrc(0) := src1
850          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
851          csBundle(2).ldest := dest
852          csBundle(2).uopIdx := 2.U
853        }
854        when(vsew === VSew.e32) {
855          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
856          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
857          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
858          csBundle(1).vpu.fpu.isFoldTo1_2 := true.B
859          csBundle(1).uopIdx := 1.U
860          csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
861          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
862          csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
863          csBundle(2).vpu.fpu.isFoldTo1_4 := true.B
864          csBundle(2).uopIdx := 2.U
865          csBundle(3).lsrc(0) := src1
866          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
867          csBundle(3).ldest := dest
868          csBundle(3).uopIdx := 3.U
869        }
870        when(vsew === VSew.e16) {
871          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
872          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
873          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
874          csBundle(1).vpu.fpu.isFoldTo1_2 := true.B
875          csBundle(1).uopIdx := 1.U
876          csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
877          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
878          csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
879          csBundle(2).vpu.fpu.isFoldTo1_4 := true.B
880          csBundle(2).uopIdx := 2.U
881          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
882          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
883          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
884          csBundle(3).vpu.fpu.isFoldTo1_8 := true.B
885          csBundle(3).uopIdx := 3.U
886          csBundle(4).lsrc(0) := src1
887          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
888          csBundle(4).ldest := dest
889          csBundle(4).uopIdx := 4.U
890        }
891      }
892      when(vlmul === VLmul.m1) {
893        when(vsew === VSew.e64) {
894          csBundle(0).lsrc(0) := src2
895          csBundle(0).lsrc(1) := src2
896          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
897          csBundle(0).vpu.fpu.isFoldTo1_2 := true.B
898          csBundle(0).uopIdx := 0.U
899          csBundle(1).lsrc(0) := src1
900          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
901          csBundle(1).ldest := dest
902          csBundle(1).uopIdx := 1.U
903        }
904        when(vsew === VSew.e32) {
905          csBundle(0).lsrc(0) := src2
906          csBundle(0).lsrc(1) := src2
907          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
908          csBundle(0).vpu.fpu.isFoldTo1_2 := true.B
909          csBundle(0).uopIdx := 0.U
910          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
911          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
912          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
913          csBundle(1).vpu.fpu.isFoldTo1_4 := true.B
914          csBundle(1).uopIdx := 1.U
915          csBundle(2).lsrc(0) := src1
916          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
917          csBundle(2).ldest := dest
918          csBundle(2).uopIdx := 2.U
919        }
920        when(vsew === VSew.e16) {
921          csBundle(0).lsrc(0) := src2
922          csBundle(0).lsrc(1) := src2
923          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
924          csBundle(0).vpu.fpu.isFoldTo1_2 := true.B
925          csBundle(0).uopIdx := 0.U
926          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
927          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
928          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
929          csBundle(1).vpu.fpu.isFoldTo1_4 := true.B
930          csBundle(1).uopIdx := 1.U
931          csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
932          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
933          csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
934          csBundle(2).vpu.fpu.isFoldTo1_8 := true.B
935          csBundle(2).uopIdx := 2.U
936          csBundle(3).lsrc(0) := src1
937          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
938          csBundle(3).ldest := dest
939          csBundle(3).uopIdx := 3.U
940        }
941      }
942      when(vlmul === VLmul.mf2) {
943        when(vsew === VSew.e32) {
944          csBundle(0).lsrc(0) := src2
945          csBundle(0).lsrc(1) := src2
946          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
947          csBundle(0).vpu.fpu.isFoldTo1_4 := true.B
948          csBundle(0).uopIdx := 0.U
949          csBundle(1).lsrc(0) := src1
950          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
951          csBundle(1).ldest := dest
952          csBundle(1).uopIdx := 1.U
953        }
954        when(vsew === VSew.e16) {
955          csBundle(0).lsrc(0) := src2
956          csBundle(0).lsrc(1) := src2
957          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
958          csBundle(0).vpu.fpu.isFoldTo1_4 := true.B
959          csBundle(0).uopIdx := 0.U
960          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
961          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
962          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
963          csBundle(1).vpu.fpu.isFoldTo1_8 := true.B
964          csBundle(1).uopIdx := 1.U
965          csBundle(2).lsrc(0) := src1
966          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
967          csBundle(2).ldest := dest
968          csBundle(2).uopIdx := 2.U
969        }
970      }
971      when(vlmul === VLmul.mf4) {
972        when(vsew === VSew.e16) {
973          csBundle(0).lsrc(0) := src2
974          csBundle(0).lsrc(1) := src2
975          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
976          csBundle(0).vpu.fpu.isFoldTo1_8 := true.B
977          csBundle(0).uopIdx := 0.U
978          csBundle(1).lsrc(0) := src1
979          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
980          csBundle(1).ldest := dest
981          csBundle(1).uopIdx := 1.U
982        }
983      }
984    }
985
986    is(UopSplitType.VEC_VFREDOSUM) {
987      import yunsuan.VfaluType
988      val vlmul = vlmulReg
989      val vsew = vsewReg
990      val isWiden = latchedInst.fuOpType === VfaluType.vfwredosum
991      when(vlmul === VLmul.m8) {
992        when(vsew === VSew.e64) {
993          val vlmax = 16
994          for (i <- 0 until vlmax) {
995            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
996            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
997            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
998            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
999            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1000            csBundle(i).uopIdx := i.U
1001          }
1002        }
1003        when(vsew === VSew.e32) {
1004          val vlmax = 32
1005          for (i <- 0 until vlmax) {
1006            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1007            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1008            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1009            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1010            csBundle(i).vpu.fpu.isFoldTo1_4 := (if (i % 4 == 0) false.B else true.B)
1011            csBundle(i).uopIdx := i.U
1012          }
1013        }
1014        when(vsew === VSew.e16) {
1015          val vlmax = 64
1016          for (i <- 0 until vlmax) {
1017            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1018            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1019            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1020            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1021            csBundle(i).vpu.fpu.isFoldTo1_8 := (if (i % 8 == 0) false.B else true.B)
1022            csBundle(i).uopIdx := i.U
1023          }
1024        }
1025      }
1026      when(vlmul === VLmul.m4) {
1027        when(vsew === VSew.e64) {
1028          val vlmax = 8
1029          for (i <- 0 until vlmax) {
1030            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1031            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1032            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1033            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1034            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1035            csBundle(i).uopIdx := i.U
1036          }
1037        }
1038        when(vsew === VSew.e32) {
1039          val vlmax = 16
1040          for (i <- 0 until vlmax) {
1041            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1042            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1043            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1044            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1045            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1046            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1047            csBundle(i).uopIdx := i.U
1048          }
1049        }
1050        when(vsew === VSew.e16) {
1051          val vlmax = 32
1052          for (i <- 0 until vlmax) {
1053            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1054            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1055            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1056            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1057            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1058            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1059            csBundle(i).uopIdx := i.U
1060          }
1061        }
1062      }
1063      when(vlmul === VLmul.m2) {
1064        when(vsew === VSew.e64) {
1065          val vlmax = 4
1066          for (i <- 0 until vlmax) {
1067            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1068            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1069            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1070            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1071            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1072            csBundle(i).uopIdx := i.U
1073          }
1074        }
1075        when(vsew === VSew.e32) {
1076          val vlmax = 8
1077          for (i <- 0 until vlmax) {
1078            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1079            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1080            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1081            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1082            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1083            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1084            csBundle(i).uopIdx := i.U
1085          }
1086        }
1087        when(vsew === VSew.e16) {
1088          val vlmax = 16
1089          for (i <- 0 until vlmax) {
1090            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1091            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1092            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1093            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1094            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1095            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1096            csBundle(i).uopIdx := i.U
1097          }
1098        }
1099      }
1100      when(vlmul === VLmul.m1) {
1101        when(vsew === VSew.e64) {
1102          val vlmax = 2
1103          for (i <- 0 until vlmax) {
1104            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1105            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1106            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1107            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1108            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1109            csBundle(i).uopIdx := i.U
1110          }
1111        }
1112        when(vsew === VSew.e32) {
1113          val vlmax = 4
1114          for (i <- 0 until vlmax) {
1115            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1116            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1117            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1118            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1119            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1120            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1121            csBundle(i).uopIdx := i.U
1122          }
1123        }
1124        when(vsew === VSew.e16) {
1125          val vlmax = 8
1126          for (i <- 0 until vlmax) {
1127            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1128            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1129            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1130            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1131            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1132            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1133            csBundle(i).uopIdx := i.U
1134          }
1135        }
1136      }
1137      when(vlmul === VLmul.mf2) {
1138        when(vsew === VSew.e32) {
1139          val vlmax = 2
1140          for (i <- 0 until vlmax) {
1141            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1142            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1143            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1144            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1145            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1146            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1147            csBundle(i).uopIdx := i.U
1148          }
1149        }
1150        when(vsew === VSew.e16) {
1151          val vlmax = 4
1152          for (i <- 0 until vlmax) {
1153            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1154            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1155            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1156            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1157            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1158            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1159            csBundle(i).uopIdx := i.U
1160          }
1161        }
1162      }
1163      when(vlmul === VLmul.mf4) {
1164        when(vsew === VSew.e16) {
1165          val vlmax = 2
1166          for (i <- 0 until vlmax) {
1167            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1168            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1169            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1170            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1171            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1172            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1173            csBundle(i).uopIdx := i.U
1174          }
1175        }
1176      }
1177    }
1178
1179    is(UopSplitType.VEC_SLIDEUP) {
1180      // i to vector move
1181      csBundle(0).srcType(0) := SrcType.reg
1182      csBundle(0).srcType(1) := SrcType.imm
1183      csBundle(0).lsrc(1) := 0.U
1184      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1185      csBundle(0).fuType := FuType.i2v.U
1186      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg)
1187      csBundle(0).vecWen := true.B
1188      // LMUL
1189      for (i <- 0 until MAX_VLMUL)
1190        for (j <- 0 to i) {
1191          val old_vd = if (j == 0) {
1192            dest + i.U
1193          } else (VECTOR_TMP_REG_LMUL + j).U
1194          val vd = if (j == i) {
1195            dest + i.U
1196          } else (VECTOR_TMP_REG_LMUL + j + 1).U
1197          csBundle(i * (i + 1) / 2 + j + 1).srcType(0) := SrcType.vp
1198          csBundle(i * (i + 1) / 2 + j + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1199          csBundle(i * (i + 1) / 2 + j + 1).lsrc(1) := src2 + j.U
1200          csBundle(i * (i + 1) / 2 + j + 1).lsrc(2) := old_vd
1201          csBundle(i * (i + 1) / 2 + j + 1).ldest := vd
1202          csBundle(i * (i + 1) / 2 + j + 1).uopIdx := (i * (i + 1) / 2 + j).U
1203        }
1204    }
1205
1206    is(UopSplitType.VEC_SLIDEDOWN) {
1207      // i to vector move
1208      csBundle(0).srcType(0) := SrcType.reg
1209      csBundle(0).srcType(1) := SrcType.imm
1210      csBundle(0).lsrc(1) := 0.U
1211      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1212      csBundle(0).fuType := FuType.i2v.U
1213      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg)
1214      csBundle(0).vecWen := true.B
1215      // LMUL
1216      for (i <- 0 until MAX_VLMUL)
1217        for (j <- (0 to i).reverse) {
1218          when(i.U < lmul) {
1219            val old_vd = if (j == 0) {
1220              dest + lmul - 1.U - i.U
1221            } else (VECTOR_TMP_REG_LMUL + j).U
1222            val vd = if (j == i) {
1223              dest + lmul - 1.U - i.U
1224            } else (VECTOR_TMP_REG_LMUL + j + 1).U
1225            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).srcType(0) := SrcType.vp
1226            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1227            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(1) := src2 + lmul - 1.U - j.U
1228            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(2) := old_vd
1229            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).ldest := vd
1230            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).uopIdx := numOfUop - (i * (i + 1) / 2 + i - j + 2).U
1231          }
1232        }
1233    }
1234
1235    is(UopSplitType.VEC_M0X) {
1236      // LMUL
1237      for (i <- 0 until MAX_VLMUL) {
1238        val srcType0 = if (i == 0) SrcType.DC else SrcType.vp
1239        val ldest = (VECTOR_TMP_REG_LMUL + i).U
1240        csBundle(i).srcType(0) := srcType0
1241        csBundle(i).srcType(1) := SrcType.vp
1242        csBundle(i).rfWen := false.B
1243        csBundle(i).fpWen := false.B
1244        csBundle(i).vecWen := true.B
1245        csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U
1246        csBundle(i).lsrc(1) := src2
1247        // csBundle(i).lsrc(2) := dest + i.U  DontCare
1248        csBundle(i).ldest := ldest
1249        csBundle(i).uopIdx := i.U
1250      }
1251      csBundle(lmul - 1.U).rfWen := true.B
1252      csBundle(lmul - 1.U).fpWen := false.B
1253      csBundle(lmul - 1.U).vecWen := false.B
1254      csBundle(lmul - 1.U).ldest := dest
1255    }
1256
1257    is(UopSplitType.VEC_MVV) {
1258      // LMUL
1259      for (i <- 0 until MAX_VLMUL) {
1260        val srcType0 = if (i == 0) SrcType.DC else SrcType.vp
1261        csBundle(i * 2 + 0).srcType(0) := srcType0
1262        csBundle(i * 2 + 0).srcType(1) := SrcType.vp
1263        csBundle(i * 2 + 0).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U
1264        csBundle(i * 2 + 0).lsrc(1) := src2
1265        csBundle(i * 2 + 0).lsrc(2) := dest + i.U
1266        csBundle(i * 2 + 0).ldest := dest + i.U
1267        csBundle(i * 2 + 0).uopIdx := (i * 2 + 0).U
1268
1269        csBundle(i * 2 + 1).srcType(0) := srcType0
1270        csBundle(i * 2 + 1).srcType(1) := SrcType.vp
1271        csBundle(i * 2 + 1).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U
1272        csBundle(i * 2 + 1).lsrc(1) := src2
1273        // csBundle(i).lsrc(2) := dest + i.U  DontCare
1274        csBundle(i * 2 + 1).ldest := (VECTOR_TMP_REG_LMUL + i).U
1275        csBundle(i * 2 + 1).uopIdx := (i * 2 + 1).U
1276      }
1277    }
1278
1279    is(UopSplitType.VEC_M0X_VFIRST) {
1280      // LMUL
1281      csBundle(0).rfWen := true.B
1282      csBundle(0).fpWen := false.B
1283      csBundle(0).vecWen := false.B
1284      csBundle(0).ldest := dest
1285    }
1286    is(UopSplitType.VEC_VWW) {
1287      for (i <- 0 until MAX_VLMUL*2) {
1288        when(i.U < lmul){
1289          csBundle(i).srcType(2) := SrcType.DC
1290          csBundle(i).lsrc(0) := src2 + i.U
1291          csBundle(i).lsrc(1) := src2 + i.U
1292          // csBundle(i).lsrc(2) := dest + (2 * i).U
1293          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
1294          csBundle(i).uopIdx :=  i.U
1295        } otherwise {
1296          csBundle(i).srcType(2) := SrcType.DC
1297          csBundle(i).lsrc(0) := VECTOR_TMP_REG_LMUL.U + Cat((i.U-lmul),0.U(1.W)) + 1.U
1298          csBundle(i).lsrc(1) := VECTOR_TMP_REG_LMUL.U + Cat((i.U-lmul),0.U(1.W))
1299          // csBundle(i).lsrc(2) := dest + (2 * i).U
1300          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
1301          csBundle(i).uopIdx := i.U
1302        }
1303        csBundle(numOfUop-1.U).srcType(2) := SrcType.vp
1304        csBundle(numOfUop-1.U).lsrc(0) := src1
1305        csBundle(numOfUop-1.U).lsrc(2) := dest
1306        csBundle(numOfUop-1.U).ldest := dest
1307      }
1308    }
1309    is(UopSplitType.VEC_RGATHER) {
1310      def genCsBundle_VEC_RGATHER(len:Int): Unit ={
1311        for (i <- 0 until len)
1312          for (j <- 0 until len) {
1313            // csBundle(i * len + j).srcType(0) := SrcType.vp // SrcType.imm
1314            // csBundle(i * len + j).srcType(1) := SrcType.vp
1315            // csBundle(i * len + j).srcType(2) := SrcType.vp
1316            csBundle(i * len + j).lsrc(0) := src1 + i.U
1317            csBundle(i * len + j).lsrc(1) := src2 + j.U
1318            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j - 1).U
1319            csBundle(i * len + j).lsrc(2) := vd_old
1320            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1321            csBundle(i * len + j).ldest := vd
1322            csBundle(i * len + j).uopIdx := (i * len + j).U
1323          }
1324      }
1325      switch(vlmulReg) {
1326        is("b001".U ){
1327          genCsBundle_VEC_RGATHER(2)
1328        }
1329        is("b010".U ){
1330          genCsBundle_VEC_RGATHER(4)
1331        }
1332        is("b011".U ){
1333          genCsBundle_VEC_RGATHER(8)
1334        }
1335      }
1336    }
1337    is(UopSplitType.VEC_RGATHER_VX) {
1338      def genCsBundle_RGATHER_VX(len:Int): Unit ={
1339        for (i <- 0 until len)
1340          for (j <- 0 until len) {
1341            csBundle(i * len + j + 1).srcType(0) := SrcType.vp
1342            // csBundle(i * len + j + 1).srcType(1) := SrcType.vp
1343            // csBundle(i * len + j + 1).srcType(2) := SrcType.vp
1344            csBundle(i * len + j + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1345            csBundle(i * len + j + 1).lsrc(1) := src2 + j.U
1346            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1347            csBundle(i * len + j + 1).lsrc(2) := vd_old
1348            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j + 1).U
1349            csBundle(i * len + j + 1).ldest := vd
1350            csBundle(i * len + j + 1).uopIdx := (i * len + j).U
1351          }
1352      }
1353      // i to vector move
1354      csBundle(0).srcType(0) := SrcType.reg
1355      csBundle(0).srcType(1) := SrcType.imm
1356      csBundle(0).lsrc(1) := 0.U
1357      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1358      csBundle(0).fuType := FuType.i2v.U
1359      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg)
1360      csBundle(0).vecWen := true.B
1361      switch(vlmulReg) {
1362        is("b000".U ){
1363          genCsBundle_RGATHER_VX(1)
1364        }
1365        is("b001".U ){
1366          genCsBundle_RGATHER_VX(2)
1367        }
1368        is("b010".U ){
1369          genCsBundle_RGATHER_VX(4)
1370        }
1371        is("b011".U ){
1372          genCsBundle_RGATHER_VX(8)
1373        }
1374      }
1375    }
1376    is(UopSplitType.VEC_RGATHEREI16) {
1377      def genCsBundle_VEC_RGATHEREI16_SEW8(len:Int): Unit ={
1378        for (i <- 0 until len)
1379          for (j <- 0 until len) {
1380            val vd_old0 = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j*2-1).U
1381            val vd0 = (VECTOR_TMP_REG_LMUL + j*2 ).U
1382            // csBundle(i * len + j).srcType(0) := SrcType.vp // SrcType.imm
1383            // csBundle(i * len + j).srcType(1) := SrcType.vp
1384            // csBundle(i * len + j).srcType(2) := SrcType.vp
1385            csBundle((i * len + j)*2+0).lsrc(0) := src1 + (i*2+0).U
1386            csBundle((i * len + j)*2+0).lsrc(1) := src2 + j.U
1387            csBundle((i * len + j)*2+0).lsrc(2) := vd_old0
1388            csBundle((i * len + j)*2+0).ldest := vd0
1389            csBundle((i * len + j)*2+0).uopIdx := ((i * len + j)*2+0).U
1390            val vd_old1 = (VECTOR_TMP_REG_LMUL + j*2).U
1391            val vd1 = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j*2+1 ).U
1392            csBundle((i * len + j)*2+1).lsrc(0) := src1 + (i*2+1).U
1393            csBundle((i * len + j)*2+1).lsrc(1) := src2 + j.U
1394            csBundle((i * len + j)*2+1).lsrc(2) := vd_old1
1395            csBundle((i * len + j)*2+1).ldest := vd1
1396            csBundle((i * len + j)*2+1).uopIdx := ((i * len + j)*2+1).U
1397          }
1398      }
1399      def genCsBundle_VEC_RGATHEREI16(len:Int): Unit ={
1400        for (i <- 0 until len)
1401          for (j <- 0 until len) {
1402            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U
1403            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1404            // csBundle(i * len + j).srcType(0) := SrcType.vp // SrcType.imm
1405            // csBundle(i * len + j).srcType(1) := SrcType.vp
1406            // csBundle(i * len + j).srcType(2) := SrcType.vp
1407            csBundle(i * len + j).lsrc(0) := src1 + i.U
1408            csBundle(i * len + j).lsrc(1) := src2 + j.U
1409            csBundle(i * len + j).lsrc(2) := vd_old
1410            csBundle(i * len + j).ldest := vd
1411            csBundle(i * len + j).uopIdx := (i * len + j).U
1412          }
1413      }
1414      switch(vlmulReg) {
1415        is("b000".U ){
1416          when(!vsewReg.orR){
1417            genCsBundle_VEC_RGATHEREI16_SEW8(1)
1418          } .otherwise{
1419            genCsBundle_VEC_RGATHEREI16(1)
1420          }
1421        }
1422        is("b001".U) {
1423          when(!vsewReg.orR) {
1424            genCsBundle_VEC_RGATHEREI16_SEW8(2)
1425          }.otherwise {
1426            genCsBundle_VEC_RGATHEREI16(2)
1427          }
1428        }
1429        is("b010".U) {
1430          when(!vsewReg.orR) {
1431            genCsBundle_VEC_RGATHEREI16_SEW8(4)
1432          }.otherwise {
1433            genCsBundle_VEC_RGATHEREI16(4)
1434          }
1435        }
1436        is("b011".U) {
1437          genCsBundle_VEC_RGATHEREI16(8)
1438        }
1439      }
1440    }
1441    is(UopSplitType.VEC_COMPRESS) {
1442      def genCsBundle_VEC_COMPRESS(len:Int): Unit ={
1443        for (i <- 0 until len){
1444          val jlen = if (i == len-1) i+1 else i+2
1445          for (j <- 0 until jlen) {
1446            val vd_old = if(i==j) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j + 1).U
1447            val vd = if(i==len-1) (dest + j.U) else{
1448              if (j == i+1) VECTOR_TMP_REG_LMUL.U else (VECTOR_TMP_REG_LMUL + j + 1).U
1449            }
1450            val src23Type = if (j == i+1) DontCare else SrcType.vp
1451            csBundle(i*(i+3)/2 + j).srcType(0) := SrcType.vp
1452            csBundle(i*(i+3)/2 + j).srcType(1) := src23Type
1453            csBundle(i*(i+3)/2 + j).srcType(2) := src23Type
1454            csBundle(i*(i+3)/2 + j).lsrc(0) := src1
1455            csBundle(i*(i+3)/2 + j).lsrc(1) := src2 + i.U
1456            csBundle(i*(i+3)/2 + j).lsrc(2) := vd_old
1457            // csBundle(i*(i+3)/2 + j).lsrc(3) := VECTOR_TMP_REG_LMUL.U
1458            csBundle(i*(i+3)/2 + j).ldest := vd
1459            csBundle(i*(i+3)/2 + j).uopIdx := (i*(i+3)/2 + j).U
1460          }
1461        }
1462      }
1463      switch(vlmulReg) {
1464        is("b001".U ){
1465          genCsBundle_VEC_COMPRESS(2)
1466        }
1467        is("b010".U ){
1468          genCsBundle_VEC_COMPRESS(4)
1469        }
1470        is("b011".U ){
1471          genCsBundle_VEC_COMPRESS(8)
1472        }
1473      }
1474    }
1475    is(UopSplitType.VEC_MVNR) {
1476      for (i <- 0 until MAX_VLMUL) {
1477        csBundle(i).lsrc(0) := src1 + i.U
1478        csBundle(i).lsrc(1) := src2 + i.U
1479        csBundle(i).lsrc(2) := dest + i.U
1480        csBundle(i).ldest := dest + i.U
1481        csBundle(i).uopIdx := i.U
1482      }
1483    }
1484    is(UopSplitType.VEC_US_LDST) {
1485      /*
1486      FMV.D.X
1487       */
1488      csBundle(0).srcType(0) := SrcType.reg
1489      csBundle(0).srcType(1) := SrcType.imm
1490      csBundle(0).lsrc(1) := 0.U
1491      csBundle(0).ldest := FP_TMP_REG_MV.U
1492      csBundle(0).fuType := FuType.i2f.U
1493      csBundle(0).rfWen := false.B
1494      csBundle(0).fpWen := true.B
1495      csBundle(0).vecWen := false.B
1496      csBundle(0).fpu.isAddSub := false.B
1497      csBundle(0).fpu.typeTagIn := FPU.D
1498      csBundle(0).fpu.typeTagOut := FPU.D
1499      csBundle(0).fpu.fromInt := true.B
1500      csBundle(0).fpu.wflags := false.B
1501      csBundle(0).fpu.fpWen := true.B
1502      csBundle(0).fpu.div := false.B
1503      csBundle(0).fpu.sqrt := false.B
1504      csBundle(0).fpu.fcvt := false.B
1505      //LMUL
1506      for (i <- 0 until MAX_VLMUL) {
1507        csBundle(i + 1).srcType(0) := SrcType.fp
1508        csBundle(i + 1).lsrc(0) := FP_TMP_REG_MV.U
1509        csBundle(i + 1).lsrc(2) := dest + i.U // old vd
1510        csBundle(i + 1).ldest := dest + i.U
1511        csBundle(i + 1).uopIdx := i.U
1512      }
1513    }
1514    is(UopSplitType.VEC_S_LDST) {
1515      /*
1516      FMV.D.X
1517       */
1518      csBundle(0).srcType(0) := SrcType.reg
1519      csBundle(0).srcType(1) := SrcType.imm
1520      csBundle(0).lsrc(1) := 0.U
1521      csBundle(0).ldest := FP_TMP_REG_MV.U
1522      csBundle(0).fuType := FuType.i2f.U
1523      csBundle(0).rfWen := false.B
1524      csBundle(0).fpWen := true.B
1525      csBundle(0).vecWen := false.B
1526      csBundle(0).fpu.isAddSub := false.B
1527      csBundle(0).fpu.typeTagIn := FPU.D
1528      csBundle(0).fpu.typeTagOut := FPU.D
1529      csBundle(0).fpu.fromInt := true.B
1530      csBundle(0).fpu.wflags := false.B
1531      csBundle(0).fpu.fpWen := true.B
1532      csBundle(0).fpu.div := false.B
1533      csBundle(0).fpu.sqrt := false.B
1534      csBundle(0).fpu.fcvt := false.B
1535
1536      csBundle(1).srcType(0) := SrcType.reg
1537      csBundle(1).srcType(1) := SrcType.imm
1538      csBundle(1).lsrc(0) := latchedInst.lsrc(1)
1539      csBundle(1).lsrc(1) := 0.U
1540      csBundle(1).ldest := VECTOR_TMP_REG_LMUL.U
1541      csBundle(1).fuType := FuType.i2f.U
1542      csBundle(1).rfWen := false.B
1543      csBundle(1).fpWen := true.B
1544      csBundle(1).vecWen := false.B
1545      csBundle(1).fpu.isAddSub := false.B
1546      csBundle(1).fpu.typeTagIn := FPU.D
1547      csBundle(1).fpu.typeTagOut := FPU.D
1548      csBundle(1).fpu.fromInt := true.B
1549      csBundle(1).fpu.wflags := false.B
1550      csBundle(1).fpu.fpWen := true.B
1551      csBundle(1).fpu.div := false.B
1552      csBundle(1).fpu.sqrt := false.B
1553      csBundle(1).fpu.fcvt := false.B
1554
1555      //LMUL
1556      for (i <- 0 until MAX_VLMUL) {
1557        csBundle(i + 2).srcType(0) := SrcType.fp
1558        csBundle(i + 2).srcType(1) := SrcType.fp
1559        csBundle(i + 2).lsrc(0) := FP_TMP_REG_MV.U
1560        csBundle(i + 2).lsrc(1) := VECTOR_TMP_REG_LMUL.U
1561        csBundle(i + 2).lsrc(2) := dest + i.U // old vd
1562        csBundle(i + 2).ldest := dest + i.U
1563        csBundle(i + 2).uopIdx := i.U
1564      }
1565    }
1566    is(UopSplitType.VEC_I_LDST) {
1567    /*
1568      FMV.D.X
1569       */
1570      val vlmul = vlmulReg
1571      val vsew = Cat(0.U(1.W), vsewReg)
1572      val veew = Cat(0.U(1.W), width)
1573      val vemul: UInt = veew.asUInt + 1.U + vlmul.asUInt + ~vsew.asUInt
1574      val simple_lmul = MuxLookup(vlmul, 0.U(2.W), Array(
1575        "b001".U -> 1.U,
1576        "b010".U -> 2.U,
1577        "b011".U -> 3.U
1578      ))
1579      val simple_emul = MuxLookup(vemul, 0.U(2.W), Array(
1580        "b001".U -> 1.U,
1581        "b010".U -> 2.U,
1582        "b011".U -> 3.U
1583      ))
1584      csBundle(0).srcType(0) := SrcType.reg
1585      csBundle(0).srcType(1) := SrcType.imm
1586      csBundle(0).lsrc(1) := 0.U
1587      csBundle(0).ldest := FP_TMP_REG_MV.U
1588      csBundle(0).fuType := FuType.i2f.U
1589      csBundle(0).rfWen := false.B
1590      csBundle(0).fpWen := true.B
1591      csBundle(0).vecWen := false.B
1592      csBundle(0).fpu.isAddSub := false.B
1593      csBundle(0).fpu.typeTagIn := FPU.D
1594      csBundle(0).fpu.typeTagOut := FPU.D
1595      csBundle(0).fpu.fromInt := true.B
1596      csBundle(0).fpu.wflags := false.B
1597      csBundle(0).fpu.fpWen := true.B
1598      csBundle(0).fpu.div := false.B
1599      csBundle(0).fpu.sqrt := false.B
1600      csBundle(0).fpu.fcvt := false.B
1601
1602      //LMUL
1603      for (i <- 0 until MAX_INDEXED_LS_UOPNUM) {
1604        indexedLSRegOffset(i).src := Cat(simple_emul, simple_lmul, nf)
1605        val offsetVs2 = indexedLSRegOffset(i).outOffsetVs2
1606        val offsetVd = indexedLSRegOffset(i).outOffsetVd
1607        val isFirstUopInVd = indexedLSRegOffset(i).outIsFirstUopInVd
1608        csBundle(i + 1).srcType(0) := SrcType.fp
1609        csBundle(i + 1).lsrc(0) := FP_TMP_REG_MV.U
1610        csBundle(i + 1).lsrc(1) := Mux1H(UIntToOH(offsetVs2, MAX_VLMUL), (0 until MAX_VLMUL).map(j => src2 + j.U))
1611        /**
1612          * For indexed instructions, VLSU will concatenate all the uops that write the same logic vd register and
1613          * writeback only once for all these uops. However, these uops share the same lsrc(2)/old vd and the same
1614          * ldest/vd that is equal to old vd, which leads to data dependence between the uops. Therefore there will be
1615          * deadlock for indexed instructions with emul > lmul.
1616          *
1617          * Assume N = emul/lmul. To break the deadlock, only the first uop will read old vd as lsrc(2), and the rest
1618          * N-1 uops will read temporary vector register.
1619          */
1620        // csBundle(i + 1).lsrc(2) := Mux1H(UIntToOH(offsetVd, MAX_VLMUL), (0 until MAX_VLMUL).map(j => dest + j.U))
1621        csBundle(i + 1).lsrc(2) := Mux(
1622          isFirstUopInVd,
1623          Mux1H(UIntToOH(offsetVd, MAX_VLMUL), (0 until MAX_VLMUL).map(j => dest + j.U)),
1624          VECTOR_TMP_REG_LMUL.U
1625        )
1626        csBundle(i + 1).ldest := Mux1H(UIntToOH(offsetVd, MAX_VLMUL), (0 until MAX_VLMUL).map(j => dest + j.U))
1627        csBundle(i + 1).uopIdx := i.U
1628      }
1629    }
1630  }
1631
1632  //readyFromRename Counter
1633  val readyCounter = PriorityMuxDefault(outReadys.map(x => !x).zip((0 until RenameWidth).map(_.U)), RenameWidth.U)
1634
1635  // The left uops of the complex inst in ComplexDecoder can be send out this cycle
1636  val thisAllOut = uopRes <= readyCounter
1637
1638  switch(state) {
1639    is(s_idle) {
1640      when (inValid) {
1641        stateNext := s_active
1642        uopResNext := inUopInfo.numOfUop
1643      }
1644    }
1645    is(s_active) {
1646      when (thisAllOut) {
1647        when (inValid) {
1648          stateNext := s_active
1649          uopResNext := inUopInfo.numOfUop
1650        }.otherwise {
1651          stateNext := s_idle
1652          uopResNext := 0.U
1653        }
1654      }.otherwise {
1655        stateNext := s_active
1656        uopResNext := uopRes - readyCounter
1657      }
1658    }
1659  }
1660
1661  state := Mux(io.redirect, s_idle, stateNext)
1662  uopRes := Mux(io.redirect, 0.U, uopResNext)
1663
1664  val complexNum = Mux(uopRes > readyCounter, readyCounter, uopRes)
1665
1666  for(i <- 0 until RenameWidth) {
1667    outValids(i) := complexNum > i.U
1668    outDecodedInsts(i) := Mux((i.U + numOfUop - uopRes) < maxUopSize.U, csBundle(i.U + numOfUop - uopRes), csBundle(maxUopSize - 1))
1669  }
1670
1671  outComplexNum := Mux(state === s_active, complexNum, 0.U)
1672  inReady := state === s_idle || state === s_active && thisAllOut
1673
1674//  val validSimple = Wire(Vec(DecodeWidth, Bool()))
1675//  validSimple.zip(io.validFromIBuf.zip(io.isComplex)).map{ case (dst, (src1, src2)) => dst := src1 && !src2 }
1676//  val notInf = Wire(Vec(DecodeWidth, Bool()))
1677//  notInf.drop(1).zip(io.validFromIBuf.drop(1).zip(validSimple.drop(1))).map{ case (dst, (src1, src2)) => dst := !src1 || src2 }
1678//  notInf(0) := !io.validFromIBuf(0) || validSimple(0) || (io.isComplex(0) && io.in0pc === io.simple.decodedInst.pc)
1679//  val notInfVec = Wire(Vec(DecodeWidth, Bool()))
1680//  notInfVec.zipWithIndex.map{ case (dst, i) => dst := Cat(notInf.take(i + 1)).andR}
1681//
1682//  complexNum := Mux(io.validFromIBuf(0) && readyCounter.orR ,
1683//    Mux(uopRes0 > readyCounter, readyCounter, uopRes0),
1684//    0.U)
1685//  validToRename.zipWithIndex.foreach{
1686//    case(dst, i) =>
1687//      val validFix = Mux(complexNum.orR, validSimple((i+1).U - complexNum), validSimple(i))
1688//      dst := MuxCase(false.B, Seq(
1689//        (io.validFromIBuf(0) && readyCounter.orR && uopRes0 > readyCounter) -> Mux(readyCounter > i.U, true.B, false.B),
1690//        (io.validFromIBuf(0) && readyCounter.orR && !(uopRes0 > readyCounter)) -> Mux(complexNum > i.U, true.B, validFix && notInfVec(i.U - complexNum) && io.readyFromRename(i)),
1691//      ).toSeq)
1692//  }
1693//
1694//  readyToIBuf.zipWithIndex.foreach {
1695//    case (dst, i) =>
1696//      val readyToIBuf0 = Mux(io.isComplex(0), io.in0pc === io.simple.decodedInst.pc, true.B)
1697//      dst := MuxCase(true.B, Seq(
1698//        (io.validFromIBuf(0) && uopRes0 > readyCounter || !readyCounter.orR) -> false.B,
1699//        (io.validFromIBuf(0) && !(uopRes0 > readyCounter) && readyCounter.orR) -> (if (i==0) readyToIBuf0 else Mux(RenameWidth.U - complexNum >= i.U, notInfVec(i) && validSimple(i) && io.readyFromRename(i), false.B))
1700//      ).toSeq)
1701//  }
1702//
1703//  io.deq.decodedInsts := decodedInsts
1704//  io.deq.complexNum := complexNum
1705//  io.deq.validToRename := validToRename
1706//  io.deq.readyToIBuf := readyToIBuf
1707}
1708