xref: /XiangShan/src/main/scala/xiangshan/backend/fu/wrapper/VFALU.scala (revision 9d7a35d1edfbb626dbeb588fd135303ba5fa3bc9)
1efdf5c1cSxiaofeibao-xjtupackage xiangshan.backend.fu.wrapper
2efdf5c1cSxiaofeibao-xjtu
383ba63b3SXuan Huimport org.chipsalliance.cde.config.Parameters
4efdf5c1cSxiaofeibao-xjtuimport chisel3._
5efdf5c1cSxiaofeibao-xjtuimport chisel3.util._
6bb2f3f51STang Haojinimport utility.XSError
7efdf5c1cSxiaofeibao-xjtuimport xiangshan.backend.fu.FuConfig
8785e3bfdSXuan Huimport xiangshan.backend.fu.vector.Bundles.{VLmul, VSew}
9efdf5c1cSxiaofeibao-xjtuimport xiangshan.backend.fu.vector.utils.VecDataSplitModule
107ee6b881SZiyue Zhangimport xiangshan.backend.fu.vector.{Mgu, Mgtu, VecInfo, VecPipedFuncUnit}
11c33d4a9eSXuan Huimport xiangshan.ExceptionNO
12684d7aceSxiaofeibao-xjtuimport yunsuan.{VfaluType, VfpuType}
13*9d7a35d1Sxiaofeibao-xjtuimport yunsuan.vector.{LZD, VectorFloatAdder}
14cc998cd1Slewislzhimport xiangshan.backend.fu.vector.Bundles.VConfig
15efdf5c1cSxiaofeibao-xjtu
16efdf5c1cSxiaofeibao-xjtuclass VFAlu(cfg: FuConfig)(implicit p: Parameters) extends VecPipedFuncUnit(cfg) {
17efdf5c1cSxiaofeibao-xjtu  XSError(io.in.valid && io.in.bits.ctrl.fuOpType === VfpuType.dummy, "Vfalu OpType not supported")
18efdf5c1cSxiaofeibao-xjtu
19efdf5c1cSxiaofeibao-xjtu  // params alias
202d12882cSxiaofeibao  private val dataWidth = cfg.destDataBits
21efdf5c1cSxiaofeibao-xjtu  private val dataWidthOfDataModule = 64
22efdf5c1cSxiaofeibao-xjtu  private val numVecModule = dataWidth / dataWidthOfDataModule
23efdf5c1cSxiaofeibao-xjtu
24efdf5c1cSxiaofeibao-xjtu  // io alias
25efdf5c1cSxiaofeibao-xjtu  private val opcode  = fuOpType(4,0)
26efdf5c1cSxiaofeibao-xjtu  private val resWiden  = fuOpType(5)
27efdf5c1cSxiaofeibao-xjtu  private val opbWiden  = fuOpType(6)
28efdf5c1cSxiaofeibao-xjtu
29efdf5c1cSxiaofeibao-xjtu  // modules
30efdf5c1cSxiaofeibao-xjtu  private val vfalus = Seq.fill(numVecModule)(Module(new VectorFloatAdder))
31efdf5c1cSxiaofeibao-xjtu  private val vs2Split = Module(new VecDataSplitModule(dataWidth, dataWidthOfDataModule))
32efdf5c1cSxiaofeibao-xjtu  private val vs1Split = Module(new VecDataSplitModule(dataWidth, dataWidthOfDataModule))
33efdf5c1cSxiaofeibao-xjtu  private val oldVdSplit  = Module(new VecDataSplitModule(dataWidth, dataWidthOfDataModule))
34684d7aceSxiaofeibao-xjtu  private val mgu = Module(new Mgu(dataWidth))
357ee6b881SZiyue Zhang  private val mgtu = Module(new Mgtu(dataWidth))
36efdf5c1cSxiaofeibao-xjtu
37efdf5c1cSxiaofeibao-xjtu  /**
38efdf5c1cSxiaofeibao-xjtu    * In connection of [[vs2Split]], [[vs1Split]] and [[oldVdSplit]]
39efdf5c1cSxiaofeibao-xjtu    */
40efdf5c1cSxiaofeibao-xjtu  vs2Split.io.inVecData := vs2
41efdf5c1cSxiaofeibao-xjtu  vs1Split.io.inVecData := vs1
42efdf5c1cSxiaofeibao-xjtu  oldVdSplit.io.inVecData := oldVd
43efdf5c1cSxiaofeibao-xjtu
44efdf5c1cSxiaofeibao-xjtu  /**
45efdf5c1cSxiaofeibao-xjtu    * [[vfalus]]'s in connection
46efdf5c1cSxiaofeibao-xjtu    */
47efdf5c1cSxiaofeibao-xjtu  // Vec(vs2(31,0), vs2(63,32), vs2(95,64), vs2(127,96)) ==>
48efdf5c1cSxiaofeibao-xjtu  // Vec(
49efdf5c1cSxiaofeibao-xjtu  //   Cat(vs2(95,64),  vs2(31,0)),
50efdf5c1cSxiaofeibao-xjtu  //   Cat(vs2(127,96), vs2(63,32)),
51efdf5c1cSxiaofeibao-xjtu  // )
52efdf5c1cSxiaofeibao-xjtu  private val vs2GroupedVec: Vec[UInt] = VecInit(vs2Split.io.outVec32b.zipWithIndex.groupBy(_._2 % 2).map(x => x._1 -> x._2.map(_._1)).values.map(x => Cat(x.reverse)).toSeq)
53efdf5c1cSxiaofeibao-xjtu  private val vs1GroupedVec: Vec[UInt] = VecInit(vs1Split.io.outVec32b.zipWithIndex.groupBy(_._2 % 2).map(x => x._1 -> x._2.map(_._1)).values.map(x => Cat(x.reverse)).toSeq)
54cd1420fbSxiaofeibao-xjtu  private val resultData = Wire(Vec(numVecModule,UInt(dataWidthOfDataModule.W)))
55684d7aceSxiaofeibao-xjtu  private val fflagsData = Wire(Vec(numVecModule,UInt(20.W)))
56582849ffSxiaofeibao-xjtu  private val srcMaskRShiftForReduction = Wire(UInt((8 * numVecModule).W))
57582849ffSxiaofeibao-xjtu  // for reduction
58582849ffSxiaofeibao-xjtu  val isFirstGroupUop = vuopIdx === 0.U ||
59582849ffSxiaofeibao-xjtu    (vuopIdx === 1.U && (vlmul === VLmul.m4 || vlmul === VLmul.m8)) ||
60582849ffSxiaofeibao-xjtu    ((vuopIdx === 2.U || vuopIdx === 3.U) && vlmul === VLmul.m8)
61582849ffSxiaofeibao-xjtu  val maskRshiftWidthForReduction = Wire(UInt(6.W))
6261fc96a9SsinceforYy  maskRshiftWidthForReduction := Mux(fuOpType === VfaluType.vfredosum || fuOpType === VfaluType.vfwredosum,
63b94b1889Sxiaofeibao-xjtu    vuopIdx,
64b94b1889Sxiaofeibao-xjtu    Mux1H(Seq(
65582849ffSxiaofeibao-xjtu      (vsew === VSew.e16) -> (vuopIdx(1, 0) << 4),
66582849ffSxiaofeibao-xjtu      (vsew === VSew.e32) -> (vuopIdx(1, 0) << 3),
67582849ffSxiaofeibao-xjtu      (vsew === VSew.e64) -> (vuopIdx(1, 0) << 2),
68b94b1889Sxiaofeibao-xjtu    ))
69582849ffSxiaofeibao-xjtu  )
70582849ffSxiaofeibao-xjtu  val vlMaskForReduction = (~(Fill(VLEN, 1.U) << vl)).asUInt
71582849ffSxiaofeibao-xjtu  srcMaskRShiftForReduction := ((srcMask & vlMaskForReduction) >> maskRshiftWidthForReduction)(8 * numVecModule - 1, 0)
723bec463eSlewislzh  val existMask = (srcMask & vlMaskForReduction).orR
733bec463eSlewislzh  val existMaskReg = RegEnable(existMask, io.in.fire)
743bec463eSlewislzh
75efdf5c1cSxiaofeibao-xjtu
76582849ffSxiaofeibao-xjtu  def genMaskForReduction(inmask: UInt, sew: UInt, i: Int): UInt = {
77582849ffSxiaofeibao-xjtu    val f64MaskNum = dataWidth / 64 * 2
78582849ffSxiaofeibao-xjtu    val f32MaskNum = dataWidth / 32 * 2
79582849ffSxiaofeibao-xjtu    val f16MaskNum = dataWidth / 16 * 2
80582849ffSxiaofeibao-xjtu    val f64Mask = inmask(f64MaskNum - 1, 0)
81582849ffSxiaofeibao-xjtu    val f32Mask = inmask(f32MaskNum - 1, 0)
82582849ffSxiaofeibao-xjtu    val f16Mask = inmask(f16MaskNum - 1, 0)
83582849ffSxiaofeibao-xjtu    // vs2 reordered, so mask use high bits
8461fc96a9SsinceforYy    val f64FirstFoldMaskUnorder = Mux1H(
85582849ffSxiaofeibao-xjtu      Seq(
86582849ffSxiaofeibao-xjtu        vecCtrl.fpu.isFoldTo1_2 -> Cat(0.U(3.W), f64Mask(0), 0.U(3.W), f64Mask(1)),
87582849ffSxiaofeibao-xjtu      )
88582849ffSxiaofeibao-xjtu    )
8961fc96a9SsinceforYy    val f64FirstFoldMaskOrder = Mux1H(
9061fc96a9SsinceforYy      Seq(
9161fc96a9SsinceforYy        vecCtrl.fpu.isFoldTo1_2 -> Cat(0.U(3.W), f64Mask(1), 0.U(3.W), f64Mask(0))
9261fc96a9SsinceforYy      )
9361fc96a9SsinceforYy    )
9461fc96a9SsinceforYy    val f32FirstFoldMaskUnorder = Mux1H(
95582849ffSxiaofeibao-xjtu      Seq(
96582849ffSxiaofeibao-xjtu        vecCtrl.fpu.isFoldTo1_2 -> Cat(0.U(2.W), f32Mask(1), f32Mask(0), 0.U(2.W), f32Mask(3), f32Mask(2)),
97582849ffSxiaofeibao-xjtu        vecCtrl.fpu.isFoldTo1_4 -> Cat(0.U(3.W), f32Mask(0), 0.U(3.W), f32Mask(1)),
98582849ffSxiaofeibao-xjtu      )
99582849ffSxiaofeibao-xjtu    )
10061fc96a9SsinceforYy    val f32FirstFoldMaskOrder = Mux1H(
10161fc96a9SsinceforYy      Seq(
10261fc96a9SsinceforYy        vecCtrl.fpu.isFoldTo1_2 -> Cat(0.U(2.W), f32Mask(3), f32Mask(2), 0.U(2.W), f32Mask(1), f32Mask(0)),
10361fc96a9SsinceforYy        vecCtrl.fpu.isFoldTo1_4 -> Cat(0.U(3.W), f32Mask(1), 0.U(3.W), f32Mask(0)),
10461fc96a9SsinceforYy      )
10561fc96a9SsinceforYy    )
10661fc96a9SsinceforYy    val f16FirstFoldMaskUnorder = Mux1H(
107582849ffSxiaofeibao-xjtu      Seq(
108800b4e00Sxiaofeibao-xjtu        vecCtrl.fpu.isFoldTo1_2 -> Cat(f16Mask(3,0), f16Mask(7,4)),
109582849ffSxiaofeibao-xjtu        vecCtrl.fpu.isFoldTo1_4 -> Cat(0.U(2.W), f16Mask(1), f16Mask(0), 0.U(2.W), f16Mask(3), f16Mask(2)),
110582849ffSxiaofeibao-xjtu        vecCtrl.fpu.isFoldTo1_8 -> Cat(0.U(3.W), f16Mask(0), 0.U(3.W), f16Mask(1)),
111582849ffSxiaofeibao-xjtu      )
112582849ffSxiaofeibao-xjtu    )
11361fc96a9SsinceforYy    val f16FirstFoldMaskOrder = Mux1H(
11461fc96a9SsinceforYy      Seq(
11561fc96a9SsinceforYy        vecCtrl.fpu.isFoldTo1_2 -> Cat(f16Mask(7,4), f16Mask(3,0)),
11661fc96a9SsinceforYy        vecCtrl.fpu.isFoldTo1_4 -> Cat(0.U(2.W), f16Mask(3), f16Mask(2), 0.U(2.W), f16Mask(1), f16Mask(0)),
11761fc96a9SsinceforYy        vecCtrl.fpu.isFoldTo1_8 -> Cat(0.U(3.W), f16Mask(1), 0.U(3.W), f16Mask(0)),
11861fc96a9SsinceforYy      )
11961fc96a9SsinceforYy    )
120582849ffSxiaofeibao-xjtu    val f64FoldMask = Mux1H(
121582849ffSxiaofeibao-xjtu      Seq(
122582849ffSxiaofeibao-xjtu        vecCtrl.fpu.isFoldTo1_2 -> "b00010001".U,
123582849ffSxiaofeibao-xjtu      )
124582849ffSxiaofeibao-xjtu    )
125582849ffSxiaofeibao-xjtu    val f32FoldMask = Mux1H(
126582849ffSxiaofeibao-xjtu      Seq(
127582849ffSxiaofeibao-xjtu        vecCtrl.fpu.isFoldTo1_2 -> "b00110011".U,
128582849ffSxiaofeibao-xjtu        vecCtrl.fpu.isFoldTo1_4 -> "b00010001".U,
129582849ffSxiaofeibao-xjtu      )
130582849ffSxiaofeibao-xjtu    )
131582849ffSxiaofeibao-xjtu    val f16FoldMask = Mux1H(
132582849ffSxiaofeibao-xjtu      Seq(
133582849ffSxiaofeibao-xjtu        vecCtrl.fpu.isFoldTo1_2 -> "b11111111".U,
134582849ffSxiaofeibao-xjtu        vecCtrl.fpu.isFoldTo1_4 -> "b00110011".U,
135582849ffSxiaofeibao-xjtu        vecCtrl.fpu.isFoldTo1_8 -> "b00010001".U,
136582849ffSxiaofeibao-xjtu      )
137582849ffSxiaofeibao-xjtu    )
138582849ffSxiaofeibao-xjtu    // low 4 bits for vs2(fp_a), high 4 bits for vs1(fp_b),
139582849ffSxiaofeibao-xjtu    val isFold = vecCtrl.fpu.isFoldTo1_2 || vecCtrl.fpu.isFoldTo1_4 || vecCtrl.fpu.isFoldTo1_8
140582849ffSxiaofeibao-xjtu    val f64FirstNotFoldMask = Cat(0.U(3.W), f64Mask(i + 2), 0.U(3.W), f64Mask(i))
141e68f18f0SZiyue Zhang    val f32FirstNotFoldMask = Cat(0.U(2.W), f32Mask(i * 2 + 5, i * 2 + 4), 0.U(2.W), Cat(f32Mask(i * 2 + 1, i * 2)))
142e68f18f0SZiyue Zhang    val f16FirstNotFoldMask = Cat(f16Mask(i * 4 + 11, i * 4 + 8), f16Mask(i * 4 + 3, i * 4))
14361fc96a9SsinceforYy    val f64MaskI = Mux(fuOpType === VfaluType.vfredosum || fuOpType === VfaluType.vfwredosum,
14461fc96a9SsinceforYy      Mux(isFold, f64FirstFoldMaskOrder, f64FirstNotFoldMask),
14561fc96a9SsinceforYy      Mux(isFirstGroupUop,
14661fc96a9SsinceforYy        Mux(isFold, f64FirstFoldMaskUnorder, f64FirstNotFoldMask),
14761fc96a9SsinceforYy        Mux(isFold, f64FoldMask, Fill(8, 1.U))))
14861fc96a9SsinceforYy    val f32MaskI = Mux(fuOpType === VfaluType.vfredosum || fuOpType === VfaluType.vfwredosum,
14961fc96a9SsinceforYy      Mux(isFold, f32FirstFoldMaskOrder, f32FirstNotFoldMask),
15061fc96a9SsinceforYy      Mux(isFirstGroupUop,
15161fc96a9SsinceforYy        Mux(isFold, f32FirstFoldMaskUnorder, f32FirstNotFoldMask),
15261fc96a9SsinceforYy        Mux(isFold, f32FoldMask, Fill(8, 1.U))))
15361fc96a9SsinceforYy    val f16MaskI = Mux(fuOpType === VfaluType.vfredosum || fuOpType === VfaluType.vfwredosum,
15461fc96a9SsinceforYy      Mux(isFold, f16FirstFoldMaskOrder, f16FirstNotFoldMask),
15561fc96a9SsinceforYy      Mux(isFirstGroupUop,
15661fc96a9SsinceforYy        Mux(isFold, f16FirstFoldMaskUnorder, f16FirstNotFoldMask),
15761fc96a9SsinceforYy        Mux(isFold, f16FoldMask, Fill(8, 1.U))))
158582849ffSxiaofeibao-xjtu    val outMask = Mux1H(
159582849ffSxiaofeibao-xjtu      Seq(
160582849ffSxiaofeibao-xjtu        (sew === 3.U) -> f64MaskI,
161582849ffSxiaofeibao-xjtu        (sew === 2.U) -> f32MaskI,
162582849ffSxiaofeibao-xjtu        (sew === 1.U) -> f16MaskI,
163582849ffSxiaofeibao-xjtu      )
164582849ffSxiaofeibao-xjtu    )
165b94b1889Sxiaofeibao-xjtu    Mux(fuOpType === VfaluType.vfredosum || fuOpType === VfaluType.vfwredosum, outMask(0),outMask)
166582849ffSxiaofeibao-xjtu  }
167f06d6d60Sxiaofeibao-xjtu  def genMaskForMerge(inmask:UInt, sew:UInt, i:Int): UInt = {
168f06d6d60Sxiaofeibao-xjtu    val f64MaskNum = dataWidth / 64
169f06d6d60Sxiaofeibao-xjtu    val f32MaskNum = dataWidth / 32
170f06d6d60Sxiaofeibao-xjtu    val f16MaskNum = dataWidth / 16
171f06d6d60Sxiaofeibao-xjtu    val f64Mask = inmask(f64MaskNum-1,0)
172f06d6d60Sxiaofeibao-xjtu    val f32Mask = inmask(f32MaskNum-1,0)
173f06d6d60Sxiaofeibao-xjtu    val f16Mask = inmask(f16MaskNum-1,0)
174f06d6d60Sxiaofeibao-xjtu    val f64MaskI = Cat(0.U(3.W),f64Mask(i))
175f06d6d60Sxiaofeibao-xjtu    val f32MaskI = Cat(0.U(2.W),f32Mask(2*i+1,2*i))
176f06d6d60Sxiaofeibao-xjtu    val f16MaskI = f16Mask(4*i+3,4*i)
177f06d6d60Sxiaofeibao-xjtu    val outMask = Mux1H(
178f06d6d60Sxiaofeibao-xjtu      Seq(
179f06d6d60Sxiaofeibao-xjtu        (sew === 3.U) -> f64MaskI,
180f06d6d60Sxiaofeibao-xjtu        (sew === 2.U) -> f32MaskI,
181f06d6d60Sxiaofeibao-xjtu        (sew === 1.U) -> f16MaskI,
182f06d6d60Sxiaofeibao-xjtu      )
183f06d6d60Sxiaofeibao-xjtu    )
184f06d6d60Sxiaofeibao-xjtu    outMask
185f06d6d60Sxiaofeibao-xjtu  }
186ecc992caSlewislzh  def genMaskForRedFFlag(sew:UInt): UInt = {
187ae44e2b7Slewislzh    val default = "b11111111".U
188aff5ef05Slewislzh    val f64FoldMask = Mux(outVecCtrl.fpu.isFoldTo1_2, "b00000001".U, default)
189aff5ef05Slewislzh    val f32Fold = outVecCtrl.fpu.isFoldTo1_2 || outVecCtrl.fpu.isFoldTo1_4
190ecc992caSlewislzh    val f32FoldMask = Mux1H(
191ecc992caSlewislzh      Seq(
192aff5ef05Slewislzh        outVecCtrl.fpu.isFoldTo1_2 -> "b00000011".U,
193aff5ef05Slewislzh        outVecCtrl.fpu.isFoldTo1_4 -> "b00000001".U,
194ecc992caSlewislzh      )
195ecc992caSlewislzh    )
196aff5ef05Slewislzh    val f16Fold = outVecCtrl.fpu.isFoldTo1_2 || outVecCtrl.fpu.isFoldTo1_4 || outVecCtrl.fpu.isFoldTo1_8
197ecc992caSlewislzh    val f16FoldMask = Mux1H(
198ecc992caSlewislzh      Seq(
199aff5ef05Slewislzh        outVecCtrl.fpu.isFoldTo1_2 -> "b00001111".U,
200aff5ef05Slewislzh        outVecCtrl.fpu.isFoldTo1_4 -> "b00000011".U,
201aff5ef05Slewislzh        outVecCtrl.fpu.isFoldTo1_8 -> "b00000001".U,
202ecc992caSlewislzh      )
203ecc992caSlewislzh    )
204ecc992caSlewislzh    Mux1H(
205ecc992caSlewislzh      Seq(
206ecc992caSlewislzh        (sew === 3.U) -> f64FoldMask,
207ae44e2b7Slewislzh        (sew === 2.U) -> Mux(f32Fold, f32FoldMask, default),
208ae44e2b7Slewislzh        (sew === 1.U) -> Mux(f16Fold, f16FoldMask, default),
209ecc992caSlewislzh      )
210ecc992caSlewislzh    )
211ecc992caSlewislzh  }
2129bb931c8Sxiaofeibao-xjtu  val isScalarMove = (fuOpType === VfaluType.vfmv_f_s) || (fuOpType === VfaluType.vfmv_s_f)
213582849ffSxiaofeibao-xjtu  val srcMaskRShift = Wire(UInt((4 * numVecModule).W))
214582849ffSxiaofeibao-xjtu  val maskRshiftWidth = Wire(UInt(6.W))
215582849ffSxiaofeibao-xjtu  maskRshiftWidth := Mux1H(
216582849ffSxiaofeibao-xjtu    Seq(
217582849ffSxiaofeibao-xjtu      (vsew === VSew.e16) -> (vuopIdx(2,0) << 3),
218582849ffSxiaofeibao-xjtu      (vsew === VSew.e32) -> (vuopIdx(2,0) << 2),
219582849ffSxiaofeibao-xjtu      (vsew === VSew.e64) -> (vuopIdx(2,0) << 1),
220582849ffSxiaofeibao-xjtu    )
221582849ffSxiaofeibao-xjtu  )
222582849ffSxiaofeibao-xjtu  srcMaskRShift := (srcMask >> maskRshiftWidth)(4 * numVecModule - 1, 0)
223bdda74fdSxiaofeibao-xjtu  val fp_aIsFpCanonicalNAN = Wire(Vec(numVecModule,Bool()))
224bdda74fdSxiaofeibao-xjtu  val fp_bIsFpCanonicalNAN = Wire(Vec(numVecModule,Bool()))
22520f53972SsinceforYy  val inIsFold = Wire(UInt(3.W))
22620f53972SsinceforYy  inIsFold := Cat(vecCtrl.fpu.isFoldTo1_8, vecCtrl.fpu.isFoldTo1_4, vecCtrl.fpu.isFoldTo1_2)
227efdf5c1cSxiaofeibao-xjtu  vfalus.zipWithIndex.foreach {
228efdf5c1cSxiaofeibao-xjtu    case (mod, i) =>
229e8e02b74SsinceforYy      mod.io.fire             := io.in.valid
23020f53972SsinceforYy      mod.io.fp_a             := vs2Split.io.outVec64b(i)
23120f53972SsinceforYy      mod.io.fp_b             := vs1Split.io.outVec64b(i)
23220f53972SsinceforYy      mod.io.widen_a          := Cat(vs2Split.io.outVec32b(i+numVecModule), vs2Split.io.outVec32b(i))
23320f53972SsinceforYy      mod.io.widen_b          := Cat(vs1Split.io.outVec32b(i+numVecModule), vs1Split.io.outVec32b(i))
234efdf5c1cSxiaofeibao-xjtu      mod.io.frs1             := 0.U     // already vf -> vv
235efdf5c1cSxiaofeibao-xjtu      mod.io.is_frs1          := false.B // already vf -> vv
2369bb931c8Sxiaofeibao-xjtu      mod.io.mask             := Mux(isScalarMove, !vuopIdx.orR, genMaskForMerge(inmask = srcMaskRShift, sew = vsew, i = i))
237582849ffSxiaofeibao-xjtu      mod.io.maskForReduction := genMaskForReduction(inmask = srcMaskRShiftForReduction, sew = vsew, i = i)
23820f53972SsinceforYy      mod.io.uop_idx          := vuopIdx(0)
239efdf5c1cSxiaofeibao-xjtu      mod.io.is_vec           := true.B // Todo
240c6efb121SZiyue Zhang      mod.io.round_mode       := rm
2413748ec56Sxiaofeibao-xjtu      mod.io.fp_format        := Mux(resWiden, vsew + 1.U, vsew)
24220f53972SsinceforYy      mod.io.opb_widening     := opbWiden
243efdf5c1cSxiaofeibao-xjtu      mod.io.res_widening     := resWiden
244efdf5c1cSxiaofeibao-xjtu      mod.io.op_code          := opcode
24520f53972SsinceforYy      mod.io.is_vfwredosum    := fuOpType === VfaluType.vfwredosum
24620f53972SsinceforYy      mod.io.is_fold          := inIsFold
24720f53972SsinceforYy      mod.io.vs2_fold         := vs2      // for better timing
248cd1420fbSxiaofeibao-xjtu      resultData(i)           := mod.io.fp_result
249684d7aceSxiaofeibao-xjtu      fflagsData(i)           := mod.io.fflags
250bdda74fdSxiaofeibao-xjtu      fp_aIsFpCanonicalNAN(i) := vecCtrl.fpu.isFpToVecInst & (
251bdda74fdSxiaofeibao-xjtu          ((vsew === VSew.e32) & (!vs2Split.io.outVec64b(i).head(32).andR)) |
252bdda74fdSxiaofeibao-xjtu          ((vsew === VSew.e16) & (!vs2Split.io.outVec64b(i).head(48).andR))
253bdda74fdSxiaofeibao-xjtu        )
254bdda74fdSxiaofeibao-xjtu      fp_bIsFpCanonicalNAN(i) := vecCtrl.fpu.isFpToVecInst & (
255bdda74fdSxiaofeibao-xjtu          ((vsew === VSew.e32) & (!vs1Split.io.outVec64b(i).head(32).andR)) |
256bdda74fdSxiaofeibao-xjtu          ((vsew === VSew.e16) & (!vs1Split.io.outVec64b(i).head(48).andR))
257bdda74fdSxiaofeibao-xjtu        )
258bdda74fdSxiaofeibao-xjtu      mod.io.fp_aIsFpCanonicalNAN := fp_aIsFpCanonicalNAN(i)
259bdda74fdSxiaofeibao-xjtu      mod.io.fp_bIsFpCanonicalNAN := fp_bIsFpCanonicalNAN(i)
260efdf5c1cSxiaofeibao-xjtu  }
2612dbaa2e2Sxiaofeibao-xjtu  val outVuopidx = outVecCtrl.vuopIdx(2, 0) // for vfadd max vuopidx=7
2623bec463eSlewislzh  val numOfUopVFRED = Wire(UInt(4.W))
2633bec463eSlewislzh  val numofUopVFREDReg = RegEnable(numOfUopVFRED, io.in.fire)
2643bec463eSlewislzh  val vs1Reg = RegEnable(vs1, io.in.fire)
2650fbf39afSlewislzh  val outIsVfRedUnordered = outCtrl.fuOpType === VfaluType.vfredusum ||
2663bec463eSlewislzh    outCtrl.fuOpType === VfaluType.vfredmax ||
2673bec463eSlewislzh    outCtrl.fuOpType === VfaluType.vfredmin
268477cff67Slewislzh  val outIsVfRedUnComp = outCtrl.fuOpType === VfaluType.vfredmax ||
269477cff67Slewislzh    outCtrl.fuOpType === VfaluType.vfredmin
270477cff67Slewislzh  val outIsVfRedUnSum = outCtrl.fuOpType === VfaluType.vfredusum
2710fbf39afSlewislzh  val outIsVfRedOrdered = outCtrl.fuOpType === VfaluType.vfredosum ||
2720fbf39afSlewislzh    outCtrl.fuOpType === VfaluType.vfwredosum
2730fbf39afSlewislzh
2740fbf39afSlewislzh  val isLastUopRed = outIsVfRedUnordered && outLastUop
2750fbf39afSlewislzh  val resultDataUInt = Mux(isLastUopRed && !existMaskReg, vs1Reg, resultData.asUInt)
276f06d6d60Sxiaofeibao-xjtu  val cmpResultWidth = dataWidth / 16
277f06d6d60Sxiaofeibao-xjtu  val cmpResult = Wire(Vec(cmpResultWidth, Bool()))
278f06d6d60Sxiaofeibao-xjtu  for (i <- 0 until cmpResultWidth) {
279f06d6d60Sxiaofeibao-xjtu    if(i == 0) {
280f06d6d60Sxiaofeibao-xjtu      cmpResult(i) := resultDataUInt(0)
281f06d6d60Sxiaofeibao-xjtu    }
282f06d6d60Sxiaofeibao-xjtu    else if(i < dataWidth / 64) {
283f06d6d60Sxiaofeibao-xjtu      cmpResult(i) := Mux1H(
284f06d6d60Sxiaofeibao-xjtu        Seq(
285f06d6d60Sxiaofeibao-xjtu          (outVecCtrl.vsew === 1.U) -> resultDataUInt(i*16),
286f06d6d60Sxiaofeibao-xjtu          (outVecCtrl.vsew === 2.U) -> resultDataUInt(i*32),
287f06d6d60Sxiaofeibao-xjtu          (outVecCtrl.vsew === 3.U) -> resultDataUInt(i*64)
288f06d6d60Sxiaofeibao-xjtu        )
289f06d6d60Sxiaofeibao-xjtu      )
290f06d6d60Sxiaofeibao-xjtu    }
291f06d6d60Sxiaofeibao-xjtu    else if(i < dataWidth / 32) {
292f06d6d60Sxiaofeibao-xjtu      cmpResult(i) := Mux1H(
293f06d6d60Sxiaofeibao-xjtu        Seq(
294f06d6d60Sxiaofeibao-xjtu          (outVecCtrl.vsew === 1.U) -> resultDataUInt(i * 16),
295f06d6d60Sxiaofeibao-xjtu          (outVecCtrl.vsew === 2.U) -> resultDataUInt(i * 32),
296f06d6d60Sxiaofeibao-xjtu          (outVecCtrl.vsew === 3.U) -> false.B
297f06d6d60Sxiaofeibao-xjtu        )
298f06d6d60Sxiaofeibao-xjtu      )
299f06d6d60Sxiaofeibao-xjtu    }
300f06d6d60Sxiaofeibao-xjtu    else if(i <  dataWidth / 16) {
301f06d6d60Sxiaofeibao-xjtu      cmpResult(i) := Mux(outVecCtrl.vsew === 1.U, resultDataUInt(i*16), false.B)
302f06d6d60Sxiaofeibao-xjtu    }
303f06d6d60Sxiaofeibao-xjtu  }
304cc998cd1Slewislzh  val outCtrl_s0 = ctrlVec.head
305cc998cd1Slewislzh  val outVecCtrl_s0 = ctrlVec.head.vpu.get
306cc998cd1Slewislzh  val outEew_s0 = Mux(resWiden, outVecCtrl_s0.vsew + 1.U, outVecCtrl_s0.vsew)
3071d484543Sxiaofeibao  val outWiden = RegEnable(resWiden, io.in.fire)
3081d484543Sxiaofeibao  val outEew = Mux(outWiden, outVecCtrl.vsew + 1.U, outVecCtrl.vsew)
309cc998cd1Slewislzh  val vlMax_s0 = ((VLEN/8).U >> outEew_s0).asUInt
310bdda74fdSxiaofeibao-xjtu  val vlMax = ((VLEN/8).U >> outEew).asUInt
31177315a6bSxiaofeibao-xjtu  val outVlmulFix = Mux(outWiden, outVecCtrl.vlmul + 1.U, outVecCtrl.vlmul)
3121d484543Sxiaofeibao  val lmulAbs = Mux(outVlmulFix(2), (~outVlmulFix(1,0)).asUInt + 1.U, outVlmulFix(1,0))
313582849ffSxiaofeibao-xjtu  //  vfmv_f_s need vl=1, reduction last uop need vl=1, other uop need vl=vlmax
3143bec463eSlewislzh  numOfUopVFRED := {
315582849ffSxiaofeibao-xjtu    // addTime include add frs1
316e3da8badSTang Haojin    val addTime = MuxLookup(outVecCtrl_s0.vlmul, 1.U(4.W))(Seq(
317582849ffSxiaofeibao-xjtu      VLmul.m2 -> 2.U,
318582849ffSxiaofeibao-xjtu      VLmul.m4 -> 4.U,
319582849ffSxiaofeibao-xjtu      VLmul.m8 -> 8.U,
320582849ffSxiaofeibao-xjtu    ))
321e3da8badSTang Haojin    val foldLastVlmul = MuxLookup(outVecCtrl_s0.vsew, "b000".U)(Seq(
322582849ffSxiaofeibao-xjtu      VSew.e16 -> VLmul.mf8,
323582849ffSxiaofeibao-xjtu      VSew.e32 -> VLmul.mf4,
324582849ffSxiaofeibao-xjtu      VSew.e64 -> VLmul.mf2,
325582849ffSxiaofeibao-xjtu    ))
326582849ffSxiaofeibao-xjtu    // lmul < 1, foldTime = vlmul - foldFastVlmul
327582849ffSxiaofeibao-xjtu    // lmul >= 1, foldTime = 0.U - foldFastVlmul
328cc998cd1Slewislzh    val foldTime = Mux(outVecCtrl_s0.vlmul(2), outVecCtrl_s0.vlmul, 0.U) - foldLastVlmul
329582849ffSxiaofeibao-xjtu    addTime + foldTime
330582849ffSxiaofeibao-xjtu  }
331cc998cd1Slewislzh  val reductionVl = Mux((outVecCtrl_s0.vuopIdx ===  numOfUopVFRED - 1.U) || (outCtrl_s0.fuOpType === VfaluType.vfredosum || outCtrl_s0.fuOpType === VfaluType.vfwredosum), 1.U, vlMax_s0)
332b94b1889Sxiaofeibao-xjtu  val outIsResuction = outCtrl.fuOpType === VfaluType.vfredusum ||
333b94b1889Sxiaofeibao-xjtu    outCtrl.fuOpType === VfaluType.vfredmax ||
334b94b1889Sxiaofeibao-xjtu    outCtrl.fuOpType === VfaluType.vfredmin ||
335b94b1889Sxiaofeibao-xjtu    outCtrl.fuOpType === VfaluType.vfredosum ||
336b94b1889Sxiaofeibao-xjtu    outCtrl.fuOpType === VfaluType.vfwredosum
337cc998cd1Slewislzh  val outIsResuction_s0 = outCtrl_s0.fuOpType === VfaluType.vfredusum ||
338cc998cd1Slewislzh    outCtrl_s0.fuOpType === VfaluType.vfredmax ||
339cc998cd1Slewislzh    outCtrl_s0.fuOpType === VfaluType.vfredmin ||
340cc998cd1Slewislzh    outCtrl_s0.fuOpType === VfaluType.vfredosum ||
341cc998cd1Slewislzh    outCtrl_s0.fuOpType === VfaluType.vfwredosum
342cc998cd1Slewislzh  val outVConfig_s0  = if(!cfg.vconfigWakeUp) outVecCtrl_s0.vconfig else dataVec.head.getSrcVConfig.asTypeOf(new VConfig)
343cc998cd1Slewislzh  val outVl_s0       = outVConfig_s0.vl
344cc998cd1Slewislzh  val outVlFix_s0 = Mux(
345cc998cd1Slewislzh    outVecCtrl_s0.fpu.isFpToVecInst || (outCtrl_s0.fuOpType === VfaluType.vfmv_f_s),
3469bb931c8Sxiaofeibao-xjtu    1.U,
347582849ffSxiaofeibao-xjtu    Mux(
348cc998cd1Slewislzh      outCtrl_s0.fuOpType === VfaluType.vfmv_s_f,
349cc998cd1Slewislzh      outVl_s0.orR,
350cc998cd1Slewislzh      Mux(outIsResuction_s0, reductionVl, outVl_s0)
351582849ffSxiaofeibao-xjtu    )
3529bb931c8Sxiaofeibao-xjtu  )
3537cb9199bSlewislzh  val outVlFix = RegEnable(outVlFix_s0,io.in.fire)
354cc998cd1Slewislzh
355bdda74fdSxiaofeibao-xjtu  val vlMaxAllUop = Wire(outVl.cloneType)
356bdda74fdSxiaofeibao-xjtu  vlMaxAllUop := Mux(outVecCtrl.vlmul(2), vlMax >> lmulAbs, vlMax << lmulAbs).asUInt
357bdda74fdSxiaofeibao-xjtu  val vlMaxThisUop = Mux(outVecCtrl.vlmul(2), vlMax >> lmulAbs, vlMax).asUInt
358bdda74fdSxiaofeibao-xjtu  val vlSetThisUop = Mux(outVlFix > outVuopidx*vlMaxThisUop, outVlFix - outVuopidx*vlMaxThisUop, 0.U)
359614d2bc6SHeiHuDie  val vlThisUop = Wire(UInt(4.W))
360bdda74fdSxiaofeibao-xjtu  vlThisUop := Mux(vlSetThisUop < vlMaxThisUop, vlSetThisUop, vlMaxThisUop)
361bdda74fdSxiaofeibao-xjtu  val vlMaskRShift = Wire(UInt((4 * numVecModule).W))
362bdda74fdSxiaofeibao-xjtu  vlMaskRShift := Fill(4 * numVecModule, 1.U(1.W)) >> ((4 * numVecModule).U - vlThisUop)
363bdda74fdSxiaofeibao-xjtu
3642dbaa2e2Sxiaofeibao-xjtu  val outVuopidxForRed = outVecCtrl.vuopIdx(3, 0) // lmul=8 sew=16, (4+2+1)(vector)+(1+1+1)(fold)+(1)(scala) max vuopIdx=10
3652dbaa2e2Sxiaofeibao-xjtu  val outIsFisrtGroup = outVuopidxForRed === 0.U ||
3662dbaa2e2Sxiaofeibao-xjtu    (outVuopidxForRed === 1.U && (outVlmul === VLmul.m4 || outVlmul === VLmul.m8)) ||
3672dbaa2e2Sxiaofeibao-xjtu    ((outVuopidxForRed === 2.U || outVuopidxForRed === 3.U) && outVlmul === VLmul.m8)
368477cff67Slewislzh  val firstNeedFFlags = outIsFisrtGroup  && outIsVfRedUnComp
369477cff67Slewislzh  val lastNeedFFlags = outVecCtrl.lastUop && outIsVfRedUnComp
370582849ffSxiaofeibao-xjtu  private val needNoMask = outCtrl.fuOpType === VfaluType.vfmerge ||
371582849ffSxiaofeibao-xjtu    outCtrl.fuOpType === VfaluType.vfmv_s_f ||
372582849ffSxiaofeibao-xjtu    outIsResuction ||
373582849ffSxiaofeibao-xjtu    outVecCtrl.fpu.isFpToVecInst
374bdda74fdSxiaofeibao-xjtu  val maskToMgu = Mux(needNoMask, allMaskTrue, outSrcMask)
375efdf5c1cSxiaofeibao-xjtu  val allFFlagsEn = Wire(Vec(4*numVecModule,Bool()))
376f06d6d60Sxiaofeibao-xjtu  val outSrcMaskRShift = Wire(UInt((4*numVecModule).W))
377bdda74fdSxiaofeibao-xjtu  outSrcMaskRShift := (maskToMgu >> (outVecCtrl.vuopIdx(2,0) * vlMax))(4*numVecModule-1,0)
378f06d6d60Sxiaofeibao-xjtu  val f16FFlagsEn = outSrcMaskRShift
379684d7aceSxiaofeibao-xjtu  val f32FFlagsEn = Wire(Vec(numVecModule,UInt(4.W)))
380afd78189Sxiaofeibao-xjtu  val f64FFlagsEn = Wire(Vec(numVecModule, UInt(4.W)))
381afd78189Sxiaofeibao-xjtu  val f16VlMaskEn = vlMaskRShift
382afd78189Sxiaofeibao-xjtu  val f32VlMaskEn = Wire(Vec(numVecModule, UInt(4.W)))
383afd78189Sxiaofeibao-xjtu  val f64VlMaskEn = Wire(Vec(numVecModule, UInt(4.W)))
384684d7aceSxiaofeibao-xjtu  for (i <- 0 until numVecModule){
385582849ffSxiaofeibao-xjtu    f32FFlagsEn(i) := Cat(Fill(2, 0.U), outSrcMaskRShift(2*i+1,2*i))
386582849ffSxiaofeibao-xjtu    f64FFlagsEn(i) := Cat(Fill(3, 0.U), outSrcMaskRShift(i))
387afd78189Sxiaofeibao-xjtu    f32VlMaskEn(i) := Cat(Fill(2, 0.U), vlMaskRShift(2 * i + 1, 2 * i))
388afd78189Sxiaofeibao-xjtu    f64VlMaskEn(i) := Cat(Fill(3, 0.U), vlMaskRShift(i))
389684d7aceSxiaofeibao-xjtu  }
390684d7aceSxiaofeibao-xjtu  val fflagsEn= Mux1H(
391684d7aceSxiaofeibao-xjtu    Seq(
392bdda74fdSxiaofeibao-xjtu      (outEew === 1.U) -> f16FFlagsEn.asUInt,
393bdda74fdSxiaofeibao-xjtu      (outEew === 2.U) -> f32FFlagsEn.asUInt,
394bdda74fdSxiaofeibao-xjtu      (outEew === 3.U) -> f64FFlagsEn.asUInt
395684d7aceSxiaofeibao-xjtu    )
396684d7aceSxiaofeibao-xjtu  )
397afd78189Sxiaofeibao-xjtu  val vlMaskEn = Mux1H(
398afd78189Sxiaofeibao-xjtu    Seq(
399afd78189Sxiaofeibao-xjtu      (outEew === 1.U) -> f16VlMaskEn.asUInt,
400afd78189Sxiaofeibao-xjtu      (outEew === 2.U) -> f32VlMaskEn.asUInt,
401afd78189Sxiaofeibao-xjtu      (outEew === 3.U) -> f64VlMaskEn.asUInt
402afd78189Sxiaofeibao-xjtu    )
403afd78189Sxiaofeibao-xjtu  )
404ae44e2b7Slewislzh  val fflagsRedMask = genMaskForRedFFlag(outVecCtrl.vsew)
405ae44e2b7Slewislzh
406ecc992caSlewislzh  if (backendParams.debugEn){
407ecc992caSlewislzh    dontTouch(allFFlagsEn)
408ae44e2b7Slewislzh    dontTouch(fflagsRedMask)
409ecc992caSlewislzh  }
410*9d7a35d1Sxiaofeibao-xjtu  // use srcMask(XLEN-1, 0) because float format hasn't fp8
411*9d7a35d1Sxiaofeibao-xjtu  val allVmZero = RegEnable(LZD(Reverse(srcMask(XLEN-1, 0))) >= outVl_s0, io.in.fire)
412*9d7a35d1Sxiaofeibao-xjtu  allFFlagsEn := Mux(outIsResuction,
413*9d7a35d1Sxiaofeibao-xjtu    Cat(
414*9d7a35d1Sxiaofeibao-xjtu      Fill(4*numVecModule - 1, firstNeedFFlags || outIsVfRedUnSum && !outVecCtrl.lastUop) & fflagsRedMask(4*numVecModule - 1, 1),
415*9d7a35d1Sxiaofeibao-xjtu      !allVmZero && (lastNeedFFlags || firstNeedFFlags || outIsVfRedOrdered || outIsVfRedUnSum)
416*9d7a35d1Sxiaofeibao-xjtu    ),
417*9d7a35d1Sxiaofeibao-xjtu    fflagsEn & vlMaskEn
418*9d7a35d1Sxiaofeibao-xjtu  ).asTypeOf(allFFlagsEn)
419684d7aceSxiaofeibao-xjtu
420684d7aceSxiaofeibao-xjtu  val allFFlags = fflagsData.asTypeOf(Vec( 4*numVecModule,UInt(5.W)))
421efdf5c1cSxiaofeibao-xjtu  val outFFlags = allFFlagsEn.zip(allFFlags).map{
422efdf5c1cSxiaofeibao-xjtu    case(en,fflags) => Mux(en, fflags, 0.U(5.W))
423efdf5c1cSxiaofeibao-xjtu  }.reduce(_ | _)
424684d7aceSxiaofeibao-xjtu
425684d7aceSxiaofeibao-xjtu
426f06d6d60Sxiaofeibao-xjtu  val cmpResultOldVd = Wire(UInt(cmpResultWidth.W))
427582849ffSxiaofeibao-xjtu  val cmpResultOldVdRshiftWidth = Wire(UInt(6.W))
428582849ffSxiaofeibao-xjtu  cmpResultOldVdRshiftWidth := Mux1H(
429582849ffSxiaofeibao-xjtu    Seq(
430582849ffSxiaofeibao-xjtu      (outVecCtrl.vsew === VSew.e16) -> (outVecCtrl.vuopIdx(2, 0) << 3),
431582849ffSxiaofeibao-xjtu      (outVecCtrl.vsew === VSew.e32) -> (outVecCtrl.vuopIdx(2, 0) << 2),
432582849ffSxiaofeibao-xjtu      (outVecCtrl.vsew === VSew.e64) -> (outVecCtrl.vuopIdx(2, 0) << 1),
433582849ffSxiaofeibao-xjtu    )
434582849ffSxiaofeibao-xjtu  )
435582849ffSxiaofeibao-xjtu  cmpResultOldVd := (outOldVd >> cmpResultOldVdRshiftWidth)(4*numVecModule-1,0)
436f06d6d60Sxiaofeibao-xjtu  val cmpResultForMgu = Wire(Vec(cmpResultWidth, Bool()))
4371cefa917SZiyue Zhang  private val maxVdIdx = 8
4381cefa917SZiyue Zhang  private val elementsInOneUop = Mux1H(
4391cefa917SZiyue Zhang    Seq(
4401cefa917SZiyue Zhang      (outEew === 1.U) -> (cmpResultWidth).U(4.W),
4411cefa917SZiyue Zhang      (outEew === 2.U) -> (cmpResultWidth / 2).U(4.W),
4421cefa917SZiyue Zhang      (outEew === 3.U) -> (cmpResultWidth / 4).U(4.W),
4431cefa917SZiyue Zhang    )
4441cefa917SZiyue Zhang  )
4451cefa917SZiyue Zhang  private val vdIdx = outVecCtrl.vuopIdx(2, 0)
4461cefa917SZiyue Zhang  private val elementsComputed = Mux1H(Seq.tabulate(maxVdIdx)(i => (vdIdx === i.U) -> (elementsInOneUop * i.U)))
447f06d6d60Sxiaofeibao-xjtu  for (i <- 0 until cmpResultWidth) {
4481cefa917SZiyue Zhang    val cmpResultWithVmask = Mux(outSrcMaskRShift(i), cmpResult(i), Mux(outVecCtrl.vma, true.B, cmpResultOldVd(i)))
4497ee6b881SZiyue Zhang    cmpResultForMgu(i) := Mux(elementsComputed +& i.U >= outVl, true.B, cmpResultWithVmask)
450f06d6d60Sxiaofeibao-xjtu  }
451b94b1889Sxiaofeibao-xjtu  val outIsFold = outVecCtrl.fpu.isFoldTo1_2 || outVecCtrl.fpu.isFoldTo1_4 || outVecCtrl.fpu.isFoldTo1_8
452b94b1889Sxiaofeibao-xjtu  val outOldVdForREDO = Mux1H(Seq(
453b94b1889Sxiaofeibao-xjtu    (outVecCtrl.vsew === VSew.e16) -> (outOldVd >> 16),
454b94b1889Sxiaofeibao-xjtu    (outVecCtrl.vsew === VSew.e32) -> (outOldVd >> 32),
455b94b1889Sxiaofeibao-xjtu    (outVecCtrl.vsew === VSew.e64) -> (outOldVd >> 64),
456b94b1889Sxiaofeibao-xjtu  ))
457b94b1889Sxiaofeibao-xjtu  val outOldVdForWREDO = Mux(
458b94b1889Sxiaofeibao-xjtu    !outIsFold,
459b94b1889Sxiaofeibao-xjtu    Mux(outVecCtrl.vsew === VSew.e16, Cat(outOldVd(VLEN-1-16,16), 0.U(32.W)), Cat(outOldVd(VLEN-1-32,32), 0.U(64.W))),
460b94b1889Sxiaofeibao-xjtu    Mux(outVecCtrl.vsew === VSew.e16,
461b94b1889Sxiaofeibao-xjtu      // Divide vuopIdx by 8 and the remainder is 1
462b94b1889Sxiaofeibao-xjtu      Mux(outVecCtrl.vuopIdx(2,0) === 1.U, outOldVd, outOldVd >> 16),
463b94b1889Sxiaofeibao-xjtu      // Divide vuopIdx by 4 and the remainder is 1
464b94b1889Sxiaofeibao-xjtu      Mux(outVecCtrl.vuopIdx(1,0) === 1.U, outOldVd, outOldVd >> 32)
465b94b1889Sxiaofeibao-xjtu    ),
466b94b1889Sxiaofeibao-xjtu  )
467b94b1889Sxiaofeibao-xjtu  val outOldVdForRED = Mux(outCtrl.fuOpType === VfaluType.vfredosum, outOldVdForREDO, outOldVdForWREDO)
468b94b1889Sxiaofeibao-xjtu  val numOfUopVFREDOSUM = {
469e3da8badSTang Haojin    val uvlMax = MuxLookup(outVecCtrl.vsew, 0.U)(Seq(
470b94b1889Sxiaofeibao-xjtu      VSew.e16 -> 8.U,
471b94b1889Sxiaofeibao-xjtu      VSew.e32 -> 4.U,
472b94b1889Sxiaofeibao-xjtu      VSew.e64 -> 2.U,
473b94b1889Sxiaofeibao-xjtu    ))
474b94b1889Sxiaofeibao-xjtu    val vlMax = Mux(outVecCtrl.vlmul(2), uvlMax >> (-outVecCtrl.vlmul)(1, 0), uvlMax << outVecCtrl.vlmul(1, 0)).asUInt
475b94b1889Sxiaofeibao-xjtu    vlMax
476b94b1889Sxiaofeibao-xjtu  }
477b51d5c34SZhaoyang You  val isLastUopForREDO = outVecCtrl.lastUop
478b51d5c34SZhaoyang You  val isOutOldVdForREDO = ((outCtrl.fuOpType === VfaluType.vfredosum && outIsFold) || outCtrl.fuOpType === VfaluType.vfwredosum) && !isLastUopForREDO
479b94b1889Sxiaofeibao-xjtu  val taIsFalseForVFREDO = ((outCtrl.fuOpType === VfaluType.vfredosum) || (outCtrl.fuOpType === VfaluType.vfwredosum)) && (outVecCtrl.vuopIdx =/= numOfUopVFREDOSUM - 1.U)
4801cefa917SZiyue Zhang  // outVecCtrl.fpu.isFpToVecInst means the instruction is float instruction, not vector float instruction
4811cefa917SZiyue Zhang  val notUseVl = outVecCtrl.fpu.isFpToVecInst || (outCtrl.fuOpType === VfaluType.vfmv_f_s)
4821cefa917SZiyue Zhang  val notModifyVd = !notUseVl && (outVl === 0.U)
483f06d6d60Sxiaofeibao-xjtu  mgu.io.in.vd := Mux(outVecCtrl.isDstMask, Cat(0.U((dataWidth / 16 * 15).W), cmpResultForMgu.asUInt), resultDataUInt)
484b94b1889Sxiaofeibao-xjtu  mgu.io.in.oldVd := Mux(isOutOldVdForREDO, outOldVdForRED, outOldVd)
485f06d6d60Sxiaofeibao-xjtu  mgu.io.in.mask := maskToMgu
486b94b1889Sxiaofeibao-xjtu  mgu.io.in.info.ta := Mux(outCtrl.fuOpType === VfaluType.vfmv_f_s, true.B , Mux(taIsFalseForVFREDO, false.B, outVecCtrl.vta))
4879bb931c8Sxiaofeibao-xjtu  mgu.io.in.info.ma := Mux(outCtrl.fuOpType === VfaluType.vfmv_s_f, true.B , outVecCtrl.vma)
488bdda74fdSxiaofeibao-xjtu  mgu.io.in.info.vl := outVlFix
489684d7aceSxiaofeibao-xjtu  mgu.io.in.info.vstart := outVecCtrl.vstart
490b3e2881cSxiaofeibao-xjtu  mgu.io.in.info.vlmul := outVecCtrl.vlmul
4911cefa917SZiyue Zhang  mgu.io.in.info.valid := Mux(notModifyVd, false.B, io.in.valid)
492b3e2881cSxiaofeibao-xjtu  mgu.io.in.info.vstart := Mux(outVecCtrl.fpu.isFpToVecInst, 0.U, outVecCtrl.vstart)
4937cb9199bSlewislzh  mgu.io.in.info.eew :=  RegEnable(outEew_s0,io.in.fire)
494b3e2881cSxiaofeibao-xjtu  mgu.io.in.info.vsew := outVecCtrl.vsew
4957cb9199bSlewislzh  mgu.io.in.info.vdIdx := RegEnable(Mux(outIsResuction_s0, 0.U, outVecCtrl_s0.vuopIdx), io.in.fire)
496684d7aceSxiaofeibao-xjtu  mgu.io.in.info.narrow := outVecCtrl.isNarrow
497684d7aceSxiaofeibao-xjtu  mgu.io.in.info.dstMask := outVecCtrl.isDstMask
49892c6b7edSzhanglinjuan  mgu.io.in.isIndexedVls := false.B
4997ee6b881SZiyue Zhang  mgtu.io.in.vd := Mux(outVecCtrl.isDstMask, mgu.io.out.vd, resultDataUInt)
5007ee6b881SZiyue Zhang  mgtu.io.in.vl := outVl
501bdda74fdSxiaofeibao-xjtu  val resultFpMask = Wire(UInt(VLEN.W))
502bdda74fdSxiaofeibao-xjtu  val isFclass = outVecCtrl.fpu.isFpToVecInst && (outCtrl.fuOpType === VfaluType.vfclass)
503bdda74fdSxiaofeibao-xjtu  val fpCmpFuOpType = Seq(VfaluType.vfeq, VfaluType.vflt, VfaluType.vfle)
504bdda74fdSxiaofeibao-xjtu  val isCmp = outVecCtrl.fpu.isFpToVecInst && (fpCmpFuOpType.map(_ === outCtrl.fuOpType).reduce(_|_))
505bdda74fdSxiaofeibao-xjtu  resultFpMask := Mux(isFclass || isCmp, Fill(16, 1.U(1.W)), Fill(VLEN, 1.U(1.W)))
5067ee6b881SZiyue Zhang  // when dest is mask, the result need to be masked by mgtu
5077ee6b881SZiyue Zhang  io.out.bits.res.data := Mux(notModifyVd, outOldVd, Mux(outVecCtrl.isDstMask, mgtu.io.out.vd, mgu.io.out.vd) & resultFpMask)
50897e37a22SZiyue Zhang  io.out.bits.res.fflags.get := Mux(notModifyVd, 0.U(5.W), outFFlags)
509c33d4a9eSXuan Hu  io.out.bits.ctrl.exceptionVec.get(ExceptionNO.illegalInstr) := mgu.io.out.illegal
510684d7aceSxiaofeibao-xjtu
511684d7aceSxiaofeibao-xjtu}
512684d7aceSxiaofeibao-xjtu
513684d7aceSxiaofeibao-xjtuclass VFMgu(vlen:Int)(implicit p: Parameters) extends Module{
514684d7aceSxiaofeibao-xjtu  val io = IO(new VFMguIO(vlen))
515684d7aceSxiaofeibao-xjtu
516684d7aceSxiaofeibao-xjtu  val vd = io.in.vd
517684d7aceSxiaofeibao-xjtu  val oldvd = io.in.oldVd
518684d7aceSxiaofeibao-xjtu  val mask = io.in.mask
519684d7aceSxiaofeibao-xjtu  val vsew = io.in.info.eew
520684d7aceSxiaofeibao-xjtu  val num16bits = vlen / 16
521684d7aceSxiaofeibao-xjtu
522684d7aceSxiaofeibao-xjtu}
523684d7aceSxiaofeibao-xjtu
524684d7aceSxiaofeibao-xjtuclass VFMguIO(vlen: Int)(implicit p: Parameters) extends Bundle {
525684d7aceSxiaofeibao-xjtu  val in = new Bundle {
526684d7aceSxiaofeibao-xjtu    val vd = Input(UInt(vlen.W))
527684d7aceSxiaofeibao-xjtu    val oldVd = Input(UInt(vlen.W))
528684d7aceSxiaofeibao-xjtu    val mask = Input(UInt(vlen.W))
529684d7aceSxiaofeibao-xjtu    val info = Input(new VecInfo)
530684d7aceSxiaofeibao-xjtu  }
531684d7aceSxiaofeibao-xjtu  val out = new Bundle {
532684d7aceSxiaofeibao-xjtu    val vd = Output(UInt(vlen.W))
533684d7aceSxiaofeibao-xjtu  }
534efdf5c1cSxiaofeibao-xjtu}