1efdf5c1cSxiaofeibao-xjtupackage xiangshan.backend.fu.wrapper 2efdf5c1cSxiaofeibao-xjtu 383ba63b3SXuan Huimport org.chipsalliance.cde.config.Parameters 4efdf5c1cSxiaofeibao-xjtuimport chisel3._ 5efdf5c1cSxiaofeibao-xjtuimport chisel3.util._ 6bb2f3f51STang Haojinimport utility.XSError 7efdf5c1cSxiaofeibao-xjtuimport xiangshan.backend.fu.FuConfig 8785e3bfdSXuan Huimport xiangshan.backend.fu.vector.Bundles.{VLmul, VSew} 9efdf5c1cSxiaofeibao-xjtuimport xiangshan.backend.fu.vector.utils.VecDataSplitModule 107ee6b881SZiyue Zhangimport xiangshan.backend.fu.vector.{Mgu, Mgtu, VecInfo, VecPipedFuncUnit} 11c33d4a9eSXuan Huimport xiangshan.ExceptionNO 12684d7aceSxiaofeibao-xjtuimport yunsuan.{VfaluType, VfpuType} 13*9d7a35d1Sxiaofeibao-xjtuimport yunsuan.vector.{LZD, VectorFloatAdder} 14cc998cd1Slewislzhimport xiangshan.backend.fu.vector.Bundles.VConfig 15efdf5c1cSxiaofeibao-xjtu 16efdf5c1cSxiaofeibao-xjtuclass VFAlu(cfg: FuConfig)(implicit p: Parameters) extends VecPipedFuncUnit(cfg) { 17efdf5c1cSxiaofeibao-xjtu XSError(io.in.valid && io.in.bits.ctrl.fuOpType === VfpuType.dummy, "Vfalu OpType not supported") 18efdf5c1cSxiaofeibao-xjtu 19efdf5c1cSxiaofeibao-xjtu // params alias 202d12882cSxiaofeibao private val dataWidth = cfg.destDataBits 21efdf5c1cSxiaofeibao-xjtu private val dataWidthOfDataModule = 64 22efdf5c1cSxiaofeibao-xjtu private val numVecModule = dataWidth / dataWidthOfDataModule 23efdf5c1cSxiaofeibao-xjtu 24efdf5c1cSxiaofeibao-xjtu // io alias 25efdf5c1cSxiaofeibao-xjtu private val opcode = fuOpType(4,0) 26efdf5c1cSxiaofeibao-xjtu private val resWiden = fuOpType(5) 27efdf5c1cSxiaofeibao-xjtu private val opbWiden = fuOpType(6) 28efdf5c1cSxiaofeibao-xjtu 29efdf5c1cSxiaofeibao-xjtu // modules 30efdf5c1cSxiaofeibao-xjtu private val vfalus = Seq.fill(numVecModule)(Module(new VectorFloatAdder)) 31efdf5c1cSxiaofeibao-xjtu private val vs2Split = Module(new VecDataSplitModule(dataWidth, dataWidthOfDataModule)) 32efdf5c1cSxiaofeibao-xjtu private val vs1Split = Module(new VecDataSplitModule(dataWidth, dataWidthOfDataModule)) 33efdf5c1cSxiaofeibao-xjtu private val oldVdSplit = Module(new VecDataSplitModule(dataWidth, dataWidthOfDataModule)) 34684d7aceSxiaofeibao-xjtu private val mgu = Module(new Mgu(dataWidth)) 357ee6b881SZiyue Zhang private val mgtu = Module(new Mgtu(dataWidth)) 36efdf5c1cSxiaofeibao-xjtu 37efdf5c1cSxiaofeibao-xjtu /** 38efdf5c1cSxiaofeibao-xjtu * In connection of [[vs2Split]], [[vs1Split]] and [[oldVdSplit]] 39efdf5c1cSxiaofeibao-xjtu */ 40efdf5c1cSxiaofeibao-xjtu vs2Split.io.inVecData := vs2 41efdf5c1cSxiaofeibao-xjtu vs1Split.io.inVecData := vs1 42efdf5c1cSxiaofeibao-xjtu oldVdSplit.io.inVecData := oldVd 43efdf5c1cSxiaofeibao-xjtu 44efdf5c1cSxiaofeibao-xjtu /** 45efdf5c1cSxiaofeibao-xjtu * [[vfalus]]'s in connection 46efdf5c1cSxiaofeibao-xjtu */ 47efdf5c1cSxiaofeibao-xjtu // Vec(vs2(31,0), vs2(63,32), vs2(95,64), vs2(127,96)) ==> 48efdf5c1cSxiaofeibao-xjtu // Vec( 49efdf5c1cSxiaofeibao-xjtu // Cat(vs2(95,64), vs2(31,0)), 50efdf5c1cSxiaofeibao-xjtu // Cat(vs2(127,96), vs2(63,32)), 51efdf5c1cSxiaofeibao-xjtu // ) 52efdf5c1cSxiaofeibao-xjtu private val vs2GroupedVec: Vec[UInt] = VecInit(vs2Split.io.outVec32b.zipWithIndex.groupBy(_._2 % 2).map(x => x._1 -> x._2.map(_._1)).values.map(x => Cat(x.reverse)).toSeq) 53efdf5c1cSxiaofeibao-xjtu private val vs1GroupedVec: Vec[UInt] = VecInit(vs1Split.io.outVec32b.zipWithIndex.groupBy(_._2 % 2).map(x => x._1 -> x._2.map(_._1)).values.map(x => Cat(x.reverse)).toSeq) 54cd1420fbSxiaofeibao-xjtu private val resultData = Wire(Vec(numVecModule,UInt(dataWidthOfDataModule.W))) 55684d7aceSxiaofeibao-xjtu private val fflagsData = Wire(Vec(numVecModule,UInt(20.W))) 56582849ffSxiaofeibao-xjtu private val srcMaskRShiftForReduction = Wire(UInt((8 * numVecModule).W)) 57582849ffSxiaofeibao-xjtu // for reduction 58582849ffSxiaofeibao-xjtu val isFirstGroupUop = vuopIdx === 0.U || 59582849ffSxiaofeibao-xjtu (vuopIdx === 1.U && (vlmul === VLmul.m4 || vlmul === VLmul.m8)) || 60582849ffSxiaofeibao-xjtu ((vuopIdx === 2.U || vuopIdx === 3.U) && vlmul === VLmul.m8) 61582849ffSxiaofeibao-xjtu val maskRshiftWidthForReduction = Wire(UInt(6.W)) 6261fc96a9SsinceforYy maskRshiftWidthForReduction := Mux(fuOpType === VfaluType.vfredosum || fuOpType === VfaluType.vfwredosum, 63b94b1889Sxiaofeibao-xjtu vuopIdx, 64b94b1889Sxiaofeibao-xjtu Mux1H(Seq( 65582849ffSxiaofeibao-xjtu (vsew === VSew.e16) -> (vuopIdx(1, 0) << 4), 66582849ffSxiaofeibao-xjtu (vsew === VSew.e32) -> (vuopIdx(1, 0) << 3), 67582849ffSxiaofeibao-xjtu (vsew === VSew.e64) -> (vuopIdx(1, 0) << 2), 68b94b1889Sxiaofeibao-xjtu )) 69582849ffSxiaofeibao-xjtu ) 70582849ffSxiaofeibao-xjtu val vlMaskForReduction = (~(Fill(VLEN, 1.U) << vl)).asUInt 71582849ffSxiaofeibao-xjtu srcMaskRShiftForReduction := ((srcMask & vlMaskForReduction) >> maskRshiftWidthForReduction)(8 * numVecModule - 1, 0) 723bec463eSlewislzh val existMask = (srcMask & vlMaskForReduction).orR 733bec463eSlewislzh val existMaskReg = RegEnable(existMask, io.in.fire) 743bec463eSlewislzh 75efdf5c1cSxiaofeibao-xjtu 76582849ffSxiaofeibao-xjtu def genMaskForReduction(inmask: UInt, sew: UInt, i: Int): UInt = { 77582849ffSxiaofeibao-xjtu val f64MaskNum = dataWidth / 64 * 2 78582849ffSxiaofeibao-xjtu val f32MaskNum = dataWidth / 32 * 2 79582849ffSxiaofeibao-xjtu val f16MaskNum = dataWidth / 16 * 2 80582849ffSxiaofeibao-xjtu val f64Mask = inmask(f64MaskNum - 1, 0) 81582849ffSxiaofeibao-xjtu val f32Mask = inmask(f32MaskNum - 1, 0) 82582849ffSxiaofeibao-xjtu val f16Mask = inmask(f16MaskNum - 1, 0) 83582849ffSxiaofeibao-xjtu // vs2 reordered, so mask use high bits 8461fc96a9SsinceforYy val f64FirstFoldMaskUnorder = Mux1H( 85582849ffSxiaofeibao-xjtu Seq( 86582849ffSxiaofeibao-xjtu vecCtrl.fpu.isFoldTo1_2 -> Cat(0.U(3.W), f64Mask(0), 0.U(3.W), f64Mask(1)), 87582849ffSxiaofeibao-xjtu ) 88582849ffSxiaofeibao-xjtu ) 8961fc96a9SsinceforYy val f64FirstFoldMaskOrder = Mux1H( 9061fc96a9SsinceforYy Seq( 9161fc96a9SsinceforYy vecCtrl.fpu.isFoldTo1_2 -> Cat(0.U(3.W), f64Mask(1), 0.U(3.W), f64Mask(0)) 9261fc96a9SsinceforYy ) 9361fc96a9SsinceforYy ) 9461fc96a9SsinceforYy val f32FirstFoldMaskUnorder = Mux1H( 95582849ffSxiaofeibao-xjtu Seq( 96582849ffSxiaofeibao-xjtu vecCtrl.fpu.isFoldTo1_2 -> Cat(0.U(2.W), f32Mask(1), f32Mask(0), 0.U(2.W), f32Mask(3), f32Mask(2)), 97582849ffSxiaofeibao-xjtu vecCtrl.fpu.isFoldTo1_4 -> Cat(0.U(3.W), f32Mask(0), 0.U(3.W), f32Mask(1)), 98582849ffSxiaofeibao-xjtu ) 99582849ffSxiaofeibao-xjtu ) 10061fc96a9SsinceforYy val f32FirstFoldMaskOrder = Mux1H( 10161fc96a9SsinceforYy Seq( 10261fc96a9SsinceforYy vecCtrl.fpu.isFoldTo1_2 -> Cat(0.U(2.W), f32Mask(3), f32Mask(2), 0.U(2.W), f32Mask(1), f32Mask(0)), 10361fc96a9SsinceforYy vecCtrl.fpu.isFoldTo1_4 -> Cat(0.U(3.W), f32Mask(1), 0.U(3.W), f32Mask(0)), 10461fc96a9SsinceforYy ) 10561fc96a9SsinceforYy ) 10661fc96a9SsinceforYy val f16FirstFoldMaskUnorder = Mux1H( 107582849ffSxiaofeibao-xjtu Seq( 108800b4e00Sxiaofeibao-xjtu vecCtrl.fpu.isFoldTo1_2 -> Cat(f16Mask(3,0), f16Mask(7,4)), 109582849ffSxiaofeibao-xjtu vecCtrl.fpu.isFoldTo1_4 -> Cat(0.U(2.W), f16Mask(1), f16Mask(0), 0.U(2.W), f16Mask(3), f16Mask(2)), 110582849ffSxiaofeibao-xjtu vecCtrl.fpu.isFoldTo1_8 -> Cat(0.U(3.W), f16Mask(0), 0.U(3.W), f16Mask(1)), 111582849ffSxiaofeibao-xjtu ) 112582849ffSxiaofeibao-xjtu ) 11361fc96a9SsinceforYy val f16FirstFoldMaskOrder = Mux1H( 11461fc96a9SsinceforYy Seq( 11561fc96a9SsinceforYy vecCtrl.fpu.isFoldTo1_2 -> Cat(f16Mask(7,4), f16Mask(3,0)), 11661fc96a9SsinceforYy vecCtrl.fpu.isFoldTo1_4 -> Cat(0.U(2.W), f16Mask(3), f16Mask(2), 0.U(2.W), f16Mask(1), f16Mask(0)), 11761fc96a9SsinceforYy vecCtrl.fpu.isFoldTo1_8 -> Cat(0.U(3.W), f16Mask(1), 0.U(3.W), f16Mask(0)), 11861fc96a9SsinceforYy ) 11961fc96a9SsinceforYy ) 120582849ffSxiaofeibao-xjtu val f64FoldMask = Mux1H( 121582849ffSxiaofeibao-xjtu Seq( 122582849ffSxiaofeibao-xjtu vecCtrl.fpu.isFoldTo1_2 -> "b00010001".U, 123582849ffSxiaofeibao-xjtu ) 124582849ffSxiaofeibao-xjtu ) 125582849ffSxiaofeibao-xjtu val f32FoldMask = Mux1H( 126582849ffSxiaofeibao-xjtu Seq( 127582849ffSxiaofeibao-xjtu vecCtrl.fpu.isFoldTo1_2 -> "b00110011".U, 128582849ffSxiaofeibao-xjtu vecCtrl.fpu.isFoldTo1_4 -> "b00010001".U, 129582849ffSxiaofeibao-xjtu ) 130582849ffSxiaofeibao-xjtu ) 131582849ffSxiaofeibao-xjtu val f16FoldMask = Mux1H( 132582849ffSxiaofeibao-xjtu Seq( 133582849ffSxiaofeibao-xjtu vecCtrl.fpu.isFoldTo1_2 -> "b11111111".U, 134582849ffSxiaofeibao-xjtu vecCtrl.fpu.isFoldTo1_4 -> "b00110011".U, 135582849ffSxiaofeibao-xjtu vecCtrl.fpu.isFoldTo1_8 -> "b00010001".U, 136582849ffSxiaofeibao-xjtu ) 137582849ffSxiaofeibao-xjtu ) 138582849ffSxiaofeibao-xjtu // low 4 bits for vs2(fp_a), high 4 bits for vs1(fp_b), 139582849ffSxiaofeibao-xjtu val isFold = vecCtrl.fpu.isFoldTo1_2 || vecCtrl.fpu.isFoldTo1_4 || vecCtrl.fpu.isFoldTo1_8 140582849ffSxiaofeibao-xjtu val f64FirstNotFoldMask = Cat(0.U(3.W), f64Mask(i + 2), 0.U(3.W), f64Mask(i)) 141e68f18f0SZiyue Zhang val f32FirstNotFoldMask = Cat(0.U(2.W), f32Mask(i * 2 + 5, i * 2 + 4), 0.U(2.W), Cat(f32Mask(i * 2 + 1, i * 2))) 142e68f18f0SZiyue Zhang val f16FirstNotFoldMask = Cat(f16Mask(i * 4 + 11, i * 4 + 8), f16Mask(i * 4 + 3, i * 4)) 14361fc96a9SsinceforYy val f64MaskI = Mux(fuOpType === VfaluType.vfredosum || fuOpType === VfaluType.vfwredosum, 14461fc96a9SsinceforYy Mux(isFold, f64FirstFoldMaskOrder, f64FirstNotFoldMask), 14561fc96a9SsinceforYy Mux(isFirstGroupUop, 14661fc96a9SsinceforYy Mux(isFold, f64FirstFoldMaskUnorder, f64FirstNotFoldMask), 14761fc96a9SsinceforYy Mux(isFold, f64FoldMask, Fill(8, 1.U)))) 14861fc96a9SsinceforYy val f32MaskI = Mux(fuOpType === VfaluType.vfredosum || fuOpType === VfaluType.vfwredosum, 14961fc96a9SsinceforYy Mux(isFold, f32FirstFoldMaskOrder, f32FirstNotFoldMask), 15061fc96a9SsinceforYy Mux(isFirstGroupUop, 15161fc96a9SsinceforYy Mux(isFold, f32FirstFoldMaskUnorder, f32FirstNotFoldMask), 15261fc96a9SsinceforYy Mux(isFold, f32FoldMask, Fill(8, 1.U)))) 15361fc96a9SsinceforYy val f16MaskI = Mux(fuOpType === VfaluType.vfredosum || fuOpType === VfaluType.vfwredosum, 15461fc96a9SsinceforYy Mux(isFold, f16FirstFoldMaskOrder, f16FirstNotFoldMask), 15561fc96a9SsinceforYy Mux(isFirstGroupUop, 15661fc96a9SsinceforYy Mux(isFold, f16FirstFoldMaskUnorder, f16FirstNotFoldMask), 15761fc96a9SsinceforYy Mux(isFold, f16FoldMask, Fill(8, 1.U)))) 158582849ffSxiaofeibao-xjtu val outMask = Mux1H( 159582849ffSxiaofeibao-xjtu Seq( 160582849ffSxiaofeibao-xjtu (sew === 3.U) -> f64MaskI, 161582849ffSxiaofeibao-xjtu (sew === 2.U) -> f32MaskI, 162582849ffSxiaofeibao-xjtu (sew === 1.U) -> f16MaskI, 163582849ffSxiaofeibao-xjtu ) 164582849ffSxiaofeibao-xjtu ) 165b94b1889Sxiaofeibao-xjtu Mux(fuOpType === VfaluType.vfredosum || fuOpType === VfaluType.vfwredosum, outMask(0),outMask) 166582849ffSxiaofeibao-xjtu } 167f06d6d60Sxiaofeibao-xjtu def genMaskForMerge(inmask:UInt, sew:UInt, i:Int): UInt = { 168f06d6d60Sxiaofeibao-xjtu val f64MaskNum = dataWidth / 64 169f06d6d60Sxiaofeibao-xjtu val f32MaskNum = dataWidth / 32 170f06d6d60Sxiaofeibao-xjtu val f16MaskNum = dataWidth / 16 171f06d6d60Sxiaofeibao-xjtu val f64Mask = inmask(f64MaskNum-1,0) 172f06d6d60Sxiaofeibao-xjtu val f32Mask = inmask(f32MaskNum-1,0) 173f06d6d60Sxiaofeibao-xjtu val f16Mask = inmask(f16MaskNum-1,0) 174f06d6d60Sxiaofeibao-xjtu val f64MaskI = Cat(0.U(3.W),f64Mask(i)) 175f06d6d60Sxiaofeibao-xjtu val f32MaskI = Cat(0.U(2.W),f32Mask(2*i+1,2*i)) 176f06d6d60Sxiaofeibao-xjtu val f16MaskI = f16Mask(4*i+3,4*i) 177f06d6d60Sxiaofeibao-xjtu val outMask = Mux1H( 178f06d6d60Sxiaofeibao-xjtu Seq( 179f06d6d60Sxiaofeibao-xjtu (sew === 3.U) -> f64MaskI, 180f06d6d60Sxiaofeibao-xjtu (sew === 2.U) -> f32MaskI, 181f06d6d60Sxiaofeibao-xjtu (sew === 1.U) -> f16MaskI, 182f06d6d60Sxiaofeibao-xjtu ) 183f06d6d60Sxiaofeibao-xjtu ) 184f06d6d60Sxiaofeibao-xjtu outMask 185f06d6d60Sxiaofeibao-xjtu } 186ecc992caSlewislzh def genMaskForRedFFlag(sew:UInt): UInt = { 187ae44e2b7Slewislzh val default = "b11111111".U 188aff5ef05Slewislzh val f64FoldMask = Mux(outVecCtrl.fpu.isFoldTo1_2, "b00000001".U, default) 189aff5ef05Slewislzh val f32Fold = outVecCtrl.fpu.isFoldTo1_2 || outVecCtrl.fpu.isFoldTo1_4 190ecc992caSlewislzh val f32FoldMask = Mux1H( 191ecc992caSlewislzh Seq( 192aff5ef05Slewislzh outVecCtrl.fpu.isFoldTo1_2 -> "b00000011".U, 193aff5ef05Slewislzh outVecCtrl.fpu.isFoldTo1_4 -> "b00000001".U, 194ecc992caSlewislzh ) 195ecc992caSlewislzh ) 196aff5ef05Slewislzh val f16Fold = outVecCtrl.fpu.isFoldTo1_2 || outVecCtrl.fpu.isFoldTo1_4 || outVecCtrl.fpu.isFoldTo1_8 197ecc992caSlewislzh val f16FoldMask = Mux1H( 198ecc992caSlewislzh Seq( 199aff5ef05Slewislzh outVecCtrl.fpu.isFoldTo1_2 -> "b00001111".U, 200aff5ef05Slewislzh outVecCtrl.fpu.isFoldTo1_4 -> "b00000011".U, 201aff5ef05Slewislzh outVecCtrl.fpu.isFoldTo1_8 -> "b00000001".U, 202ecc992caSlewislzh ) 203ecc992caSlewislzh ) 204ecc992caSlewislzh Mux1H( 205ecc992caSlewislzh Seq( 206ecc992caSlewislzh (sew === 3.U) -> f64FoldMask, 207ae44e2b7Slewislzh (sew === 2.U) -> Mux(f32Fold, f32FoldMask, default), 208ae44e2b7Slewislzh (sew === 1.U) -> Mux(f16Fold, f16FoldMask, default), 209ecc992caSlewislzh ) 210ecc992caSlewislzh ) 211ecc992caSlewislzh } 2129bb931c8Sxiaofeibao-xjtu val isScalarMove = (fuOpType === VfaluType.vfmv_f_s) || (fuOpType === VfaluType.vfmv_s_f) 213582849ffSxiaofeibao-xjtu val srcMaskRShift = Wire(UInt((4 * numVecModule).W)) 214582849ffSxiaofeibao-xjtu val maskRshiftWidth = Wire(UInt(6.W)) 215582849ffSxiaofeibao-xjtu maskRshiftWidth := Mux1H( 216582849ffSxiaofeibao-xjtu Seq( 217582849ffSxiaofeibao-xjtu (vsew === VSew.e16) -> (vuopIdx(2,0) << 3), 218582849ffSxiaofeibao-xjtu (vsew === VSew.e32) -> (vuopIdx(2,0) << 2), 219582849ffSxiaofeibao-xjtu (vsew === VSew.e64) -> (vuopIdx(2,0) << 1), 220582849ffSxiaofeibao-xjtu ) 221582849ffSxiaofeibao-xjtu ) 222582849ffSxiaofeibao-xjtu srcMaskRShift := (srcMask >> maskRshiftWidth)(4 * numVecModule - 1, 0) 223bdda74fdSxiaofeibao-xjtu val fp_aIsFpCanonicalNAN = Wire(Vec(numVecModule,Bool())) 224bdda74fdSxiaofeibao-xjtu val fp_bIsFpCanonicalNAN = Wire(Vec(numVecModule,Bool())) 22520f53972SsinceforYy val inIsFold = Wire(UInt(3.W)) 22620f53972SsinceforYy inIsFold := Cat(vecCtrl.fpu.isFoldTo1_8, vecCtrl.fpu.isFoldTo1_4, vecCtrl.fpu.isFoldTo1_2) 227efdf5c1cSxiaofeibao-xjtu vfalus.zipWithIndex.foreach { 228efdf5c1cSxiaofeibao-xjtu case (mod, i) => 229e8e02b74SsinceforYy mod.io.fire := io.in.valid 23020f53972SsinceforYy mod.io.fp_a := vs2Split.io.outVec64b(i) 23120f53972SsinceforYy mod.io.fp_b := vs1Split.io.outVec64b(i) 23220f53972SsinceforYy mod.io.widen_a := Cat(vs2Split.io.outVec32b(i+numVecModule), vs2Split.io.outVec32b(i)) 23320f53972SsinceforYy mod.io.widen_b := Cat(vs1Split.io.outVec32b(i+numVecModule), vs1Split.io.outVec32b(i)) 234efdf5c1cSxiaofeibao-xjtu mod.io.frs1 := 0.U // already vf -> vv 235efdf5c1cSxiaofeibao-xjtu mod.io.is_frs1 := false.B // already vf -> vv 2369bb931c8Sxiaofeibao-xjtu mod.io.mask := Mux(isScalarMove, !vuopIdx.orR, genMaskForMerge(inmask = srcMaskRShift, sew = vsew, i = i)) 237582849ffSxiaofeibao-xjtu mod.io.maskForReduction := genMaskForReduction(inmask = srcMaskRShiftForReduction, sew = vsew, i = i) 23820f53972SsinceforYy mod.io.uop_idx := vuopIdx(0) 239efdf5c1cSxiaofeibao-xjtu mod.io.is_vec := true.B // Todo 240c6efb121SZiyue Zhang mod.io.round_mode := rm 2413748ec56Sxiaofeibao-xjtu mod.io.fp_format := Mux(resWiden, vsew + 1.U, vsew) 24220f53972SsinceforYy mod.io.opb_widening := opbWiden 243efdf5c1cSxiaofeibao-xjtu mod.io.res_widening := resWiden 244efdf5c1cSxiaofeibao-xjtu mod.io.op_code := opcode 24520f53972SsinceforYy mod.io.is_vfwredosum := fuOpType === VfaluType.vfwredosum 24620f53972SsinceforYy mod.io.is_fold := inIsFold 24720f53972SsinceforYy mod.io.vs2_fold := vs2 // for better timing 248cd1420fbSxiaofeibao-xjtu resultData(i) := mod.io.fp_result 249684d7aceSxiaofeibao-xjtu fflagsData(i) := mod.io.fflags 250bdda74fdSxiaofeibao-xjtu fp_aIsFpCanonicalNAN(i) := vecCtrl.fpu.isFpToVecInst & ( 251bdda74fdSxiaofeibao-xjtu ((vsew === VSew.e32) & (!vs2Split.io.outVec64b(i).head(32).andR)) | 252bdda74fdSxiaofeibao-xjtu ((vsew === VSew.e16) & (!vs2Split.io.outVec64b(i).head(48).andR)) 253bdda74fdSxiaofeibao-xjtu ) 254bdda74fdSxiaofeibao-xjtu fp_bIsFpCanonicalNAN(i) := vecCtrl.fpu.isFpToVecInst & ( 255bdda74fdSxiaofeibao-xjtu ((vsew === VSew.e32) & (!vs1Split.io.outVec64b(i).head(32).andR)) | 256bdda74fdSxiaofeibao-xjtu ((vsew === VSew.e16) & (!vs1Split.io.outVec64b(i).head(48).andR)) 257bdda74fdSxiaofeibao-xjtu ) 258bdda74fdSxiaofeibao-xjtu mod.io.fp_aIsFpCanonicalNAN := fp_aIsFpCanonicalNAN(i) 259bdda74fdSxiaofeibao-xjtu mod.io.fp_bIsFpCanonicalNAN := fp_bIsFpCanonicalNAN(i) 260efdf5c1cSxiaofeibao-xjtu } 2612dbaa2e2Sxiaofeibao-xjtu val outVuopidx = outVecCtrl.vuopIdx(2, 0) // for vfadd max vuopidx=7 2623bec463eSlewislzh val numOfUopVFRED = Wire(UInt(4.W)) 2633bec463eSlewislzh val numofUopVFREDReg = RegEnable(numOfUopVFRED, io.in.fire) 2643bec463eSlewislzh val vs1Reg = RegEnable(vs1, io.in.fire) 2650fbf39afSlewislzh val outIsVfRedUnordered = outCtrl.fuOpType === VfaluType.vfredusum || 2663bec463eSlewislzh outCtrl.fuOpType === VfaluType.vfredmax || 2673bec463eSlewislzh outCtrl.fuOpType === VfaluType.vfredmin 268477cff67Slewislzh val outIsVfRedUnComp = outCtrl.fuOpType === VfaluType.vfredmax || 269477cff67Slewislzh outCtrl.fuOpType === VfaluType.vfredmin 270477cff67Slewislzh val outIsVfRedUnSum = outCtrl.fuOpType === VfaluType.vfredusum 2710fbf39afSlewislzh val outIsVfRedOrdered = outCtrl.fuOpType === VfaluType.vfredosum || 2720fbf39afSlewislzh outCtrl.fuOpType === VfaluType.vfwredosum 2730fbf39afSlewislzh 2740fbf39afSlewislzh val isLastUopRed = outIsVfRedUnordered && outLastUop 2750fbf39afSlewislzh val resultDataUInt = Mux(isLastUopRed && !existMaskReg, vs1Reg, resultData.asUInt) 276f06d6d60Sxiaofeibao-xjtu val cmpResultWidth = dataWidth / 16 277f06d6d60Sxiaofeibao-xjtu val cmpResult = Wire(Vec(cmpResultWidth, Bool())) 278f06d6d60Sxiaofeibao-xjtu for (i <- 0 until cmpResultWidth) { 279f06d6d60Sxiaofeibao-xjtu if(i == 0) { 280f06d6d60Sxiaofeibao-xjtu cmpResult(i) := resultDataUInt(0) 281f06d6d60Sxiaofeibao-xjtu } 282f06d6d60Sxiaofeibao-xjtu else if(i < dataWidth / 64) { 283f06d6d60Sxiaofeibao-xjtu cmpResult(i) := Mux1H( 284f06d6d60Sxiaofeibao-xjtu Seq( 285f06d6d60Sxiaofeibao-xjtu (outVecCtrl.vsew === 1.U) -> resultDataUInt(i*16), 286f06d6d60Sxiaofeibao-xjtu (outVecCtrl.vsew === 2.U) -> resultDataUInt(i*32), 287f06d6d60Sxiaofeibao-xjtu (outVecCtrl.vsew === 3.U) -> resultDataUInt(i*64) 288f06d6d60Sxiaofeibao-xjtu ) 289f06d6d60Sxiaofeibao-xjtu ) 290f06d6d60Sxiaofeibao-xjtu } 291f06d6d60Sxiaofeibao-xjtu else if(i < dataWidth / 32) { 292f06d6d60Sxiaofeibao-xjtu cmpResult(i) := Mux1H( 293f06d6d60Sxiaofeibao-xjtu Seq( 294f06d6d60Sxiaofeibao-xjtu (outVecCtrl.vsew === 1.U) -> resultDataUInt(i * 16), 295f06d6d60Sxiaofeibao-xjtu (outVecCtrl.vsew === 2.U) -> resultDataUInt(i * 32), 296f06d6d60Sxiaofeibao-xjtu (outVecCtrl.vsew === 3.U) -> false.B 297f06d6d60Sxiaofeibao-xjtu ) 298f06d6d60Sxiaofeibao-xjtu ) 299f06d6d60Sxiaofeibao-xjtu } 300f06d6d60Sxiaofeibao-xjtu else if(i < dataWidth / 16) { 301f06d6d60Sxiaofeibao-xjtu cmpResult(i) := Mux(outVecCtrl.vsew === 1.U, resultDataUInt(i*16), false.B) 302f06d6d60Sxiaofeibao-xjtu } 303f06d6d60Sxiaofeibao-xjtu } 304cc998cd1Slewislzh val outCtrl_s0 = ctrlVec.head 305cc998cd1Slewislzh val outVecCtrl_s0 = ctrlVec.head.vpu.get 306cc998cd1Slewislzh val outEew_s0 = Mux(resWiden, outVecCtrl_s0.vsew + 1.U, outVecCtrl_s0.vsew) 3071d484543Sxiaofeibao val outWiden = RegEnable(resWiden, io.in.fire) 3081d484543Sxiaofeibao val outEew = Mux(outWiden, outVecCtrl.vsew + 1.U, outVecCtrl.vsew) 309cc998cd1Slewislzh val vlMax_s0 = ((VLEN/8).U >> outEew_s0).asUInt 310bdda74fdSxiaofeibao-xjtu val vlMax = ((VLEN/8).U >> outEew).asUInt 31177315a6bSxiaofeibao-xjtu val outVlmulFix = Mux(outWiden, outVecCtrl.vlmul + 1.U, outVecCtrl.vlmul) 3121d484543Sxiaofeibao val lmulAbs = Mux(outVlmulFix(2), (~outVlmulFix(1,0)).asUInt + 1.U, outVlmulFix(1,0)) 313582849ffSxiaofeibao-xjtu // vfmv_f_s need vl=1, reduction last uop need vl=1, other uop need vl=vlmax 3143bec463eSlewislzh numOfUopVFRED := { 315582849ffSxiaofeibao-xjtu // addTime include add frs1 316e3da8badSTang Haojin val addTime = MuxLookup(outVecCtrl_s0.vlmul, 1.U(4.W))(Seq( 317582849ffSxiaofeibao-xjtu VLmul.m2 -> 2.U, 318582849ffSxiaofeibao-xjtu VLmul.m4 -> 4.U, 319582849ffSxiaofeibao-xjtu VLmul.m8 -> 8.U, 320582849ffSxiaofeibao-xjtu )) 321e3da8badSTang Haojin val foldLastVlmul = MuxLookup(outVecCtrl_s0.vsew, "b000".U)(Seq( 322582849ffSxiaofeibao-xjtu VSew.e16 -> VLmul.mf8, 323582849ffSxiaofeibao-xjtu VSew.e32 -> VLmul.mf4, 324582849ffSxiaofeibao-xjtu VSew.e64 -> VLmul.mf2, 325582849ffSxiaofeibao-xjtu )) 326582849ffSxiaofeibao-xjtu // lmul < 1, foldTime = vlmul - foldFastVlmul 327582849ffSxiaofeibao-xjtu // lmul >= 1, foldTime = 0.U - foldFastVlmul 328cc998cd1Slewislzh val foldTime = Mux(outVecCtrl_s0.vlmul(2), outVecCtrl_s0.vlmul, 0.U) - foldLastVlmul 329582849ffSxiaofeibao-xjtu addTime + foldTime 330582849ffSxiaofeibao-xjtu } 331cc998cd1Slewislzh val reductionVl = Mux((outVecCtrl_s0.vuopIdx === numOfUopVFRED - 1.U) || (outCtrl_s0.fuOpType === VfaluType.vfredosum || outCtrl_s0.fuOpType === VfaluType.vfwredosum), 1.U, vlMax_s0) 332b94b1889Sxiaofeibao-xjtu val outIsResuction = outCtrl.fuOpType === VfaluType.vfredusum || 333b94b1889Sxiaofeibao-xjtu outCtrl.fuOpType === VfaluType.vfredmax || 334b94b1889Sxiaofeibao-xjtu outCtrl.fuOpType === VfaluType.vfredmin || 335b94b1889Sxiaofeibao-xjtu outCtrl.fuOpType === VfaluType.vfredosum || 336b94b1889Sxiaofeibao-xjtu outCtrl.fuOpType === VfaluType.vfwredosum 337cc998cd1Slewislzh val outIsResuction_s0 = outCtrl_s0.fuOpType === VfaluType.vfredusum || 338cc998cd1Slewislzh outCtrl_s0.fuOpType === VfaluType.vfredmax || 339cc998cd1Slewislzh outCtrl_s0.fuOpType === VfaluType.vfredmin || 340cc998cd1Slewislzh outCtrl_s0.fuOpType === VfaluType.vfredosum || 341cc998cd1Slewislzh outCtrl_s0.fuOpType === VfaluType.vfwredosum 342cc998cd1Slewislzh val outVConfig_s0 = if(!cfg.vconfigWakeUp) outVecCtrl_s0.vconfig else dataVec.head.getSrcVConfig.asTypeOf(new VConfig) 343cc998cd1Slewislzh val outVl_s0 = outVConfig_s0.vl 344cc998cd1Slewislzh val outVlFix_s0 = Mux( 345cc998cd1Slewislzh outVecCtrl_s0.fpu.isFpToVecInst || (outCtrl_s0.fuOpType === VfaluType.vfmv_f_s), 3469bb931c8Sxiaofeibao-xjtu 1.U, 347582849ffSxiaofeibao-xjtu Mux( 348cc998cd1Slewislzh outCtrl_s0.fuOpType === VfaluType.vfmv_s_f, 349cc998cd1Slewislzh outVl_s0.orR, 350cc998cd1Slewislzh Mux(outIsResuction_s0, reductionVl, outVl_s0) 351582849ffSxiaofeibao-xjtu ) 3529bb931c8Sxiaofeibao-xjtu ) 3537cb9199bSlewislzh val outVlFix = RegEnable(outVlFix_s0,io.in.fire) 354cc998cd1Slewislzh 355bdda74fdSxiaofeibao-xjtu val vlMaxAllUop = Wire(outVl.cloneType) 356bdda74fdSxiaofeibao-xjtu vlMaxAllUop := Mux(outVecCtrl.vlmul(2), vlMax >> lmulAbs, vlMax << lmulAbs).asUInt 357bdda74fdSxiaofeibao-xjtu val vlMaxThisUop = Mux(outVecCtrl.vlmul(2), vlMax >> lmulAbs, vlMax).asUInt 358bdda74fdSxiaofeibao-xjtu val vlSetThisUop = Mux(outVlFix > outVuopidx*vlMaxThisUop, outVlFix - outVuopidx*vlMaxThisUop, 0.U) 359614d2bc6SHeiHuDie val vlThisUop = Wire(UInt(4.W)) 360bdda74fdSxiaofeibao-xjtu vlThisUop := Mux(vlSetThisUop < vlMaxThisUop, vlSetThisUop, vlMaxThisUop) 361bdda74fdSxiaofeibao-xjtu val vlMaskRShift = Wire(UInt((4 * numVecModule).W)) 362bdda74fdSxiaofeibao-xjtu vlMaskRShift := Fill(4 * numVecModule, 1.U(1.W)) >> ((4 * numVecModule).U - vlThisUop) 363bdda74fdSxiaofeibao-xjtu 3642dbaa2e2Sxiaofeibao-xjtu val outVuopidxForRed = outVecCtrl.vuopIdx(3, 0) // lmul=8 sew=16, (4+2+1)(vector)+(1+1+1)(fold)+(1)(scala) max vuopIdx=10 3652dbaa2e2Sxiaofeibao-xjtu val outIsFisrtGroup = outVuopidxForRed === 0.U || 3662dbaa2e2Sxiaofeibao-xjtu (outVuopidxForRed === 1.U && (outVlmul === VLmul.m4 || outVlmul === VLmul.m8)) || 3672dbaa2e2Sxiaofeibao-xjtu ((outVuopidxForRed === 2.U || outVuopidxForRed === 3.U) && outVlmul === VLmul.m8) 368477cff67Slewislzh val firstNeedFFlags = outIsFisrtGroup && outIsVfRedUnComp 369477cff67Slewislzh val lastNeedFFlags = outVecCtrl.lastUop && outIsVfRedUnComp 370582849ffSxiaofeibao-xjtu private val needNoMask = outCtrl.fuOpType === VfaluType.vfmerge || 371582849ffSxiaofeibao-xjtu outCtrl.fuOpType === VfaluType.vfmv_s_f || 372582849ffSxiaofeibao-xjtu outIsResuction || 373582849ffSxiaofeibao-xjtu outVecCtrl.fpu.isFpToVecInst 374bdda74fdSxiaofeibao-xjtu val maskToMgu = Mux(needNoMask, allMaskTrue, outSrcMask) 375efdf5c1cSxiaofeibao-xjtu val allFFlagsEn = Wire(Vec(4*numVecModule,Bool())) 376f06d6d60Sxiaofeibao-xjtu val outSrcMaskRShift = Wire(UInt((4*numVecModule).W)) 377bdda74fdSxiaofeibao-xjtu outSrcMaskRShift := (maskToMgu >> (outVecCtrl.vuopIdx(2,0) * vlMax))(4*numVecModule-1,0) 378f06d6d60Sxiaofeibao-xjtu val f16FFlagsEn = outSrcMaskRShift 379684d7aceSxiaofeibao-xjtu val f32FFlagsEn = Wire(Vec(numVecModule,UInt(4.W))) 380afd78189Sxiaofeibao-xjtu val f64FFlagsEn = Wire(Vec(numVecModule, UInt(4.W))) 381afd78189Sxiaofeibao-xjtu val f16VlMaskEn = vlMaskRShift 382afd78189Sxiaofeibao-xjtu val f32VlMaskEn = Wire(Vec(numVecModule, UInt(4.W))) 383afd78189Sxiaofeibao-xjtu val f64VlMaskEn = Wire(Vec(numVecModule, UInt(4.W))) 384684d7aceSxiaofeibao-xjtu for (i <- 0 until numVecModule){ 385582849ffSxiaofeibao-xjtu f32FFlagsEn(i) := Cat(Fill(2, 0.U), outSrcMaskRShift(2*i+1,2*i)) 386582849ffSxiaofeibao-xjtu f64FFlagsEn(i) := Cat(Fill(3, 0.U), outSrcMaskRShift(i)) 387afd78189Sxiaofeibao-xjtu f32VlMaskEn(i) := Cat(Fill(2, 0.U), vlMaskRShift(2 * i + 1, 2 * i)) 388afd78189Sxiaofeibao-xjtu f64VlMaskEn(i) := Cat(Fill(3, 0.U), vlMaskRShift(i)) 389684d7aceSxiaofeibao-xjtu } 390684d7aceSxiaofeibao-xjtu val fflagsEn= Mux1H( 391684d7aceSxiaofeibao-xjtu Seq( 392bdda74fdSxiaofeibao-xjtu (outEew === 1.U) -> f16FFlagsEn.asUInt, 393bdda74fdSxiaofeibao-xjtu (outEew === 2.U) -> f32FFlagsEn.asUInt, 394bdda74fdSxiaofeibao-xjtu (outEew === 3.U) -> f64FFlagsEn.asUInt 395684d7aceSxiaofeibao-xjtu ) 396684d7aceSxiaofeibao-xjtu ) 397afd78189Sxiaofeibao-xjtu val vlMaskEn = Mux1H( 398afd78189Sxiaofeibao-xjtu Seq( 399afd78189Sxiaofeibao-xjtu (outEew === 1.U) -> f16VlMaskEn.asUInt, 400afd78189Sxiaofeibao-xjtu (outEew === 2.U) -> f32VlMaskEn.asUInt, 401afd78189Sxiaofeibao-xjtu (outEew === 3.U) -> f64VlMaskEn.asUInt 402afd78189Sxiaofeibao-xjtu ) 403afd78189Sxiaofeibao-xjtu ) 404ae44e2b7Slewislzh val fflagsRedMask = genMaskForRedFFlag(outVecCtrl.vsew) 405ae44e2b7Slewislzh 406ecc992caSlewislzh if (backendParams.debugEn){ 407ecc992caSlewislzh dontTouch(allFFlagsEn) 408ae44e2b7Slewislzh dontTouch(fflagsRedMask) 409ecc992caSlewislzh } 410*9d7a35d1Sxiaofeibao-xjtu // use srcMask(XLEN-1, 0) because float format hasn't fp8 411*9d7a35d1Sxiaofeibao-xjtu val allVmZero = RegEnable(LZD(Reverse(srcMask(XLEN-1, 0))) >= outVl_s0, io.in.fire) 412*9d7a35d1Sxiaofeibao-xjtu allFFlagsEn := Mux(outIsResuction, 413*9d7a35d1Sxiaofeibao-xjtu Cat( 414*9d7a35d1Sxiaofeibao-xjtu Fill(4*numVecModule - 1, firstNeedFFlags || outIsVfRedUnSum && !outVecCtrl.lastUop) & fflagsRedMask(4*numVecModule - 1, 1), 415*9d7a35d1Sxiaofeibao-xjtu !allVmZero && (lastNeedFFlags || firstNeedFFlags || outIsVfRedOrdered || outIsVfRedUnSum) 416*9d7a35d1Sxiaofeibao-xjtu ), 417*9d7a35d1Sxiaofeibao-xjtu fflagsEn & vlMaskEn 418*9d7a35d1Sxiaofeibao-xjtu ).asTypeOf(allFFlagsEn) 419684d7aceSxiaofeibao-xjtu 420684d7aceSxiaofeibao-xjtu val allFFlags = fflagsData.asTypeOf(Vec( 4*numVecModule,UInt(5.W))) 421efdf5c1cSxiaofeibao-xjtu val outFFlags = allFFlagsEn.zip(allFFlags).map{ 422efdf5c1cSxiaofeibao-xjtu case(en,fflags) => Mux(en, fflags, 0.U(5.W)) 423efdf5c1cSxiaofeibao-xjtu }.reduce(_ | _) 424684d7aceSxiaofeibao-xjtu 425684d7aceSxiaofeibao-xjtu 426f06d6d60Sxiaofeibao-xjtu val cmpResultOldVd = Wire(UInt(cmpResultWidth.W)) 427582849ffSxiaofeibao-xjtu val cmpResultOldVdRshiftWidth = Wire(UInt(6.W)) 428582849ffSxiaofeibao-xjtu cmpResultOldVdRshiftWidth := Mux1H( 429582849ffSxiaofeibao-xjtu Seq( 430582849ffSxiaofeibao-xjtu (outVecCtrl.vsew === VSew.e16) -> (outVecCtrl.vuopIdx(2, 0) << 3), 431582849ffSxiaofeibao-xjtu (outVecCtrl.vsew === VSew.e32) -> (outVecCtrl.vuopIdx(2, 0) << 2), 432582849ffSxiaofeibao-xjtu (outVecCtrl.vsew === VSew.e64) -> (outVecCtrl.vuopIdx(2, 0) << 1), 433582849ffSxiaofeibao-xjtu ) 434582849ffSxiaofeibao-xjtu ) 435582849ffSxiaofeibao-xjtu cmpResultOldVd := (outOldVd >> cmpResultOldVdRshiftWidth)(4*numVecModule-1,0) 436f06d6d60Sxiaofeibao-xjtu val cmpResultForMgu = Wire(Vec(cmpResultWidth, Bool())) 4371cefa917SZiyue Zhang private val maxVdIdx = 8 4381cefa917SZiyue Zhang private val elementsInOneUop = Mux1H( 4391cefa917SZiyue Zhang Seq( 4401cefa917SZiyue Zhang (outEew === 1.U) -> (cmpResultWidth).U(4.W), 4411cefa917SZiyue Zhang (outEew === 2.U) -> (cmpResultWidth / 2).U(4.W), 4421cefa917SZiyue Zhang (outEew === 3.U) -> (cmpResultWidth / 4).U(4.W), 4431cefa917SZiyue Zhang ) 4441cefa917SZiyue Zhang ) 4451cefa917SZiyue Zhang private val vdIdx = outVecCtrl.vuopIdx(2, 0) 4461cefa917SZiyue Zhang private val elementsComputed = Mux1H(Seq.tabulate(maxVdIdx)(i => (vdIdx === i.U) -> (elementsInOneUop * i.U))) 447f06d6d60Sxiaofeibao-xjtu for (i <- 0 until cmpResultWidth) { 4481cefa917SZiyue Zhang val cmpResultWithVmask = Mux(outSrcMaskRShift(i), cmpResult(i), Mux(outVecCtrl.vma, true.B, cmpResultOldVd(i))) 4497ee6b881SZiyue Zhang cmpResultForMgu(i) := Mux(elementsComputed +& i.U >= outVl, true.B, cmpResultWithVmask) 450f06d6d60Sxiaofeibao-xjtu } 451b94b1889Sxiaofeibao-xjtu val outIsFold = outVecCtrl.fpu.isFoldTo1_2 || outVecCtrl.fpu.isFoldTo1_4 || outVecCtrl.fpu.isFoldTo1_8 452b94b1889Sxiaofeibao-xjtu val outOldVdForREDO = Mux1H(Seq( 453b94b1889Sxiaofeibao-xjtu (outVecCtrl.vsew === VSew.e16) -> (outOldVd >> 16), 454b94b1889Sxiaofeibao-xjtu (outVecCtrl.vsew === VSew.e32) -> (outOldVd >> 32), 455b94b1889Sxiaofeibao-xjtu (outVecCtrl.vsew === VSew.e64) -> (outOldVd >> 64), 456b94b1889Sxiaofeibao-xjtu )) 457b94b1889Sxiaofeibao-xjtu val outOldVdForWREDO = Mux( 458b94b1889Sxiaofeibao-xjtu !outIsFold, 459b94b1889Sxiaofeibao-xjtu Mux(outVecCtrl.vsew === VSew.e16, Cat(outOldVd(VLEN-1-16,16), 0.U(32.W)), Cat(outOldVd(VLEN-1-32,32), 0.U(64.W))), 460b94b1889Sxiaofeibao-xjtu Mux(outVecCtrl.vsew === VSew.e16, 461b94b1889Sxiaofeibao-xjtu // Divide vuopIdx by 8 and the remainder is 1 462b94b1889Sxiaofeibao-xjtu Mux(outVecCtrl.vuopIdx(2,0) === 1.U, outOldVd, outOldVd >> 16), 463b94b1889Sxiaofeibao-xjtu // Divide vuopIdx by 4 and the remainder is 1 464b94b1889Sxiaofeibao-xjtu Mux(outVecCtrl.vuopIdx(1,0) === 1.U, outOldVd, outOldVd >> 32) 465b94b1889Sxiaofeibao-xjtu ), 466b94b1889Sxiaofeibao-xjtu ) 467b94b1889Sxiaofeibao-xjtu val outOldVdForRED = Mux(outCtrl.fuOpType === VfaluType.vfredosum, outOldVdForREDO, outOldVdForWREDO) 468b94b1889Sxiaofeibao-xjtu val numOfUopVFREDOSUM = { 469e3da8badSTang Haojin val uvlMax = MuxLookup(outVecCtrl.vsew, 0.U)(Seq( 470b94b1889Sxiaofeibao-xjtu VSew.e16 -> 8.U, 471b94b1889Sxiaofeibao-xjtu VSew.e32 -> 4.U, 472b94b1889Sxiaofeibao-xjtu VSew.e64 -> 2.U, 473b94b1889Sxiaofeibao-xjtu )) 474b94b1889Sxiaofeibao-xjtu val vlMax = Mux(outVecCtrl.vlmul(2), uvlMax >> (-outVecCtrl.vlmul)(1, 0), uvlMax << outVecCtrl.vlmul(1, 0)).asUInt 475b94b1889Sxiaofeibao-xjtu vlMax 476b94b1889Sxiaofeibao-xjtu } 477b51d5c34SZhaoyang You val isLastUopForREDO = outVecCtrl.lastUop 478b51d5c34SZhaoyang You val isOutOldVdForREDO = ((outCtrl.fuOpType === VfaluType.vfredosum && outIsFold) || outCtrl.fuOpType === VfaluType.vfwredosum) && !isLastUopForREDO 479b94b1889Sxiaofeibao-xjtu val taIsFalseForVFREDO = ((outCtrl.fuOpType === VfaluType.vfredosum) || (outCtrl.fuOpType === VfaluType.vfwredosum)) && (outVecCtrl.vuopIdx =/= numOfUopVFREDOSUM - 1.U) 4801cefa917SZiyue Zhang // outVecCtrl.fpu.isFpToVecInst means the instruction is float instruction, not vector float instruction 4811cefa917SZiyue Zhang val notUseVl = outVecCtrl.fpu.isFpToVecInst || (outCtrl.fuOpType === VfaluType.vfmv_f_s) 4821cefa917SZiyue Zhang val notModifyVd = !notUseVl && (outVl === 0.U) 483f06d6d60Sxiaofeibao-xjtu mgu.io.in.vd := Mux(outVecCtrl.isDstMask, Cat(0.U((dataWidth / 16 * 15).W), cmpResultForMgu.asUInt), resultDataUInt) 484b94b1889Sxiaofeibao-xjtu mgu.io.in.oldVd := Mux(isOutOldVdForREDO, outOldVdForRED, outOldVd) 485f06d6d60Sxiaofeibao-xjtu mgu.io.in.mask := maskToMgu 486b94b1889Sxiaofeibao-xjtu mgu.io.in.info.ta := Mux(outCtrl.fuOpType === VfaluType.vfmv_f_s, true.B , Mux(taIsFalseForVFREDO, false.B, outVecCtrl.vta)) 4879bb931c8Sxiaofeibao-xjtu mgu.io.in.info.ma := Mux(outCtrl.fuOpType === VfaluType.vfmv_s_f, true.B , outVecCtrl.vma) 488bdda74fdSxiaofeibao-xjtu mgu.io.in.info.vl := outVlFix 489684d7aceSxiaofeibao-xjtu mgu.io.in.info.vstart := outVecCtrl.vstart 490b3e2881cSxiaofeibao-xjtu mgu.io.in.info.vlmul := outVecCtrl.vlmul 4911cefa917SZiyue Zhang mgu.io.in.info.valid := Mux(notModifyVd, false.B, io.in.valid) 492b3e2881cSxiaofeibao-xjtu mgu.io.in.info.vstart := Mux(outVecCtrl.fpu.isFpToVecInst, 0.U, outVecCtrl.vstart) 4937cb9199bSlewislzh mgu.io.in.info.eew := RegEnable(outEew_s0,io.in.fire) 494b3e2881cSxiaofeibao-xjtu mgu.io.in.info.vsew := outVecCtrl.vsew 4957cb9199bSlewislzh mgu.io.in.info.vdIdx := RegEnable(Mux(outIsResuction_s0, 0.U, outVecCtrl_s0.vuopIdx), io.in.fire) 496684d7aceSxiaofeibao-xjtu mgu.io.in.info.narrow := outVecCtrl.isNarrow 497684d7aceSxiaofeibao-xjtu mgu.io.in.info.dstMask := outVecCtrl.isDstMask 49892c6b7edSzhanglinjuan mgu.io.in.isIndexedVls := false.B 4997ee6b881SZiyue Zhang mgtu.io.in.vd := Mux(outVecCtrl.isDstMask, mgu.io.out.vd, resultDataUInt) 5007ee6b881SZiyue Zhang mgtu.io.in.vl := outVl 501bdda74fdSxiaofeibao-xjtu val resultFpMask = Wire(UInt(VLEN.W)) 502bdda74fdSxiaofeibao-xjtu val isFclass = outVecCtrl.fpu.isFpToVecInst && (outCtrl.fuOpType === VfaluType.vfclass) 503bdda74fdSxiaofeibao-xjtu val fpCmpFuOpType = Seq(VfaluType.vfeq, VfaluType.vflt, VfaluType.vfle) 504bdda74fdSxiaofeibao-xjtu val isCmp = outVecCtrl.fpu.isFpToVecInst && (fpCmpFuOpType.map(_ === outCtrl.fuOpType).reduce(_|_)) 505bdda74fdSxiaofeibao-xjtu resultFpMask := Mux(isFclass || isCmp, Fill(16, 1.U(1.W)), Fill(VLEN, 1.U(1.W))) 5067ee6b881SZiyue Zhang // when dest is mask, the result need to be masked by mgtu 5077ee6b881SZiyue Zhang io.out.bits.res.data := Mux(notModifyVd, outOldVd, Mux(outVecCtrl.isDstMask, mgtu.io.out.vd, mgu.io.out.vd) & resultFpMask) 50897e37a22SZiyue Zhang io.out.bits.res.fflags.get := Mux(notModifyVd, 0.U(5.W), outFFlags) 509c33d4a9eSXuan Hu io.out.bits.ctrl.exceptionVec.get(ExceptionNO.illegalInstr) := mgu.io.out.illegal 510684d7aceSxiaofeibao-xjtu 511684d7aceSxiaofeibao-xjtu} 512684d7aceSxiaofeibao-xjtu 513684d7aceSxiaofeibao-xjtuclass VFMgu(vlen:Int)(implicit p: Parameters) extends Module{ 514684d7aceSxiaofeibao-xjtu val io = IO(new VFMguIO(vlen)) 515684d7aceSxiaofeibao-xjtu 516684d7aceSxiaofeibao-xjtu val vd = io.in.vd 517684d7aceSxiaofeibao-xjtu val oldvd = io.in.oldVd 518684d7aceSxiaofeibao-xjtu val mask = io.in.mask 519684d7aceSxiaofeibao-xjtu val vsew = io.in.info.eew 520684d7aceSxiaofeibao-xjtu val num16bits = vlen / 16 521684d7aceSxiaofeibao-xjtu 522684d7aceSxiaofeibao-xjtu} 523684d7aceSxiaofeibao-xjtu 524684d7aceSxiaofeibao-xjtuclass VFMguIO(vlen: Int)(implicit p: Parameters) extends Bundle { 525684d7aceSxiaofeibao-xjtu val in = new Bundle { 526684d7aceSxiaofeibao-xjtu val vd = Input(UInt(vlen.W)) 527684d7aceSxiaofeibao-xjtu val oldVd = Input(UInt(vlen.W)) 528684d7aceSxiaofeibao-xjtu val mask = Input(UInt(vlen.W)) 529684d7aceSxiaofeibao-xjtu val info = Input(new VecInfo) 530684d7aceSxiaofeibao-xjtu } 531684d7aceSxiaofeibao-xjtu val out = new Bundle { 532684d7aceSxiaofeibao-xjtu val vd = Output(UInt(vlen.W)) 533684d7aceSxiaofeibao-xjtu } 534efdf5c1cSxiaofeibao-xjtu}