xref: /XiangShan/src/main/scala/xiangshan/backend/fu/wrapper/VFMA.scala (revision 614d2bc6eead7bc6e6e71c4d6dc850d2d5ad3aef)
1efdf5c1cSxiaofeibao-xjtupackage xiangshan.backend.fu.wrapper
2efdf5c1cSxiaofeibao-xjtu
383ba63b3SXuan Huimport org.chipsalliance.cde.config.Parameters
4efdf5c1cSxiaofeibao-xjtuimport chisel3._
5efdf5c1cSxiaofeibao-xjtuimport chisel3.util._
6bb2f3f51STang Haojinimport utility.XSError
7efdf5c1cSxiaofeibao-xjtuimport xiangshan.backend.fu.FuConfig
8efdf5c1cSxiaofeibao-xjtuimport xiangshan.backend.fu.vector.Bundles.VSew
9efdf5c1cSxiaofeibao-xjtuimport xiangshan.backend.fu.vector.utils.VecDataSplitModule
10bdda74fdSxiaofeibao-xjtuimport xiangshan.backend.fu.vector.{Mgu, VecPipedFuncUnit}
11c33d4a9eSXuan Huimport xiangshan.ExceptionNO
12efdf5c1cSxiaofeibao-xjtuimport yunsuan.VfpuType
13bdda74fdSxiaofeibao-xjtuimport yunsuan.VfmaType
14efdf5c1cSxiaofeibao-xjtuimport yunsuan.vector.VectorFloatFMA
15efdf5c1cSxiaofeibao-xjtu
16efdf5c1cSxiaofeibao-xjtuclass VFMA(cfg: FuConfig)(implicit p: Parameters) extends VecPipedFuncUnit(cfg) {
17efdf5c1cSxiaofeibao-xjtu  XSError(io.in.valid && io.in.bits.ctrl.fuOpType === VfpuType.dummy, "Vfalu OpType not supported")
18efdf5c1cSxiaofeibao-xjtu
19efdf5c1cSxiaofeibao-xjtu  // params alias
202d12882cSxiaofeibao  private val dataWidth = cfg.destDataBits
21efdf5c1cSxiaofeibao-xjtu  private val dataWidthOfDataModule = 64
22efdf5c1cSxiaofeibao-xjtu  private val numVecModule = dataWidth / dataWidthOfDataModule
23efdf5c1cSxiaofeibao-xjtu
24efdf5c1cSxiaofeibao-xjtu  // io alias
25efdf5c1cSxiaofeibao-xjtu  private val opcode  = fuOpType(3,0)
26efdf5c1cSxiaofeibao-xjtu  private val resWiden  = fuOpType(4)
27efdf5c1cSxiaofeibao-xjtu
28efdf5c1cSxiaofeibao-xjtu  // modules
29efdf5c1cSxiaofeibao-xjtu  private val vfmas = Seq.fill(numVecModule)(Module(new VectorFloatFMA))
30efdf5c1cSxiaofeibao-xjtu  private val vs2Split = Module(new VecDataSplitModule(dataWidth, dataWidthOfDataModule))
31efdf5c1cSxiaofeibao-xjtu  private val vs1Split = Module(new VecDataSplitModule(dataWidth, dataWidthOfDataModule))
32efdf5c1cSxiaofeibao-xjtu  private val oldVdSplit  = Module(new VecDataSplitModule(dataWidth, dataWidthOfDataModule))
33b19366b2Sxiaofeibao-xjtu  private val mgu = Module(new Mgu(dataWidth))
34efdf5c1cSxiaofeibao-xjtu
35efdf5c1cSxiaofeibao-xjtu  /**
36efdf5c1cSxiaofeibao-xjtu    * In connection of [[vs2Split]], [[vs1Split]] and [[oldVdSplit]]
37efdf5c1cSxiaofeibao-xjtu    */
38efdf5c1cSxiaofeibao-xjtu  vs2Split.io.inVecData := vs2
39efdf5c1cSxiaofeibao-xjtu  vs1Split.io.inVecData := vs1
40efdf5c1cSxiaofeibao-xjtu  oldVdSplit.io.inVecData := oldVd
41efdf5c1cSxiaofeibao-xjtu
42efdf5c1cSxiaofeibao-xjtu  /**
43efdf5c1cSxiaofeibao-xjtu    * [[vfmas]]'s in connection
44efdf5c1cSxiaofeibao-xjtu    */
45efdf5c1cSxiaofeibao-xjtu  // Vec(vs2(31,0), vs2(63,32), vs2(95,64), vs2(127,96)) ==>
46efdf5c1cSxiaofeibao-xjtu  // Vec(
47efdf5c1cSxiaofeibao-xjtu  //   Cat(vs2(95,64),  vs2(31,0)),
48efdf5c1cSxiaofeibao-xjtu  //   Cat(vs2(127,96), vs2(63,32)),
49efdf5c1cSxiaofeibao-xjtu  // )
50efdf5c1cSxiaofeibao-xjtu  private val vs2GroupedVec: Vec[UInt] = VecInit(vs2Split.io.outVec32b.zipWithIndex.groupBy(_._2 % 2).map(x => x._1 -> x._2.map(_._1)).values.map(x => Cat(x.reverse)).toSeq)
51efdf5c1cSxiaofeibao-xjtu  private val vs1GroupedVec: Vec[UInt] = VecInit(vs1Split.io.outVec32b.zipWithIndex.groupBy(_._2 % 2).map(x => x._1 -> x._2.map(_._1)).values.map(x => Cat(x.reverse)).toSeq)
52b19366b2Sxiaofeibao-xjtu  private val resultData = Wire(Vec(numVecModule, UInt(dataWidthOfDataModule.W)))
53b19366b2Sxiaofeibao-xjtu  private val fflagsData = Wire(Vec(numVecModule, UInt(20.W)))
54bdda74fdSxiaofeibao-xjtu  val fp_aIsFpCanonicalNAN = Wire(Vec(numVecModule, Bool()))
55bdda74fdSxiaofeibao-xjtu  val fp_bIsFpCanonicalNAN = Wire(Vec(numVecModule, Bool()))
56bdda74fdSxiaofeibao-xjtu  val fp_cIsFpCanonicalNAN = Wire(Vec(numVecModule, Bool()))
57efdf5c1cSxiaofeibao-xjtu  vfmas.zipWithIndex.foreach {
58efdf5c1cSxiaofeibao-xjtu    case (mod, i) =>
59e8e02b74SsinceforYy      mod.io.fire         := io.in.valid
60efdf5c1cSxiaofeibao-xjtu      mod.io.fp_a         := vs2Split.io.outVec64b(i)
61efdf5c1cSxiaofeibao-xjtu      mod.io.fp_b         := vs1Split.io.outVec64b(i)
62efdf5c1cSxiaofeibao-xjtu      mod.io.fp_c         := oldVdSplit.io.outVec64b(i)
63efdf5c1cSxiaofeibao-xjtu      mod.io.widen_a      := Cat(vs2Split.io.outVec32b(i+numVecModule), vs2Split.io.outVec32b(i))
64efdf5c1cSxiaofeibao-xjtu      mod.io.widen_b      := Cat(vs1Split.io.outVec32b(i+numVecModule), vs1Split.io.outVec32b(i))
65efdf5c1cSxiaofeibao-xjtu      mod.io.frs1         := 0.U     // already vf -> vv
66efdf5c1cSxiaofeibao-xjtu      mod.io.is_frs1      := false.B // already vf -> vv
67efdf5c1cSxiaofeibao-xjtu      mod.io.uop_idx      := vuopIdx(0)
68efdf5c1cSxiaofeibao-xjtu      mod.io.is_vec       := true.B // Todo
69c6efb121SZiyue Zhang      mod.io.round_mode   := rm
70b19366b2Sxiaofeibao-xjtu      mod.io.fp_format    := Mux(resWiden, vsew + 1.U, vsew)
71efdf5c1cSxiaofeibao-xjtu      mod.io.res_widening := resWiden
72efdf5c1cSxiaofeibao-xjtu      mod.io.op_code      := opcode
73b19366b2Sxiaofeibao-xjtu      resultData(i) := mod.io.fp_result
74b19366b2Sxiaofeibao-xjtu      fflagsData(i) := mod.io.fflags
75bdda74fdSxiaofeibao-xjtu      fp_aIsFpCanonicalNAN(i) := vecCtrl.fpu.isFpToVecInst & (
76bdda74fdSxiaofeibao-xjtu        ((vsew === VSew.e32) & (!vs2Split.io.outVec64b(i).head(32).andR)) |
77bdda74fdSxiaofeibao-xjtu          ((vsew === VSew.e16) & (!vs2Split.io.outVec64b(i).head(48).andR))
78bdda74fdSxiaofeibao-xjtu        )
79bdda74fdSxiaofeibao-xjtu      fp_bIsFpCanonicalNAN(i) := vecCtrl.fpu.isFpToVecInst & (
80bdda74fdSxiaofeibao-xjtu        ((vsew === VSew.e32) & (!vs1Split.io.outVec64b(i).head(32).andR)) |
81bdda74fdSxiaofeibao-xjtu          ((vsew === VSew.e16) & (!vs1Split.io.outVec64b(i).head(48).andR))
82bdda74fdSxiaofeibao-xjtu        )
83bdda74fdSxiaofeibao-xjtu      fp_cIsFpCanonicalNAN(i) := !(opcode === VfmaType.vfmul) & vecCtrl.fpu.isFpToVecInst & (
84bdda74fdSxiaofeibao-xjtu        ((vsew === VSew.e32) & (!oldVdSplit.io.outVec64b(i).head(32).andR)) |
85bdda74fdSxiaofeibao-xjtu          ((vsew === VSew.e16) & (!oldVdSplit.io.outVec64b(i).head(48).andR))
86bdda74fdSxiaofeibao-xjtu        )
87bdda74fdSxiaofeibao-xjtu      mod.io.fp_aIsFpCanonicalNAN := fp_aIsFpCanonicalNAN(i)
88bdda74fdSxiaofeibao-xjtu      mod.io.fp_bIsFpCanonicalNAN := fp_bIsFpCanonicalNAN(i)
89bdda74fdSxiaofeibao-xjtu      mod.io.fp_cIsFpCanonicalNAN := fp_cIsFpCanonicalNAN(i)
90efdf5c1cSxiaofeibao-xjtu  }
91b19366b2Sxiaofeibao-xjtu
92395c8649SZiyue-Zhang  val outFuOpType = outCtrl.fuOpType
93395c8649SZiyue-Zhang  val outWiden = outCtrl.fuOpType(4)
94395c8649SZiyue-Zhang  val outEew = Mux(outWiden, outVecCtrl.vsew + 1.U, outVecCtrl.vsew)
95bdda74fdSxiaofeibao-xjtu  val outVuopidx = outVecCtrl.vuopIdx(2, 0)
96bdda74fdSxiaofeibao-xjtu  val vlMax = ((VLEN / 8).U >> outEew).asUInt
9777315a6bSxiaofeibao-xjtu  val outVlmulFix = Mux(outWiden, outVecCtrl.vlmul + 1.U, outVecCtrl.vlmul)
981d484543Sxiaofeibao  val lmulAbs = Mux(outVlmulFix(2), (~outVlmulFix(1, 0)).asUInt + 1.U, outVlmulFix(1, 0))
99bdda74fdSxiaofeibao-xjtu  val outVlFix = Mux(outVecCtrl.fpu.isFpToVecInst, 1.U, outVl)
100bdda74fdSxiaofeibao-xjtu  val vlMaxAllUop = Wire(outVl.cloneType)
101bdda74fdSxiaofeibao-xjtu  vlMaxAllUop := Mux(outVecCtrl.vlmul(2), vlMax >> lmulAbs, vlMax << lmulAbs).asUInt
102bdda74fdSxiaofeibao-xjtu  val vlMaxThisUop = Mux(outVecCtrl.vlmul(2), vlMax >> lmulAbs, vlMax).asUInt
103bdda74fdSxiaofeibao-xjtu  val vlSetThisUop = Mux(outVlFix > outVuopidx * vlMaxThisUop, outVlFix - outVuopidx * vlMaxThisUop, 0.U)
104*614d2bc6SHeiHuDie  val vlThisUop = Wire(UInt(4.W))
105bdda74fdSxiaofeibao-xjtu  vlThisUop := Mux(vlSetThisUop < vlMaxThisUop, vlSetThisUop, vlMaxThisUop)
106bdda74fdSxiaofeibao-xjtu  val vlMaskRShift = Wire(UInt((4 * numVecModule).W))
107bdda74fdSxiaofeibao-xjtu  vlMaskRShift := Fill(4 * numVecModule, 1.U(1.W)) >> ((4 * numVecModule).U - vlThisUop)
108b19366b2Sxiaofeibao-xjtu
109bdda74fdSxiaofeibao-xjtu  private val needNoMask = outVecCtrl.fpu.isFpToVecInst
110bdda74fdSxiaofeibao-xjtu  val maskToMgu = Mux(needNoMask, allMaskTrue, outSrcMask)
111efdf5c1cSxiaofeibao-xjtu  val allFFlagsEn = Wire(Vec(4 * numVecModule, Bool()))
112b19366b2Sxiaofeibao-xjtu  val outSrcMaskRShift = Wire(UInt((4 * numVecModule).W))
113bdda74fdSxiaofeibao-xjtu  outSrcMaskRShift := (maskToMgu >> (outVecCtrl.vuopIdx(2, 0) * vlMax))(4 * numVecModule - 1, 0)
114b19366b2Sxiaofeibao-xjtu  val f16FFlagsEn = outSrcMaskRShift
115b19366b2Sxiaofeibao-xjtu  val f32FFlagsEn = Wire(Vec(numVecModule, UInt(4.W)))
116b19366b2Sxiaofeibao-xjtu  val f64FFlagsEn = Wire(Vec(numVecModule, UInt(4.W)))
117df7130a7Sxiaofeibao-xjtu  val f16VlMaskEn = vlMaskRShift
118df7130a7Sxiaofeibao-xjtu  val f32VlMaskEn = Wire(Vec(numVecModule, UInt(4.W)))
119df7130a7Sxiaofeibao-xjtu  val f64VlMaskEn = Wire(Vec(numVecModule, UInt(4.W)))
120b19366b2Sxiaofeibao-xjtu  for (i <- 0 until numVecModule) {
121df7130a7Sxiaofeibao-xjtu    f32FFlagsEn(i) := Cat(Fill(2, 0.U), outSrcMaskRShift(2 * i + 1, 2 * i))
122df7130a7Sxiaofeibao-xjtu    f64FFlagsEn(i) := Cat(Fill(3, 0.U), outSrcMaskRShift(i))
123df7130a7Sxiaofeibao-xjtu    f32VlMaskEn(i) := Cat(Fill(2, 0.U), vlMaskRShift(2 * i + 1, 2 * i))
124df7130a7Sxiaofeibao-xjtu    f64VlMaskEn(i) := Cat(Fill(3, 0.U), vlMaskRShift(i))
125b19366b2Sxiaofeibao-xjtu  }
126b19366b2Sxiaofeibao-xjtu  val fflagsEn = Mux1H(
127b19366b2Sxiaofeibao-xjtu    Seq(
128bdda74fdSxiaofeibao-xjtu      (outEew === 1.U) -> f16FFlagsEn.asUInt,
129bdda74fdSxiaofeibao-xjtu      (outEew === 2.U) -> f32FFlagsEn.asUInt,
130bdda74fdSxiaofeibao-xjtu      (outEew === 3.U) -> f64FFlagsEn.asUInt
131b19366b2Sxiaofeibao-xjtu    )
132b19366b2Sxiaofeibao-xjtu  )
133df7130a7Sxiaofeibao-xjtu  val vlMaskEn = Mux1H(
134df7130a7Sxiaofeibao-xjtu    Seq(
135df7130a7Sxiaofeibao-xjtu      (outEew === 1.U) -> f16VlMaskEn.asUInt,
136df7130a7Sxiaofeibao-xjtu      (outEew === 2.U) -> f32VlMaskEn.asUInt,
137df7130a7Sxiaofeibao-xjtu      (outEew === 3.U) -> f64VlMaskEn.asUInt
138df7130a7Sxiaofeibao-xjtu    )
139df7130a7Sxiaofeibao-xjtu  )
140fe982725Sxiaofeibao-xjtu  allFFlagsEn := (fflagsEn & vlMaskEn).asTypeOf(allFFlagsEn)
141b19366b2Sxiaofeibao-xjtu
142b19366b2Sxiaofeibao-xjtu  val allFFlags = fflagsData.asTypeOf(Vec(4 * numVecModule, UInt(5.W)))
143efdf5c1cSxiaofeibao-xjtu  val outFFlags = allFFlagsEn.zip(allFFlags).map {
144efdf5c1cSxiaofeibao-xjtu    case (en, fflags) => Mux(en, fflags, 0.U(5.W))
145efdf5c1cSxiaofeibao-xjtu  }.reduce(_ | _)
146b19366b2Sxiaofeibao-xjtu  io.out.bits.res.fflags.get := outFFlags
147bdda74fdSxiaofeibao-xjtu
148bdda74fdSxiaofeibao-xjtu  val resultDataUInt = resultData.asUInt
149bdda74fdSxiaofeibao-xjtu  mgu.io.in.vd := resultDataUInt
150bdda74fdSxiaofeibao-xjtu  mgu.io.in.oldVd := outOldVd
151bdda74fdSxiaofeibao-xjtu  mgu.io.in.mask := maskToMgu
152bdda74fdSxiaofeibao-xjtu  mgu.io.in.info.ta := outVecCtrl.vta
153bdda74fdSxiaofeibao-xjtu  mgu.io.in.info.ma := outVecCtrl.vma
154bdda74fdSxiaofeibao-xjtu  mgu.io.in.info.vl := Mux(outVecCtrl.fpu.isFpToVecInst, 1.U, outVl)
155b3e2881cSxiaofeibao-xjtu  mgu.io.in.info.vlmul := outVecCtrl.vlmul
156b3e2881cSxiaofeibao-xjtu  mgu.io.in.info.valid := io.out.valid
1572a5d1f7dSxiaofeibao-xjtu  mgu.io.in.info.vstart := Mux(outVecCtrl.fpu.isFpToVecInst, 0.U, outVecCtrl.vstart)
158bdda74fdSxiaofeibao-xjtu  mgu.io.in.info.eew := outEew
159b3e2881cSxiaofeibao-xjtu  mgu.io.in.info.vsew := outVecCtrl.vsew
160bdda74fdSxiaofeibao-xjtu  mgu.io.in.info.vdIdx := outVecCtrl.vuopIdx
161bdda74fdSxiaofeibao-xjtu  mgu.io.in.info.narrow := outVecCtrl.isNarrow
162bdda74fdSxiaofeibao-xjtu  mgu.io.in.info.dstMask := outVecCtrl.isDstMask
16392c6b7edSzhanglinjuan  mgu.io.in.isIndexedVls := false.B
164bdda74fdSxiaofeibao-xjtu  io.out.bits.res.data := mgu.io.out.vd
165c33d4a9eSXuan Hu  io.out.bits.ctrl.exceptionVec.get(ExceptionNO.illegalInstr) := mgu.io.out.illegal
166efdf5c1cSxiaofeibao-xjtu}
167