xref: /XiangShan/src/main/scala/xiangshan/backend/fu/wrapper/VFDivSqrt.scala (revision 614d2bc6eead7bc6e6e71c4d6dc850d2d5ad3aef)
1efdf5c1cSxiaofeibao-xjtupackage xiangshan.backend.fu.wrapper
2efdf5c1cSxiaofeibao-xjtu
383ba63b3SXuan Huimport org.chipsalliance.cde.config.Parameters
4efdf5c1cSxiaofeibao-xjtuimport chisel3._
5efdf5c1cSxiaofeibao-xjtuimport chisel3.util._
6bb2f3f51STang Haojinimport utility.XSError
7efdf5c1cSxiaofeibao-xjtuimport xiangshan.backend.fu.FuConfig
8efdf5c1cSxiaofeibao-xjtuimport xiangshan.backend.fu.vector.Bundles.VSew
9efdf5c1cSxiaofeibao-xjtuimport xiangshan.backend.fu.vector.utils.VecDataSplitModule
10bdda74fdSxiaofeibao-xjtuimport xiangshan.backend.fu.vector.{Mgu, VecNonPipedFuncUnit}
11bdda74fdSxiaofeibao-xjtuimport xiangshan.backend.rob.RobPtr
12c33d4a9eSXuan Huimport xiangshan.ExceptionNO
13efdf5c1cSxiaofeibao-xjtuimport yunsuan.VfpuType
14efdf5c1cSxiaofeibao-xjtuimport yunsuan.vector.VectorFloatDivider
15efdf5c1cSxiaofeibao-xjtu
1675841254Sxiaofeibao-xjtuclass VFDivSqrt(cfg: FuConfig)(implicit p: Parameters) extends VecNonPipedFuncUnit(cfg) {
17efdf5c1cSxiaofeibao-xjtu  XSError(io.in.valid && io.in.bits.ctrl.fuOpType === VfpuType.dummy, "Vfdiv OpType not supported")
18efdf5c1cSxiaofeibao-xjtu
19efdf5c1cSxiaofeibao-xjtu  // params alias
202d12882cSxiaofeibao  private val dataWidth = cfg.destDataBits
21efdf5c1cSxiaofeibao-xjtu  private val dataWidthOfDataModule = 64
22efdf5c1cSxiaofeibao-xjtu  private val numVecModule = dataWidth / dataWidthOfDataModule
23efdf5c1cSxiaofeibao-xjtu
24efdf5c1cSxiaofeibao-xjtu  // io alias
25efdf5c1cSxiaofeibao-xjtu  private val opcode  = fuOpType(0)
26efdf5c1cSxiaofeibao-xjtu
27efdf5c1cSxiaofeibao-xjtu  // modules
28efdf5c1cSxiaofeibao-xjtu  private val vfdivs = Seq.fill(numVecModule)(Module(new VectorFloatDivider))
29efdf5c1cSxiaofeibao-xjtu  private val vs2Split = Module(new VecDataSplitModule(dataWidth, dataWidthOfDataModule))
30efdf5c1cSxiaofeibao-xjtu  private val vs1Split = Module(new VecDataSplitModule(dataWidth, dataWidthOfDataModule))
31efdf5c1cSxiaofeibao-xjtu  private val oldVdSplit  = Module(new VecDataSplitModule(dataWidth, dataWidthOfDataModule))
3275841254Sxiaofeibao-xjtu  private val mgu = Module(new Mgu(dataWidth))
33efdf5c1cSxiaofeibao-xjtu
34efdf5c1cSxiaofeibao-xjtu  /**
35efdf5c1cSxiaofeibao-xjtu    * In connection of [[vs2Split]], [[vs1Split]] and [[oldVdSplit]]
36efdf5c1cSxiaofeibao-xjtu    */
37efdf5c1cSxiaofeibao-xjtu  vs2Split.io.inVecData := vs2
38efdf5c1cSxiaofeibao-xjtu  vs1Split.io.inVecData := vs1
39efdf5c1cSxiaofeibao-xjtu  oldVdSplit.io.inVecData := oldVd
40efdf5c1cSxiaofeibao-xjtu
41efdf5c1cSxiaofeibao-xjtu  /**
42efdf5c1cSxiaofeibao-xjtu    * [[vfdivs]]'s in connection
43efdf5c1cSxiaofeibao-xjtu    */
44efdf5c1cSxiaofeibao-xjtu  private val vs2GroupedVec: Vec[UInt] = VecInit(vs2Split.io.outVec32b.zipWithIndex.groupBy(_._2 % 2).map(x => x._1 -> x._2.map(_._1)).values.map(x => Cat(x.reverse)).toSeq)
45efdf5c1cSxiaofeibao-xjtu  private val vs1GroupedVec: Vec[UInt] = VecInit(vs1Split.io.outVec32b.zipWithIndex.groupBy(_._2 % 2).map(x => x._1 -> x._2.map(_._1)).values.map(x => Cat(x.reverse)).toSeq)
4675841254Sxiaofeibao-xjtu  private val resultData = Wire(Vec(numVecModule, UInt(dataWidthOfDataModule.W)))
4775841254Sxiaofeibao-xjtu  private val fflagsData = Wire(Vec(numVecModule, UInt(20.W)))
48bdda74fdSxiaofeibao-xjtu  val fp_aIsFpCanonicalNAN = Wire(Vec(numVecModule, Bool()))
49bdda74fdSxiaofeibao-xjtu  val fp_bIsFpCanonicalNAN = Wire(Vec(numVecModule, Bool()))
50efdf5c1cSxiaofeibao-xjtu
51bdda74fdSxiaofeibao-xjtu  val thisRobIdx = Wire(new RobPtr)
52bdda74fdSxiaofeibao-xjtu  when(io.in.ready){
53bdda74fdSxiaofeibao-xjtu    thisRobIdx := io.in.bits.ctrl.robIdx
54bdda74fdSxiaofeibao-xjtu  }.otherwise{
55bdda74fdSxiaofeibao-xjtu    thisRobIdx := outCtrl.robIdx
56bdda74fdSxiaofeibao-xjtu  }
57efdf5c1cSxiaofeibao-xjtu  vfdivs.zipWithIndex.foreach {
58efdf5c1cSxiaofeibao-xjtu    case (mod, i) =>
59efdf5c1cSxiaofeibao-xjtu      mod.io.start_valid_i  := io.in.valid
60bdda74fdSxiaofeibao-xjtu      mod.io.finish_ready_i := io.out.ready & io.out.valid
61bdda74fdSxiaofeibao-xjtu      mod.io.flush_i        := thisRobIdx.needFlush(io.flush)
62efdf5c1cSxiaofeibao-xjtu      mod.io.fp_format_i    := vsew
63efdf5c1cSxiaofeibao-xjtu      mod.io.opa_i          := vs2Split.io.outVec64b(i)
64efdf5c1cSxiaofeibao-xjtu      mod.io.opb_i          := vs1Split.io.outVec64b(i)
65efdf5c1cSxiaofeibao-xjtu      mod.io.frs2_i         := 0.U     // already vf -> vv
66efdf5c1cSxiaofeibao-xjtu      mod.io.frs1_i         := 0.U     // already vf -> vv
67efdf5c1cSxiaofeibao-xjtu      mod.io.is_frs2_i      := false.B // already vf -> vv
68efdf5c1cSxiaofeibao-xjtu      mod.io.is_frs1_i      := false.B // already vf -> vv
69efdf5c1cSxiaofeibao-xjtu      mod.io.is_sqrt_i      := opcode
70c6efb121SZiyue Zhang      mod.io.rm_i           := rm
71efdf5c1cSxiaofeibao-xjtu      mod.io.is_vec_i       := true.B // Todo
7275841254Sxiaofeibao-xjtu      resultData(i) := mod.io.fpdiv_res_o
7375841254Sxiaofeibao-xjtu      fflagsData(i) := mod.io.fflags_o
74bdda74fdSxiaofeibao-xjtu      fp_aIsFpCanonicalNAN(i) := vecCtrl.fpu.isFpToVecInst & (
75bdda74fdSxiaofeibao-xjtu        ((vsew === VSew.e32) & (!vs2Split.io.outVec64b(i).head(32).andR)) |
76bdda74fdSxiaofeibao-xjtu          ((vsew === VSew.e16) & (!vs2Split.io.outVec64b(i).head(48).andR))
77bdda74fdSxiaofeibao-xjtu        )
78bdda74fdSxiaofeibao-xjtu      fp_bIsFpCanonicalNAN(i) := vecCtrl.fpu.isFpToVecInst & (
79bdda74fdSxiaofeibao-xjtu        ((vsew === VSew.e32) & (!vs1Split.io.outVec64b(i).head(32).andR)) |
80bdda74fdSxiaofeibao-xjtu          ((vsew === VSew.e16) & (!vs1Split.io.outVec64b(i).head(48).andR))
81bdda74fdSxiaofeibao-xjtu        )
82bdda74fdSxiaofeibao-xjtu      mod.io.fp_aIsFpCanonicalNAN := fp_aIsFpCanonicalNAN(i)
83bdda74fdSxiaofeibao-xjtu      mod.io.fp_bIsFpCanonicalNAN := fp_bIsFpCanonicalNAN(i)
84efdf5c1cSxiaofeibao-xjtu  }
85efdf5c1cSxiaofeibao-xjtu
86bdda74fdSxiaofeibao-xjtu  io.in.ready  := vfdivs.map(_.io.start_ready_o).reduce(_&_)
87bdda74fdSxiaofeibao-xjtu  io.out.valid := vfdivs.map(_.io.finish_valid_o).reduce(_&_)
88bdda74fdSxiaofeibao-xjtu  val outEew = outVecCtrl.vsew
89bdda74fdSxiaofeibao-xjtu  val outVuopidx = outVecCtrl.vuopIdx(2, 0)
90bdda74fdSxiaofeibao-xjtu  val vlMax = ((VLEN / 8).U >> outEew).asUInt
91bdda74fdSxiaofeibao-xjtu  val lmulAbs = Mux(outVecCtrl.vlmul(2), (~outVecCtrl.vlmul(1, 0)).asUInt + 1.U, outVecCtrl.vlmul(1, 0))
92bdda74fdSxiaofeibao-xjtu  val outVlFix = Mux(outVecCtrl.fpu.isFpToVecInst, 1.U, outVl)
93bdda74fdSxiaofeibao-xjtu  val vlMaxAllUop = Wire(outVl.cloneType)
94bdda74fdSxiaofeibao-xjtu  vlMaxAllUop := Mux(outVecCtrl.vlmul(2), vlMax >> lmulAbs, vlMax << lmulAbs).asUInt
95bdda74fdSxiaofeibao-xjtu  val vlMaxThisUop = Mux(outVecCtrl.vlmul(2), vlMax >> lmulAbs, vlMax).asUInt
96bdda74fdSxiaofeibao-xjtu  val vlSetThisUop = Mux(outVlFix > outVuopidx * vlMaxThisUop, outVlFix - outVuopidx * vlMaxThisUop, 0.U)
97*614d2bc6SHeiHuDie  val vlThisUop = Wire(UInt(4.W))
98bdda74fdSxiaofeibao-xjtu  vlThisUop := Mux(vlSetThisUop < vlMaxThisUop, vlSetThisUop, vlMaxThisUop)
99bdda74fdSxiaofeibao-xjtu  val vlMaskRShift = Wire(UInt((4 * numVecModule).W))
100bdda74fdSxiaofeibao-xjtu  vlMaskRShift := Fill(4 * numVecModule, 1.U(1.W)) >> ((4 * numVecModule).U - vlThisUop)
10175841254Sxiaofeibao-xjtu
102bdda74fdSxiaofeibao-xjtu  private val needNoMask = outVecCtrl.fpu.isFpToVecInst
103bdda74fdSxiaofeibao-xjtu  val maskToMgu = Mux(needNoMask, allMaskTrue, outSrcMask)
104efdf5c1cSxiaofeibao-xjtu  val allFFlagsEn = Wire(Vec(4 * numVecModule, Bool()))
10575841254Sxiaofeibao-xjtu  val outSrcMaskRShift = Wire(UInt((4 * numVecModule).W))
106bdda74fdSxiaofeibao-xjtu  outSrcMaskRShift := (maskToMgu >> (outVecCtrl.vuopIdx(2, 0) * vlMax))(4 * numVecModule - 1, 0)
10775841254Sxiaofeibao-xjtu  val f16FFlagsEn = outSrcMaskRShift
10875841254Sxiaofeibao-xjtu  val f32FFlagsEn = Wire(Vec(numVecModule, UInt(4.W)))
10975841254Sxiaofeibao-xjtu  val f64FFlagsEn = Wire(Vec(numVecModule, UInt(4.W)))
110df7130a7Sxiaofeibao-xjtu  val f16VlMaskEn = vlMaskRShift
111df7130a7Sxiaofeibao-xjtu  val f32VlMaskEn = Wire(Vec(numVecModule, UInt(4.W)))
112df7130a7Sxiaofeibao-xjtu  val f64VlMaskEn = Wire(Vec(numVecModule, UInt(4.W)))
11375841254Sxiaofeibao-xjtu  for (i <- 0 until numVecModule) {
114df7130a7Sxiaofeibao-xjtu    f32FFlagsEn(i) := Cat(Fill(2, 0.U), outSrcMaskRShift(2 * i + 1, 2 * i))
115df7130a7Sxiaofeibao-xjtu    f64FFlagsEn(i) := Cat(Fill(3, 0.U), outSrcMaskRShift(i))
116df7130a7Sxiaofeibao-xjtu    f32VlMaskEn(i) := Cat(Fill(2, 0.U), vlMaskRShift(2 * i + 1, 2 * i))
117df7130a7Sxiaofeibao-xjtu    f64VlMaskEn(i) := Cat(Fill(3, 0.U), vlMaskRShift(i))
11875841254Sxiaofeibao-xjtu  }
11975841254Sxiaofeibao-xjtu  val fflagsEn = Mux1H(
12075841254Sxiaofeibao-xjtu    Seq(
121bdda74fdSxiaofeibao-xjtu      (outEew === 1.U) -> f16FFlagsEn.asUInt,
122bdda74fdSxiaofeibao-xjtu      (outEew === 2.U) -> f32FFlagsEn.asUInt,
123bdda74fdSxiaofeibao-xjtu      (outEew === 3.U) -> f64FFlagsEn.asUInt
12475841254Sxiaofeibao-xjtu    )
12575841254Sxiaofeibao-xjtu  )
126df7130a7Sxiaofeibao-xjtu  val vlMaskEn = Mux1H(
127df7130a7Sxiaofeibao-xjtu    Seq(
128df7130a7Sxiaofeibao-xjtu      (outEew === 1.U) -> f16VlMaskEn.asUInt,
129df7130a7Sxiaofeibao-xjtu      (outEew === 2.U) -> f32VlMaskEn.asUInt,
130df7130a7Sxiaofeibao-xjtu      (outEew === 3.U) -> f64VlMaskEn.asUInt
131df7130a7Sxiaofeibao-xjtu    )
132df7130a7Sxiaofeibao-xjtu  )
133df7130a7Sxiaofeibao-xjtu  allFFlagsEn := (fflagsEn & vlMaskEn).asTypeOf(allFFlagsEn)
13475841254Sxiaofeibao-xjtu
13575841254Sxiaofeibao-xjtu  val allFFlags = fflagsData.asTypeOf(Vec(4 * numVecModule, UInt(5.W)))
136efdf5c1cSxiaofeibao-xjtu  val outFFlags = allFFlagsEn.zip(allFFlags).map {
137efdf5c1cSxiaofeibao-xjtu    case (en, fflags) => Mux(en, fflags, 0.U(5.W))
138efdf5c1cSxiaofeibao-xjtu  }.reduce(_ | _)
139efdf5c1cSxiaofeibao-xjtu  io.out.bits.res.fflags.get := outFFlags
140bdda74fdSxiaofeibao-xjtu
141bdda74fdSxiaofeibao-xjtu  val resultDataUInt = resultData.asUInt
142bdda74fdSxiaofeibao-xjtu  mgu.io.in.vd := resultDataUInt
143bdda74fdSxiaofeibao-xjtu  mgu.io.in.oldVd := outOldVd
144bdda74fdSxiaofeibao-xjtu  mgu.io.in.mask := maskToMgu
145bdda74fdSxiaofeibao-xjtu  mgu.io.in.info.ta := outVecCtrl.vta
146bdda74fdSxiaofeibao-xjtu  mgu.io.in.info.ma := outVecCtrl.vma
147bdda74fdSxiaofeibao-xjtu  mgu.io.in.info.vl := Mux(outVecCtrl.fpu.isFpToVecInst, 1.U, outVl)
148b3e2881cSxiaofeibao-xjtu  mgu.io.in.info.vlmul := outVecCtrl.vlmul
149b3e2881cSxiaofeibao-xjtu  mgu.io.in.info.valid := io.out.valid
1502a5d1f7dSxiaofeibao-xjtu  mgu.io.in.info.vstart := Mux(outVecCtrl.fpu.isFpToVecInst, 0.U, outVecCtrl.vstart)
151bdda74fdSxiaofeibao-xjtu  mgu.io.in.info.eew := outVecCtrl.vsew
152b3e2881cSxiaofeibao-xjtu  mgu.io.in.info.vsew := outVecCtrl.vsew
153bdda74fdSxiaofeibao-xjtu  mgu.io.in.info.vdIdx := outVecCtrl.vuopIdx
154bdda74fdSxiaofeibao-xjtu  mgu.io.in.info.narrow := outVecCtrl.isNarrow
155bdda74fdSxiaofeibao-xjtu  mgu.io.in.info.dstMask := outVecCtrl.isDstMask
15692c6b7edSzhanglinjuan  mgu.io.in.isIndexedVls := false.B
157bdda74fdSxiaofeibao-xjtu  io.out.bits.res.data := mgu.io.out.vd
158c33d4a9eSXuan Hu  io.out.bits.ctrl.exceptionVec.get(ExceptionNO.illegalInstr) := mgu.io.out.illegal
159efdf5c1cSxiaofeibao-xjtu}
160