xref: /XiangShan/src/main/scala/xiangshan/backend/fu/Multiplier.scala (revision c49ebec88f6e402aefec681225e3537e2c511430)
1c6d43980SLemover/***************************************************************************************
2c6d43980SLemover* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3f320e0f0SYinan Xu* Copyright (c) 2020-2021 Peng Cheng Laboratory
4c6d43980SLemover*
5c6d43980SLemover* XiangShan is licensed under Mulan PSL v2.
6c6d43980SLemover* You can use this software according to the terms and conditions of the Mulan PSL v2.
7c6d43980SLemover* You may obtain a copy of Mulan PSL v2 at:
8c6d43980SLemover*          http://license.coscl.org.cn/MulanPSL2
9c6d43980SLemover*
10c6d43980SLemover* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11c6d43980SLemover* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12c6d43980SLemover* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13c6d43980SLemover*
14c6d43980SLemover* See the Mulan PSL v2 for more details.
15*c49ebec8SHaoyuan Feng*
16*c49ebec8SHaoyuan Feng*
17*c49ebec8SHaoyuan Feng* Acknowledgement
18*c49ebec8SHaoyuan Feng*
19*c49ebec8SHaoyuan Feng* This implementation is inspired by several key papers:
20*c49ebec8SHaoyuan Feng* [1] Andrew D. Booth. "[A signed binary multiplication technique.](https://doi.org/10.1093/qjmam/4.2.236)" The
21*c49ebec8SHaoyuan Feng* Quarterly Journal of Mechanics and Applied Mathematics 4.2: 236-240. 1951.
22*c49ebec8SHaoyuan Feng* [2] Christopher. S. Wallace. "[A suggestion for a fast multiplier.](https://doi.org/10.1109/PGEC.1964.263830)" IEEE
23*c49ebec8SHaoyuan Feng* Transactions on Electronic Computers 1: 14-17. 1964.
24c6d43980SLemover***************************************************************************************/
25c6d43980SLemover
26cafb3558SLinJiaweipackage xiangshan.backend.fu
27cafb3558SLinJiawei
288891a219SYinan Xuimport org.chipsalliance.cde.config.Parameters
29cafb3558SLinJiaweiimport chisel3._
30cafb3558SLinJiaweiimport chisel3.util._
313c02ee8fSwakafaimport utility._
323b739f49SXuan Huimport utils._
333b739f49SXuan Huimport xiangshan._
347f1506e3SLinJiaweiimport xiangshan.backend.fu.util.{C22, C32, C53}
35cafb3558SLinJiawei
36cafb3558SLinJiaweiclass MulDivCtrl extends Bundle{
37cafb3558SLinJiawei  val sign = Bool()
38cafb3558SLinJiawei  val isW = Bool()
39cafb3558SLinJiawei  val isHi = Bool() // return hi bits of result ?
40cafb3558SLinJiawei}
41cafb3558SLinJiawei
42c3d7991bSJiawei Linclass ArrayMulDataModule(len: Int) extends Module {
43e2203130SLinJiawei  val io = IO(new Bundle() {
44e2203130SLinJiawei    val a, b = Input(UInt(len.W))
45c3d7991bSJiawei Lin    val regEnables = Input(Vec(2, Bool()))
46e2203130SLinJiawei    val result = Output(UInt((2 * len).W))
47e2203130SLinJiawei  })
48e2203130SLinJiawei  val (a, b) = (io.a, io.b)
498a4dc19aSLinJiawei
508a4dc19aSLinJiawei  val b_sext, bx2, neg_b, neg_bx2 = Wire(UInt((len+1).W))
518a4dc19aSLinJiawei  b_sext := SignExt(b, len+1)
528a4dc19aSLinJiawei  bx2 := b_sext << 1
53935edac4STang Haojin  neg_b := (~b_sext).asUInt
548a4dc19aSLinJiawei  neg_bx2 := neg_b << 1
558a4dc19aSLinJiawei
568a4dc19aSLinJiawei  val columns: Array[Seq[Bool]] = Array.fill(2*len)(Seq())
578a4dc19aSLinJiawei
588a4dc19aSLinJiawei  var last_x = WireInit(0.U(3.W))
598a4dc19aSLinJiawei  for(i <- Range(0, len, 2)){
608a4dc19aSLinJiawei    val x = if(i==0) Cat(a(1,0), 0.U(1.W)) else if(i+1==len) SignExt(a(i, i-1), 3) else a(i+1, i-1)
6145f43e6eSTang Haojin    val pp_temp = MuxLookup(x, 0.U)(Seq(
628a4dc19aSLinJiawei      1.U -> b_sext,
638a4dc19aSLinJiawei      2.U -> b_sext,
648a4dc19aSLinJiawei      3.U -> bx2,
658a4dc19aSLinJiawei      4.U -> neg_bx2,
668a4dc19aSLinJiawei      5.U -> neg_b,
678a4dc19aSLinJiawei      6.U -> neg_b
688a4dc19aSLinJiawei    ))
698a4dc19aSLinJiawei    val s = pp_temp(len)
7045f43e6eSTang Haojin    val t = MuxLookup(last_x, 0.U(2.W))(Seq(
718a4dc19aSLinJiawei      4.U -> 2.U(2.W),
728a4dc19aSLinJiawei      5.U -> 1.U(2.W),
738a4dc19aSLinJiawei      6.U -> 1.U(2.W)
748a4dc19aSLinJiawei    ))
758a4dc19aSLinJiawei    last_x = x
768a4dc19aSLinJiawei    val (pp, weight) = i match {
778a4dc19aSLinJiawei      case 0 =>
788a4dc19aSLinJiawei        (Cat(~s, s, s, pp_temp), 0)
798a4dc19aSLinJiawei      case n if (n==len-1) || (n==len-2) =>
808a4dc19aSLinJiawei        (Cat(~s, pp_temp, t), i-2)
818a4dc19aSLinJiawei      case _ =>
828a4dc19aSLinJiawei        (Cat(1.U(1.W), ~s, pp_temp, t), i-2)
838a4dc19aSLinJiawei    }
848a4dc19aSLinJiawei    for(j <- columns.indices){
858a4dc19aSLinJiawei      if(j >= weight && j < (weight + pp.getWidth)){
868a4dc19aSLinJiawei        columns(j) = columns(j) :+ pp(j-weight)
878a4dc19aSLinJiawei      }
888a4dc19aSLinJiawei    }
898a4dc19aSLinJiawei  }
908a4dc19aSLinJiawei
918a4dc19aSLinJiawei  def addOneColumn(col: Seq[Bool], cin: Seq[Bool]): (Seq[Bool], Seq[Bool], Seq[Bool]) = {
928a4dc19aSLinJiawei    var sum = Seq[Bool]()
938a4dc19aSLinJiawei    var cout1 = Seq[Bool]()
948a4dc19aSLinJiawei    var cout2 = Seq[Bool]()
958a4dc19aSLinJiawei    col.size match {
968a4dc19aSLinJiawei      case 1 =>  // do nothing
978a4dc19aSLinJiawei        sum = col ++ cin
988a4dc19aSLinJiawei      case 2 =>
998a4dc19aSLinJiawei        val c22 = Module(new C22)
1008a4dc19aSLinJiawei        c22.io.in := col
101935edac4STang Haojin        sum = c22.io.out(0).asBool +: cin
102935edac4STang Haojin        cout2 = Seq(c22.io.out(1).asBool)
1038a4dc19aSLinJiawei      case 3 =>
1048a4dc19aSLinJiawei        val c32 = Module(new C32)
1058a4dc19aSLinJiawei        c32.io.in := col
106935edac4STang Haojin        sum = c32.io.out(0).asBool +: cin
107935edac4STang Haojin        cout2 = Seq(c32.io.out(1).asBool)
1088a4dc19aSLinJiawei      case 4 =>
1098a4dc19aSLinJiawei        val c53 = Module(new C53)
1108a4dc19aSLinJiawei        for((x, y) <- c53.io.in.take(4) zip col){
1118a4dc19aSLinJiawei          x := y
1128a4dc19aSLinJiawei        }
1138a4dc19aSLinJiawei        c53.io.in.last := (if(cin.nonEmpty) cin.head else 0.U)
114935edac4STang Haojin        sum = Seq(c53.io.out(0).asBool) ++ (if(cin.nonEmpty) cin.drop(1) else Nil)
115935edac4STang Haojin        cout1 = Seq(c53.io.out(1).asBool)
116935edac4STang Haojin        cout2 = Seq(c53.io.out(2).asBool)
1178a4dc19aSLinJiawei      case n =>
1188a4dc19aSLinJiawei        val cin_1 = if(cin.nonEmpty) Seq(cin.head) else Nil
1198a4dc19aSLinJiawei        val cin_2 = if(cin.nonEmpty) cin.drop(1) else Nil
1208a4dc19aSLinJiawei        val (s_1, c_1_1, c_1_2) = addOneColumn(col take 4, cin_1)
1218a4dc19aSLinJiawei        val (s_2, c_2_1, c_2_2) = addOneColumn(col drop 4, cin_2)
1228a4dc19aSLinJiawei        sum = s_1 ++ s_2
1238a4dc19aSLinJiawei        cout1 = c_1_1 ++ c_2_1
1248a4dc19aSLinJiawei        cout2 = c_1_2 ++ c_2_2
1258a4dc19aSLinJiawei    }
1268a4dc19aSLinJiawei    (sum, cout1, cout2)
1278a4dc19aSLinJiawei  }
1288a4dc19aSLinJiawei
1298a4dc19aSLinJiawei  def max(in: Iterable[Int]): Int = in.reduce((a, b) => if(a>b) a else b)
130e3da8badSTang Haojin  def addAll(cols: Seq[Seq[Bool]], depth: Int): (UInt, UInt) = {
1318a4dc19aSLinJiawei    if(max(cols.map(_.size)) <= 2){
1328a4dc19aSLinJiawei      val sum = Cat(cols.map(_(0)).reverse)
1338a4dc19aSLinJiawei      var k = 0
1348a4dc19aSLinJiawei      while(cols(k).size == 1) k = k+1
1358a4dc19aSLinJiawei      val carry = Cat(cols.drop(k).map(_(1)).reverse)
1368a4dc19aSLinJiawei      (sum, Cat(carry, 0.U(k.W)))
1378a4dc19aSLinJiawei    } else {
1388a4dc19aSLinJiawei      val columns_next = Array.fill(2*len)(Seq[Bool]())
1398a4dc19aSLinJiawei      var cout1, cout2 = Seq[Bool]()
1408a4dc19aSLinJiawei      for( i <- cols.indices){
1418a4dc19aSLinJiawei        val (s, c1, c2) = addOneColumn(cols(i), cout1)
1428a4dc19aSLinJiawei        columns_next(i) = s ++ cout2
1438a4dc19aSLinJiawei        cout1 = c1
1448a4dc19aSLinJiawei        cout2 = c2
1458a4dc19aSLinJiawei      }
1468a4dc19aSLinJiawei
147c3d7991bSJiawei Lin      val needReg = depth == 4
1488a4dc19aSLinJiawei      val toNextLayer = if(needReg)
149c3d7991bSJiawei Lin        columns_next.map(_.map(x => RegEnable(x, io.regEnables(1))))
1508a4dc19aSLinJiawei      else
1518a4dc19aSLinJiawei        columns_next
1528a4dc19aSLinJiawei
153e3da8badSTang Haojin      addAll(toNextLayer.toSeq, depth+1)
1548a4dc19aSLinJiawei    }
1558a4dc19aSLinJiawei  }
1568a4dc19aSLinJiawei
157c3d7991bSJiawei Lin  val columns_reg = columns.map(col => col.map(b => RegEnable(b, io.regEnables(0))))
158e3da8badSTang Haojin  val (sum, carry) = addAll(cols = columns_reg.toSeq, depth = 0)
159c3d7991bSJiawei Lin
160e2203130SLinJiawei  io.result := sum + carry
161e2203130SLinJiawei}
162