1c6d43980SLemover/*************************************************************************************** 2c6d43980SLemover* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3f320e0f0SYinan Xu* Copyright (c) 2020-2021 Peng Cheng Laboratory 4c6d43980SLemover* 5c6d43980SLemover* XiangShan is licensed under Mulan PSL v2. 6c6d43980SLemover* You can use this software according to the terms and conditions of the Mulan PSL v2. 7c6d43980SLemover* You may obtain a copy of Mulan PSL v2 at: 8c6d43980SLemover* http://license.coscl.org.cn/MulanPSL2 9c6d43980SLemover* 10c6d43980SLemover* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11c6d43980SLemover* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12c6d43980SLemover* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13c6d43980SLemover* 14c6d43980SLemover* See the Mulan PSL v2 for more details. 15*c49ebec8SHaoyuan Feng* 16*c49ebec8SHaoyuan Feng* 17*c49ebec8SHaoyuan Feng* Acknowledgement 18*c49ebec8SHaoyuan Feng* 19*c49ebec8SHaoyuan Feng* This implementation is inspired by several key papers: 20*c49ebec8SHaoyuan Feng* [1] Andrew D. Booth. "[A signed binary multiplication technique.](https://doi.org/10.1093/qjmam/4.2.236)" The 21*c49ebec8SHaoyuan Feng* Quarterly Journal of Mechanics and Applied Mathematics 4.2: 236-240. 1951. 22*c49ebec8SHaoyuan Feng* [2] Christopher. S. Wallace. "[A suggestion for a fast multiplier.](https://doi.org/10.1109/PGEC.1964.263830)" IEEE 23*c49ebec8SHaoyuan Feng* Transactions on Electronic Computers 1: 14-17. 1964. 24c6d43980SLemover***************************************************************************************/ 25c6d43980SLemover 26cafb3558SLinJiaweipackage xiangshan.backend.fu 27cafb3558SLinJiawei 288891a219SYinan Xuimport org.chipsalliance.cde.config.Parameters 29cafb3558SLinJiaweiimport chisel3._ 30cafb3558SLinJiaweiimport chisel3.util._ 313c02ee8fSwakafaimport utility._ 323b739f49SXuan Huimport utils._ 333b739f49SXuan Huimport xiangshan._ 347f1506e3SLinJiaweiimport xiangshan.backend.fu.util.{C22, C32, C53} 35cafb3558SLinJiawei 36cafb3558SLinJiaweiclass MulDivCtrl extends Bundle{ 37cafb3558SLinJiawei val sign = Bool() 38cafb3558SLinJiawei val isW = Bool() 39cafb3558SLinJiawei val isHi = Bool() // return hi bits of result ? 40cafb3558SLinJiawei} 41cafb3558SLinJiawei 42c3d7991bSJiawei Linclass ArrayMulDataModule(len: Int) extends Module { 43e2203130SLinJiawei val io = IO(new Bundle() { 44e2203130SLinJiawei val a, b = Input(UInt(len.W)) 45c3d7991bSJiawei Lin val regEnables = Input(Vec(2, Bool())) 46e2203130SLinJiawei val result = Output(UInt((2 * len).W)) 47e2203130SLinJiawei }) 48e2203130SLinJiawei val (a, b) = (io.a, io.b) 498a4dc19aSLinJiawei 508a4dc19aSLinJiawei val b_sext, bx2, neg_b, neg_bx2 = Wire(UInt((len+1).W)) 518a4dc19aSLinJiawei b_sext := SignExt(b, len+1) 528a4dc19aSLinJiawei bx2 := b_sext << 1 53935edac4STang Haojin neg_b := (~b_sext).asUInt 548a4dc19aSLinJiawei neg_bx2 := neg_b << 1 558a4dc19aSLinJiawei 568a4dc19aSLinJiawei val columns: Array[Seq[Bool]] = Array.fill(2*len)(Seq()) 578a4dc19aSLinJiawei 588a4dc19aSLinJiawei var last_x = WireInit(0.U(3.W)) 598a4dc19aSLinJiawei for(i <- Range(0, len, 2)){ 608a4dc19aSLinJiawei val x = if(i==0) Cat(a(1,0), 0.U(1.W)) else if(i+1==len) SignExt(a(i, i-1), 3) else a(i+1, i-1) 6145f43e6eSTang Haojin val pp_temp = MuxLookup(x, 0.U)(Seq( 628a4dc19aSLinJiawei 1.U -> b_sext, 638a4dc19aSLinJiawei 2.U -> b_sext, 648a4dc19aSLinJiawei 3.U -> bx2, 658a4dc19aSLinJiawei 4.U -> neg_bx2, 668a4dc19aSLinJiawei 5.U -> neg_b, 678a4dc19aSLinJiawei 6.U -> neg_b 688a4dc19aSLinJiawei )) 698a4dc19aSLinJiawei val s = pp_temp(len) 7045f43e6eSTang Haojin val t = MuxLookup(last_x, 0.U(2.W))(Seq( 718a4dc19aSLinJiawei 4.U -> 2.U(2.W), 728a4dc19aSLinJiawei 5.U -> 1.U(2.W), 738a4dc19aSLinJiawei 6.U -> 1.U(2.W) 748a4dc19aSLinJiawei )) 758a4dc19aSLinJiawei last_x = x 768a4dc19aSLinJiawei val (pp, weight) = i match { 778a4dc19aSLinJiawei case 0 => 788a4dc19aSLinJiawei (Cat(~s, s, s, pp_temp), 0) 798a4dc19aSLinJiawei case n if (n==len-1) || (n==len-2) => 808a4dc19aSLinJiawei (Cat(~s, pp_temp, t), i-2) 818a4dc19aSLinJiawei case _ => 828a4dc19aSLinJiawei (Cat(1.U(1.W), ~s, pp_temp, t), i-2) 838a4dc19aSLinJiawei } 848a4dc19aSLinJiawei for(j <- columns.indices){ 858a4dc19aSLinJiawei if(j >= weight && j < (weight + pp.getWidth)){ 868a4dc19aSLinJiawei columns(j) = columns(j) :+ pp(j-weight) 878a4dc19aSLinJiawei } 888a4dc19aSLinJiawei } 898a4dc19aSLinJiawei } 908a4dc19aSLinJiawei 918a4dc19aSLinJiawei def addOneColumn(col: Seq[Bool], cin: Seq[Bool]): (Seq[Bool], Seq[Bool], Seq[Bool]) = { 928a4dc19aSLinJiawei var sum = Seq[Bool]() 938a4dc19aSLinJiawei var cout1 = Seq[Bool]() 948a4dc19aSLinJiawei var cout2 = Seq[Bool]() 958a4dc19aSLinJiawei col.size match { 968a4dc19aSLinJiawei case 1 => // do nothing 978a4dc19aSLinJiawei sum = col ++ cin 988a4dc19aSLinJiawei case 2 => 998a4dc19aSLinJiawei val c22 = Module(new C22) 1008a4dc19aSLinJiawei c22.io.in := col 101935edac4STang Haojin sum = c22.io.out(0).asBool +: cin 102935edac4STang Haojin cout2 = Seq(c22.io.out(1).asBool) 1038a4dc19aSLinJiawei case 3 => 1048a4dc19aSLinJiawei val c32 = Module(new C32) 1058a4dc19aSLinJiawei c32.io.in := col 106935edac4STang Haojin sum = c32.io.out(0).asBool +: cin 107935edac4STang Haojin cout2 = Seq(c32.io.out(1).asBool) 1088a4dc19aSLinJiawei case 4 => 1098a4dc19aSLinJiawei val c53 = Module(new C53) 1108a4dc19aSLinJiawei for((x, y) <- c53.io.in.take(4) zip col){ 1118a4dc19aSLinJiawei x := y 1128a4dc19aSLinJiawei } 1138a4dc19aSLinJiawei c53.io.in.last := (if(cin.nonEmpty) cin.head else 0.U) 114935edac4STang Haojin sum = Seq(c53.io.out(0).asBool) ++ (if(cin.nonEmpty) cin.drop(1) else Nil) 115935edac4STang Haojin cout1 = Seq(c53.io.out(1).asBool) 116935edac4STang Haojin cout2 = Seq(c53.io.out(2).asBool) 1178a4dc19aSLinJiawei case n => 1188a4dc19aSLinJiawei val cin_1 = if(cin.nonEmpty) Seq(cin.head) else Nil 1198a4dc19aSLinJiawei val cin_2 = if(cin.nonEmpty) cin.drop(1) else Nil 1208a4dc19aSLinJiawei val (s_1, c_1_1, c_1_2) = addOneColumn(col take 4, cin_1) 1218a4dc19aSLinJiawei val (s_2, c_2_1, c_2_2) = addOneColumn(col drop 4, cin_2) 1228a4dc19aSLinJiawei sum = s_1 ++ s_2 1238a4dc19aSLinJiawei cout1 = c_1_1 ++ c_2_1 1248a4dc19aSLinJiawei cout2 = c_1_2 ++ c_2_2 1258a4dc19aSLinJiawei } 1268a4dc19aSLinJiawei (sum, cout1, cout2) 1278a4dc19aSLinJiawei } 1288a4dc19aSLinJiawei 1298a4dc19aSLinJiawei def max(in: Iterable[Int]): Int = in.reduce((a, b) => if(a>b) a else b) 130e3da8badSTang Haojin def addAll(cols: Seq[Seq[Bool]], depth: Int): (UInt, UInt) = { 1318a4dc19aSLinJiawei if(max(cols.map(_.size)) <= 2){ 1328a4dc19aSLinJiawei val sum = Cat(cols.map(_(0)).reverse) 1338a4dc19aSLinJiawei var k = 0 1348a4dc19aSLinJiawei while(cols(k).size == 1) k = k+1 1358a4dc19aSLinJiawei val carry = Cat(cols.drop(k).map(_(1)).reverse) 1368a4dc19aSLinJiawei (sum, Cat(carry, 0.U(k.W))) 1378a4dc19aSLinJiawei } else { 1388a4dc19aSLinJiawei val columns_next = Array.fill(2*len)(Seq[Bool]()) 1398a4dc19aSLinJiawei var cout1, cout2 = Seq[Bool]() 1408a4dc19aSLinJiawei for( i <- cols.indices){ 1418a4dc19aSLinJiawei val (s, c1, c2) = addOneColumn(cols(i), cout1) 1428a4dc19aSLinJiawei columns_next(i) = s ++ cout2 1438a4dc19aSLinJiawei cout1 = c1 1448a4dc19aSLinJiawei cout2 = c2 1458a4dc19aSLinJiawei } 1468a4dc19aSLinJiawei 147c3d7991bSJiawei Lin val needReg = depth == 4 1488a4dc19aSLinJiawei val toNextLayer = if(needReg) 149c3d7991bSJiawei Lin columns_next.map(_.map(x => RegEnable(x, io.regEnables(1)))) 1508a4dc19aSLinJiawei else 1518a4dc19aSLinJiawei columns_next 1528a4dc19aSLinJiawei 153e3da8badSTang Haojin addAll(toNextLayer.toSeq, depth+1) 1548a4dc19aSLinJiawei } 1558a4dc19aSLinJiawei } 1568a4dc19aSLinJiawei 157c3d7991bSJiawei Lin val columns_reg = columns.map(col => col.map(b => RegEnable(b, io.regEnables(0)))) 158e3da8badSTang Haojin val (sum, carry) = addAll(cols = columns_reg.toSeq, depth = 0) 159c3d7991bSJiawei Lin 160e2203130SLinJiawei io.result := sum + carry 161e2203130SLinJiawei} 162