1/*************************************************************************************** 2 * Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3 * Copyright (c) 2020-2021 Peng Cheng Laboratory 4 * 5 * XiangShan is licensed under Mulan PSL v2. 6 * You can use this software according to the terms and conditions of the Mulan PSL v2. 7 * You may obtain a copy of Mulan PSL v2 at: 8 * http://license.coscl.org.cn/MulanPSL2 9 * 10 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11 * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13 * 14 * See the Mulan PSL v2 for more details. 15 ***************************************************************************************/ 16 17package xiangshan.backend.decode 18 19import org.chipsalliance.cde.config.Parameters 20import chisel3._ 21import chisel3.util._ 22import freechips.rocketchip.rocket.Instructions 23import freechips.rocketchip.util.uintToBitPat 24import utils._ 25import utility._ 26import xiangshan.ExceptionNO.illegalInstr 27import xiangshan._ 28import xiangshan.backend.fu.fpu.FPU 29import xiangshan.backend.fu.FuType 30import freechips.rocketchip.rocket.Instructions._ 31import xiangshan.backend.Bundles.{DecodedInst, StaticInst} 32import xiangshan.backend.decode.isa.bitfield.XSInstBitFields 33import xiangshan.backend.fu.vector.Bundles.{VSew, VType, VLmul} 34import yunsuan.VpermType 35import scala.collection.Seq 36import chisel3.util.experimental.decode.{QMCMinimizer, TruthTable, decoder} 37 38class indexedLSUopTable(uopIdx:Int) extends Module { 39 val src = IO(Input(UInt(7.W))) 40 val outOffsetVs2 = IO(Output(UInt(3.W))) 41 val outOffsetVd = IO(Output(UInt(3.W))) 42 val outIsFirstUopInVd = IO(Output(Bool())) 43 def genCsBundle_VEC_INDEXED_LDST(lmul:Int, emul:Int, nfields:Int, uopIdx:Int): (Int, Int, Int) ={ 44 if (lmul * nfields <= 8) { 45 for (k <-0 until nfields) { 46 if (lmul < emul) { // lmul < emul, uop num is depend on emul * nf 47 var offset = 1 << (emul - lmul) 48 for (i <- 0 until (1 << emul)) { 49 if (uopIdx == k * (1 << emul) + i) { 50 return (i, i / offset + k * (1 << lmul), if (i % offset == 0) 1 else 0) 51 } 52 } 53 } else { // lmul > emul, uop num is depend on lmul * nf 54 var offset = 1 << (lmul - emul) 55 for (i <- 0 until (1 << lmul)) { 56 if (uopIdx == k * (1 << lmul) + i) { 57 return (i / offset, i + k * (1 << lmul), 1) 58 } 59 } 60 } 61 } 62 } 63 return (0, 0, 1) 64 } 65 // strided load/store 66 var combVemulNf : Seq[(Int, Int, Int, Int, Int, Int)] = Seq() 67 for (emul <- 0 until 4) { 68 for (lmul <- 0 until 4) { 69 for (nf <- 0 until 8) { 70 var offset = genCsBundle_VEC_INDEXED_LDST(lmul, emul, nf+1, uopIdx) 71 var offsetVs2 = offset._1 72 var offsetVd = offset._2 73 var isFirstUopInVd = offset._3 74 combVemulNf :+= (emul, lmul, nf, isFirstUopInVd, offsetVs2, offsetVd) 75 } 76 } 77 } 78 val out = decoder(QMCMinimizer, src, TruthTable(combVemulNf.map { 79 case (emul, lmul, nf, isFirstUopInVd, offsetVs2, offsetVd) => 80 (BitPat((emul << 5 | lmul << 3 | nf).U(7.W)), BitPat((isFirstUopInVd << 6 | offsetVs2 << 3 | offsetVd).U(7.W))) 81 }, BitPat.N(7))) 82 outOffsetVs2 := out(5, 3) 83 outOffsetVd := out(2, 0) 84 outIsFirstUopInVd := out(6).asBool 85} 86 87trait VectorConstants { 88 val MAX_VLMUL = 8 89 val FP_TMP_REG_MV = 32 90 val VECTOR_TMP_REG_LMUL = 33 // 33~47 -> 15 91 val MAX_INDEXED_LS_UOPNUM = 64 92} 93 94class DecodeUnitCompIO(implicit p: Parameters) extends XSBundle { 95 val redirect = Input(Bool()) 96 val csrCtrl = Input(new CustomCSRCtrlIO) 97 // When the first inst in decode vector is complex inst, pass it in 98 val in = Flipped(DecoupledIO(new Bundle { 99 val simpleDecodedInst = new DecodedInst 100 val uopInfo = new UopInfo 101 })) 102 val out = new Bundle { 103 val complexDecodedInsts = Vec(RenameWidth, DecoupledIO(new DecodedInst)) 104 } 105 val complexNum = Output(UInt(3.W)) 106} 107 108/** 109 * @author zly 110 */ 111class DecodeUnitComp()(implicit p : Parameters) extends XSModule with DecodeUnitConstants with VectorConstants { 112 val io = IO(new DecodeUnitCompIO) 113 114 // alias 115 private val inReady = io.in.ready 116 private val inValid = io.in.valid 117 private val inDecodedInst = WireInit(io.in.bits.simpleDecodedInst) 118 private val inInstFields = io.in.bits.simpleDecodedInst.instr.asTypeOf(new XSInstBitFields) 119 private val inUopInfo = io.in.bits.uopInfo 120 private val outValids = io.out.complexDecodedInsts.map(_.valid) 121 private val outReadys = io.out.complexDecodedInsts.map(_.ready) 122 private val outDecodedInsts = io.out.complexDecodedInsts.map(_.bits) 123 private val outComplexNum = io.complexNum 124 125 val maxUopSize = MaxUopSize 126 when (io.in.fire && io.in.bits.simpleDecodedInst.isVset) { 127 when(inInstFields.RD === 0.U && inInstFields.RS1 === 0.U) { 128 inDecodedInst.fuOpType := VSETOpType.keepVl(io.in.bits.simpleDecodedInst.fuOpType) 129 }.elsewhen(inInstFields.RS1 === 0.U) { 130 inDecodedInst.fuOpType := VSETOpType.setVlmax(io.in.bits.simpleDecodedInst.fuOpType) 131 } 132 } 133 134 val latchedInst = RegEnable(inDecodedInst, inValid && inReady) 135 val latchedUopInfo = RegEnable(inUopInfo, inValid && inReady) 136 //input bits 137 private val instFields: XSInstBitFields = latchedInst.instr.asTypeOf(new XSInstBitFields) 138 139 val src1 = Cat(0.U(1.W), instFields.RS1) 140 val src2 = Cat(0.U(1.W), instFields.RS2) 141 val dest = Cat(0.U(1.W), instFields.RD) 142 143 val nf = instFields.NF 144 val width = instFields.WIDTH(1, 0) 145 146 //output of DecodeUnit 147 val numOfUop = Wire(UInt(log2Up(maxUopSize).W)) 148 val numOfWB = Wire(UInt(log2Up(maxUopSize).W)) 149 val lmul = Wire(UInt(4.W)) 150 val isVsetSimple = Wire(Bool()) 151 152 val indexedLSRegOffset = Seq.tabulate(MAX_INDEXED_LS_UOPNUM)(i => Module(new indexedLSUopTable(i))) 153 indexedLSRegOffset.map(_.src := 0.U) 154 155 //pre decode 156 lmul := latchedUopInfo.lmul 157 isVsetSimple := latchedInst.isVset 158 val vlmulReg = latchedInst.vpu.vlmul 159 val vsewReg = latchedInst.vpu.vsew 160 161 //Type of uop Div 162 val typeOfSplit = latchedInst.uopSplitType 163 val src1Type = latchedInst.srcType(0) 164 val src1IsImm = src1Type === SrcType.imm 165 166 numOfUop := latchedUopInfo.numOfUop 167 numOfWB := latchedUopInfo.numOfWB 168 169 //uops dispatch 170 val s_idle :: s_active :: Nil = Enum(2) 171 val state = RegInit(s_idle) 172 val stateNext = WireDefault(state) 173 val numDecodedUop = RegInit(0.U(log2Up(maxUopSize).W)) 174 val uopRes = RegInit(0.U(log2Up(maxUopSize).W)) 175 val uopResNext = WireInit(uopRes) 176 177 //uop div up to maxUopSize 178 val csBundle = Wire(Vec(maxUopSize, new DecodedInst)) 179 csBundle.foreach { case dst => 180 dst := latchedInst 181 dst.numUops := latchedUopInfo.numOfUop 182 dst.numWB := latchedUopInfo.numOfWB 183 dst.firstUop := false.B 184 dst.lastUop := false.B 185 } 186 187 csBundle(0).firstUop := true.B 188 csBundle(numOfUop - 1.U).lastUop := true.B 189 190 switch(typeOfSplit) { 191 is(UopSplitType.VSET) { 192 // In simple decoder, rfWen and vecWen are not set 193 when(isVsetSimple) { 194 // Default 195 // uop0 set rd, never flushPipe 196 csBundle(0).fuType := FuType.vsetiwi.U 197 csBundle(0).flushPipe := false.B 198 csBundle(0).rfWen := true.B 199 // uop1 set vl, vsetvl will flushPipe 200 csBundle(1).ldest := VCONFIG_IDX.U 201 csBundle(1).vecWen := true.B 202 when(VSETOpType.isVsetvli(latchedInst.fuOpType) && dest === 0.U && src1 === 0.U) { 203 csBundle(1).fuType := FuType.vsetfwf.U 204 csBundle(1).srcType(0) := SrcType.vp 205 csBundle(1).lsrc(0) := VCONFIG_IDX.U 206 }.elsewhen(VSETOpType.isVsetvl(latchedInst.fuOpType) && dest === 0.U && src1 === 0.U) { 207 // uop0: mv vtype gpr to vector region 208 csBundle(0).srcType(0) := SrcType.xp 209 csBundle(0).srcType(1) := SrcType.no 210 csBundle(0).lsrc(1) := 0.U 211 csBundle(0).ldest := FP_TMP_REG_MV.U 212 csBundle(0).fuType := FuType.i2f.U 213 csBundle(0).fpWen := true.B 214 csBundle(0).fpu.isAddSub := false.B 215 csBundle(0).fpu.typeTagIn := FPU.D 216 csBundle(0).fpu.typeTagOut := FPU.D 217 csBundle(0).fpu.fromInt := true.B 218 csBundle(0).fpu.wflags := false.B 219 csBundle(0).fpu.fpWen := true.B 220 csBundle(0).fpu.div := false.B 221 csBundle(0).fpu.sqrt := false.B 222 csBundle(0).fpu.fcvt := false.B 223 csBundle(0).flushPipe := false.B 224 // uop1: uvsetvcfg_vv 225 csBundle(1).fuType := FuType.vsetfwf.U 226 // vl 227 csBundle(1).srcType(0) := SrcType.vp 228 csBundle(1).lsrc(0) := VCONFIG_IDX.U 229 // vtype 230 csBundle(1).srcType(1) := SrcType.fp 231 csBundle(1).lsrc(1) := FP_TMP_REG_MV.U 232 csBundle(1).vecWen := true.B 233 csBundle(1).ldest := VCONFIG_IDX.U 234 } 235 } 236 } 237 is(UopSplitType.VEC_VVV) { 238 for (i <- 0 until MAX_VLMUL) { 239 csBundle(i).lsrc(0) := src1 + i.U 240 csBundle(i).lsrc(1) := src2 + i.U 241 csBundle(i).lsrc(2) := dest + i.U 242 csBundle(i).ldest := dest + i.U 243 csBundle(i).uopIdx := i.U 244 } 245 } 246 is(UopSplitType.VEC_VFV) { 247 for (i <- 0 until MAX_VLMUL) { 248 csBundle(i).lsrc(1) := src2 + i.U 249 csBundle(i).lsrc(2) := dest + i.U 250 csBundle(i).ldest := dest + i.U 251 csBundle(i).uopIdx := i.U 252 } 253 } 254 is(UopSplitType.VEC_EXT2) { 255 for (i <- 0 until MAX_VLMUL / 2) { 256 csBundle(2 * i).lsrc(1) := src2 + i.U 257 csBundle(2 * i).lsrc(2) := dest + (2 * i).U 258 csBundle(2 * i).ldest := dest + (2 * i).U 259 csBundle(2 * i).uopIdx := (2 * i).U 260 csBundle(2 * i + 1).lsrc(1) := src2 + i.U 261 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U 262 csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U 263 csBundle(2 * i + 1).uopIdx := (2 * i + 1).U 264 } 265 } 266 is(UopSplitType.VEC_EXT4) { 267 for (i <- 0 until MAX_VLMUL / 4) { 268 csBundle(4 * i).lsrc(1) := src2 + i.U 269 csBundle(4 * i).lsrc(2) := dest + (4 * i).U 270 csBundle(4 * i).ldest := dest + (4 * i).U 271 csBundle(4 * i).uopIdx := (4 * i).U 272 csBundle(4 * i + 1).lsrc(1) := src2 + i.U 273 csBundle(4 * i + 1).lsrc(2) := dest + (4 * i + 1).U 274 csBundle(4 * i + 1).ldest := dest + (4 * i + 1).U 275 csBundle(4 * i + 1).uopIdx := (4 * i + 1).U 276 csBundle(4 * i + 2).lsrc(1) := src2 + i.U 277 csBundle(4 * i + 2).lsrc(2) := dest + (4 * i + 2).U 278 csBundle(4 * i + 2).ldest := dest + (4 * i + 2).U 279 csBundle(4 * i + 2).uopIdx := (4 * i + 2).U 280 csBundle(4 * i + 3).lsrc(1) := src2 + i.U 281 csBundle(4 * i + 3).lsrc(2) := dest + (4 * i + 3).U 282 csBundle(4 * i + 3).ldest := dest + (4 * i + 3).U 283 csBundle(4 * i + 3).uopIdx := (4 * i + 3).U 284 } 285 } 286 is(UopSplitType.VEC_EXT8) { 287 for (i <- 0 until MAX_VLMUL) { 288 csBundle(i).lsrc(1) := src2 289 csBundle(i).lsrc(2) := dest + i.U 290 csBundle(i).ldest := dest + i.U 291 csBundle(i).uopIdx := i.U 292 } 293 } 294 is(UopSplitType.VEC_0XV) { 295 /* 296 FMV.D.X 297 */ 298 csBundle(0).srcType(0) := SrcType.reg 299 csBundle(0).srcType(1) := SrcType.imm 300 csBundle(0).lsrc(1) := 0.U 301 csBundle(0).ldest := FP_TMP_REG_MV.U 302 csBundle(0).fuType := FuType.i2f.U 303 csBundle(0).rfWen := false.B 304 csBundle(0).fpWen := true.B 305 csBundle(0).vecWen := false.B 306 csBundle(0).fpu.isAddSub := false.B 307 csBundle(0).fpu.typeTagIn := FPU.D 308 csBundle(0).fpu.typeTagOut := FPU.D 309 csBundle(0).fpu.fromInt := true.B 310 csBundle(0).fpu.wflags := false.B 311 csBundle(0).fpu.fpWen := true.B 312 csBundle(0).fpu.div := false.B 313 csBundle(0).fpu.sqrt := false.B 314 csBundle(0).fpu.fcvt := false.B 315 /* 316 vfmv.s.f 317 */ 318 csBundle(1).srcType(0) := SrcType.fp 319 csBundle(1).srcType(1) := SrcType.vp 320 csBundle(1).srcType(2) := SrcType.vp 321 csBundle(1).lsrc(0) := FP_TMP_REG_MV.U 322 csBundle(1).lsrc(1) := 0.U 323 csBundle(1).lsrc(2) := dest 324 csBundle(1).ldest := dest 325 csBundle(1).fuType := FuType.vppu.U 326 csBundle(1).fuOpType := VpermType.dummy 327 csBundle(1).rfWen := false.B 328 csBundle(1).fpWen := false.B 329 csBundle(1).vecWen := true.B 330 } 331 is(UopSplitType.VEC_VXV) { 332 /* 333 i to vector move 334 */ 335 csBundle(0).srcType(0) := SrcType.reg 336 csBundle(0).srcType(1) := SrcType.imm 337 csBundle(0).lsrc(1) := 0.U 338 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 339 csBundle(0).fuType := FuType.i2v.U 340 csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg) 341 csBundle(0).vecWen := true.B 342 /* 343 LMUL 344 */ 345 for (i <- 0 until MAX_VLMUL) { 346 csBundle(i + 1).srcType(0) := SrcType.vp 347 csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 348 csBundle(i + 1).lsrc(1) := src2 + i.U 349 csBundle(i + 1).lsrc(2) := dest + i.U 350 csBundle(i + 1).ldest := dest + i.U 351 csBundle(i + 1).uopIdx := i.U 352 } 353 } 354 is(UopSplitType.VEC_VVW) { 355 for (i <- 0 until MAX_VLMUL / 2) { 356 csBundle(2 * i).lsrc(0) := src1 + i.U 357 csBundle(2 * i).lsrc(1) := src2 + i.U 358 csBundle(2 * i).lsrc(2) := dest + (2 * i).U 359 csBundle(2 * i).ldest := dest + (2 * i).U 360 csBundle(2 * i).uopIdx := (2 * i).U 361 csBundle(2 * i + 1).lsrc(0) := src1 + i.U 362 csBundle(2 * i + 1).lsrc(1) := src2 + i.U 363 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U 364 csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U 365 csBundle(2 * i + 1).uopIdx := (2 * i + 1).U 366 } 367 } 368 is(UopSplitType.VEC_VFW) { 369 for (i <- 0 until MAX_VLMUL / 2) { 370 csBundle(2 * i).lsrc(0) := src1 371 csBundle(2 * i).lsrc(1) := src2 + i.U 372 csBundle(2 * i).lsrc(2) := dest + (2 * i).U 373 csBundle(2 * i).ldest := dest + (2 * i).U 374 csBundle(2 * i).uopIdx := (2 * i).U 375 csBundle(2 * i + 1).lsrc(0) := src1 376 csBundle(2 * i + 1).lsrc(1) := src2 + i.U 377 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U 378 csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U 379 csBundle(2 * i + 1).uopIdx := (2 * i + 1).U 380 } 381 } 382 is(UopSplitType.VEC_WVW) { 383 for (i <- 0 until MAX_VLMUL / 2) { 384 csBundle(2 * i).lsrc(0) := src1 + i.U 385 csBundle(2 * i).lsrc(1) := src2 + (2 * i).U 386 csBundle(2 * i).lsrc(2) := dest + (2 * i).U 387 csBundle(2 * i).ldest := dest + (2 * i).U 388 csBundle(2 * i).uopIdx := (2 * i).U 389 csBundle(2 * i + 1).lsrc(0) := src1 + i.U 390 csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i + 1).U 391 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U 392 csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U 393 csBundle(2 * i + 1).uopIdx := (2 * i + 1).U 394 } 395 } 396 is(UopSplitType.VEC_VXW) { 397 /* 398 i to vector move 399 */ 400 csBundle(0).srcType(0) := SrcType.reg 401 csBundle(0).srcType(1) := SrcType.imm 402 csBundle(0).lsrc(1) := 0.U 403 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 404 csBundle(0).fuType := FuType.i2v.U 405 csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg) 406 csBundle(0).vecWen := true.B 407 408 for (i <- 0 until MAX_VLMUL / 2) { 409 csBundle(2 * i + 1).srcType(0) := SrcType.vp 410 csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 411 csBundle(2 * i + 1).lsrc(1) := src2 + i.U 412 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U 413 csBundle(2 * i + 1).ldest := dest + (2 * i).U 414 csBundle(2 * i + 1).uopIdx := (2 * i).U 415 csBundle(2 * i + 2).srcType(0) := SrcType.vp 416 csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U 417 csBundle(2 * i + 2).lsrc(1) := src2 + i.U 418 csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U 419 csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U 420 csBundle(2 * i + 2).uopIdx := (2 * i + 1).U 421 } 422 } 423 is(UopSplitType.VEC_WXW) { 424 /* 425 i to vector move 426 */ 427 csBundle(0).srcType(0) := SrcType.reg 428 csBundle(0).srcType(1) := SrcType.imm 429 csBundle(0).lsrc(1) := 0.U 430 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 431 csBundle(0).fuType := FuType.i2v.U 432 csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg) 433 csBundle(0).vecWen := true.B 434 435 for (i <- 0 until MAX_VLMUL / 2) { 436 csBundle(2 * i + 1).srcType(0) := SrcType.vp 437 csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 438 csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U 439 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U 440 csBundle(2 * i + 1).ldest := dest + (2 * i).U 441 csBundle(2 * i + 1).uopIdx := (2 * i).U 442 csBundle(2 * i + 2).srcType(0) := SrcType.vp 443 csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U 444 csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U 445 csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U 446 csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U 447 csBundle(2 * i + 2).uopIdx := (2 * i + 1).U 448 } 449 } 450 is(UopSplitType.VEC_WVV) { 451 for (i <- 0 until MAX_VLMUL / 2) { 452 453 csBundle(2 * i).lsrc(0) := src1 + i.U 454 csBundle(2 * i).lsrc(1) := src2 + (2 * i).U 455 csBundle(2 * i).lsrc(2) := dest + i.U 456 csBundle(2 * i).ldest := dest + i.U 457 csBundle(2 * i).uopIdx := (2 * i).U 458 csBundle(2 * i + 1).lsrc(0) := src1 + i.U 459 csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i + 1).U 460 csBundle(2 * i + 1).lsrc(2) := dest + i.U 461 csBundle(2 * i + 1).ldest := dest + i.U 462 csBundle(2 * i + 1).uopIdx := (2 * i + 1).U 463 } 464 } 465 is(UopSplitType.VEC_WFW) { 466 for (i <- 0 until MAX_VLMUL / 2) { 467 csBundle(2 * i).lsrc(0) := src1 468 csBundle(2 * i).lsrc(1) := src2 + (2 * i).U 469 csBundle(2 * i).lsrc(2) := dest + (2 * i).U 470 csBundle(2 * i).ldest := dest + (2 * i).U 471 csBundle(2 * i).uopIdx := (2 * i).U 472 csBundle(2 * i + 1).lsrc(0) := src1 473 csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i + 1).U 474 csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U 475 csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U 476 csBundle(2 * i + 1).uopIdx := (2 * i + 1).U 477 } 478 } 479 is(UopSplitType.VEC_WXV) { 480 /* 481 i to vector move 482 */ 483 csBundle(0).srcType(0) := SrcType.reg 484 csBundle(0).srcType(1) := SrcType.imm 485 csBundle(0).lsrc(1) := 0.U 486 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 487 csBundle(0).fuType := FuType.i2v.U 488 csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg) 489 csBundle(0).vecWen := true.B 490 491 for (i <- 0 until MAX_VLMUL / 2) { 492 csBundle(2 * i + 1).srcType(0) := SrcType.vp 493 csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 494 csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U 495 csBundle(2 * i + 1).lsrc(2) := dest + i.U 496 csBundle(2 * i + 1).ldest := dest + i.U 497 csBundle(2 * i + 1).uopIdx := (2 * i).U 498 csBundle(2 * i + 2).srcType(0) := SrcType.vp 499 csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U 500 csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U 501 csBundle(2 * i + 2).lsrc(2) := dest + i.U 502 csBundle(2 * i + 2).ldest := dest + i.U 503 csBundle(2 * i + 2).uopIdx := (2 * i + 1).U 504 } 505 } 506 is(UopSplitType.VEC_VVM) { 507 csBundle(0).lsrc(2) := dest 508 csBundle(0).ldest := dest 509 csBundle(0).uopIdx := 0.U 510 for (i <- 1 until MAX_VLMUL) { 511 csBundle(i).lsrc(0) := src1 + i.U 512 csBundle(i).lsrc(1) := src2 + i.U 513 csBundle(i).lsrc(2) := dest 514 csBundle(i).ldest := dest 515 csBundle(i).uopIdx := i.U 516 } 517 } 518 is(UopSplitType.VEC_VFM) { 519 csBundle(0).lsrc(2) := dest 520 csBundle(0).ldest := dest 521 csBundle(0).uopIdx := 0.U 522 for (i <- 1 until MAX_VLMUL) { 523 csBundle(i).lsrc(0) := src1 524 csBundle(i).lsrc(1) := src2 + i.U 525 csBundle(i).lsrc(2) := dest 526 csBundle(i).ldest := dest 527 csBundle(i).uopIdx := i.U 528 } 529 csBundle(numOfUop - 1.U).ldest := dest 530 } 531 is(UopSplitType.VEC_VXM) { 532 /* 533 i to vector move 534 */ 535 csBundle(0).srcType(0) := SrcType.reg 536 csBundle(0).srcType(1) := SrcType.imm 537 csBundle(0).lsrc(1) := 0.U 538 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 539 csBundle(0).fuType := FuType.i2v.U 540 csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg) 541 csBundle(0).vecWen := true.B 542 //LMUL 543 csBundle(1).srcType(0) := SrcType.vp 544 csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 545 csBundle(1).lsrc(2) := dest 546 csBundle(1).ldest := dest 547 csBundle(1).uopIdx := 0.U 548 for (i <- 1 until MAX_VLMUL) { 549 csBundle(i + 1).srcType(0) := SrcType.vp 550 csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 551 csBundle(i + 1).lsrc(1) := src2 + i.U 552 csBundle(i + 1).lsrc(2) := dest 553 csBundle(i + 1).ldest := dest 554 csBundle(i + 1).uopIdx := i.U 555 } 556 csBundle(numOfUop - 1.U).ldest := dest 557 } 558 is(UopSplitType.VEC_SLIDE1UP) { 559 /* 560 i to vector move 561 */ 562 csBundle(0).srcType(0) := SrcType.reg 563 csBundle(0).srcType(1) := SrcType.imm 564 csBundle(0).lsrc(1) := 0.U 565 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 566 csBundle(0).fuType := FuType.i2v.U 567 csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), vsewReg) 568 csBundle(0).vecWen := true.B 569 //LMUL 570 csBundle(1).srcType(0) := SrcType.vp 571 csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 572 csBundle(1).lsrc(2) := dest 573 csBundle(1).ldest := dest 574 csBundle(1).uopIdx := 0.U 575 for (i <- 1 until MAX_VLMUL) { 576 csBundle(i + 1).srcType(0) := SrcType.vp 577 csBundle(i + 1).lsrc(0) := src2 + (i - 1).U 578 csBundle(i + 1).lsrc(1) := src2 + i.U 579 csBundle(i + 1).lsrc(2) := dest + i.U 580 csBundle(i + 1).ldest := dest + i.U 581 csBundle(i + 1).uopIdx := i.U 582 } 583 } 584 is(UopSplitType.VEC_FSLIDE1UP) { 585 //LMUL 586 csBundle(0).srcType(0) := SrcType.fp 587 csBundle(0).lsrc(0) := src1 588 csBundle(0).lsrc(1) := src2 589 csBundle(0).lsrc(2) := dest 590 csBundle(0).ldest := dest 591 csBundle(0).uopIdx := 0.U 592 for (i <- 1 until MAX_VLMUL) { 593 csBundle(i).srcType(0) := SrcType.vp 594 csBundle(i).lsrc(0) := src2 + (i - 1).U 595 csBundle(i).lsrc(1) := src2 + i.U 596 csBundle(i).lsrc(2) := dest + i.U 597 csBundle(i).ldest := dest + i.U 598 csBundle(i).uopIdx := i.U 599 } 600 } 601 is(UopSplitType.VEC_SLIDE1DOWN) { // lmul+lmul = 16 602 /* 603 i to vector move 604 */ 605 csBundle(0).srcType(0) := SrcType.reg 606 csBundle(0).srcType(1) := SrcType.imm 607 csBundle(0).lsrc(1) := 0.U 608 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 609 csBundle(0).fuType := FuType.i2v.U 610 csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), vsewReg) 611 csBundle(0).vecWen := true.B 612 //LMUL 613 for (i <- 0 until MAX_VLMUL) { 614 csBundle(2 * i + 1).srcType(0) := SrcType.vp 615 csBundle(2 * i + 1).srcType(1) := SrcType.vp 616 csBundle(2 * i + 1).lsrc(0) := src2 + (i + 1).U 617 csBundle(2 * i + 1).lsrc(1) := src2 + i.U 618 csBundle(2 * i + 1).lsrc(2) := dest + i.U 619 csBundle(2 * i + 1).ldest := VECTOR_TMP_REG_LMUL.U + 1.U 620 csBundle(2 * i + 1).uopIdx := (2 * i).U 621 if (2 * i + 2 < MAX_VLMUL * 2) { 622 csBundle(2 * i + 2).srcType(0) := SrcType.vp 623 csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U 624 // csBundle(2 * i + 2).lsrc(1) := src2 + i.U // DontCare 625 csBundle(2 * i + 2).lsrc(2) := VECTOR_TMP_REG_LMUL.U + 1.U 626 csBundle(2 * i + 2).ldest := dest + i.U 627 csBundle(2 * i + 2).uopIdx := (2 * i + 1).U 628 } 629 } 630 csBundle(numOfUop - 1.U).srcType(0) := SrcType.vp 631 csBundle(numOfUop - 1.U).lsrc(0) := VECTOR_TMP_REG_LMUL.U 632 csBundle(numOfUop - 1.U).ldest := dest + lmul - 1.U 633 } 634 is(UopSplitType.VEC_FSLIDE1DOWN) { 635 //LMUL 636 for (i <- 0 until MAX_VLMUL) { 637 csBundle(2 * i).srcType(0) := SrcType.vp 638 csBundle(2 * i).srcType(1) := SrcType.vp 639 csBundle(2 * i).lsrc(0) := src2 + (i + 1).U 640 csBundle(2 * i).lsrc(1) := src2 + i.U 641 csBundle(2 * i).lsrc(2) := dest + i.U 642 csBundle(2 * i).ldest := VECTOR_TMP_REG_LMUL.U 643 csBundle(2 * i).uopIdx := (2 * i).U 644 csBundle(2 * i + 1).srcType(0) := SrcType.fp 645 csBundle(2 * i + 1).lsrc(0) := src1 646 csBundle(2 * i + 1).lsrc(2) := VECTOR_TMP_REG_LMUL.U 647 csBundle(2 * i + 1).ldest := dest + i.U 648 csBundle(2 * i + 1).uopIdx := (2 * i + 1).U 649 } 650 csBundle(numOfUop - 1.U).srcType(0) := SrcType.fp 651 csBundle(numOfUop - 1.U).lsrc(0) := src1 652 csBundle(numOfUop - 1.U).ldest := dest + lmul - 1.U 653 } 654 is(UopSplitType.VEC_VRED) { 655 when(vlmulReg === "b001".U) { 656 csBundle(0).srcType(2) := SrcType.DC 657 csBundle(0).lsrc(0) := src2 + 1.U 658 csBundle(0).lsrc(1) := src2 659 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 660 csBundle(0).uopIdx := 0.U 661 } 662 when(vlmulReg === "b010".U) { 663 csBundle(0).srcType(2) := SrcType.DC 664 csBundle(0).lsrc(0) := src2 + 1.U 665 csBundle(0).lsrc(1) := src2 666 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 667 csBundle(0).uopIdx := 0.U 668 669 csBundle(1).srcType(2) := SrcType.DC 670 csBundle(1).lsrc(0) := src2 + 3.U 671 csBundle(1).lsrc(1) := src2 + 2.U 672 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 673 csBundle(1).uopIdx := 1.U 674 675 csBundle(2).srcType(2) := SrcType.DC 676 csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U 677 csBundle(2).lsrc(1) := VECTOR_TMP_REG_LMUL.U 678 csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U 679 csBundle(2).uopIdx := 2.U 680 } 681 when(vlmulReg === "b011".U) { 682 for (i <- 0 until MAX_VLMUL) { 683 if (i < MAX_VLMUL - MAX_VLMUL / 2) { 684 csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U 685 csBundle(i).lsrc(1) := src2 + (i * 2).U 686 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 687 } else if (i < MAX_VLMUL - MAX_VLMUL / 4) { 688 csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + (i - MAX_VLMUL / 2) * 2 + 1).U 689 csBundle(i).lsrc(1) := (VECTOR_TMP_REG_LMUL + (i - MAX_VLMUL / 2) * 2).U 690 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 691 } else if (i < MAX_VLMUL - MAX_VLMUL / 8) { 692 csBundle(6).lsrc(0) := (VECTOR_TMP_REG_LMUL + 5).U 693 csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U 694 csBundle(6).ldest := (VECTOR_TMP_REG_LMUL + 6).U 695 } 696 csBundle(i).srcType(2) := SrcType.DC 697 csBundle(i).uopIdx := i.U 698 } 699 } 700 when(vlmulReg.orR) { 701 csBundle(numOfUop - 1.U).srcType(2) := SrcType.vp 702 csBundle(numOfUop - 1.U).lsrc(0) := src1 703 csBundle(numOfUop - 1.U).lsrc(1) := VECTOR_TMP_REG_LMUL.U + numOfUop - 2.U 704 csBundle(numOfUop - 1.U).lsrc(2) := dest 705 csBundle(numOfUop - 1.U).ldest := dest 706 csBundle(numOfUop - 1.U).uopIdx := numOfUop - 1.U 707 } 708 } 709 is(UopSplitType.VEC_VFRED) { 710 val vlmul = vlmulReg 711 val vsew = vsewReg 712 when(vlmul === VLmul.m8){ 713 for (i <- 0 until 4) { 714 csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U 715 csBundle(i).lsrc(1) := src2 + (i * 2).U 716 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 717 csBundle(i).uopIdx := i.U 718 } 719 for (i <- 4 until 6) { 720 csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + (i - 4) * 2 + 1).U 721 csBundle(i).lsrc(1) := (VECTOR_TMP_REG_LMUL + (i - 4) * 2).U 722 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 723 csBundle(i).uopIdx := i.U 724 } 725 csBundle(6).lsrc(0) := (VECTOR_TMP_REG_LMUL + 5).U 726 csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U 727 csBundle(6).ldest := (VECTOR_TMP_REG_LMUL + 6).U 728 csBundle(6).uopIdx := 6.U 729 when(vsew === VSew.e64) { 730 csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U 731 csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U 732 csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U 733 csBundle(7).vpu.fpu.isFoldTo1_2 := true.B 734 csBundle(7).uopIdx := 7.U 735 csBundle(8).lsrc(0) := src1 736 csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U 737 csBundle(8).ldest := dest 738 csBundle(8).uopIdx := 8.U 739 } 740 when(vsew === VSew.e32) { 741 csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U 742 csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U 743 csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U 744 csBundle(7).vpu.fpu.isFoldTo1_2 := true.B 745 csBundle(7).uopIdx := 7.U 746 csBundle(8).lsrc(0) := (VECTOR_TMP_REG_LMUL + 7).U 747 csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U 748 csBundle(8).ldest := (VECTOR_TMP_REG_LMUL + 8).U 749 csBundle(8).vpu.fpu.isFoldTo1_4 := true.B 750 csBundle(8).uopIdx := 8.U 751 csBundle(9).lsrc(0) := src1 752 csBundle(9).lsrc(1) := (VECTOR_TMP_REG_LMUL + 8).U 753 csBundle(9).ldest := dest 754 csBundle(9).uopIdx := 9.U 755 } 756 when(vsew === VSew.e16) { 757 csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U 758 csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U 759 csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U 760 csBundle(7).vpu.fpu.isFoldTo1_2 := true.B 761 csBundle(7).uopIdx := 7.U 762 csBundle(8).lsrc(0) := (VECTOR_TMP_REG_LMUL + 7).U 763 csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U 764 csBundle(8).ldest := (VECTOR_TMP_REG_LMUL + 8).U 765 csBundle(8).vpu.fpu.isFoldTo1_4 := true.B 766 csBundle(8).uopIdx := 8.U 767 csBundle(9).lsrc(0) := (VECTOR_TMP_REG_LMUL + 8).U 768 csBundle(9).lsrc(1) := (VECTOR_TMP_REG_LMUL + 8).U 769 csBundle(9).ldest := (VECTOR_TMP_REG_LMUL + 9).U 770 csBundle(9).vpu.fpu.isFoldTo1_8 := true.B 771 csBundle(9).uopIdx := 9.U 772 csBundle(10).lsrc(0) := src1 773 csBundle(10).lsrc(1) := (VECTOR_TMP_REG_LMUL + 9).U 774 csBundle(10).ldest := dest 775 csBundle(10).uopIdx := 10.U 776 } 777 } 778 when(vlmul === VLmul.m4) { 779 for (i <- 0 until 2) { 780 csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U 781 csBundle(i).lsrc(1) := src2 + (i * 2).U 782 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 783 csBundle(i).uopIdx := i.U 784 } 785 csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U 786 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 787 csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U 788 csBundle(2).uopIdx := 2.U 789 when(vsew === VSew.e64) { 790 csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U 791 csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U 792 csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U 793 csBundle(3).vpu.fpu.isFoldTo1_2 := true.B 794 csBundle(3).uopIdx := 3.U 795 csBundle(4).lsrc(0) := src1 796 csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U 797 csBundle(4).ldest := dest 798 csBundle(4).uopIdx := 4.U 799 } 800 when(vsew === VSew.e32) { 801 csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U 802 csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U 803 csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U 804 csBundle(3).vpu.fpu.isFoldTo1_2 := true.B 805 csBundle(3).uopIdx := 3.U 806 csBundle(4).lsrc(0) := (VECTOR_TMP_REG_LMUL + 3).U 807 csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U 808 csBundle(4).ldest := (VECTOR_TMP_REG_LMUL + 4).U 809 csBundle(4).vpu.fpu.isFoldTo1_4 := true.B 810 csBundle(4).uopIdx := 4.U 811 csBundle(5).lsrc(0) := src1 812 csBundle(5).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U 813 csBundle(5).ldest := dest 814 csBundle(5).uopIdx := 5.U 815 } 816 when(vsew === VSew.e16) { 817 csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U 818 csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U 819 csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U 820 csBundle(3).vpu.fpu.isFoldTo1_2 := true.B 821 csBundle(3).uopIdx := 3.U 822 csBundle(4).lsrc(0) := (VECTOR_TMP_REG_LMUL + 3).U 823 csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U 824 csBundle(4).ldest := (VECTOR_TMP_REG_LMUL + 4).U 825 csBundle(4).vpu.fpu.isFoldTo1_4 := true.B 826 csBundle(4).uopIdx := 4.U 827 csBundle(5).lsrc(0) := (VECTOR_TMP_REG_LMUL + 4).U 828 csBundle(5).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U 829 csBundle(5).ldest := (VECTOR_TMP_REG_LMUL + 5).U 830 csBundle(5).vpu.fpu.isFoldTo1_8 := true.B 831 csBundle(5).uopIdx := 5.U 832 csBundle(6).lsrc(0) := src1 833 csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 5).U 834 csBundle(6).ldest := dest 835 csBundle(6).uopIdx := 6.U 836 } 837 } 838 when(vlmul === VLmul.m2) { 839 csBundle(0).lsrc(0) := src2 + 1.U 840 csBundle(0).lsrc(1) := src2 + 0.U 841 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 842 csBundle(0).uopIdx := 0.U 843 when(vsew === VSew.e64) { 844 csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U 845 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 846 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 847 csBundle(1).vpu.fpu.isFoldTo1_2 := true.B 848 csBundle(1).uopIdx := 1.U 849 csBundle(2).lsrc(0) := src1 850 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U 851 csBundle(2).ldest := dest 852 csBundle(2).uopIdx := 2.U 853 } 854 when(vsew === VSew.e32) { 855 csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U 856 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 857 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 858 csBundle(1).vpu.fpu.isFoldTo1_2 := true.B 859 csBundle(1).uopIdx := 1.U 860 csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U 861 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U 862 csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U 863 csBundle(2).vpu.fpu.isFoldTo1_4 := true.B 864 csBundle(2).uopIdx := 2.U 865 csBundle(3).lsrc(0) := src1 866 csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U 867 csBundle(3).ldest := dest 868 csBundle(3).uopIdx := 3.U 869 } 870 when(vsew === VSew.e16) { 871 csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U 872 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 873 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 874 csBundle(1).vpu.fpu.isFoldTo1_2 := true.B 875 csBundle(1).uopIdx := 1.U 876 csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U 877 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U 878 csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U 879 csBundle(2).vpu.fpu.isFoldTo1_4 := true.B 880 csBundle(2).uopIdx := 2.U 881 csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U 882 csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U 883 csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U 884 csBundle(3).vpu.fpu.isFoldTo1_8 := true.B 885 csBundle(3).uopIdx := 3.U 886 csBundle(4).lsrc(0) := src1 887 csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U 888 csBundle(4).ldest := dest 889 csBundle(4).uopIdx := 4.U 890 } 891 } 892 when(vlmul === VLmul.m1) { 893 when(vsew === VSew.e64) { 894 csBundle(0).lsrc(0) := src2 895 csBundle(0).lsrc(1) := src2 896 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 897 csBundle(0).vpu.fpu.isFoldTo1_2 := true.B 898 csBundle(0).uopIdx := 0.U 899 csBundle(1).lsrc(0) := src1 900 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 901 csBundle(1).ldest := dest 902 csBundle(1).uopIdx := 1.U 903 } 904 when(vsew === VSew.e32) { 905 csBundle(0).lsrc(0) := src2 906 csBundle(0).lsrc(1) := src2 907 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 908 csBundle(0).vpu.fpu.isFoldTo1_2 := true.B 909 csBundle(0).uopIdx := 0.U 910 csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U 911 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 912 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 913 csBundle(1).vpu.fpu.isFoldTo1_4 := true.B 914 csBundle(1).uopIdx := 1.U 915 csBundle(2).lsrc(0) := src1 916 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U 917 csBundle(2).ldest := dest 918 csBundle(2).uopIdx := 2.U 919 } 920 when(vsew === VSew.e16) { 921 csBundle(0).lsrc(0) := src2 922 csBundle(0).lsrc(1) := src2 923 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 924 csBundle(0).vpu.fpu.isFoldTo1_2 := true.B 925 csBundle(0).uopIdx := 0.U 926 csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U 927 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 928 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 929 csBundle(1).vpu.fpu.isFoldTo1_4 := true.B 930 csBundle(1).uopIdx := 1.U 931 csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U 932 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U 933 csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U 934 csBundle(2).vpu.fpu.isFoldTo1_8 := true.B 935 csBundle(2).uopIdx := 2.U 936 csBundle(3).lsrc(0) := src1 937 csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U 938 csBundle(3).ldest := dest 939 csBundle(3).uopIdx := 3.U 940 } 941 } 942 when(vlmul === VLmul.mf2) { 943 when(vsew === VSew.e32) { 944 csBundle(0).lsrc(0) := src2 945 csBundle(0).lsrc(1) := src2 946 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 947 csBundle(0).vpu.fpu.isFoldTo1_4 := true.B 948 csBundle(0).uopIdx := 0.U 949 csBundle(1).lsrc(0) := src1 950 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 951 csBundle(1).ldest := dest 952 csBundle(1).uopIdx := 1.U 953 } 954 when(vsew === VSew.e16) { 955 csBundle(0).lsrc(0) := src2 956 csBundle(0).lsrc(1) := src2 957 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 958 csBundle(0).vpu.fpu.isFoldTo1_4 := true.B 959 csBundle(0).uopIdx := 0.U 960 csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U 961 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 962 csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U 963 csBundle(1).vpu.fpu.isFoldTo1_8 := true.B 964 csBundle(1).uopIdx := 1.U 965 csBundle(2).lsrc(0) := src1 966 csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U 967 csBundle(2).ldest := dest 968 csBundle(2).uopIdx := 2.U 969 } 970 } 971 when(vlmul === VLmul.mf4) { 972 when(vsew === VSew.e16) { 973 csBundle(0).lsrc(0) := src2 974 csBundle(0).lsrc(1) := src2 975 csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U 976 csBundle(0).vpu.fpu.isFoldTo1_8 := true.B 977 csBundle(0).uopIdx := 0.U 978 csBundle(1).lsrc(0) := src1 979 csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U 980 csBundle(1).ldest := dest 981 csBundle(1).uopIdx := 1.U 982 } 983 } 984 } 985 986 is(UopSplitType.VEC_VFREDOSUM) { 987 import yunsuan.VfaluType 988 val vlmul = vlmulReg 989 val vsew = vsewReg 990 val isWiden = latchedInst.fuOpType === VfaluType.vfwredosum 991 when(vlmul === VLmul.m8) { 992 when(vsew === VSew.e64) { 993 val vlmax = 16 994 for (i <- 0 until vlmax) { 995 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 996 csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U) 997 csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 998 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 999 csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B) 1000 csBundle(i).uopIdx := i.U 1001 } 1002 } 1003 when(vsew === VSew.e32) { 1004 val vlmax = 32 1005 for (i <- 0 until vlmax) { 1006 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1007 csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U) 1008 csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1009 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1010 csBundle(i).vpu.fpu.isFoldTo1_4 := (if (i % 4 == 0) false.B else true.B) 1011 csBundle(i).uopIdx := i.U 1012 } 1013 } 1014 when(vsew === VSew.e16) { 1015 val vlmax = 64 1016 for (i <- 0 until vlmax) { 1017 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1018 csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U) 1019 csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1020 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1021 csBundle(i).vpu.fpu.isFoldTo1_8 := (if (i % 8 == 0) false.B else true.B) 1022 csBundle(i).uopIdx := i.U 1023 } 1024 } 1025 } 1026 when(vlmul === VLmul.m4) { 1027 when(vsew === VSew.e64) { 1028 val vlmax = 8 1029 for (i <- 0 until vlmax) { 1030 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1031 csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U) 1032 csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1033 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1034 csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B) 1035 csBundle(i).uopIdx := i.U 1036 } 1037 } 1038 when(vsew === VSew.e32) { 1039 val vlmax = 16 1040 for (i <- 0 until vlmax) { 1041 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1042 csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U) 1043 csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1044 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1045 csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B) 1046 csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B) 1047 csBundle(i).uopIdx := i.U 1048 } 1049 } 1050 when(vsew === VSew.e16) { 1051 val vlmax = 32 1052 for (i <- 0 until vlmax) { 1053 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1054 csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U) 1055 csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1056 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1057 csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B) 1058 csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B) 1059 csBundle(i).uopIdx := i.U 1060 } 1061 } 1062 } 1063 when(vlmul === VLmul.m2) { 1064 when(vsew === VSew.e64) { 1065 val vlmax = 4 1066 for (i <- 0 until vlmax) { 1067 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1068 csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U) 1069 csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1070 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1071 csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B) 1072 csBundle(i).uopIdx := i.U 1073 } 1074 } 1075 when(vsew === VSew.e32) { 1076 val vlmax = 8 1077 for (i <- 0 until vlmax) { 1078 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1079 csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U) 1080 csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1081 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1082 csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B) 1083 csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B) 1084 csBundle(i).uopIdx := i.U 1085 } 1086 } 1087 when(vsew === VSew.e16) { 1088 val vlmax = 16 1089 for (i <- 0 until vlmax) { 1090 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1091 csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U) 1092 csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1093 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1094 csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B) 1095 csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B) 1096 csBundle(i).uopIdx := i.U 1097 } 1098 } 1099 } 1100 when(vlmul === VLmul.m1) { 1101 when(vsew === VSew.e64) { 1102 val vlmax = 2 1103 for (i <- 0 until vlmax) { 1104 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1105 csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U) 1106 csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1107 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1108 csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B) 1109 csBundle(i).uopIdx := i.U 1110 } 1111 } 1112 when(vsew === VSew.e32) { 1113 val vlmax = 4 1114 for (i <- 0 until vlmax) { 1115 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1116 csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U) 1117 csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1118 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1119 csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B) 1120 csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B) 1121 csBundle(i).uopIdx := i.U 1122 } 1123 } 1124 when(vsew === VSew.e16) { 1125 val vlmax = 8 1126 for (i <- 0 until vlmax) { 1127 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1128 csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U) 1129 csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1130 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1131 csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B) 1132 csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B) 1133 csBundle(i).uopIdx := i.U 1134 } 1135 } 1136 } 1137 when(vlmul === VLmul.mf2) { 1138 when(vsew === VSew.e32) { 1139 val vlmax = 2 1140 for (i <- 0 until vlmax) { 1141 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1142 csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U) 1143 csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1144 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1145 csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B) 1146 csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B) 1147 csBundle(i).uopIdx := i.U 1148 } 1149 } 1150 when(vsew === VSew.e16) { 1151 val vlmax = 4 1152 for (i <- 0 until vlmax) { 1153 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1154 csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U) 1155 csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1156 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1157 csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B) 1158 csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B) 1159 csBundle(i).uopIdx := i.U 1160 } 1161 } 1162 } 1163 when(vlmul === VLmul.mf4) { 1164 when(vsew === VSew.e16) { 1165 val vlmax = 2 1166 for (i <- 0 until vlmax) { 1167 csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U) 1168 csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U) 1169 csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U) 1170 csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U) 1171 csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B) 1172 csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B) 1173 csBundle(i).uopIdx := i.U 1174 } 1175 } 1176 } 1177 } 1178 1179 is(UopSplitType.VEC_SLIDEUP) { 1180 // i to vector move 1181 csBundle(0).srcType(0) := SrcType.reg 1182 csBundle(0).srcType(1) := SrcType.imm 1183 csBundle(0).lsrc(1) := 0.U 1184 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 1185 csBundle(0).fuType := FuType.i2v.U 1186 csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg) 1187 csBundle(0).vecWen := true.B 1188 // LMUL 1189 for (i <- 0 until MAX_VLMUL) 1190 for (j <- 0 to i) { 1191 val old_vd = if (j == 0) { 1192 dest + i.U 1193 } else (VECTOR_TMP_REG_LMUL + j).U 1194 val vd = if (j == i) { 1195 dest + i.U 1196 } else (VECTOR_TMP_REG_LMUL + j + 1).U 1197 csBundle(i * (i + 1) / 2 + j + 1).srcType(0) := SrcType.vp 1198 csBundle(i * (i + 1) / 2 + j + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 1199 csBundle(i * (i + 1) / 2 + j + 1).lsrc(1) := src2 + j.U 1200 csBundle(i * (i + 1) / 2 + j + 1).lsrc(2) := old_vd 1201 csBundle(i * (i + 1) / 2 + j + 1).ldest := vd 1202 csBundle(i * (i + 1) / 2 + j + 1).uopIdx := (i * (i + 1) / 2 + j).U 1203 } 1204 } 1205 1206 is(UopSplitType.VEC_SLIDEDOWN) { 1207 // i to vector move 1208 csBundle(0).srcType(0) := SrcType.reg 1209 csBundle(0).srcType(1) := SrcType.imm 1210 csBundle(0).lsrc(1) := 0.U 1211 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 1212 csBundle(0).fuType := FuType.i2v.U 1213 csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg) 1214 csBundle(0).vecWen := true.B 1215 // LMUL 1216 for (i <- 0 until MAX_VLMUL) 1217 for (j <- (0 to i).reverse) { 1218 when(i.U < lmul) { 1219 val old_vd = if (j == 0) { 1220 dest + lmul - 1.U - i.U 1221 } else (VECTOR_TMP_REG_LMUL + j).U 1222 val vd = if (j == i) { 1223 dest + lmul - 1.U - i.U 1224 } else (VECTOR_TMP_REG_LMUL + j + 1).U 1225 csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).srcType(0) := SrcType.vp 1226 csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(0) := VECTOR_TMP_REG_LMUL.U 1227 csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(1) := src2 + lmul - 1.U - j.U 1228 csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(2) := old_vd 1229 csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).ldest := vd 1230 csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).uopIdx := numOfUop - (i * (i + 1) / 2 + i - j + 2).U 1231 } 1232 } 1233 } 1234 1235 is(UopSplitType.VEC_M0X) { 1236 // LMUL 1237 for (i <- 0 until MAX_VLMUL) { 1238 val srcType0 = if (i == 0) SrcType.DC else SrcType.vp 1239 val ldest = (VECTOR_TMP_REG_LMUL + i).U 1240 csBundle(i).srcType(0) := srcType0 1241 csBundle(i).srcType(1) := SrcType.vp 1242 csBundle(i).rfWen := false.B 1243 csBundle(i).vecWen := true.B 1244 csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U 1245 csBundle(i).lsrc(1) := src2 1246 // csBundle(i).lsrc(2) := dest + i.U DontCare 1247 csBundle(i).ldest := ldest 1248 csBundle(i).uopIdx := i.U 1249 } 1250 csBundle(lmul - 1.U).vecWen := false.B 1251 csBundle(lmul - 1.U).fpWen := true.B 1252 csBundle(lmul - 1.U).ldest := FP_TMP_REG_MV.U 1253 // FMV_X_D 1254 csBundle(lmul).srcType(0) := SrcType.fp 1255 csBundle(lmul).srcType(1) := SrcType.imm 1256 csBundle(lmul).lsrc(0) := FP_TMP_REG_MV.U 1257 csBundle(lmul).lsrc(1) := 0.U 1258 csBundle(lmul).ldest := dest 1259 csBundle(lmul).fuType := FuType.fmisc.U 1260 csBundle(lmul).rfWen := true.B 1261 csBundle(lmul).fpWen := false.B 1262 csBundle(lmul).vecWen := false.B 1263 csBundle(lmul).fpu.isAddSub := false.B 1264 csBundle(lmul).fpu.typeTagIn := FPU.D 1265 csBundle(lmul).fpu.typeTagOut := FPU.D 1266 csBundle(lmul).fpu.fromInt := false.B 1267 csBundle(lmul).fpu.wflags := false.B 1268 csBundle(lmul).fpu.fpWen := false.B 1269 csBundle(lmul).fpu.div := false.B 1270 csBundle(lmul).fpu.sqrt := false.B 1271 csBundle(lmul).fpu.fcvt := false.B 1272 } 1273 1274 is(UopSplitType.VEC_MVV) { 1275 // LMUL 1276 for (i <- 0 until MAX_VLMUL) { 1277 val srcType0 = if (i == 0) SrcType.DC else SrcType.vp 1278 csBundle(i * 2 + 0).srcType(0) := srcType0 1279 csBundle(i * 2 + 0).srcType(1) := SrcType.vp 1280 csBundle(i * 2 + 0).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U 1281 csBundle(i * 2 + 0).lsrc(1) := src2 1282 csBundle(i * 2 + 0).lsrc(2) := dest + i.U 1283 csBundle(i * 2 + 0).ldest := dest + i.U 1284 csBundle(i * 2 + 0).uopIdx := (i * 2 + 0).U 1285 1286 csBundle(i * 2 + 1).srcType(0) := srcType0 1287 csBundle(i * 2 + 1).srcType(1) := SrcType.vp 1288 csBundle(i * 2 + 1).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U 1289 csBundle(i * 2 + 1).lsrc(1) := src2 1290 // csBundle(i).lsrc(2) := dest + i.U DontCare 1291 csBundle(i * 2 + 1).ldest := (VECTOR_TMP_REG_LMUL + i).U 1292 csBundle(i * 2 + 1).uopIdx := (i * 2 + 1).U 1293 } 1294 } 1295 1296 is(UopSplitType.VEC_M0X_VFIRST) { 1297 // LMUL 1298 csBundle(0).rfWen := false.B 1299 csBundle(0).fpWen := true.B 1300 csBundle(0).ldest := FP_TMP_REG_MV.U 1301 // FMV_X_D 1302 csBundle(1).srcType(0) := SrcType.fp 1303 csBundle(1).srcType(1) := SrcType.imm 1304 csBundle(1).lsrc(0) := FP_TMP_REG_MV.U 1305 csBundle(1).lsrc(1) := 0.U 1306 csBundle(1).ldest := dest 1307 csBundle(1).fuType := FuType.fmisc.U 1308 csBundle(1).rfWen := true.B 1309 csBundle(1).fpWen := false.B 1310 csBundle(1).vecWen := false.B 1311 csBundle(1).fpu.isAddSub := false.B 1312 csBundle(1).fpu.typeTagIn := FPU.D 1313 csBundle(1).fpu.typeTagOut := FPU.D 1314 csBundle(1).fpu.fromInt := false.B 1315 csBundle(1).fpu.wflags := false.B 1316 csBundle(1).fpu.fpWen := false.B 1317 csBundle(1).fpu.div := false.B 1318 csBundle(1).fpu.sqrt := false.B 1319 csBundle(1).fpu.fcvt := false.B 1320 } 1321 is(UopSplitType.VEC_VWW) { 1322 for (i <- 0 until MAX_VLMUL*2) { 1323 when(i.U < lmul){ 1324 csBundle(i).srcType(2) := SrcType.DC 1325 csBundle(i).lsrc(0) := src2 + i.U 1326 csBundle(i).lsrc(1) := src2 + i.U 1327 // csBundle(i).lsrc(2) := dest + (2 * i).U 1328 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 1329 csBundle(i).uopIdx := i.U 1330 } otherwise { 1331 csBundle(i).srcType(2) := SrcType.DC 1332 csBundle(i).lsrc(0) := VECTOR_TMP_REG_LMUL.U + Cat((i.U-lmul),0.U(1.W)) + 1.U 1333 csBundle(i).lsrc(1) := VECTOR_TMP_REG_LMUL.U + Cat((i.U-lmul),0.U(1.W)) 1334 // csBundle(i).lsrc(2) := dest + (2 * i).U 1335 csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U 1336 csBundle(i).uopIdx := i.U 1337 } 1338 csBundle(numOfUop-1.U).srcType(2) := SrcType.vp 1339 csBundle(numOfUop-1.U).lsrc(0) := src1 1340 csBundle(numOfUop-1.U).lsrc(2) := dest 1341 csBundle(numOfUop-1.U).ldest := dest 1342 } 1343 } 1344 is(UopSplitType.VEC_RGATHER) { 1345 def genCsBundle_VEC_RGATHER(len:Int): Unit ={ 1346 for (i <- 0 until len) 1347 for (j <- 0 until len) { 1348 // csBundle(i * len + j).srcType(0) := SrcType.vp // SrcType.imm 1349 // csBundle(i * len + j).srcType(1) := SrcType.vp 1350 // csBundle(i * len + j).srcType(2) := SrcType.vp 1351 csBundle(i * len + j).lsrc(0) := src1 + i.U 1352 csBundle(i * len + j).lsrc(1) := src2 + j.U 1353 val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j - 1).U 1354 csBundle(i * len + j).lsrc(2) := vd_old 1355 val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U 1356 csBundle(i * len + j).ldest := vd 1357 csBundle(i * len + j).uopIdx := (i * len + j).U 1358 } 1359 } 1360 switch(vlmulReg) { 1361 is("b001".U ){ 1362 genCsBundle_VEC_RGATHER(2) 1363 } 1364 is("b010".U ){ 1365 genCsBundle_VEC_RGATHER(4) 1366 } 1367 is("b011".U ){ 1368 genCsBundle_VEC_RGATHER(8) 1369 } 1370 } 1371 } 1372 is(UopSplitType.VEC_RGATHER_VX) { 1373 def genCsBundle_RGATHER_VX(len:Int): Unit ={ 1374 for (i <- 0 until len) 1375 for (j <- 0 until len) { 1376 csBundle(i * len + j + 1).srcType(0) := SrcType.vp 1377 // csBundle(i * len + j + 1).srcType(1) := SrcType.vp 1378 // csBundle(i * len + j + 1).srcType(2) := SrcType.vp 1379 csBundle(i * len + j + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U 1380 csBundle(i * len + j + 1).lsrc(1) := src2 + j.U 1381 val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U 1382 csBundle(i * len + j + 1).lsrc(2) := vd_old 1383 val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j + 1).U 1384 csBundle(i * len + j + 1).ldest := vd 1385 csBundle(i * len + j + 1).uopIdx := (i * len + j).U 1386 } 1387 } 1388 // i to vector move 1389 csBundle(0).srcType(0) := SrcType.reg 1390 csBundle(0).srcType(1) := SrcType.imm 1391 csBundle(0).lsrc(1) := 0.U 1392 csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U 1393 csBundle(0).fuType := FuType.i2v.U 1394 csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg) 1395 csBundle(0).vecWen := true.B 1396 switch(vlmulReg) { 1397 is("b000".U ){ 1398 genCsBundle_RGATHER_VX(1) 1399 } 1400 is("b001".U ){ 1401 genCsBundle_RGATHER_VX(2) 1402 } 1403 is("b010".U ){ 1404 genCsBundle_RGATHER_VX(4) 1405 } 1406 is("b011".U ){ 1407 genCsBundle_RGATHER_VX(8) 1408 } 1409 } 1410 } 1411 is(UopSplitType.VEC_RGATHEREI16) { 1412 def genCsBundle_VEC_RGATHEREI16_SEW8(len:Int): Unit ={ 1413 for (i <- 0 until len) 1414 for (j <- 0 until len) { 1415 val vd_old0 = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j*2-1).U 1416 val vd0 = (VECTOR_TMP_REG_LMUL + j*2 ).U 1417 // csBundle(i * len + j).srcType(0) := SrcType.vp // SrcType.imm 1418 // csBundle(i * len + j).srcType(1) := SrcType.vp 1419 // csBundle(i * len + j).srcType(2) := SrcType.vp 1420 csBundle((i * len + j)*2+0).lsrc(0) := src1 + (i*2+0).U 1421 csBundle((i * len + j)*2+0).lsrc(1) := src2 + j.U 1422 csBundle((i * len + j)*2+0).lsrc(2) := vd_old0 1423 csBundle((i * len + j)*2+0).ldest := vd0 1424 csBundle((i * len + j)*2+0).uopIdx := ((i * len + j)*2+0).U 1425 val vd_old1 = (VECTOR_TMP_REG_LMUL + j*2).U 1426 val vd1 = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j*2+1 ).U 1427 csBundle((i * len + j)*2+1).lsrc(0) := src1 + (i*2+1).U 1428 csBundle((i * len + j)*2+1).lsrc(1) := src2 + j.U 1429 csBundle((i * len + j)*2+1).lsrc(2) := vd_old1 1430 csBundle((i * len + j)*2+1).ldest := vd1 1431 csBundle((i * len + j)*2+1).uopIdx := ((i * len + j)*2+1).U 1432 } 1433 } 1434 def genCsBundle_VEC_RGATHEREI16(len:Int): Unit ={ 1435 for (i <- 0 until len) 1436 for (j <- 0 until len) { 1437 val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U 1438 val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U 1439 // csBundle(i * len + j).srcType(0) := SrcType.vp // SrcType.imm 1440 // csBundle(i * len + j).srcType(1) := SrcType.vp 1441 // csBundle(i * len + j).srcType(2) := SrcType.vp 1442 csBundle(i * len + j).lsrc(0) := src1 + i.U 1443 csBundle(i * len + j).lsrc(1) := src2 + j.U 1444 csBundle(i * len + j).lsrc(2) := vd_old 1445 csBundle(i * len + j).ldest := vd 1446 csBundle(i * len + j).uopIdx := (i * len + j).U 1447 } 1448 } 1449 switch(vlmulReg) { 1450 is("b000".U ){ 1451 when(!vsewReg.orR){ 1452 genCsBundle_VEC_RGATHEREI16_SEW8(1) 1453 } .otherwise{ 1454 genCsBundle_VEC_RGATHEREI16(1) 1455 } 1456 } 1457 is("b001".U) { 1458 when(!vsewReg.orR) { 1459 genCsBundle_VEC_RGATHEREI16_SEW8(2) 1460 }.otherwise { 1461 genCsBundle_VEC_RGATHEREI16(2) 1462 } 1463 } 1464 is("b010".U) { 1465 when(!vsewReg.orR) { 1466 genCsBundle_VEC_RGATHEREI16_SEW8(4) 1467 }.otherwise { 1468 genCsBundle_VEC_RGATHEREI16(4) 1469 } 1470 } 1471 is("b011".U) { 1472 genCsBundle_VEC_RGATHEREI16(8) 1473 } 1474 } 1475 } 1476 is(UopSplitType.VEC_COMPRESS) { 1477 def genCsBundle_VEC_COMPRESS(len:Int): Unit ={ 1478 for (i <- 0 until len){ 1479 val jlen = if (i == len-1) i+1 else i+2 1480 for (j <- 0 until jlen) { 1481 val vd_old = if(i==j) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j + 1).U 1482 val vd = if(i==len-1) (dest + j.U) else{ 1483 if (j == i+1) VECTOR_TMP_REG_LMUL.U else (VECTOR_TMP_REG_LMUL + j + 1).U 1484 } 1485 val src23Type = if (j == i+1) DontCare else SrcType.vp 1486 csBundle(i*(i+3)/2 + j).srcType(0) := SrcType.vp 1487 csBundle(i*(i+3)/2 + j).srcType(1) := src23Type 1488 csBundle(i*(i+3)/2 + j).srcType(2) := src23Type 1489 csBundle(i*(i+3)/2 + j).lsrc(0) := src1 1490 csBundle(i*(i+3)/2 + j).lsrc(1) := src2 + i.U 1491 csBundle(i*(i+3)/2 + j).lsrc(2) := vd_old 1492 // csBundle(i*(i+3)/2 + j).lsrc(3) := VECTOR_TMP_REG_LMUL.U 1493 csBundle(i*(i+3)/2 + j).ldest := vd 1494 csBundle(i*(i+3)/2 + j).uopIdx := (i*(i+3)/2 + j).U 1495 } 1496 } 1497 } 1498 switch(vlmulReg) { 1499 is("b001".U ){ 1500 genCsBundle_VEC_COMPRESS(2) 1501 } 1502 is("b010".U ){ 1503 genCsBundle_VEC_COMPRESS(4) 1504 } 1505 is("b011".U ){ 1506 genCsBundle_VEC_COMPRESS(8) 1507 } 1508 } 1509 } 1510 is(UopSplitType.VEC_MVNR) { 1511 for (i <- 0 until MAX_VLMUL) { 1512 csBundle(i).lsrc(0) := src1 + i.U 1513 csBundle(i).lsrc(1) := src2 + i.U 1514 csBundle(i).lsrc(2) := dest + i.U 1515 csBundle(i).ldest := dest + i.U 1516 csBundle(i).uopIdx := i.U 1517 } 1518 } 1519 is(UopSplitType.VEC_US_LDST) { 1520 /* 1521 FMV.D.X 1522 */ 1523 csBundle(0).srcType(0) := SrcType.reg 1524 csBundle(0).srcType(1) := SrcType.imm 1525 csBundle(0).lsrc(1) := 0.U 1526 csBundle(0).ldest := FP_TMP_REG_MV.U 1527 csBundle(0).fuType := FuType.i2f.U 1528 csBundle(0).rfWen := false.B 1529 csBundle(0).fpWen := true.B 1530 csBundle(0).vecWen := false.B 1531 csBundle(0).fpu.isAddSub := false.B 1532 csBundle(0).fpu.typeTagIn := FPU.D 1533 csBundle(0).fpu.typeTagOut := FPU.D 1534 csBundle(0).fpu.fromInt := true.B 1535 csBundle(0).fpu.wflags := false.B 1536 csBundle(0).fpu.fpWen := true.B 1537 csBundle(0).fpu.div := false.B 1538 csBundle(0).fpu.sqrt := false.B 1539 csBundle(0).fpu.fcvt := false.B 1540 //LMUL 1541 for (i <- 0 until MAX_VLMUL) { 1542 csBundle(i + 1).srcType(0) := SrcType.fp 1543 csBundle(i + 1).lsrc(0) := FP_TMP_REG_MV.U 1544 csBundle(i + 1).lsrc(2) := dest + i.U // old vd 1545 csBundle(i + 1).ldest := dest + i.U 1546 csBundle(i + 1).uopIdx := i.U 1547 } 1548 } 1549 is(UopSplitType.VEC_S_LDST) { 1550 /* 1551 FMV.D.X 1552 */ 1553 csBundle(0).srcType(0) := SrcType.reg 1554 csBundle(0).srcType(1) := SrcType.imm 1555 csBundle(0).lsrc(1) := 0.U 1556 csBundle(0).ldest := FP_TMP_REG_MV.U 1557 csBundle(0).fuType := FuType.i2f.U 1558 csBundle(0).rfWen := false.B 1559 csBundle(0).fpWen := true.B 1560 csBundle(0).vecWen := false.B 1561 csBundle(0).fpu.isAddSub := false.B 1562 csBundle(0).fpu.typeTagIn := FPU.D 1563 csBundle(0).fpu.typeTagOut := FPU.D 1564 csBundle(0).fpu.fromInt := true.B 1565 csBundle(0).fpu.wflags := false.B 1566 csBundle(0).fpu.fpWen := true.B 1567 csBundle(0).fpu.div := false.B 1568 csBundle(0).fpu.sqrt := false.B 1569 csBundle(0).fpu.fcvt := false.B 1570 1571 csBundle(1).srcType(0) := SrcType.reg 1572 csBundle(1).srcType(1) := SrcType.imm 1573 csBundle(1).lsrc(0) := latchedInst.lsrc(1) 1574 csBundle(1).lsrc(1) := 0.U 1575 csBundle(1).ldest := VECTOR_TMP_REG_LMUL.U 1576 csBundle(1).fuType := FuType.i2f.U 1577 csBundle(1).rfWen := false.B 1578 csBundle(1).fpWen := true.B 1579 csBundle(1).vecWen := false.B 1580 csBundle(1).fpu.isAddSub := false.B 1581 csBundle(1).fpu.typeTagIn := FPU.D 1582 csBundle(1).fpu.typeTagOut := FPU.D 1583 csBundle(1).fpu.fromInt := true.B 1584 csBundle(1).fpu.wflags := false.B 1585 csBundle(1).fpu.fpWen := true.B 1586 csBundle(1).fpu.div := false.B 1587 csBundle(1).fpu.sqrt := false.B 1588 csBundle(1).fpu.fcvt := false.B 1589 1590 //LMUL 1591 for (i <- 0 until MAX_VLMUL) { 1592 csBundle(i + 2).srcType(0) := SrcType.fp 1593 csBundle(i + 2).srcType(1) := SrcType.fp 1594 csBundle(i + 2).lsrc(0) := FP_TMP_REG_MV.U 1595 csBundle(i + 2).lsrc(1) := VECTOR_TMP_REG_LMUL.U 1596 csBundle(i + 2).lsrc(2) := dest + i.U // old vd 1597 csBundle(i + 2).ldest := dest + i.U 1598 csBundle(i + 2).uopIdx := i.U 1599 } 1600 } 1601 is(UopSplitType.VEC_I_LDST) { 1602 /* 1603 FMV.D.X 1604 */ 1605 val vlmul = vlmulReg 1606 val vsew = Cat(0.U(1.W), vsewReg) 1607 val veew = Cat(0.U(1.W), width) 1608 val vemul: UInt = veew.asUInt + 1.U + vlmul.asUInt + ~vsew.asUInt 1609 val simple_lmul = MuxLookup(vlmul, 0.U(2.W), Array( 1610 "b001".U -> 1.U, 1611 "b010".U -> 2.U, 1612 "b011".U -> 3.U 1613 )) 1614 val simple_emul = MuxLookup(vemul, 0.U(2.W), Array( 1615 "b001".U -> 1.U, 1616 "b010".U -> 2.U, 1617 "b011".U -> 3.U 1618 )) 1619 csBundle(0).srcType(0) := SrcType.reg 1620 csBundle(0).srcType(1) := SrcType.imm 1621 csBundle(0).lsrc(1) := 0.U 1622 csBundle(0).ldest := FP_TMP_REG_MV.U 1623 csBundle(0).fuType := FuType.i2f.U 1624 csBundle(0).rfWen := false.B 1625 csBundle(0).fpWen := true.B 1626 csBundle(0).vecWen := false.B 1627 csBundle(0).fpu.isAddSub := false.B 1628 csBundle(0).fpu.typeTagIn := FPU.D 1629 csBundle(0).fpu.typeTagOut := FPU.D 1630 csBundle(0).fpu.fromInt := true.B 1631 csBundle(0).fpu.wflags := false.B 1632 csBundle(0).fpu.fpWen := true.B 1633 csBundle(0).fpu.div := false.B 1634 csBundle(0).fpu.sqrt := false.B 1635 csBundle(0).fpu.fcvt := false.B 1636 1637 //LMUL 1638 for (i <- 0 until MAX_INDEXED_LS_UOPNUM) { 1639 indexedLSRegOffset(i).src := Cat(simple_emul, simple_lmul, nf) 1640 val offsetVs2 = indexedLSRegOffset(i).outOffsetVs2 1641 val offsetVd = indexedLSRegOffset(i).outOffsetVd 1642 val isFirstUopInVd = indexedLSRegOffset(i).outIsFirstUopInVd 1643 csBundle(i + 1).srcType(0) := SrcType.fp 1644 csBundle(i + 1).lsrc(0) := FP_TMP_REG_MV.U 1645 csBundle(i + 1).lsrc(1) := Mux1H(UIntToOH(offsetVs2, MAX_VLMUL), (0 until MAX_VLMUL).map(j => src2 + j.U)) 1646 /** 1647 * For indexed instructions, VLSU will concatenate all the uops that write the same logic vd register and 1648 * writeback only once for all these uops. However, these uops share the same lsrc(2)/old vd and the same 1649 * ldest/vd that is equal to old vd, which leads to data dependence between the uops. Therefore there will be 1650 * deadlock for indexed instructions with emul > lmul. 1651 * 1652 * Assume N = emul/lmul. To break the deadlock, only the first uop will read old vd as lsrc(2), and the rest 1653 * N-1 uops will read temporary vector register. 1654 */ 1655 // csBundle(i + 1).lsrc(2) := Mux1H(UIntToOH(offsetVd, MAX_VLMUL), (0 until MAX_VLMUL).map(j => dest + j.U)) 1656 csBundle(i + 1).lsrc(2) := Mux( 1657 isFirstUopInVd, 1658 Mux1H(UIntToOH(offsetVd, MAX_VLMUL), (0 until MAX_VLMUL).map(j => dest + j.U)), 1659 VECTOR_TMP_REG_LMUL.U 1660 ) 1661 csBundle(i + 1).ldest := Mux1H(UIntToOH(offsetVd, MAX_VLMUL), (0 until MAX_VLMUL).map(j => dest + j.U)) 1662 csBundle(i + 1).uopIdx := i.U 1663 } 1664 } 1665 } 1666 1667 //readyFromRename Counter 1668 val readyCounter = PriorityMuxDefault(outReadys.map(x => !x).zip((0 until RenameWidth).map(_.U)), RenameWidth.U) 1669 1670 // The left uops of the complex inst in ComplexDecoder can be send out this cycle 1671 val thisAllOut = uopRes <= readyCounter 1672 1673 switch(state) { 1674 is(s_idle) { 1675 when (inValid) { 1676 stateNext := s_active 1677 uopResNext := inUopInfo.numOfUop 1678 } 1679 } 1680 is(s_active) { 1681 when (thisAllOut) { 1682 when (inValid) { 1683 stateNext := s_active 1684 uopResNext := inUopInfo.numOfUop 1685 }.otherwise { 1686 stateNext := s_idle 1687 uopResNext := 0.U 1688 } 1689 }.otherwise { 1690 stateNext := s_active 1691 uopResNext := uopRes - readyCounter 1692 } 1693 } 1694 } 1695 1696 state := Mux(io.redirect, s_idle, stateNext) 1697 uopRes := Mux(io.redirect, 0.U, uopResNext) 1698 1699 val complexNum = Mux(uopRes > readyCounter, readyCounter, uopRes) 1700 1701 for(i <- 0 until RenameWidth) { 1702 outValids(i) := complexNum > i.U 1703 outDecodedInsts(i) := Mux((i.U + numOfUop - uopRes) < maxUopSize.U, csBundle(i.U + numOfUop - uopRes), csBundle(maxUopSize - 1)) 1704 } 1705 1706 outComplexNum := Mux(state === s_active, complexNum, 0.U) 1707 inReady := state === s_idle || state === s_active && thisAllOut 1708 1709// val validSimple = Wire(Vec(DecodeWidth, Bool())) 1710// validSimple.zip(io.validFromIBuf.zip(io.isComplex)).map{ case (dst, (src1, src2)) => dst := src1 && !src2 } 1711// val notInf = Wire(Vec(DecodeWidth, Bool())) 1712// notInf.drop(1).zip(io.validFromIBuf.drop(1).zip(validSimple.drop(1))).map{ case (dst, (src1, src2)) => dst := !src1 || src2 } 1713// notInf(0) := !io.validFromIBuf(0) || validSimple(0) || (io.isComplex(0) && io.in0pc === io.simple.decodedInst.pc) 1714// val notInfVec = Wire(Vec(DecodeWidth, Bool())) 1715// notInfVec.zipWithIndex.map{ case (dst, i) => dst := Cat(notInf.take(i + 1)).andR} 1716// 1717// complexNum := Mux(io.validFromIBuf(0) && readyCounter.orR , 1718// Mux(uopRes0 > readyCounter, readyCounter, uopRes0), 1719// 0.U) 1720// validToRename.zipWithIndex.foreach{ 1721// case(dst, i) => 1722// val validFix = Mux(complexNum.orR, validSimple((i+1).U - complexNum), validSimple(i)) 1723// dst := MuxCase(false.B, Seq( 1724// (io.validFromIBuf(0) && readyCounter.orR && uopRes0 > readyCounter) -> Mux(readyCounter > i.U, true.B, false.B), 1725// (io.validFromIBuf(0) && readyCounter.orR && !(uopRes0 > readyCounter)) -> Mux(complexNum > i.U, true.B, validFix && notInfVec(i.U - complexNum) && io.readyFromRename(i)), 1726// ).toSeq) 1727// } 1728// 1729// readyToIBuf.zipWithIndex.foreach { 1730// case (dst, i) => 1731// val readyToIBuf0 = Mux(io.isComplex(0), io.in0pc === io.simple.decodedInst.pc, true.B) 1732// dst := MuxCase(true.B, Seq( 1733// (io.validFromIBuf(0) && uopRes0 > readyCounter || !readyCounter.orR) -> false.B, 1734// (io.validFromIBuf(0) && !(uopRes0 > readyCounter) && readyCounter.orR) -> (if (i==0) readyToIBuf0 else Mux(RenameWidth.U - complexNum >= i.U, notInfVec(i) && validSimple(i) && io.readyFromRename(i), false.B)) 1735// ).toSeq) 1736// } 1737// 1738// io.deq.decodedInsts := decodedInsts 1739// io.deq.complexNum := complexNum 1740// io.deq.validToRename := validToRename 1741// io.deq.readyToIBuf := readyToIBuf 1742} 1743