1/*************************************************************************************** 2 * Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3 * Copyright (c) 2020-2021 Peng Cheng Laboratory 4 * 5 * XiangShan is licensed under Mulan PSL v2. 6 * You can use this software according to the terms and conditions of the Mulan PSL v2. 7 * You may obtain a copy of Mulan PSL v2 at: 8 * http://license.coscl.org.cn/MulanPSL2 9 * 10 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11 * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13 * 14 * See the Mulan PSL v2 for more details. 15 ***************************************************************************************/ 16 17package xiangshan.mem 18 19import org.chipsalliance.cde.config.Parameters 20import chisel3._ 21import chisel3.util._ 22import utils._ 23import utility._ 24import xiangshan._ 25import xiangshan.backend.rob.RobPtr 26import xiangshan.backend.Bundles._ 27import xiangshan.backend.fu.FuType 28import xiangshan.backend.fu.vector.Bundles.VEew 29 30/** 31 * Common used parameters or functions in vlsu 32 */ 33trait VLSUConstants { 34 val VLEN = 128 35 //for pack unit-stride flow 36 val AlignedNum = 4 // 1/2/4/8 37 def VLENB = VLEN/8 38 def vOffsetBits = log2Up(VLENB) // bits-width to index offset inside a vector reg 39 lazy val vlmBindexBits = 8 //will be overrided later 40 lazy val vsmBindexBits = 8 // will be overrided later 41 42 def alignTypes = 5 // eew/sew = 1/2/4/8, last indicate 128 bit element 43 def alignTypeBits = log2Up(alignTypes) 44 def maxMUL = 8 45 def maxFields = 8 46 /** 47 * In the most extreme cases like a segment indexed instruction, eew=64, emul=8, sew=8, lmul=1, 48 * and nf=8, each data reg is mapped with 8 index regs and there are 8 data regs in total, 49 * each for a field. Therefore an instruction can be divided into 64 uops at most. 50 */ 51 def maxUopNum = maxMUL * maxFields // 64 52 def maxFlowNum = 16 53 def maxElemNum = maxMUL * maxFlowNum // 128 54 // def uopIdxBits = log2Up(maxUopNum) // to index uop inside an robIdx 55 def elemIdxBits = log2Up(maxElemNum) + 1 // to index which element in an instruction 56 def flowIdxBits = log2Up(maxFlowNum) + 1 // to index which flow in a uop 57 def fieldBits = log2Up(maxFields) + 1 // 4-bits to indicate 1~8 58 59 def ewBits = 3 // bits-width of EEW/SEW 60 def mulBits = 3 // bits-width of emul/lmul 61 62 def getSlice(data: UInt, i: Int, alignBits: Int): UInt = { 63 require(data.getWidth >= (i+1) * alignBits) 64 data((i+1) * alignBits - 1, i * alignBits) 65 } 66 def getNoAlignedSlice(data: UInt, i: Int, alignBits: Int): UInt = { 67 data(i * 8 + alignBits - 1, i * 8) 68 } 69 70 def getByte(data: UInt, i: Int = 0) = getSlice(data, i, 8) 71 def getHalfWord(data: UInt, i: Int = 0) = getSlice(data, i, 16) 72 def getWord(data: UInt, i: Int = 0) = getSlice(data, i, 32) 73 def getDoubleWord(data: UInt, i: Int = 0) = getSlice(data, i, 64) 74 def getDoubleDoubleWord(data: UInt, i: Int = 0) = getSlice(data, i, 128) 75} 76 77trait HasVLSUParameters extends HasXSParameter with VLSUConstants { 78 override val VLEN = coreParams.VLEN 79 override lazy val vlmBindexBits = log2Up(coreParams.VlMergeBufferSize) 80 override lazy val vsmBindexBits = log2Up(coreParams.VsMergeBufferSize) 81 lazy val maxMemByteNum = 16 // Maximum bytes for a single memory access 82 /** 83 * get addr aligned low bits 84 * @param addr Address to be check 85 * @param width Width for checking alignment 86 */ 87 def getCheckAddrLowBits(addr: UInt, width: Int): UInt = addr(log2Up(width) - 1, 0) 88 def getOverflowBit(in: UInt, width: Int): UInt = in(log2Up(width)) 89 def isUnitStride(instType: UInt) = instType(1, 0) === "b00".U 90 def isStrided(instType: UInt) = instType(1, 0) === "b10".U 91 def isIndexed(instType: UInt) = instType(0) === "b1".U 92 def isNotIndexed(instType: UInt) = instType(0) === "b0".U 93 def isSegment(instType: UInt) = instType(2) === "b1".U 94 def is128Bit(alignedType: UInt) = alignedType(2) === "b1".U 95 96 def mergeDataWithMask(oldData: UInt, newData: UInt, mask: UInt): Vec[UInt] = { 97 require(oldData.getWidth == newData.getWidth) 98 require(oldData.getWidth == mask.getWidth * 8) 99 VecInit(mask.asBools.zipWithIndex.map { case (en, i) => 100 Mux(en, getByte(newData, i), getByte(oldData, i)) 101 }) 102 } 103 104 // def asBytes(data: UInt) = { 105 // require(data.getWidth % 8 == 0) 106 // (0 until data.getWidth/8).map(i => getByte(data, i)) 107 // } 108 109 def mergeDataWithElemIdx( 110 oldData: UInt, 111 newData: Seq[UInt], 112 alignedType: UInt, 113 elemIdx: Seq[UInt], 114 valids: Seq[Bool] 115 ): UInt = { 116 require(newData.length == elemIdx.length) 117 require(newData.length == valids.length) 118 LookupTree(alignedType, List( 119 "b00".U -> VecInit(elemIdx.map(e => UIntToOH(e(3, 0)).asBools).transpose.zipWithIndex.map { case (selVec, i) => 120 ParallelPosteriorityMux( 121 true.B +: selVec.zip(valids).map(x => x._1 && x._2), 122 getByte(oldData, i) +: newData.map(getByte(_)) 123 )}).asUInt, 124 "b01".U -> VecInit(elemIdx.map(e => UIntToOH(e(2, 0)).asBools).transpose.zipWithIndex.map { case (selVec, i) => 125 ParallelPosteriorityMux( 126 true.B +: selVec.zip(valids).map(x => x._1 && x._2), 127 getHalfWord(oldData, i) +: newData.map(getHalfWord(_)) 128 )}).asUInt, 129 "b10".U -> VecInit(elemIdx.map(e => UIntToOH(e(1, 0)).asBools).transpose.zipWithIndex.map { case (selVec, i) => 130 ParallelPosteriorityMux( 131 true.B +: selVec.zip(valids).map(x => x._1 && x._2), 132 getWord(oldData, i) +: newData.map(getWord(_)) 133 )}).asUInt, 134 "b11".U -> VecInit(elemIdx.map(e => UIntToOH(e(0)).asBools).transpose.zipWithIndex.map { case (selVec, i) => 135 ParallelPosteriorityMux( 136 true.B +: selVec.zip(valids).map(x => x._1 && x._2), 137 getDoubleWord(oldData, i) +: newData.map(getDoubleWord(_)) 138 )}).asUInt 139 )) 140 } 141 142 def mergeDataWithElemIdx(oldData: UInt, newData: UInt, alignedType: UInt, elemIdx: UInt): UInt = { 143 mergeDataWithElemIdx(oldData, Seq(newData), alignedType, Seq(elemIdx), Seq(true.B)) 144 } 145 /** 146 * for merge 128-bits data of unit-stride 147 */ 148 object mergeDataByByte{ 149 def apply(oldData: UInt, newData: UInt, mask: UInt): UInt = { 150 val selVec = Seq(mask).map(_.asBools).transpose 151 VecInit(selVec.zipWithIndex.map{ case (selV, i) => 152 ParallelPosteriorityMux( 153 true.B +: selV.map(x => x), 154 getByte(oldData, i) +: Seq(getByte(newData, i)) 155 )}).asUInt 156 } 157 } 158 159 /** 160 * for merge Unit-Stride data to 256-bits 161 * merge 128-bits data to 256-bits 162 * if have 3 port, 163 * if is port0, it is 6 to 1 Multiplexer -> (128'b0, data) or (data, 128'b0) or (data, port2data) or (port2data, data) or (data, port3data) or (port3data, data) 164 * if is port1, it is 4 to 1 Multiplexer -> (128'b0, data) or (data, 128'b0) or (data, port3data) or (port3data, data) 165 * if is port3, it is 2 to 1 Multiplexer -> (128'b0, data) or (data, 128'b0) 166 * 167 */ 168 object mergeDataByIndex{ 169 def apply(data: Seq[UInt], mask: Seq[UInt], index: UInt, valids: Seq[Bool]): (UInt, UInt) = { 170 require(data.length == valids.length) 171 require(data.length == mask.length) 172 val muxLength = data.length 173 val selDataMatrix = Wire(Vec(muxLength, Vec(2, UInt((VLEN * 2).W)))) // 3 * 2 * 256 174 val selMaskMatrix = Wire(Vec(muxLength, Vec(2, UInt((VLENB * 2).W)))) // 3 * 2 * 16 175 176 if (backendParams.debugEn){ 177 dontTouch(selDataMatrix) 178 dontTouch(selMaskMatrix) 179 } 180 181 for(i <- 0 until muxLength){ 182 if(i == 0){ 183 selDataMatrix(i)(0) := Cat(0.U(VLEN.W), data(i)) 184 selDataMatrix(i)(1) := Cat(data(i), 0.U(VLEN.W)) 185 selMaskMatrix(i)(0) := Cat(0.U(VLENB.W), mask(i)) 186 selMaskMatrix(i)(1) := Cat(mask(i), 0.U(VLENB.W)) 187 } 188 else{ 189 selDataMatrix(i)(0) := Cat(data(i), data(0)) 190 selDataMatrix(i)(1) := Cat(data(0), data(i)) 191 selMaskMatrix(i)(0) := Cat(mask(i), mask(0)) 192 selMaskMatrix(i)(1) := Cat(mask(0), mask(i)) 193 } 194 } 195 val selIdxVec = (0 until muxLength).map(_.U) 196 val selIdx = PriorityMux(valids.reverse, selIdxVec.reverse) 197 198 val selData = Mux(index === 0.U, 199 selDataMatrix(selIdx)(0), 200 selDataMatrix(selIdx)(1)) 201 val selMask = Mux(index === 0.U, 202 selMaskMatrix(selIdx)(0), 203 selMaskMatrix(selIdx)(1)) 204 (selData, selMask) 205 } 206 } 207 def mergeDataByIndex(data: UInt, mask: UInt, index: UInt): (UInt, UInt) = { 208 mergeDataByIndex(Seq(data), Seq(mask), index, Seq(true.B)) 209 } 210} 211abstract class VLSUModule(implicit p: Parameters) extends XSModule 212 with HasVLSUParameters 213 with HasCircularQueuePtrHelper 214abstract class VLSUBundle(implicit p: Parameters) extends XSBundle 215 with HasVLSUParameters 216 217class VLSUBundleWithMicroOp(implicit p: Parameters) extends VLSUBundle { 218 val uop = new DynInst 219} 220 221class OnlyVecExuOutput(implicit p: Parameters) extends VLSUBundle { 222 val isvec = Bool() 223 val vecdata = UInt(VLEN.W) 224 val mask = UInt(VLENB.W) 225 // val rob_idx_valid = Vec(2, Bool()) 226 // val inner_idx = Vec(2, UInt(3.W)) 227 // val rob_idx = Vec(2, new RobPtr) 228 // val offset = Vec(2, UInt(4.W)) 229 val reg_offset = UInt(vOffsetBits.W) 230 val vecActive = Bool() // 1: vector active element, 0: vector not active element 231 val is_first_ele = Bool() 232 val elemIdx = UInt(elemIdxBits.W) // element index 233 val elemIdxInsideVd = UInt(elemIdxBits.W) // element index in scope of vd 234 val trigger = TriggerAction() 235 val vstart = UInt(elemIdxBits.W) 236 val vecTriggerMask = UInt((VLEN/8).W) 237 // val uopQueuePtr = new VluopPtr 238 // val flowPtr = new VlflowPtr 239} 240 241class VecExuOutput(implicit p: Parameters) extends MemExuOutput with HasVLSUParameters { 242 val vec = new OnlyVecExuOutput 243 val alignedType = UInt(alignTypeBits.W) 244 // feedback 245 val vecFeedback = Bool() 246} 247 248class VecUopBundle(implicit p: Parameters) extends VLSUBundleWithMicroOp { 249 val flowMask = UInt(VLENB.W) // each bit for a flow 250 val byteMask = UInt(VLENB.W) // each bit for a byte 251 val data = UInt(VLEN.W) 252 // val fof = Bool() // fof is only used for vector loads 253 val excp_eew_index = UInt(elemIdxBits.W) 254 // val exceptionVec = ExceptionVec() // uop has exceptionVec 255 val baseAddr = UInt(VAddrBits.W) 256 val stride = UInt(VLEN.W) 257 val flow_counter = UInt(flowIdxBits.W) 258 259 // instruction decode result 260 val flowNum = UInt(flowIdxBits.W) // # of flows in a uop 261 // val flowNumLog2 = UInt(log2Up(flowIdxBits).W) // log2(flowNum), for better timing of multiplication 262 val nfields = UInt(fieldBits.W) // NFIELDS 263 val vm = Bool() // whether vector masking is enabled 264 val usWholeReg = Bool() // unit-stride, whole register load 265 val usMaskReg = Bool() // unit-stride, masked store/load 266 val eew = VEew() // size of memory elements 267 val sew = UInt(ewBits.W) 268 val emul = UInt(mulBits.W) 269 val lmul = UInt(mulBits.W) 270 val vlmax = UInt(elemIdxBits.W) 271 val instType = UInt(3.W) 272 val vd_last_uop = Bool() 273 val vd_first_uop = Bool() 274} 275 276class VecFlowBundle(implicit p: Parameters) extends VLSUBundleWithMicroOp { 277 val vaddr = UInt(VAddrBits.W) 278 val mask = UInt(VLENB.W) 279 val alignedType = UInt(alignTypeBits.W) 280 val vecActive = Bool() 281 val elemIdx = UInt(elemIdxBits.W) 282 val is_first_ele = Bool() 283 284 // pack 285 val isPackage = Bool() 286 val packageNum = UInt((log2Up(VLENB) + 1).W) 287 val originAlignedType = UInt(alignTypeBits.W) 288} 289 290class VecMemExuOutput(isVector: Boolean = false)(implicit p: Parameters) extends VLSUBundle{ 291 val output = new MemExuOutput(isVector) 292 val vecFeedback = Bool() 293 val nc = Bool() 294 val mmio = Bool() 295 val usSecondInv = Bool() 296 val hasException = Bool() 297 val elemIdx = UInt(elemIdxBits.W) 298 val alignedType = UInt(alignTypeBits.W) 299 val mbIndex = UInt(vsmBindexBits.W) 300 val mask = UInt(VLENB.W) 301 val vaddr = UInt(XLEN.W) 302 val vaNeedExt = Bool() 303 val gpaddr = UInt(GPAddrBits.W) 304 val isForVSnonLeafPTE = Bool() 305 val vecTriggerMask = UInt((VLEN/8).W) 306} 307 308object MulNum { 309 def apply (mul: UInt): UInt = { //mul means emul or lmul 310 (LookupTree(mul,List( 311 "b101".U -> 1.U , // 1/8 312 "b110".U -> 1.U , // 1/4 313 "b111".U -> 1.U , // 1/2 314 "b000".U -> 1.U , // 1 315 "b001".U -> 2.U , // 2 316 "b010".U -> 4.U , // 4 317 "b011".U -> 8.U // 8 318 )))} 319} 320/** 321 * when emul is greater than or equal to 1, this means the entire register needs to be written; 322 * otherwise, only write the specified number of bytes */ 323object MulDataSize { 324 def apply (mul: UInt): UInt = { //mul means emul or lmul 325 (LookupTree(mul,List( 326 "b101".U -> 2.U , // 1/8 327 "b110".U -> 4.U , // 1/4 328 "b111".U -> 8.U , // 1/2 329 "b000".U -> 16.U , // 1 330 "b001".U -> 16.U , // 2 331 "b010".U -> 16.U , // 4 332 "b011".U -> 16.U // 8 333 )))} 334} 335 336object OneRegNum { 337 def apply (eew: UInt): UInt = { //mul means emul or lmul 338 require(eew.getWidth == 2, "The eew width must be 2.") 339 (LookupTree(eew, List( 340 "b00".U -> 16.U , // 1 341 "b01".U -> 8.U , // 2 342 "b10".U -> 4.U , // 4 343 "b11".U -> 2.U // 8 344 )))} 345} 346 347//index inst read data byte 348object SewDataSize { 349 def apply (sew: UInt): UInt = { 350 (LookupTree(sew,List( 351 "b000".U -> 1.U , // 1 352 "b001".U -> 2.U , // 2 353 "b010".U -> 4.U , // 4 354 "b011".U -> 8.U // 8 355 )))} 356} 357 358// strided inst read data byte 359object EewDataSize { 360 def apply (eew: UInt): UInt = { 361 require(eew.getWidth == 2, "The eew width must be 2.") 362 (LookupTree(eew, List( 363 "b00".U -> 1.U , // 1 364 "b01".U -> 2.U , // 2 365 "b10".U -> 4.U , // 4 366 "b11".U -> 8.U // 8 367 )))} 368} 369 370object loadDataSize { 371 def apply (instType: UInt, emul: UInt, eew: UInt, sew: UInt): UInt = { 372 (LookupTree(instType,List( 373 "b000".U -> MulDataSize(emul), // unit-stride 374 "b010".U -> EewDataSize(eew) , // strided 375 "b001".U -> SewDataSize(sew) , // indexed-unordered 376 "b011".U -> SewDataSize(sew) , // indexed-ordered 377 "b100".U -> EewDataSize(eew) , // segment unit-stride 378 "b110".U -> EewDataSize(eew) , // segment strided 379 "b101".U -> SewDataSize(sew) , // segment indexed-unordered 380 "b111".U -> SewDataSize(sew) // segment indexed-ordered 381 )))} 382} 383 384object storeDataSize { 385 def apply (instType: UInt, eew: UInt, sew: UInt): UInt = { 386 (LookupTree(instType,List( 387 "b000".U -> EewDataSize(eew) , // unit-stride, do not use 388 "b010".U -> EewDataSize(eew) , // strided 389 "b001".U -> SewDataSize(sew) , // indexed-unordered 390 "b011".U -> SewDataSize(sew) , // indexed-ordered 391 "b100".U -> EewDataSize(eew) , // segment unit-stride 392 "b110".U -> EewDataSize(eew) , // segment strided 393 "b101".U -> SewDataSize(sew) , // segment indexed-unordered 394 "b111".U -> SewDataSize(sew) // segment indexed-ordered 395 )))} 396} 397 398/** 399 * these are used to obtain immediate addresses for index instruction */ 400object EewEq8 { 401 def apply(index:UInt, flow_inner_idx: UInt): UInt = { 402 (LookupTree(flow_inner_idx,List( 403 0.U -> index(7 ,0 ), 404 1.U -> index(15,8 ), 405 2.U -> index(23,16 ), 406 3.U -> index(31,24 ), 407 4.U -> index(39,32 ), 408 5.U -> index(47,40 ), 409 6.U -> index(55,48 ), 410 7.U -> index(63,56 ), 411 8.U -> index(71,64 ), 412 9.U -> index(79,72 ), 413 10.U -> index(87,80 ), 414 11.U -> index(95,88 ), 415 12.U -> index(103,96 ), 416 13.U -> index(111,104), 417 14.U -> index(119,112), 418 15.U -> index(127,120) 419 )))} 420} 421 422object EewEq16 { 423 def apply(index: UInt, flow_inner_idx: UInt): UInt = { 424 (LookupTree(flow_inner_idx, List( 425 0.U -> index(15, 0), 426 1.U -> index(31, 16), 427 2.U -> index(47, 32), 428 3.U -> index(63, 48), 429 4.U -> index(79, 64), 430 5.U -> index(95, 80), 431 6.U -> index(111, 96), 432 7.U -> index(127, 112) 433 )))} 434} 435 436object EewEq32 { 437 def apply(index: UInt, flow_inner_idx: UInt): UInt = { 438 (LookupTree(flow_inner_idx, List( 439 0.U -> index(31, 0), 440 1.U -> index(63, 32), 441 2.U -> index(95, 64), 442 3.U -> index(127, 96) 443 )))} 444} 445 446object EewEq64 { 447 def apply (index: UInt, flow_inner_idx: UInt): UInt = { 448 (LookupTree(flow_inner_idx, List( 449 0.U -> index(63, 0), 450 1.U -> index(127, 64) 451 )))} 452} 453 454object IndexAddr { 455 def apply (index: UInt, flow_inner_idx: UInt, eew: UInt): UInt = { 456 require(eew.getWidth == 2, "The eew width must be 2.") 457 (LookupTree(eew, List( 458 "b00".U -> EewEq8 (index = index, flow_inner_idx = flow_inner_idx ), // Imm is 1 Byte // TODO: index maybe cross register 459 "b01".U -> EewEq16(index = index, flow_inner_idx = flow_inner_idx ), // Imm is 2 Byte 460 "b10".U -> EewEq32(index = index, flow_inner_idx = flow_inner_idx ), // Imm is 4 Byte 461 "b11".U -> EewEq64(index = index, flow_inner_idx = flow_inner_idx ) // Imm is 8 Byte 462 )))} 463} 464 465object Log2Num { 466 def apply (num: UInt): UInt = { 467 (LookupTree(num,List( 468 16.U -> 4.U, 469 8.U -> 3.U, 470 4.U -> 2.U, 471 2.U -> 1.U, 472 1.U -> 0.U 473 )))} 474} 475 476object GenUopIdxInField { 477 /** 478 * Used in normal vector instruction 479 * */ 480 def apply (instType: UInt, emul: UInt, lmul: UInt, uopIdx: UInt): UInt = { 481 val isIndexed = instType(0) 482 val mulInField = Mux( 483 isIndexed, 484 Mux(lmul.asSInt > emul.asSInt, lmul, emul), 485 emul 486 ) 487 LookupTree(mulInField, List( 488 "b101".U -> 0.U, 489 "b110".U -> 0.U, 490 "b111".U -> 0.U, 491 "b000".U -> 0.U, 492 "b001".U -> uopIdx(0), 493 "b010".U -> uopIdx(1, 0), 494 "b011".U -> uopIdx(2, 0) 495 )) 496 } 497 /** 498 * Only used in segment instruction. 499 * */ 500 def apply (select: UInt, uopIdx: UInt): UInt = { 501 LookupTree(select, List( 502 "b101".U -> 0.U, 503 "b110".U -> 0.U, 504 "b111".U -> 0.U, 505 "b000".U -> 0.U, 506 "b001".U -> uopIdx(0), 507 "b010".U -> uopIdx(1, 0), 508 "b011".U -> uopIdx(2, 0) 509 )) 510 } 511} 512 513//eew decode 514object EewLog2 extends VLSUConstants { 515 // def apply (eew: UInt): UInt = { 516 // (LookupTree(eew,List( 517 // "b000".U -> "b000".U , // 1 518 // "b101".U -> "b001".U , // 2 519 // "b110".U -> "b010".U , // 4 520 // "b111".U -> "b011".U // 8 521 // )))} 522 def apply(eew: UInt): UInt = { 523 require(eew.getWidth == 2, "The eew width must be 2.") 524 ZeroExt(eew, ewBits) 525 } 526} 527 528object GenRealFlowNum { 529 /** 530 * unit-stride instructions don't use this method; 531 * other instructions generate realFlowNum by EmulDataSize >> eew, 532 * EmulDataSize means the number of bytes that need to be written to the register, 533 * eew means the number of bytes written at once. 534 * 535 * @param instType As the name implies. 536 * @param emul As the name implies. 537 * @param lmul As the name implies. 538 * @param eew As the name implies. 539 * @param sew As the name implies. 540 * @param isSegment Only modules related to segment need to be set to true. 541 * @return FlowNum of instruction. 542 * 543 */ 544 def apply (instType: UInt, emul: UInt, lmul: UInt, eew: UInt, sew: UInt, isSegment: Boolean = false): UInt = { 545 require(instType.getWidth == 3, "The instType width must be 3, (isSegment, mop)") 546 require(eew.getWidth == 2, "The eew width must be 2.") 547 // Because the new segmentunit is needed. But the previous implementation is retained for the time being in case of emergency. 548 val segmentIndexFlowNum = if (isSegment) (MulDataSize(lmul) >> sew(1,0)).asUInt 549 else Mux(emul.asSInt > lmul.asSInt, (MulDataSize(emul) >> eew).asUInt, (MulDataSize(lmul) >> sew(1,0)).asUInt) 550 (LookupTree(instType,List( 551 "b000".U -> (MulDataSize(emul) >> eew).asUInt, // store use, load do not use 552 "b010".U -> (MulDataSize(emul) >> eew).asUInt, // strided 553 "b001".U -> Mux(emul.asSInt > lmul.asSInt, (MulDataSize(emul) >> eew).asUInt, (MulDataSize(lmul) >> sew(1,0)).asUInt), // indexed-unordered 554 "b011".U -> Mux(emul.asSInt > lmul.asSInt, (MulDataSize(emul) >> eew).asUInt, (MulDataSize(lmul) >> sew(1,0)).asUInt), // indexed-ordered 555 "b100".U -> (MulDataSize(emul) >> eew).asUInt, // segment unit-stride 556 "b110".U -> (MulDataSize(emul) >> eew).asUInt, // segment strided 557 "b101".U -> segmentIndexFlowNum, // segment indexed-unordered 558 "b111".U -> segmentIndexFlowNum // segment indexed-ordered 559 )))} 560} 561 562object GenRealFlowLog2 extends VLSUConstants { 563 /** 564 * GenRealFlowLog2 = Log2(GenRealFlowNum) 565 * 566 * @param instType As the name implies. 567 * @param emul As the name implies. 568 * @param lmul As the name implies. 569 * @param eew As the name implies. 570 * @param sew As the name implies. 571 * @param isSegment Only modules related to segment need to be set to true. 572 * @return FlowNumLog2 of instruction. 573 */ 574 def apply(instType: UInt, emul: UInt, lmul: UInt, eew: UInt, sew: UInt, isSegment: Boolean = false): UInt = { 575 require(instType.getWidth == 3, "The instType width must be 3, (isSegment, mop)") 576 require(eew.getWidth == 2, "The eew width must be 2.") 577 val emulLog2 = Mux(emul.asSInt >= 0.S, 0.U, emul) 578 val lmulLog2 = Mux(lmul.asSInt >= 0.S, 0.U, lmul) 579 val eewRealFlowLog2 = emulLog2 + log2Up(VLENB).U - eew 580 val sewRealFlowLog2 = lmulLog2 + log2Up(VLENB).U - sew(1, 0) 581 // Because the new segmentunit is needed. But the previous implementation is retained for the time being in case of emergency. 582 val segmentIndexFlowLog2 = if (isSegment) sewRealFlowLog2 else Mux(emul.asSInt > lmul.asSInt, eewRealFlowLog2, sewRealFlowLog2) 583 (LookupTree(instType, List( 584 "b000".U -> eewRealFlowLog2, // unit-stride 585 "b010".U -> eewRealFlowLog2, // strided 586 "b001".U -> Mux(emul.asSInt > lmul.asSInt, eewRealFlowLog2, sewRealFlowLog2), // indexed-unordered 587 "b011".U -> Mux(emul.asSInt > lmul.asSInt, eewRealFlowLog2, sewRealFlowLog2), // indexed-ordered 588 "b100".U -> eewRealFlowLog2, // segment unit-stride 589 "b110".U -> eewRealFlowLog2, // segment strided 590 "b101".U -> segmentIndexFlowLog2, // segment indexed-unordered 591 "b111".U -> segmentIndexFlowLog2, // segment indexed-ordered 592 ))) 593 } 594} 595 596/** 597 * GenElemIdx generals an element index within an instruction, given a certain uopIdx and a known flowIdx 598 * inside the uop. 599 */ 600object GenElemIdx extends VLSUConstants { 601 def apply(instType: UInt, emul: UInt, lmul: UInt, eew: UInt, sew: UInt, 602 uopIdx: UInt, flowIdx: UInt): UInt = { 603 require(eew.getWidth == 2, "The eew width must be 2.") 604 val isIndexed = instType(0).asBool 605 val eewUopFlowsLog2 = Mux(emul.asSInt > 0.S, 0.U, emul) + log2Up(VLENB).U - eew 606 val sewUopFlowsLog2 = Mux(lmul.asSInt > 0.S, 0.U, lmul) + log2Up(VLENB).U - sew(1, 0) 607 val uopFlowsLog2 = Mux( 608 isIndexed, 609 Mux(emul.asSInt > lmul.asSInt, eewUopFlowsLog2, sewUopFlowsLog2), 610 eewUopFlowsLog2 611 ) 612 LookupTree(uopFlowsLog2, List( 613 0.U -> uopIdx ## flowIdx(0), // for hardware misalign 614 1.U -> uopIdx ## flowIdx(0), 615 2.U -> uopIdx ## flowIdx(1, 0), 616 3.U -> uopIdx ## flowIdx(2, 0), 617 4.U -> uopIdx ## flowIdx(3, 0) 618 )) 619 } 620} 621 622/** 623 * GenVLMAX calculates VLMAX, which equals MUL * ew 624 */ 625object GenVLMAXLog2 extends VLSUConstants { 626 def apply(lmul: UInt, sew: UInt): UInt = lmul + log2Up(VLENB).U - sew 627} 628object GenVLMAX { 629 def apply(lmul: UInt, sew: UInt): UInt = 1.U << GenVLMAXLog2(lmul, sew) 630} 631/** 632 * generate mask base on vlmax 633 * example: vlmax = b100, max = b011 634 * */ 635object GenVlMaxMask{ 636 def apply(vlmax: UInt, length: Int): UInt = (vlmax - 1.U)(length-1, 0) 637} 638 639object GenUSWholeRegVL extends VLSUConstants { 640 def apply(nfields: UInt, eew: UInt): UInt = { 641 require(eew.getWidth == 2, "The eew width must be 2.") 642 LookupTree(eew, List( 643 "b00".U -> (nfields << (log2Up(VLENB) - 0)), 644 "b01".U -> (nfields << (log2Up(VLENB) - 1)), 645 "b10".U -> (nfields << (log2Up(VLENB) - 2)), 646 "b11".U -> (nfields << (log2Up(VLENB) - 3)) 647 )) 648 } 649} 650object GenUSWholeEmul extends VLSUConstants{ 651 def apply(nf: UInt): UInt={ 652 LookupTree(nf,List( 653 "b000".U -> "b000".U(mulBits.W), 654 "b001".U -> "b001".U(mulBits.W), 655 "b011".U -> "b010".U(mulBits.W), 656 "b111".U -> "b011".U(mulBits.W) 657 )) 658 } 659} 660 661 662object GenUSMaskRegVL extends VLSUConstants { 663 def apply(vl: UInt): UInt = { 664 Mux(vl(2,0) === 0.U , (vl >> 3.U), ((vl >> 3.U) + 1.U)) 665 } 666} 667 668object GenUopByteMask { 669 def apply(flowMask: UInt, alignedType: UInt): UInt = { 670 LookupTree(alignedType, List( 671 "b000".U -> flowMask, 672 "b001".U -> FillInterleaved(2, flowMask), 673 "b010".U -> FillInterleaved(4, flowMask), 674 "b011".U -> FillInterleaved(8, flowMask), 675 "b100".U -> FillInterleaved(16, flowMask) 676 )) 677 } 678} 679 680object GenVdIdxInField extends VLSUConstants { 681 def apply(instType: UInt, emul: UInt, lmul: UInt, uopIdx: UInt): UInt = { 682 val vdIdx = Wire(UInt(log2Up(maxMUL).W)) 683 when (instType(1,0) === "b00".U || instType(1,0) === "b10".U || lmul.asSInt > emul.asSInt) { 684 // Unit-stride or Strided, or indexed with lmul >= emul 685 vdIdx := uopIdx 686 }.otherwise { 687 // Indexed with lmul <= emul 688 val multiple = emul - lmul 689 val uopIdxWidth = uopIdx.getWidth 690 vdIdx := LookupTree(multiple, List( 691 0.U -> uopIdx, 692 1.U -> (uopIdx >> 1), 693 2.U -> (uopIdx >> 2), 694 3.U -> (uopIdx >> 3) 695 )) 696 } 697 vdIdx 698 } 699} 700/** 701* Use start and vl to generate flow activative mask 702* mod = true fill 0 703* mod = false fill 1 704*/ 705object GenFlowMask extends VLSUConstants { 706 def apply(elementMask: UInt, start: UInt, vl: UInt , mod: Boolean): UInt = { 707 val startMask = ~UIntToMask(start, VLEN) 708 val vlMask = UIntToMask(vl, VLEN) 709 val maskVlStart = vlMask & startMask 710 if(mod){ 711 elementMask & maskVlStart 712 } 713 else{ 714 (~elementMask).asUInt & maskVlStart 715 } 716 } 717} 718 719object genVWmask128 { 720 def apply(addr: UInt, sizeEncode: UInt): UInt = { 721 (LookupTree(sizeEncode, List( 722 "b000".U -> 0x1.U, //0001 << addr(2:0) 723 "b001".U -> 0x3.U, //0011 724 "b010".U -> 0xf.U, //1111 725 "b011".U -> 0xff.U, //11111111 726 "b100".U -> 0xffff.U, //1111111111111111 727 "b111".U -> 0xffff.U //cbo 728 )) << addr(3, 0)).asUInt 729 } 730} 731/* 732* only use in max length is 128 733*/ 734object genVWdata { 735 def apply(data: UInt, sizeEncode: UInt): UInt = { 736 LookupTree(sizeEncode, List( 737 "b000".U -> Fill(16, data(7, 0)), 738 "b001".U -> Fill(8, data(15, 0)), 739 "b010".U -> Fill(4, data(31, 0)), 740 "b011".U -> Fill(2, data(63,0)), 741 "b100".U -> data(127,0) 742 )) 743 } 744} 745 746object genUSSplitAddr{ 747 def apply(addr: UInt, index: UInt, width: Int): UInt = { 748 val tmpAddr = Cat(addr(width - 1, 4), 0.U(4.W)) 749 val nextCacheline = tmpAddr + 16.U 750 LookupTree(index, List( 751 0.U -> tmpAddr, 752 1.U -> nextCacheline 753 )) 754 } 755} 756 757object genUSSplitMask{ 758 def apply(mask: UInt, index: UInt): UInt = { 759 require(mask.getWidth == 32) // need to be 32-bits 760 LookupTree(index, List( 761 0.U -> mask(15, 0), 762 1.U -> mask(31, 16), 763 )) 764 } 765} 766 767object genUSSplitData{ 768 def apply(data: UInt, index: UInt, addrOffset: UInt): UInt = { 769 val tmpData = WireInit(0.U(256.W)) 770 val lookupTable = (0 until 16).map{case i => 771 if(i == 0){ 772 i.U -> Cat(0.U(128.W), data) 773 }else{ 774 i.U -> Cat(0.U(((16-i)*8).W), data, 0.U((i*8).W)) 775 } 776 } 777 tmpData := LookupTree(addrOffset, lookupTable).asUInt 778 779 LookupTree(index, List( 780 0.U -> tmpData(127, 0), 781 1.U -> tmpData(255, 128) 782 )) 783 } 784} 785 786object genVSData extends VLSUConstants { 787 def apply(data: UInt, elemIdx: UInt, alignedType: UInt): UInt = { 788 LookupTree(alignedType, List( 789 "b000".U -> ZeroExt(LookupTree(elemIdx(3, 0), List.tabulate(VLEN/8)(i => i.U -> getByte(data, i))), VLEN), 790 "b001".U -> ZeroExt(LookupTree(elemIdx(2, 0), List.tabulate(VLEN/16)(i => i.U -> getHalfWord(data, i))), VLEN), 791 "b010".U -> ZeroExt(LookupTree(elemIdx(1, 0), List.tabulate(VLEN/32)(i => i.U -> getWord(data, i))), VLEN), 792 "b011".U -> ZeroExt(LookupTree(elemIdx(0), List.tabulate(VLEN/64)(i => i.U -> getDoubleWord(data, i))), VLEN), 793 "b100".U -> data // if have wider element, it will broken 794 )) 795 } 796} 797 798// TODO: more elegant 799object genVStride extends VLSUConstants { 800 def apply(uopIdx: UInt, stride: UInt): UInt = { 801 LookupTree(uopIdx, List( 802 0.U -> 0.U, 803 1.U -> stride, 804 2.U -> (stride << 1), 805 3.U -> ((stride << 1).asUInt + stride), 806 4.U -> (stride << 2), 807 5.U -> ((stride << 2).asUInt + stride), 808 6.U -> ((stride << 2).asUInt + (stride << 1)), 809 7.U -> ((stride << 2).asUInt + (stride << 1) + stride) 810 )) 811 } 812} 813/** 814 * generate uopOffset, not used in segment instruction 815 * */ 816object genVUopOffset extends VLSUConstants { 817 def apply(instType: UInt, isfof: Bool, uopidx: UInt, nf: UInt, eew: UInt, stride: UInt, alignedType: UInt): UInt = { 818 val uopInsidefield = (uopidx >> nf).asUInt // when nf == 0, is uopidx 819 820// val fofVUopOffset = (LookupTree(instType,List( 821// "b000".U -> ( genVStride(uopInsidefield, stride) << (log2Up(VLENB).U - eew) ) , // unit-stride fof 822// "b100".U -> ( genVStride(uopInsidefield, stride) << (log2Up(VLENB).U - eew) ) , // segment unit-stride fof 823// ))).asUInt 824 825 val otherVUopOffset = (LookupTree(instType,List( 826 "b000".U -> ( uopInsidefield << alignedType ) , // unit-stride 827 "b010".U -> ( genVStride(uopInsidefield, stride) << (log2Up(VLENB).U - eew) ) , // strided 828 "b001".U -> ( 0.U ) , // indexed-unordered 829 "b011".U -> ( 0.U ) , // indexed-ordered 830 "b100".U -> ( uopInsidefield << alignedType ) , // segment unit-stride 831 "b110".U -> ( genVStride(uopInsidefield, stride) << (log2Up(VLENB).U - eew) ) , // segment strided 832 "b101".U -> ( 0.U ) , // segment indexed-unordered 833 "b111".U -> ( 0.U ) // segment indexed-ordered 834 ))).asUInt 835 836// Mux(isfof, fofVUopOffset, otherVUopOffset) 837 otherVUopOffset 838 } 839} 840 841 842 843object genVFirstUnmask extends VLSUConstants { 844 /** 845 * Find the lowest unmasked number of bits. 846 * example: 847 * mask = 16'b1111_1111_1110_0000 848 * return 5 849 * @param mask 16bits of mask. 850 * @return lowest unmasked number of bits. 851 */ 852 def apply(mask: UInt): UInt = { 853 require(mask.getWidth == 16, "The mask width must be 16") 854 val select = (0 until 16).zip(mask.asBools).map{case (i, v) => 855 (v, i.U) 856 } 857 PriorityMuxDefault(select, 0.U) 858 } 859 860 def apply(mask: UInt, regOffset: UInt): UInt = { 861 require(mask.getWidth == 16, "The mask width must be 16") 862 val realMask = (mask >> regOffset).asUInt 863 val select = (0 until 16).zip(realMask.asBools).map{case (i, v) => 864 (v, i.U) 865 } 866 PriorityMuxDefault(select, 0.U) 867 } 868} 869 870class skidBufferConnect[T <: Data](gen: T) extends Module { 871 val io = IO(new Bundle() { 872 val in = Flipped(DecoupledIO(gen.cloneType)) 873 val flush = Input(Bool()) 874 val out = DecoupledIO(gen.cloneType) 875 }) 876 877 skidBuffer.connect(io.in, io.out, io.flush) 878} 879 880object skidBuffer{ 881 /* 882 * Skid Buffer used to break timing path of ready 883 * */ 884 def connect[T <: Data]( 885 in: DecoupledIO[T], 886 out: DecoupledIO[T], 887 flush: Bool 888 ): T = { 889 val empty :: skid :: Nil = Enum(2) 890 val state = RegInit(empty) 891 val stateNext = WireInit(empty) 892 val dataBuffer = RegEnable(in.bits, (!out.ready && in.fire)) 893 894 when(state === empty){ 895 stateNext := Mux(!out.ready && in.fire && !flush, skid, empty) 896 }.elsewhen(state === skid){ 897 stateNext := Mux(out.ready || flush, empty, skid) 898 } 899 state := stateNext 900 901 in.ready := state === empty 902 out.bits := Mux(state === skid, dataBuffer, in.bits) 903 out.valid := in.valid || (state === skid) 904 905 dataBuffer 906 } 907 def apply[T <: Data]( 908 in: DecoupledIO[T], 909 out: DecoupledIO[T], 910 flush: Bool, 911 moduleName: String 912 ): Unit = { 913 val buffer = Module(new skidBufferConnect(in.bits)) 914 buffer.suggestName(moduleName) 915 buffer.io.in <> in 916 buffer.io.flush := flush 917 out <> buffer.io.out 918 } 919} 920 921