1/*************************************************************************************** 2* Copyright (c) 2024 Beijing Institute of Open Source Chip (BOSC) 3* Copyright (c) 2020-2024 Institute of Computing Technology, Chinese Academy of Sciences 4* Copyright (c) 2020-2021 Peng Cheng Laboratory 5* 6* XiangShan is licensed under Mulan PSL v2. 7* You can use this software according to the terms and conditions of the Mulan PSL v2. 8* You may obtain a copy of Mulan PSL v2 at: 9* http://license.coscl.org.cn/MulanPSL2 10* 11* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 12* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 13* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 14* 15* See the Mulan PSL v2 for more details. 16* 17* 18* Acknowledgement 19* 20* This implementation is inspired by several key papers: 21* [1] Glenn Reinman, Brad Calder, and Todd Austin. "[Fetch directed instruction prefetching.] 22* (https://doi.org/10.1109/MICRO.1999.809439)" 32nd Annual ACM/IEEE International Symposium on Microarchitecture 23* (MICRO). 1999. 24***************************************************************************************/ 25 26package xiangshan.frontend.icache 27 28import chisel3._ 29import chisel3.util._ 30import freechips.rocketchip.diplomacy.IdRange 31import freechips.rocketchip.diplomacy.LazyModule 32import freechips.rocketchip.diplomacy.LazyModuleImp 33import freechips.rocketchip.tilelink._ 34import freechips.rocketchip.util.BundleFieldBase 35import huancun.AliasField 36import huancun.PrefetchField 37import org.chipsalliance.cde.config.Parameters 38import utility._ 39import xiangshan._ 40import xiangshan.cache._ 41import xiangshan.cache.mmu.TlbRequestIO 42import xiangshan.frontend._ 43 44case class ICacheParameters( 45 nSets: Int = 256, 46 nWays: Int = 4, 47 rowBits: Int = 64, 48 nTLBEntries: Int = 32, 49 tagECC: Option[String] = None, 50 dataECC: Option[String] = None, 51 replacer: Option[String] = Some("random"), 52 PortNumber: Int = 2, 53 nFetchMshr: Int = 4, 54 nPrefetchMshr: Int = 10, 55 nWayLookupSize: Int = 32, 56 DataCodeUnit: Int = 64, 57 ICacheDataBanks: Int = 8, 58 ICacheDataSRAMWidth: Int = 66, 59 // TODO: hard code, need delete 60 partWayNum: Int = 4, 61 nMMIOs: Int = 1, 62 blockBytes: Int = 64 63) extends L1CacheParameters { 64 65 val setBytes: Int = nSets * blockBytes 66 val aliasBitsOpt: Option[Int] = Option.when(setBytes > pageSize)(log2Ceil(setBytes / pageSize)) 67 val reqFields: Seq[BundleFieldBase] = Seq( 68 PrefetchField(), 69 ReqSourceField() 70 ) ++ aliasBitsOpt.map(AliasField) 71 val echoFields: Seq[BundleFieldBase] = Nil 72 def tagCode: Code = Code.fromString(tagECC) 73 def dataCode: Code = Code.fromString(dataECC) 74 def replacement = ReplacementPolicy.fromString(replacer, nWays, nSets) 75} 76 77trait HasICacheParameters extends HasL1CacheParameters with HasInstrMMIOConst with HasIFUConst { 78 val cacheParams: ICacheParameters = icacheParameters 79 80 def ICacheSets: Int = cacheParams.nSets 81 def ICacheWays: Int = cacheParams.nWays 82 def PortNumber: Int = cacheParams.PortNumber 83 def nFetchMshr: Int = cacheParams.nFetchMshr 84 def nPrefetchMshr: Int = cacheParams.nPrefetchMshr 85 def nWayLookupSize: Int = cacheParams.nWayLookupSize 86 def DataCodeUnit: Int = cacheParams.DataCodeUnit 87 def ICacheDataBanks: Int = cacheParams.ICacheDataBanks 88 def ICacheDataSRAMWidth: Int = cacheParams.ICacheDataSRAMWidth 89 def partWayNum: Int = cacheParams.partWayNum 90 91 def ICacheMetaBits: Int = tagBits // FIXME: unportable: maybe use somemethod to get width 92 def ICacheMetaCodeBits: Int = 1 // FIXME: unportable: maybe use cacheParams.tagCode.somemethod to get width 93 def ICacheMetaEntryBits: Int = ICacheMetaBits + ICacheMetaCodeBits 94 95 def ICacheDataBits: Int = blockBits / ICacheDataBanks 96 def ICacheDataCodeSegs: Int = 97 math.ceil(ICacheDataBits / DataCodeUnit).toInt // split data to segments for ECC checking 98 def ICacheDataCodeBits: Int = 99 ICacheDataCodeSegs * 1 // FIXME: unportable: maybe use cacheParams.dataCode.somemethod to get width 100 def ICacheDataEntryBits: Int = ICacheDataBits + ICacheDataCodeBits 101 def ICacheBankVisitNum: Int = 32 * 8 / ICacheDataBits + 1 102 def highestIdxBit: Int = log2Ceil(nSets) - 1 103 104 require((ICacheDataBanks >= 2) && isPow2(ICacheDataBanks)) 105 require(ICacheDataSRAMWidth >= ICacheDataEntryBits) 106 require(isPow2(ICacheSets), s"nSets($ICacheSets) must be pow2") 107 require(isPow2(ICacheWays), s"nWays($ICacheWays) must be pow2") 108 109 def generatePipeControl(lastFire: Bool, thisFire: Bool, thisFlush: Bool, lastFlush: Bool): Bool = { 110 val valid = RegInit(false.B) 111 when(thisFlush)(valid := false.B) 112 .elsewhen(lastFire && !lastFlush)(valid := true.B) 113 .elsewhen(thisFire)(valid := false.B) 114 valid 115 } 116 117 def ResultHoldBypass[T <: Data](data: T, valid: Bool): T = 118 Mux(valid, data, RegEnable(data, valid)) 119 120 def ResultHoldBypass[T <: Data](data: T, init: T, valid: Bool): T = 121 Mux(valid, data, RegEnable(data, init, valid)) 122 123 def holdReleaseLatch(valid: Bool, release: Bool, flush: Bool): Bool = { 124 val bit = RegInit(false.B) 125 when(flush)(bit := false.B) 126 .elsewhen(valid && !release)(bit := true.B) 127 .elsewhen(release)(bit := false.B) 128 bit || valid 129 } 130 131 def blockCounter(block: Bool, flush: Bool, threshold: Int): Bool = { 132 val counter = RegInit(0.U(log2Up(threshold + 1).W)) 133 when(block)(counter := counter + 1.U) 134 when(flush)(counter := 0.U) 135 counter > threshold.U 136 } 137 138 def InitQueue[T <: Data](entry: T, size: Int): Vec[T] = 139 RegInit(VecInit(Seq.fill(size)(0.U.asTypeOf(entry.cloneType)))) 140 141 def encodeMetaECC(meta: UInt): UInt = { 142 require(meta.getWidth == ICacheMetaBits) 143 val code = cacheParams.tagCode.encode(meta) >> ICacheMetaBits 144 code.asTypeOf(UInt(ICacheMetaCodeBits.W)) 145 } 146 147 def encodeDataECC(data: UInt): UInt = { 148 require(data.getWidth == ICacheDataBits) 149 val datas = data.asTypeOf(Vec(ICacheDataCodeSegs, UInt((ICacheDataBits / ICacheDataCodeSegs).W))) 150 val codes = VecInit(datas.map(cacheParams.dataCode.encode(_) >> (ICacheDataBits / ICacheDataCodeSegs))) 151 codes.asTypeOf(UInt(ICacheDataCodeBits.W)) 152 } 153 154 def getBankSel(blkOffset: UInt, valid: Bool = true.B): Vec[UInt] = { 155 val bankIdxLow = (Cat(0.U(1.W), blkOffset) >> log2Ceil(blockBytes / ICacheDataBanks)).asUInt 156 val bankIdxHigh = ((Cat(0.U(1.W), blkOffset) + 32.U) >> log2Ceil(blockBytes / ICacheDataBanks)).asUInt 157 val bankSel = VecInit((0 until ICacheDataBanks * 2).map(i => (i.U >= bankIdxLow) && (i.U <= bankIdxHigh))) 158 assert( 159 !valid || PopCount(bankSel) === ICacheBankVisitNum.U, 160 "The number of bank visits must be %d, but bankSel=0x%x", 161 ICacheBankVisitNum.U, 162 bankSel.asUInt 163 ) 164 bankSel.asTypeOf(UInt((ICacheDataBanks * 2).W)).asTypeOf(Vec(2, UInt(ICacheDataBanks.W))) 165 } 166 167 def getLineSel(blkOffset: UInt): Vec[Bool] = { 168 val bankIdxLow = (blkOffset >> log2Ceil(blockBytes / ICacheDataBanks)).asUInt 169 val lineSel = VecInit((0 until ICacheDataBanks).map(i => i.U < bankIdxLow)) 170 lineSel 171 } 172 173 def getBlkAddr(addr: UInt): UInt = (addr >> blockOffBits).asUInt 174 def getPhyTagFromBlk(addr: UInt): UInt = (addr >> (pgUntagBits - blockOffBits)).asUInt 175 def getIdxFromBlk(addr: UInt): UInt = addr(idxBits - 1, 0) 176 def getPaddrFromPtag(vaddr: UInt, ptag: UInt): UInt = Cat(ptag, vaddr(pgUntagBits - 1, 0)) 177 def getPaddrFromPtag(vaddrVec: Vec[UInt], ptagVec: Vec[UInt]): Vec[UInt] = 178 VecInit((vaddrVec zip ptagVec).map { case (vaddr, ptag) => getPaddrFromPtag(vaddr, ptag) }) 179} 180 181abstract class ICacheBundle(implicit p: Parameters) extends XSBundle 182 with HasICacheParameters 183 184abstract class ICacheModule(implicit p: Parameters) extends XSModule 185 with HasICacheParameters 186 187abstract class ICacheArray(implicit p: Parameters) extends XSModule 188 with HasICacheParameters 189 190class ICacheMetadata(implicit p: Parameters) extends ICacheBundle { 191 val tag: UInt = UInt(tagBits.W) 192} 193 194object ICacheMetadata { 195 def apply(tag: Bits)(implicit p: Parameters): ICacheMetadata = { 196 val meta = Wire(new ICacheMetadata) 197 meta.tag := tag 198 meta 199 } 200} 201 202class ICacheMetaArrayIO(implicit p: Parameters) extends ICacheBundle { 203 val write: DecoupledIO[ICacheMetaWriteBundle] = Flipped(DecoupledIO(new ICacheMetaWriteBundle)) 204 val read: DecoupledIO[ICacheReadBundle] = Flipped(DecoupledIO(new ICacheReadBundle)) 205 val readResp: ICacheMetaRespBundle = Output(new ICacheMetaRespBundle) 206 val flush: Vec[Valid[ICacheMetaFlushBundle]] = Vec(PortNumber, Flipped(ValidIO(new ICacheMetaFlushBundle))) 207 val flushAll: Bool = Input(Bool()) 208} 209 210class ICacheMetaArray(implicit p: Parameters) extends ICacheArray { 211 class ICacheMetaEntry(implicit p: Parameters) extends ICacheBundle { 212 val meta: ICacheMetadata = new ICacheMetadata 213 val code: UInt = UInt(ICacheMetaCodeBits.W) 214 } 215 216 private object ICacheMetaEntry { 217 def apply(meta: ICacheMetadata)(implicit p: Parameters): ICacheMetaEntry = { 218 val entry = Wire(new ICacheMetaEntry) 219 entry.meta := meta 220 entry.code := encodeMetaECC(meta.asUInt) 221 entry 222 } 223 } 224 225 // sanity check 226 require(ICacheMetaEntryBits == (new ICacheMetaEntry).getWidth) 227 228 val io: ICacheMetaArrayIO = IO(new ICacheMetaArrayIO) 229 230 private val port_0_read_0 = io.read.valid && !io.read.bits.vSetIdx(0)(0) 231 private val port_0_read_1 = io.read.valid && io.read.bits.vSetIdx(0)(0) 232 private val port_1_read_1 = io.read.valid && io.read.bits.vSetIdx(1)(0) && io.read.bits.isDoubleLine 233 private val port_1_read_0 = io.read.valid && !io.read.bits.vSetIdx(1)(0) && io.read.bits.isDoubleLine 234 235 private val port_0_read_0_reg = RegEnable(port_0_read_0, 0.U.asTypeOf(port_0_read_0), io.read.fire) 236 private val port_0_read_1_reg = RegEnable(port_0_read_1, 0.U.asTypeOf(port_0_read_1), io.read.fire) 237 private val port_1_read_1_reg = RegEnable(port_1_read_1, 0.U.asTypeOf(port_1_read_1), io.read.fire) 238 private val port_1_read_0_reg = RegEnable(port_1_read_0, 0.U.asTypeOf(port_1_read_0), io.read.fire) 239 240 private val bank_0_idx = Mux(port_0_read_0, io.read.bits.vSetIdx(0), io.read.bits.vSetIdx(1)) 241 private val bank_1_idx = Mux(port_0_read_1, io.read.bits.vSetIdx(0), io.read.bits.vSetIdx(1)) 242 243 private val write_bank_0 = io.write.valid && !io.write.bits.bankIdx 244 private val write_bank_1 = io.write.valid && io.write.bits.bankIdx 245 246 private val write_meta_bits = ICacheMetaEntry(meta = 247 ICacheMetadata( 248 tag = io.write.bits.phyTag 249 ) 250 ) 251 252 private val tagArrays = (0 until PortNumber) map { bank => 253 val tagArray = Module(new SRAMTemplate( 254 new ICacheMetaEntry(), 255 set = nSets / PortNumber, 256 way = nWays, 257 shouldReset = true, 258 holdRead = true, 259 singlePort = true, 260 withClockGate = true 261 )) 262 263 // meta connection 264 if (bank == 0) { 265 tagArray.io.r.req.valid := port_0_read_0 || port_1_read_0 266 tagArray.io.r.req.bits.apply(setIdx = bank_0_idx(highestIdxBit, 1)) 267 tagArray.io.w.req.valid := write_bank_0 268 tagArray.io.w.req.bits.apply( 269 data = write_meta_bits, 270 setIdx = io.write.bits.virIdx(highestIdxBit, 1), 271 waymask = io.write.bits.waymask 272 ) 273 } else { 274 tagArray.io.r.req.valid := port_0_read_1 || port_1_read_1 275 tagArray.io.r.req.bits.apply(setIdx = bank_1_idx(highestIdxBit, 1)) 276 tagArray.io.w.req.valid := write_bank_1 277 tagArray.io.w.req.bits.apply( 278 data = write_meta_bits, 279 setIdx = io.write.bits.virIdx(highestIdxBit, 1), 280 waymask = io.write.bits.waymask 281 ) 282 } 283 284 tagArray 285 } 286 287 private val read_set_idx_next = RegEnable(io.read.bits.vSetIdx, 0.U.asTypeOf(io.read.bits.vSetIdx), io.read.fire) 288 private val valid_array = RegInit(VecInit(Seq.fill(nWays)(0.U(nSets.W)))) 289 private val valid_metas = Wire(Vec(PortNumber, Vec(nWays, Bool()))) 290 // valid read 291 (0 until PortNumber).foreach(i => 292 (0 until nWays).foreach(way => 293 valid_metas(i)(way) := valid_array(way)(read_set_idx_next(i)) 294 ) 295 ) 296 io.readResp.entryValid := valid_metas 297 298 io.read.ready := !io.write.valid && !io.flush.map(_.valid).reduce(_ || _) && !io.flushAll && 299 tagArrays.map(_.io.r.req.ready).reduce(_ && _) 300 301 // valid write 302 private val way_num = OHToUInt(io.write.bits.waymask) 303 when(io.write.valid) { 304 valid_array(way_num) := valid_array(way_num).bitSet(io.write.bits.virIdx, true.B) 305 } 306 307 XSPerfAccumulate("meta_refill_num", io.write.valid) 308 309 io.readResp.metas <> DontCare 310 io.readResp.codes <> DontCare 311 private val readMetaEntries = tagArrays.map(port => port.io.r.resp.asTypeOf(Vec(nWays, new ICacheMetaEntry()))) 312 private val readMetas = readMetaEntries.map(_.map(_.meta)) 313 private val readCodes = readMetaEntries.map(_.map(_.code)) 314 315 // TEST: force ECC to fail by setting readCodes to 0 316 if (ICacheForceMetaECCError) { 317 readCodes.foreach(_.foreach(_ := 0.U)) 318 } 319 320 when(port_0_read_0_reg) { 321 io.readResp.metas(0) := readMetas(0) 322 io.readResp.codes(0) := readCodes(0) 323 }.elsewhen(port_0_read_1_reg) { 324 io.readResp.metas(0) := readMetas(1) 325 io.readResp.codes(0) := readCodes(1) 326 } 327 328 when(port_1_read_0_reg) { 329 io.readResp.metas(1) := readMetas(0) 330 io.readResp.codes(1) := readCodes(0) 331 }.elsewhen(port_1_read_1_reg) { 332 io.readResp.metas(1) := readMetas(1) 333 io.readResp.codes(1) := readCodes(1) 334 } 335 336 io.write.ready := true.B // TODO : has bug ? should be !io.cacheOp.req.valid 337 338 /* 339 * flush logic 340 */ 341 // flush standalone set (e.g. flushed by mainPipe before doing re-fetch) 342 when(io.flush.map(_.valid).reduce(_ || _)) { 343 (0 until nWays).foreach { w => 344 valid_array(w) := (0 until PortNumber).map { i => 345 Mux( 346 // check if set `virIdx` in way `w` is requested to be flushed by port `i` 347 io.flush(i).valid && io.flush(i).bits.waymask(w), 348 valid_array(w).bitSet(io.flush(i).bits.virIdx, false.B), 349 valid_array(w) 350 ) 351 }.reduce(_ & _) 352 } 353 } 354 355 // flush all (e.g. fence.i) 356 when(io.flushAll) { 357 (0 until nWays).foreach(w => valid_array(w) := 0.U) 358 } 359 360 // PERF: flush counter 361 XSPerfAccumulate("flush", io.flush.map(_.valid).reduce(_ || _)) 362 XSPerfAccumulate("flush_all", io.flushAll) 363} 364 365class ICacheDataArrayIO(implicit p: Parameters) extends ICacheBundle { 366 val write: DecoupledIO[ICacheDataWriteBundle] = Flipped(DecoupledIO(new ICacheDataWriteBundle)) 367 val read: Vec[DecoupledIO[ICacheReadBundle]] = Flipped(Vec(partWayNum, DecoupledIO(new ICacheReadBundle))) 368 val readResp: ICacheDataRespBundle = Output(new ICacheDataRespBundle) 369} 370 371class ICacheDataArray(implicit p: Parameters) extends ICacheArray { 372 class ICacheDataEntry(implicit p: Parameters) extends ICacheBundle { 373 val data: UInt = UInt(ICacheDataBits.W) 374 val code: UInt = UInt(ICacheDataCodeBits.W) 375 } 376 377 private object ICacheDataEntry { 378 def apply(data: UInt)(implicit p: Parameters): ICacheDataEntry = { 379 val entry = Wire(new ICacheDataEntry) 380 entry.data := data 381 entry.code := encodeDataECC(data) 382 entry 383 } 384 } 385 386 val io: ICacheDataArrayIO = IO(new ICacheDataArrayIO) 387 388 /** 389 ****************************************************************************** 390 * data array 391 ****************************************************************************** 392 */ 393 private val writeDatas = io.write.bits.data.asTypeOf(Vec(ICacheDataBanks, UInt(ICacheDataBits.W))) 394 private val writeEntries = writeDatas.map(ICacheDataEntry(_).asUInt) 395 396 // io.read() are copies to control fan-out, we can simply use .head here 397 private val bankSel = getBankSel(io.read.head.bits.blkOffset, io.read.head.valid) 398 private val lineSel = getLineSel(io.read.head.bits.blkOffset) 399 private val waymasks = io.read.head.bits.waymask 400 private val masks = Wire(Vec(nWays, Vec(ICacheDataBanks, Bool()))) 401 (0 until nWays).foreach { way => 402 (0 until ICacheDataBanks).foreach { bank => 403 masks(way)(bank) := Mux( 404 lineSel(bank), 405 waymasks(1)(way) && bankSel(1)(bank).asBool, 406 waymasks(0)(way) && bankSel(0)(bank).asBool 407 ) 408 } 409 } 410 411 private val dataArrays = (0 until nWays).map { way => 412 (0 until ICacheDataBanks).map { bank => 413 val sramBank = Module(new SRAMTemplateWithFixedWidth( 414 UInt(ICacheDataEntryBits.W), 415 set = nSets, 416 width = ICacheDataSRAMWidth, 417 shouldReset = true, 418 holdRead = true, 419 singlePort = true, 420 withClockGate = true 421 )) 422 423 // read 424 sramBank.io.r.req.valid := io.read(bank % 4).valid && masks(way)(bank) 425 sramBank.io.r.req.bits.apply(setIdx = 426 Mux(lineSel(bank), io.read(bank % 4).bits.vSetIdx(1), io.read(bank % 4).bits.vSetIdx(0)) 427 ) 428 // write 429 sramBank.io.w.req.valid := io.write.valid && io.write.bits.waymask(way).asBool 430 sramBank.io.w.req.bits.apply( 431 data = writeEntries(bank), 432 setIdx = io.write.bits.virIdx, 433 // waymask is invalid when way of SRAMTemplate <= 1 434 waymask = 0.U 435 ) 436 sramBank 437 } 438 } 439 440 /** 441 ****************************************************************************** 442 * read logic 443 ****************************************************************************** 444 */ 445 private val masksReg = RegEnable(masks, 0.U.asTypeOf(masks), io.read(0).valid) 446 private val readDataWithCode = (0 until ICacheDataBanks).map { bank => 447 Mux1H(VecInit(masksReg.map(_(bank))).asTypeOf(UInt(nWays.W)), dataArrays.map(_(bank).io.r.resp.asUInt)) 448 } 449 private val readEntries = readDataWithCode.map(_.asTypeOf(new ICacheDataEntry())) 450 private val readDatas = VecInit(readEntries.map(_.data)) 451 private val readCodes = VecInit(readEntries.map(_.code)) 452 453 // TEST: force ECC to fail by setting readCodes to 0 454 if (ICacheForceDataECCError) { 455 readCodes.foreach(_ := 0.U) 456 } 457 458 /** 459 ****************************************************************************** 460 * IO 461 ****************************************************************************** 462 */ 463 io.readResp.datas := readDatas 464 io.readResp.codes := readCodes 465 io.write.ready := true.B 466 io.read.foreach(_.ready := !io.write.valid) 467} 468 469class ICacheReplacerIO(implicit p: Parameters) extends ICacheBundle { 470 val touch: Vec[Valid[ReplacerTouch]] = Vec(PortNumber, Flipped(ValidIO(new ReplacerTouch))) 471 val victim: ReplacerVictim = Flipped(new ReplacerVictim) 472} 473 474class ICacheReplacer(implicit p: Parameters) extends ICacheModule { 475 val io: ICacheReplacerIO = IO(new ICacheReplacerIO) 476 477 private val replacers = 478 Seq.fill(PortNumber)(ReplacementPolicy.fromString(cacheParams.replacer, nWays, nSets / PortNumber)) 479 480 // touch 481 private val touch_sets = Seq.fill(PortNumber)(Wire(Vec(PortNumber, UInt(log2Ceil(nSets / PortNumber).W)))) 482 private val touch_ways = Seq.fill(PortNumber)(Wire(Vec(PortNumber, Valid(UInt(wayBits.W))))) 483 (0 until PortNumber).foreach { i => 484 touch_sets(i)(0) := Mux( 485 io.touch(i).bits.vSetIdx(0), 486 io.touch(1).bits.vSetIdx(highestIdxBit, 1), 487 io.touch(0).bits.vSetIdx(highestIdxBit, 1) 488 ) 489 touch_ways(i)(0).bits := Mux(io.touch(i).bits.vSetIdx(0), io.touch(1).bits.way, io.touch(0).bits.way) 490 touch_ways(i)(0).valid := Mux(io.touch(i).bits.vSetIdx(0), io.touch(1).valid, io.touch(0).valid) 491 } 492 493 // victim 494 io.victim.way := Mux( 495 io.victim.vSetIdx.bits(0), 496 replacers(1).way(io.victim.vSetIdx.bits(highestIdxBit, 1)), 497 replacers(0).way(io.victim.vSetIdx.bits(highestIdxBit, 1)) 498 ) 499 500 // touch the victim in next cycle 501 private val victim_vSetIdx_reg = 502 RegEnable(io.victim.vSetIdx.bits, 0.U.asTypeOf(io.victim.vSetIdx.bits), io.victim.vSetIdx.valid) 503 private val victim_way_reg = RegEnable(io.victim.way, 0.U.asTypeOf(io.victim.way), io.victim.vSetIdx.valid) 504 (0 until PortNumber).foreach { i => 505 touch_sets(i)(1) := victim_vSetIdx_reg(highestIdxBit, 1) 506 touch_ways(i)(1).bits := victim_way_reg 507 touch_ways(i)(1).valid := RegNext(io.victim.vSetIdx.valid) && (victim_vSetIdx_reg(0) === i.U) 508 } 509 510 ((replacers zip touch_sets) zip touch_ways).foreach { case ((r, s), w) => r.access(s, w) } 511} 512 513class ICacheIO(implicit p: Parameters) extends ICacheBundle { 514 val hartId: UInt = Input(UInt(hartIdLen.W)) 515 // FTQ 516 val fetch: ICacheMainPipeBundle = new ICacheMainPipeBundle 517 val ftqPrefetch: FtqToPrefetchIO = Flipped(new FtqToPrefetchIO) 518 // memblock 519 val softPrefetch: Vec[Valid[SoftIfetchPrefetchBundle]] = 520 Vec(backendParams.LduCnt, Flipped(Valid(new SoftIfetchPrefetchBundle))) 521 // IFU 522 val stop: Bool = Input(Bool()) 523 val toIFU: Bool = Output(Bool()) 524 // PMP: mainPipe & prefetchPipe need PortNumber each 525 val pmp: Vec[ICachePMPBundle] = Vec(2 * PortNumber, new ICachePMPBundle) 526 // iTLB 527 val itlb: Vec[TlbRequestIO] = Vec(PortNumber, new TlbRequestIO) 528 val itlbFlushPipe: Bool = Bool() 529 // backend/BEU 530 val error: Valid[L1CacheErrorInfo] = ValidIO(new L1CacheErrorInfo) 531 // backend/CSR 532 val csr_pf_enable: Bool = Input(Bool()) 533 val csr_parity_enable: Bool = Input(Bool()) 534 // flush 535 val fencei: Bool = Input(Bool()) 536 val flush: Bool = Input(Bool()) 537 538 // perf 539 val perfInfo: ICachePerfInfo = Output(new ICachePerfInfo) 540} 541 542class ICache()(implicit p: Parameters) extends LazyModule with HasICacheParameters { 543 override def shouldBeInlined: Boolean = false 544 545 val clientParameters: TLMasterPortParameters = TLMasterPortParameters.v1( 546 Seq(TLMasterParameters.v1( 547 name = "icache", 548 sourceId = IdRange(0, cacheParams.nFetchMshr + cacheParams.nPrefetchMshr + 1) 549 )), 550 requestFields = cacheParams.reqFields, 551 echoFields = cacheParams.echoFields 552 ) 553 554 val clientNode: TLClientNode = TLClientNode(Seq(clientParameters)) 555 556 lazy val module: ICacheImp = new ICacheImp(this) 557} 558 559class ICacheImp(outer: ICache) extends LazyModuleImp(outer) with HasICacheParameters with HasPerfEvents { 560 val io: ICacheIO = IO(new ICacheIO) 561 562 println("ICache:") 563 println(" TagECC: " + cacheParams.tagECC) 564 println(" DataECC: " + cacheParams.dataECC) 565 println(" ICacheSets: " + cacheParams.nSets) 566 println(" ICacheWays: " + cacheParams.nWays) 567 println(" PortNumber: " + cacheParams.PortNumber) 568 println(" nFetchMshr: " + cacheParams.nFetchMshr) 569 println(" nPrefetchMshr: " + cacheParams.nPrefetchMshr) 570 println(" nWayLookupSize: " + cacheParams.nWayLookupSize) 571 println(" DataCodeUnit: " + cacheParams.DataCodeUnit) 572 println(" ICacheDataBanks: " + cacheParams.ICacheDataBanks) 573 println(" ICacheDataSRAMWidth: " + cacheParams.ICacheDataSRAMWidth) 574 575 val (bus, edge) = outer.clientNode.out.head 576 577 private val metaArray = Module(new ICacheMetaArray) 578 private val dataArray = Module(new ICacheDataArray) 579 private val mainPipe = Module(new ICacheMainPipe) 580 private val missUnit = Module(new ICacheMissUnit(edge)) 581 private val replacer = Module(new ICacheReplacer) 582 private val prefetcher = Module(new IPrefetchPipe) 583 private val wayLookup = Module(new WayLookup) 584 585 dataArray.io.write <> missUnit.io.data_write 586 dataArray.io.read <> mainPipe.io.dataArray.toIData 587 dataArray.io.readResp <> mainPipe.io.dataArray.fromIData 588 589 metaArray.io.flushAll := io.fencei 590 metaArray.io.flush <> mainPipe.io.metaArrayFlush 591 metaArray.io.write <> missUnit.io.meta_write 592 metaArray.io.read <> prefetcher.io.metaRead.toIMeta 593 metaArray.io.readResp <> prefetcher.io.metaRead.fromIMeta 594 595 prefetcher.io.flush := io.flush 596 prefetcher.io.csr_pf_enable := io.csr_pf_enable 597 prefetcher.io.csr_parity_enable := io.csr_parity_enable 598 prefetcher.io.MSHRResp := missUnit.io.fetch_resp 599 prefetcher.io.flushFromBpu := io.ftqPrefetch.flushFromBpu 600 // cache softPrefetch 601 private val softPrefetchValid = RegInit(false.B) 602 private val softPrefetch = RegInit(0.U.asTypeOf(new IPrefetchReq)) 603 /* FIXME: 604 * If there is already a pending softPrefetch request, it will be overwritten. 605 * Also, if there are multiple softPrefetch requests in the same cycle, only the first one will be accepted. 606 * We should implement a softPrefetchQueue (like ibuffer, multi-in, single-out) to solve this. 607 * However, the impact on performance still needs to be assessed. 608 * Considering that the frequency of prefetch.i may not be high, let's start with a temporary dummy solution. 609 */ 610 when(io.softPrefetch.map(_.valid).reduce(_ || _)) { 611 softPrefetchValid := true.B 612 softPrefetch.fromSoftPrefetch(MuxCase( 613 0.U.asTypeOf(new SoftIfetchPrefetchBundle), 614 io.softPrefetch.map(req => req.valid -> req.bits) 615 )) 616 }.elsewhen(prefetcher.io.req.fire) { 617 softPrefetchValid := false.B 618 } 619 // pass ftqPrefetch 620 private val ftqPrefetch = WireInit(0.U.asTypeOf(new IPrefetchReq)) 621 ftqPrefetch.fromFtqICacheInfo(io.ftqPrefetch.req.bits) 622 // software prefetch has higher priority 623 prefetcher.io.req.valid := softPrefetchValid || io.ftqPrefetch.req.valid 624 prefetcher.io.req.bits := Mux(softPrefetchValid, softPrefetch, ftqPrefetch) 625 prefetcher.io.req.bits.backendException := io.ftqPrefetch.backendException 626 io.ftqPrefetch.req.ready := prefetcher.io.req.ready && !softPrefetchValid 627 628 missUnit.io.hartId := io.hartId 629 missUnit.io.fencei := io.fencei 630 missUnit.io.flush := io.flush 631 missUnit.io.fetch_req <> mainPipe.io.mshr.req 632 missUnit.io.prefetch_req <> prefetcher.io.MSHRReq 633 missUnit.io.mem_grant.valid := false.B 634 missUnit.io.mem_grant.bits := DontCare 635 missUnit.io.mem_grant <> bus.d 636 637 mainPipe.io.flush := io.flush 638 mainPipe.io.respStall := io.stop 639 mainPipe.io.csr_parity_enable := io.csr_parity_enable 640 mainPipe.io.hartId := io.hartId 641 mainPipe.io.mshr.resp := missUnit.io.fetch_resp 642 mainPipe.io.fetch.req <> io.fetch.req 643 mainPipe.io.wayLookupRead <> wayLookup.io.read 644 645 wayLookup.io.flush := io.flush 646 wayLookup.io.write <> prefetcher.io.wayLookupWrite 647 wayLookup.io.update := missUnit.io.fetch_resp 648 649 replacer.io.touch <> mainPipe.io.touch 650 replacer.io.victim <> missUnit.io.victim 651 652 io.pmp(0) <> mainPipe.io.pmp(0) 653 io.pmp(1) <> mainPipe.io.pmp(1) 654 io.pmp(2) <> prefetcher.io.pmp(0) 655 io.pmp(3) <> prefetcher.io.pmp(1) 656 657 io.itlb(0) <> prefetcher.io.itlb(0) 658 io.itlb(1) <> prefetcher.io.itlb(1) 659 io.itlbFlushPipe := prefetcher.io.itlbFlushPipe 660 661 // notify IFU that Icache pipeline is available 662 io.toIFU := mainPipe.io.fetch.req.ready 663 io.perfInfo := mainPipe.io.perfInfo 664 665 io.fetch.resp <> mainPipe.io.fetch.resp 666 io.fetch.topdownIcacheMiss := mainPipe.io.fetch.topdownIcacheMiss 667 io.fetch.topdownItlbMiss := mainPipe.io.fetch.topdownItlbMiss 668 669 bus.b.ready := false.B 670 bus.c.valid := false.B 671 bus.c.bits := DontCare 672 bus.e.valid := false.B 673 bus.e.bits := DontCare 674 675 bus.a <> missUnit.io.mem_acquire 676 677 // Parity error port 678 private val errors = mainPipe.io.errors 679 private val errors_valid = errors.map(e => e.valid).reduce(_ | _) 680 io.error.bits <> RegEnable( 681 PriorityMux(errors.map(e => e.valid -> e.bits)), 682 0.U.asTypeOf(errors(0).bits), 683 errors_valid 684 ) 685 io.error.valid := RegNext(errors_valid, false.B) 686 687 XSPerfAccumulate( 688 "softPrefetch_drop_not_ready", 689 io.softPrefetch.map(_.valid).reduce(_ || _) && softPrefetchValid && !prefetcher.io.req.fire 690 ) 691 XSPerfAccumulate("softPrefetch_drop_multi_req", PopCount(io.softPrefetch.map(_.valid)) > 1.U) 692 XSPerfAccumulate("softPrefetch_block_ftq", softPrefetchValid && io.ftqPrefetch.req.valid) 693 694 val perfEvents: Seq[(String, Bool)] = Seq( 695 ("icache_miss_cnt ", false.B), 696 ("icache_miss_penalty", BoolStopWatch(start = false.B, stop = false.B || false.B, startHighPriority = true)) 697 ) 698 generatePerfEvent() 699} 700 701//class ICachePartWayReadBundle[T <: Data](gen: T, pWay: Int)(implicit p: Parameters) 702// extends ICacheBundle { 703// val req = Flipped(Vec( 704// PortNumber, 705// Decoupled(new Bundle { 706// val ridx = UInt((log2Ceil(nSets) - 1).W) 707// }) 708// )) 709// val resp = Output(new Bundle { 710// val rdata = Vec(PortNumber, Vec(pWay, gen)) 711// }) 712//} 713 714//class ICacheWriteBundle[T <: Data](gen: T, pWay: Int)(implicit p: Parameters) 715// extends ICacheBundle { 716// val wdata = gen 717// val widx = UInt((log2Ceil(nSets) - 1).W) 718// val wbankidx = Bool() 719// val wmask = Vec(pWay, Bool()) 720//} 721 722//class ICachePartWayArray[T <: Data](gen: T, pWay: Int)(implicit p: Parameters) extends ICacheArray { 723// 724// // including part way data 725// val io = IO { 726// new Bundle { 727// val read = new ICachePartWayReadBundle(gen, pWay) 728// val write = Flipped(ValidIO(new ICacheWriteBundle(gen, pWay))) 729// } 730// } 731// 732// io.read.req.map(_.ready := !io.write.valid) 733// 734// val srams = (0 until PortNumber) map { bank => 735// val sramBank = Module(new SRAMTemplate( 736// gen, 737// set = nSets / 2, 738// way = pWay, 739// shouldReset = true, 740// holdRead = true, 741// singlePort = true, 742// withClockGate = true 743// )) 744// 745// sramBank.io.r.req.valid := io.read.req(bank).valid 746// sramBank.io.r.req.bits.apply(setIdx = io.read.req(bank).bits.ridx) 747// 748// if (bank == 0) sramBank.io.w.req.valid := io.write.valid && !io.write.bits.wbankidx 749// else sramBank.io.w.req.valid := io.write.valid && io.write.bits.wbankidx 750// sramBank.io.w.req.bits.apply( 751// data = io.write.bits.wdata, 752// setIdx = io.write.bits.widx, 753// waymask = io.write.bits.wmask.asUInt 754// ) 755// 756// sramBank 757// } 758// 759// io.read.req.map(_.ready := !io.write.valid && srams.map(_.io.r.req.ready).reduce(_ && _)) 760// 761// io.read.resp.rdata := VecInit(srams.map(bank => bank.io.r.resp.asTypeOf(Vec(pWay, gen)))) 762// 763//} 764 765class SRAMTemplateWithFixedWidthIO[T <: Data](gen: T, set: Int, way: Int) extends Bundle { 766 val r: SRAMReadBus[T] = Flipped(new SRAMReadBus(gen, set, way)) 767 val w: SRAMWriteBus[T] = Flipped(new SRAMWriteBus(gen, set, way)) 768} 769 770// Automatically partition the SRAM based on the width of the data and the desired width. 771// final SRAM width = width * way 772class SRAMTemplateWithFixedWidth[T <: Data]( 773 gen: T, 774 set: Int, 775 width: Int, 776 way: Int = 1, 777 shouldReset: Boolean = false, 778 holdRead: Boolean = false, 779 singlePort: Boolean = false, 780 bypassWrite: Boolean = false, 781 withClockGate: Boolean = false 782) extends Module { 783 784 private val dataBits = gen.getWidth 785 private val bankNum = math.ceil(dataBits.toDouble / width.toDouble).toInt 786 private val totalBits = bankNum * width 787 788 val io: SRAMTemplateWithFixedWidthIO[T] = IO(new SRAMTemplateWithFixedWidthIO(gen, set, way)) 789 790 private val wordType = UInt(width.W) 791 private val writeDatas = (0 until bankNum).map { bank => 792 VecInit((0 until way).map { i => 793 io.w.req.bits.data(i).asTypeOf(UInt(totalBits.W)).asTypeOf(Vec(bankNum, wordType))(bank) 794 }) 795 } 796 797 private val srams = (0 until bankNum) map { bank => 798 val sramBank = Module(new SRAMTemplate( 799 wordType, 800 set = set, 801 way = way, 802 shouldReset = shouldReset, 803 holdRead = holdRead, 804 singlePort = singlePort, 805 bypassWrite = bypassWrite, 806 withClockGate = withClockGate 807 )) 808 // read req 809 sramBank.io.r.req.valid := io.r.req.valid 810 sramBank.io.r.req.bits.setIdx := io.r.req.bits.setIdx 811 812 // write req 813 sramBank.io.w.req.valid := io.w.req.valid 814 sramBank.io.w.req.bits.setIdx := io.w.req.bits.setIdx 815 sramBank.io.w.req.bits.data := writeDatas(bank) 816 sramBank.io.w.req.bits.waymask.foreach(_ := io.w.req.bits.waymask.get) 817 818 sramBank 819 } 820 821 io.r.req.ready := !io.w.req.valid 822 (0 until way).foreach { i => 823 io.r.resp.data(i) := VecInit((0 until bankNum).map(bank => 824 srams(bank).io.r.resp.data(i) 825 )).asTypeOf(UInt(totalBits.W))(dataBits - 1, 0).asTypeOf(gen.cloneType) 826 } 827 828 io.r.req.ready := srams.head.io.r.req.ready 829 io.w.req.ready := srams.head.io.w.req.ready 830} 831