1/*************************************************************************************** 2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3* Copyright (c) 2020-2021 Peng Cheng Laboratory 4* 5* XiangShan is licensed under Mulan PSL v2. 6* You can use this software according to the terms and conditions of the Mulan PSL v2. 7* You may obtain a copy of Mulan PSL v2 at: 8* http://license.coscl.org.cn/MulanPSL2 9* 10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13* 14* See the Mulan PSL v2 for more details. 15***************************************************************************************/ 16 17package xiangshan.cache 18 19import chisel3._ 20import chisel3.experimental.ExtModule 21import chisel3.util._ 22import coupledL2.VaddrField 23import coupledL2.IsKeywordField 24import coupledL2.IsKeywordKey 25import freechips.rocketchip.diplomacy._ 26import freechips.rocketchip.tilelink._ 27import freechips.rocketchip.util.BundleFieldBase 28import huancun.{AliasField, PrefetchField} 29import org.chipsalliance.cde.config.Parameters 30import utility._ 31import utils._ 32import xiangshan._ 33import xiangshan.backend.Bundles.DynInst 34import xiangshan.backend.rob.RobDebugRollingIO 35import xiangshan.cache.wpu._ 36import xiangshan.mem.{AddPipelineReg, HasL1PrefetchSourceParameter} 37import xiangshan.mem.prefetch._ 38import xiangshan.mem.LqPtr 39 40// DCache specific parameters 41case class DCacheParameters 42( 43 nSets: Int = 128, 44 nWays: Int = 8, 45 rowBits: Int = 64, 46 tagECC: Option[String] = None, 47 dataECC: Option[String] = None, 48 replacer: Option[String] = Some("setplru"), 49 updateReplaceOn2ndmiss: Boolean = true, 50 nMissEntries: Int = 1, 51 nProbeEntries: Int = 1, 52 nReleaseEntries: Int = 1, 53 nMMIOEntries: Int = 1, 54 nMMIOs: Int = 1, 55 blockBytes: Int = 64, 56 nMaxPrefetchEntry: Int = 1, 57 alwaysReleaseData: Boolean = false, 58 isKeywordBitsOpt: Option[Boolean] = Some(true), 59 enableDataEcc: Boolean = false, 60 enableTagEcc: Boolean = false, 61 cacheCtrlAddressOpt: Option[AddressSet] = None, 62) extends L1CacheParameters { 63 // if sets * blockBytes > 4KB(page size), 64 // cache alias will happen, 65 // we need to avoid this by recoding additional bits in L2 cache 66 val setBytes = nSets * blockBytes 67 val aliasBitsOpt = if(setBytes > pageSize) Some(log2Ceil(setBytes / pageSize)) else None 68 69 def tagCode: Code = Code.fromString(tagECC) 70 71 def dataCode: Code = Code.fromString(dataECC) 72} 73 74// Physical Address 75// -------------------------------------- 76// | Physical Tag | PIndex | Offset | 77// -------------------------------------- 78// | 79// DCacheTagOffset 80// 81// Virtual Address 82// -------------------------------------- 83// | Above index | Set | Bank | Offset | 84// -------------------------------------- 85// | | | | 86// | | | 0 87// | | DCacheBankOffset 88// | DCacheSetOffset 89// DCacheAboveIndexOffset 90 91// Default DCache size = 64 sets * 8 ways * 8 banks * 8 Byte = 32K Byte 92 93trait HasDCacheParameters extends HasL1CacheParameters with HasL1PrefetchSourceParameter{ 94 val cacheParams = dcacheParameters 95 val cfg = cacheParams 96 97 def blockProbeAfterGrantCycles = 8 // give the processor some time to issue a request after a grant 98 99 def nSourceType = 10 100 def sourceTypeWidth = log2Up(nSourceType) 101 // non-prefetch source < 3 102 def LOAD_SOURCE = 0 103 def STORE_SOURCE = 1 104 def AMO_SOURCE = 2 105 // prefetch source >= 3 106 def DCACHE_PREFETCH_SOURCE = 3 107 def SOFT_PREFETCH = 4 108 // the following sources are only used inside SMS 109 def HW_PREFETCH_AGT = 5 110 def HW_PREFETCH_PHT_CUR = 6 111 def HW_PREFETCH_PHT_INC = 7 112 def HW_PREFETCH_PHT_DEC = 8 113 def HW_PREFETCH_BOP = 9 114 def HW_PREFETCH_STRIDE = 10 115 116 def BLOOM_FILTER_ENTRY_NUM = 4096 117 118 // each source use a id to distinguish its multiple reqs 119 def reqIdWidth = log2Up(nEntries) max log2Up(StoreBufferSize) 120 121 require(isPow2(cfg.nMissEntries)) // TODO 122 // require(isPow2(cfg.nReleaseEntries)) 123 require(cfg.nMissEntries < cfg.nReleaseEntries) 124 val nEntries = cfg.nMissEntries + cfg.nReleaseEntries + 1 // nMissEntries + nReleaseEntries + 1CMO_Entry 125 val releaseIdBase = cfg.nMissEntries + 1 126 val EnableDataEcc = cacheParams.enableDataEcc 127 val EnableTagEcc = cacheParams.enableTagEcc 128 129 // banked dcache support 130 val DCacheSetDiv = 1 131 val DCacheSets = cacheParams.nSets 132 val DCacheWayDiv = 2 133 val DCacheWays = cacheParams.nWays 134 val DCacheBanks = 8 // hardcoded 135 val DCacheDupNum = 16 136 val DCacheSRAMRowBits = cacheParams.rowBits // hardcoded 137 val DCacheWordBits = 64 // hardcoded 138 val DCacheWordBytes = DCacheWordBits / 8 139 val MaxPrefetchEntry = cacheParams.nMaxPrefetchEntry 140 val DCacheVWordBytes = VLEN / 8 141 require(DCacheSRAMRowBits == 64) 142 143 val DCacheSetDivBits = log2Ceil(DCacheSetDiv) 144 val DCacheSetBits = log2Ceil(DCacheSets) 145 val DCacheSizeBits = DCacheSRAMRowBits * DCacheBanks * DCacheWays * DCacheSets 146 val DCacheSizeBytes = DCacheSizeBits / 8 147 val DCacheSizeWords = DCacheSizeBits / 64 // TODO 148 149 val DCacheSameVPAddrLength = 12 150 151 val DCacheSRAMRowBytes = DCacheSRAMRowBits / 8 152 val DCacheWordOffset = log2Up(DCacheWordBytes) 153 val DCacheVWordOffset = log2Up(DCacheVWordBytes) 154 155 val DCacheBankOffset = log2Up(DCacheSRAMRowBytes) 156 val DCacheSetOffset = DCacheBankOffset + log2Up(DCacheBanks) 157 val DCacheAboveIndexOffset = DCacheSetOffset + log2Up(DCacheSets) 158 val DCacheTagOffset = DCacheAboveIndexOffset min DCacheSameVPAddrLength 159 val DCacheLineOffset = DCacheSetOffset 160 161 def encWordBits = cacheParams.dataCode.width(wordBits) 162 def encRowBits = encWordBits * rowWords // for DuplicatedDataArray only 163 def eccBits = encWordBits - wordBits 164 165 def encTagBits = if (EnableTagEcc) cacheParams.tagCode.width(tagBits) else tagBits 166 def tagECCBits = encTagBits - tagBits 167 168 def encDataBits = if (EnableDataEcc) cacheParams.dataCode.width(DCacheSRAMRowBits) else DCacheSRAMRowBits 169 def dataECCBits = encDataBits - DCacheSRAMRowBits 170 171 // L1 DCache controller 172 val cacheCtrlParamsOpt = OptionWrapper( 173 cacheParams.cacheCtrlAddressOpt.nonEmpty, 174 L1CacheCtrlParams(cacheParams.cacheCtrlAddressOpt.get) 175 ) 176 // uncache 177 val uncacheIdxBits = log2Up(VirtualLoadQueueMaxStoreQueueSize + 1) 178 // hardware prefetch parameters 179 // high confidence hardware prefetch port 180 val HighConfHWPFLoadPort = LoadPipelineWidth - 1 // use the last load port by default 181 val IgnorePrefetchConfidence = false 182 183 // parameters about duplicating regs to solve fanout 184 // In Main Pipe: 185 // tag_write.ready -> data_write.valid * 8 banks 186 // tag_write.ready -> meta_write.valid 187 // tag_write.ready -> tag_write.valid 188 // tag_write.ready -> err_write.valid 189 // tag_write.ready -> wb.valid 190 val nDupTagWriteReady = DCacheBanks + 4 191 // In Main Pipe: 192 // data_write.ready -> data_write.valid * 8 banks 193 // data_write.ready -> meta_write.valid 194 // data_write.ready -> tag_write.valid 195 // data_write.ready -> err_write.valid 196 // data_write.ready -> wb.valid 197 val nDupDataWriteReady = DCacheBanks + 4 198 val nDupWbReady = DCacheBanks + 4 199 val nDupStatus = nDupTagWriteReady + nDupDataWriteReady 200 val dataWritePort = 0 201 val metaWritePort = DCacheBanks 202 val tagWritePort = metaWritePort + 1 203 val errWritePort = tagWritePort + 1 204 val wbPort = errWritePort + 1 205 206 def set_to_dcache_div(set: UInt) = { 207 require(set.getWidth >= DCacheSetBits) 208 if (DCacheSetDivBits == 0) 0.U else set(DCacheSetDivBits-1, 0) 209 } 210 211 def set_to_dcache_div_set(set: UInt) = { 212 require(set.getWidth >= DCacheSetBits) 213 set(DCacheSetBits - 1, DCacheSetDivBits) 214 } 215 216 def addr_to_dcache_bank(addr: UInt) = { 217 require(addr.getWidth >= DCacheSetOffset) 218 addr(DCacheSetOffset-1, DCacheBankOffset) 219 } 220 221 def addr_to_dcache_div(addr: UInt) = { 222 require(addr.getWidth >= DCacheAboveIndexOffset) 223 if(DCacheSetDivBits == 0) 0.U else addr(DCacheSetOffset + DCacheSetDivBits - 1, DCacheSetOffset) 224 } 225 226 def addr_to_dcache_div_set(addr: UInt) = { 227 require(addr.getWidth >= DCacheAboveIndexOffset) 228 addr(DCacheAboveIndexOffset - 1, DCacheSetOffset + DCacheSetDivBits) 229 } 230 231 def addr_to_dcache_set(addr: UInt) = { 232 require(addr.getWidth >= DCacheAboveIndexOffset) 233 addr(DCacheAboveIndexOffset-1, DCacheSetOffset) 234 } 235 236 def get_data_of_bank(bank: Int, data: UInt) = { 237 require(data.getWidth >= (bank+1)*DCacheSRAMRowBits) 238 data(DCacheSRAMRowBits * (bank + 1) - 1, DCacheSRAMRowBits * bank) 239 } 240 241 def get_mask_of_bank(bank: Int, data: UInt) = { 242 require(data.getWidth >= (bank+1)*DCacheSRAMRowBytes) 243 data(DCacheSRAMRowBytes * (bank + 1) - 1, DCacheSRAMRowBytes * bank) 244 } 245 246 def get_alias(vaddr: UInt): UInt ={ 247 // require(blockOffBits + idxBits > pgIdxBits) 248 if(blockOffBits + idxBits > pgIdxBits){ 249 vaddr(blockOffBits + idxBits - 1, pgIdxBits) 250 }else{ 251 0.U 252 } 253 } 254 255 def is_alias_match(vaddr0: UInt, vaddr1: UInt): Bool = { 256 require(vaddr0.getWidth == VAddrBits && vaddr1.getWidth == VAddrBits) 257 if(blockOffBits + idxBits > pgIdxBits) { 258 vaddr0(blockOffBits + idxBits - 1, pgIdxBits) === vaddr1(blockOffBits + idxBits - 1, pgIdxBits) 259 }else { 260 // no alias problem 261 true.B 262 } 263 } 264 265 def get_direct_map_way(addr:UInt): UInt = { 266 addr(DCacheAboveIndexOffset + log2Up(DCacheWays) - 1, DCacheAboveIndexOffset) 267 } 268 269 def arbiter[T <: Bundle]( 270 in: Seq[DecoupledIO[T]], 271 out: DecoupledIO[T], 272 name: Option[String] = None): Unit = { 273 val arb = Module(new Arbiter[T](chiselTypeOf(out.bits), in.size)) 274 if (name.nonEmpty) { arb.suggestName(s"${name.get}_arb") } 275 for ((a, req) <- arb.io.in.zip(in)) { 276 a <> req 277 } 278 out <> arb.io.out 279 } 280 281 def arbiter_with_pipereg[T <: Bundle]( 282 in: Seq[DecoupledIO[T]], 283 out: DecoupledIO[T], 284 name: Option[String] = None): Unit = { 285 val arb = Module(new Arbiter[T](chiselTypeOf(out.bits), in.size)) 286 if (name.nonEmpty) { arb.suggestName(s"${name.get}_arb") } 287 for ((a, req) <- arb.io.in.zip(in)) { 288 a <> req 289 } 290 AddPipelineReg(arb.io.out, out, false.B) 291 } 292 293 def arbiter_with_pipereg_N_dup[T <: Bundle]( 294 in: Seq[DecoupledIO[T]], 295 out: DecoupledIO[T], 296 dups: Seq[DecoupledIO[T]], 297 name: Option[String] = None): Unit = { 298 val arb = Module(new Arbiter[T](chiselTypeOf(out.bits), in.size)) 299 if (name.nonEmpty) { arb.suggestName(s"${name.get}_arb") } 300 for ((a, req) <- arb.io.in.zip(in)) { 301 a <> req 302 } 303 for (dup <- dups) { 304 AddPipelineReg(arb.io.out, dup, false.B) 305 } 306 AddPipelineReg(arb.io.out, out, false.B) 307 } 308 309 def rrArbiter[T <: Bundle]( 310 in: Seq[DecoupledIO[T]], 311 out: DecoupledIO[T], 312 name: Option[String] = None): Unit = { 313 val arb = Module(new RRArbiter[T](chiselTypeOf(out.bits), in.size)) 314 if (name.nonEmpty) { arb.suggestName(s"${name.get}_arb") } 315 for ((a, req) <- arb.io.in.zip(in)) { 316 a <> req 317 } 318 out <> arb.io.out 319 } 320 321 def fastArbiter[T <: Bundle]( 322 in: Seq[DecoupledIO[T]], 323 out: DecoupledIO[T], 324 name: Option[String] = None): Unit = { 325 val arb = Module(new FastArbiter[T](chiselTypeOf(out.bits), in.size)) 326 if (name.nonEmpty) { arb.suggestName(s"${name.get}_arb") } 327 for ((a, req) <- arb.io.in.zip(in)) { 328 a <> req 329 } 330 out <> arb.io.out 331 } 332 333 val numReplaceRespPorts = 2 334 335 require(isPow2(nSets), s"nSets($nSets) must be pow2") 336 require(isPow2(nWays), s"nWays($nWays) must be pow2") 337 require(full_divide(rowBits, wordBits), s"rowBits($rowBits) must be multiple of wordBits($wordBits)") 338 require(full_divide(beatBits, rowBits), s"beatBits($beatBits) must be multiple of rowBits($rowBits)") 339} 340 341abstract class DCacheModule(implicit p: Parameters) extends L1CacheModule 342 with HasDCacheParameters 343 344abstract class DCacheBundle(implicit p: Parameters) extends L1CacheBundle 345 with HasDCacheParameters 346 347class ReplacementAccessBundle(implicit p: Parameters) extends DCacheBundle { 348 val set = UInt(log2Up(nSets).W) 349 val way = UInt(log2Up(nWays).W) 350} 351 352class ReplacementWayReqIO(implicit p: Parameters) extends DCacheBundle { 353 val set = ValidIO(UInt(log2Up(nSets).W)) 354 val dmWay = Output(UInt(log2Up(nWays).W)) 355 val way = Input(UInt(log2Up(nWays).W)) 356} 357 358class DCacheExtraMeta(implicit p: Parameters) extends DCacheBundle 359{ 360 val error = Bool() // cache line has been marked as corrupted by l2 / ecc error detected when store 361 val prefetch = UInt(L1PfSourceBits.W) // cache line is first required by prefetch 362 val access = Bool() // cache line has been accessed by load / store 363 364 // val debug_access_timestamp = UInt(64.W) // last time a load / store / refill access that cacheline 365} 366 367// memory request in word granularity(load, mmio, lr/sc, atomics) 368class DCacheWordReq(implicit p: Parameters) extends DCacheBundle 369{ 370 val cmd = UInt(M_SZ.W) 371 val vaddr = UInt(VAddrBits.W) 372 val vaddr_dup = UInt(VAddrBits.W) 373 val data = UInt(VLEN.W) 374 val mask = UInt((VLEN/8).W) 375 val id = UInt(reqIdWidth.W) 376 val instrtype = UInt(sourceTypeWidth.W) 377 val isFirstIssue = Bool() 378 val replayCarry = new ReplayCarry(nWays) 379 val lqIdx = new LqPtr 380 381 val debug_robIdx = UInt(log2Ceil(RobSize).W) 382 def dump(cond: Bool) = { 383 XSDebug(cond, "DCacheWordReq: cmd: %x vaddr: %x data: %x mask: %x id: %d\n", 384 cmd, vaddr, data, mask, id) 385 } 386} 387 388// memory request in word granularity(store) 389class DCacheLineReq(implicit p: Parameters) extends DCacheBundle 390{ 391 val cmd = UInt(M_SZ.W) 392 val vaddr = UInt(VAddrBits.W) 393 val addr = UInt(PAddrBits.W) 394 val data = UInt((cfg.blockBytes * 8).W) 395 val mask = UInt(cfg.blockBytes.W) 396 val id = UInt(reqIdWidth.W) 397 def dump(cond: Bool) = { 398 XSDebug(cond, "DCacheLineReq: cmd: %x addr: %x data: %x mask: %x id: %d\n", 399 cmd, addr, data, mask, id) 400 } 401 def idx: UInt = get_idx(vaddr) 402} 403 404class DCacheWordReqWithVaddr(implicit p: Parameters) extends DCacheWordReq { 405 val addr = UInt(PAddrBits.W) 406 val wline = Bool() 407} 408 409class DCacheWordReqWithVaddrAndPfFlag(implicit p: Parameters) extends DCacheWordReqWithVaddr { 410 val prefetch = Bool() 411 val vecValid = Bool() 412 val sqNeedDeq = Bool() 413 414 def toDCacheWordReqWithVaddr() = { 415 val res = Wire(new DCacheWordReqWithVaddr) 416 res.vaddr := vaddr 417 res.wline := wline 418 res.cmd := cmd 419 res.addr := addr 420 res.data := data 421 res.mask := mask 422 res.id := id 423 res.instrtype := instrtype 424 res.replayCarry := replayCarry 425 res.isFirstIssue := isFirstIssue 426 res.debug_robIdx := debug_robIdx 427 428 res 429 } 430} 431 432class BaseDCacheWordResp(implicit p: Parameters) extends DCacheBundle 433{ 434 // read in s2 435 val data = UInt(VLEN.W) 436 // select in s3 437 val data_delayed = UInt(VLEN.W) 438 val id = UInt(reqIdWidth.W) 439 // cache req missed, send it to miss queue 440 val miss = Bool() 441 // cache miss, and failed to enter the missqueue, replay from RS is needed 442 val replay = Bool() 443 val replayCarry = new ReplayCarry(nWays) 444 // data has been corrupted 445 val tag_error = Bool() // tag error 446 val mshr_id = UInt(log2Up(cfg.nMissEntries).W) 447 448 val debug_robIdx = UInt(log2Ceil(RobSize).W) 449 def dump(cond: Bool) = { 450 XSDebug(cond, "DCacheWordResp: data: %x id: %d miss: %b replay: %b\n", 451 data, id, miss, replay) 452 } 453} 454 455class DCacheWordResp(implicit p: Parameters) extends BaseDCacheWordResp 456{ 457 val meta_prefetch = UInt(L1PfSourceBits.W) 458 val meta_access = Bool() 459 // s2 460 val handled = Bool() 461 val real_miss = Bool() 462 // s3: 1 cycle after data resp 463 val error_delayed = Bool() // all kinds of errors, include tag error 464 val replacementUpdated = Bool() 465} 466 467class BankedDCacheWordResp(implicit p: Parameters) extends DCacheWordResp 468{ 469 val bank_data = Vec(DCacheBanks, Bits(DCacheSRAMRowBits.W)) 470 val bank_oh = UInt(DCacheBanks.W) 471} 472 473class DCacheWordRespWithError(implicit p: Parameters) extends BaseDCacheWordResp 474{ 475 val error = Bool() // all kinds of errors, include tag error 476 val nderr = Bool() 477} 478 479class DCacheLineResp(implicit p: Parameters) extends DCacheBundle 480{ 481 val data = UInt((cfg.blockBytes * 8).W) 482 // cache req missed, send it to miss queue 483 val miss = Bool() 484 // cache req nacked, replay it later 485 val replay = Bool() 486 val id = UInt(reqIdWidth.W) 487 def dump(cond: Bool) = { 488 XSDebug(cond, "DCacheLineResp: data: %x id: %d miss: %b replay: %b\n", 489 data, id, miss, replay) 490 } 491} 492 493class Refill(implicit p: Parameters) extends DCacheBundle 494{ 495 val addr = UInt(PAddrBits.W) 496 val data = UInt(l1BusDataWidth.W) 497 val error = Bool() // refilled data has been corrupted 498 // for debug usage 499 val data_raw = UInt((cfg.blockBytes * 8).W) 500 val hasdata = Bool() 501 val refill_done = Bool() 502 def dump(cond: Bool) = { 503 XSDebug(cond, "Refill: addr: %x data: %x\n", addr, data) 504 } 505 val id = UInt(log2Up(cfg.nMissEntries).W) 506} 507 508class Release(implicit p: Parameters) extends DCacheBundle 509{ 510 val paddr = UInt(PAddrBits.W) 511 def dump(cond: Bool) = { 512 XSDebug(cond, "Release: paddr: %x\n", paddr(PAddrBits-1, DCacheTagOffset)) 513 } 514} 515 516class DCacheWordIO(implicit p: Parameters) extends DCacheBundle 517{ 518 val req = DecoupledIO(new DCacheWordReq) 519 val resp = Flipped(DecoupledIO(new DCacheWordResp)) 520} 521 522 523class UncacheWordReq(implicit p: Parameters) extends DCacheBundle 524{ 525 val cmd = UInt(M_SZ.W) 526 val addr = UInt(PAddrBits.W) 527 val vaddr = UInt(VAddrBits.W) // for uncache buffer forwarding 528 val data = UInt(XLEN.W) 529 val mask = UInt((XLEN/8).W) 530 val id = UInt(uncacheIdxBits.W) 531 val instrtype = UInt(sourceTypeWidth.W) 532 val atomic = Bool() 533 val nc = Bool() 534 val memBackTypeMM = Bool() 535 val isFirstIssue = Bool() 536 val replayCarry = new ReplayCarry(nWays) 537 538 def dump(cond: Bool) = { 539 XSDebug(cond, "UncacheWordReq: cmd: %x addr: %x data: %x mask: %x id: %d\n", 540 cmd, addr, data, mask, id) 541 } 542} 543 544class UncacheIdResp(implicit p: Parameters) extends DCacheBundle { 545 val mid = UInt(uncacheIdxBits.W) 546 val sid = UInt(UncacheBufferIndexWidth.W) 547 val is2lq = Bool() 548 val nc = Bool() 549} 550 551class UncacheWordResp(implicit p: Parameters) extends DCacheBundle 552{ 553 val data = UInt(XLEN.W) 554 val data_delayed = UInt(XLEN.W) 555 val id = UInt(UncacheBufferIndexWidth.W) // resp identified signals 556 val nc = Bool() // resp identified signals 557 val is2lq = Bool() // resp identified signals 558 val miss = Bool() 559 val replay = Bool() 560 val tag_error = Bool() 561 val error = Bool() 562 val nderr = Bool() 563 val replayCarry = new ReplayCarry(nWays) 564 val mshr_id = UInt(log2Up(cfg.nMissEntries).W) // FIXME: why uncacheWordResp is not merged to baseDcacheResp 565 566 val debug_robIdx = UInt(log2Ceil(RobSize).W) 567 def dump(cond: Bool) = { 568 XSDebug(cond, "UncacheWordResp: data: %x id: %d miss: %b replay: %b, tag_error: %b, error: %b\n", 569 data, id, miss, replay, tag_error, error) 570 } 571} 572 573class UncacheWordIO(implicit p: Parameters) extends DCacheBundle 574{ 575 val req = DecoupledIO(new UncacheWordReq) 576 val idResp = Flipped(ValidIO(new UncacheIdResp)) 577 val resp = Flipped(DecoupledIO(new UncacheWordResp)) 578} 579 580class MainPipeResp(implicit p: Parameters) extends DCacheBundle { 581 //distinguish amo 582 val source = UInt(sourceTypeWidth.W) 583 val data = UInt(QuadWordBits.W) 584 val miss = Bool() 585 val miss_id = UInt(log2Up(cfg.nMissEntries).W) 586 val replay = Bool() 587 val error = Bool() 588 589 val ack_miss_queue = Bool() 590 591 val id = UInt(reqIdWidth.W) 592 593 def isAMO: Bool = source === AMO_SOURCE.U 594 def isStore: Bool = source === STORE_SOURCE.U 595} 596 597class AtomicWordIO(implicit p: Parameters) extends DCacheBundle 598{ 599 val req = DecoupledIO(new MainPipeReq) 600 val resp = Flipped(ValidIO(new MainPipeResp)) 601 val block_lr = Input(Bool()) 602} 603 604class CMOReq(implicit p: Parameters) extends Bundle { 605 val opcode = UInt(3.W) // 0-cbo.clean, 1-cbo.flush, 2-cbo.inval, 3-cbo.zero 606 val address = UInt(64.W) 607} 608 609class CMOResp(implicit p: Parameters) extends Bundle { 610 val address = UInt(64.W) 611 val nderr = Bool() 612} 613 614// used by load unit 615class DCacheLoadIO(implicit p: Parameters) extends DCacheWordIO 616{ 617 // kill previous cycle's req 618 val s1_kill_data_read = Output(Bool()) // only kill bandedDataRead at s1 619 val s1_kill = Output(Bool()) // kill loadpipe req at s1 620 val s2_kill = Output(Bool()) 621 val s0_pc = Output(UInt(VAddrBits.W)) 622 val s1_pc = Output(UInt(VAddrBits.W)) 623 val s2_pc = Output(UInt(VAddrBits.W)) 624 // cycle 0: load has updated replacement before 625 val replacementUpdated = Output(Bool()) 626 val is128Req = Bool() 627 // cycle 0: prefetch source bits 628 val pf_source = Output(UInt(L1PfSourceBits.W)) 629 // cycle0: load microop 630 // val s0_uop = Output(new MicroOp) 631 // cycle 0: virtual address: req.addr 632 // cycle 1: physical address: s1_paddr 633 val s1_paddr_dup_lsu = Output(UInt(PAddrBits.W)) // lsu side paddr 634 val s1_paddr_dup_dcache = Output(UInt(PAddrBits.W)) // dcache side paddr 635 val s1_disable_fast_wakeup = Input(Bool()) 636 // cycle 2: hit signal 637 val s2_hit = Input(Bool()) // hit signal for lsu, 638 val s2_first_hit = Input(Bool()) 639 val s2_bank_conflict = Input(Bool()) 640 val s2_wpu_pred_fail = Input(Bool()) 641 val s2_mq_nack = Input(Bool()) 642 643 // debug 644 val debug_s1_hit_way = Input(UInt(nWays.W)) 645 val debug_s2_pred_way_num = Input(UInt(XLEN.W)) 646 val debug_s2_dm_way_num = Input(UInt(XLEN.W)) 647 val debug_s2_real_way_num = Input(UInt(XLEN.W)) 648} 649 650class DCacheLineIO(implicit p: Parameters) extends DCacheBundle 651{ 652 val req = DecoupledIO(new DCacheLineReq) 653 val resp = Flipped(DecoupledIO(new DCacheLineResp)) 654} 655 656class DCacheToSbufferIO(implicit p: Parameters) extends DCacheBundle { 657 // sbuffer will directly send request to dcache main pipe 658 val req = Flipped(Decoupled(new DCacheLineReq)) 659 660 val main_pipe_hit_resp = ValidIO(new DCacheLineResp) 661 //val refill_hit_resp = ValidIO(new DCacheLineResp) 662 663 val replay_resp = ValidIO(new DCacheLineResp) 664 665 //def hit_resps: Seq[ValidIO[DCacheLineResp]] = Seq(main_pipe_hit_resp, refill_hit_resp) 666 def hit_resps: Seq[ValidIO[DCacheLineResp]] = Seq(main_pipe_hit_resp) 667} 668 669// forward tilelink channel D's data to ldu 670class DcacheToLduForwardIO(implicit p: Parameters) extends DCacheBundle { 671 val valid = Bool() 672 val data = UInt(l1BusDataWidth.W) 673 val mshrid = UInt(log2Up(cfg.nMissEntries).W) 674 val last = Bool() 675 val corrupt = Bool() 676 677 def apply(d: DecoupledIO[TLBundleD], edge: TLEdgeOut) = { 678 val isKeyword = d.bits.echo.lift(IsKeywordKey).getOrElse(false.B) 679 val (_, _, done, _) = edge.count(d) 680 valid := d.valid 681 data := d.bits.data 682 mshrid := d.bits.source 683 last := isKeyword ^ done 684 corrupt := d.bits.corrupt || d.bits.denied 685 } 686 687 def dontCare() = { 688 valid := false.B 689 data := DontCare 690 mshrid := DontCare 691 last := DontCare 692 corrupt := false.B 693 } 694 695 def forward(req_valid : Bool, req_mshr_id : UInt, req_paddr : UInt) = { 696 val all_match = req_valid && valid && 697 req_mshr_id === mshrid && 698 req_paddr(log2Up(refillBytes)) === last 699 val forward_D = RegInit(false.B) 700 val forwardData = RegInit(VecInit(List.fill(VLEN/8)(0.U(8.W)))) 701 702 val block_idx = req_paddr(log2Up(refillBytes) - 1, 3) 703 val block_data = Wire(Vec(l1BusDataWidth / 64, UInt(64.W))) 704 (0 until l1BusDataWidth / 64).map(i => { 705 block_data(i) := data(64 * i + 63, 64 * i) 706 }) 707 val selected_data = Wire(UInt(128.W)) 708 selected_data := Mux(req_paddr(3), Fill(2, block_data(block_idx)), Cat(block_data(block_idx + 1.U), block_data(block_idx))) 709 710 forward_D := all_match 711 for (i <- 0 until VLEN/8) { 712 when (all_match) { 713 forwardData(i) := selected_data(8 * i + 7, 8 * i) 714 } 715 } 716 717 (forward_D, forwardData, corrupt) 718 } 719} 720 721class MissEntryForwardIO(implicit p: Parameters) extends DCacheBundle { 722 val inflight = Bool() 723 val paddr = UInt(PAddrBits.W) 724 val raw_data = Vec(blockRows, UInt(rowBits.W)) 725 val firstbeat_valid = Bool() 726 val lastbeat_valid = Bool() 727 val corrupt = Bool() 728 729 // check if we can forward from mshr or D channel 730 def check(req_valid : Bool, req_paddr : UInt) = { 731 RegNext(req_valid && inflight && req_paddr(PAddrBits - 1, blockOffBits) === paddr(PAddrBits - 1, blockOffBits)) // TODO: clock gate(1-bit) 732 } 733 734 def forward(req_valid : Bool, req_paddr : UInt) = { 735 val all_match = (req_paddr(log2Up(refillBytes)) === 0.U && firstbeat_valid) || 736 (req_paddr(log2Up(refillBytes)) === 1.U && lastbeat_valid) 737 738 val forward_mshr = RegInit(false.B) 739 val forwardData = RegInit(VecInit(List.fill(VLEN/8)(0.U(8.W)))) 740 741 val block_idx = req_paddr(log2Up(refillBytes), 3) 742 val block_data = raw_data 743 744 val selected_data = Wire(UInt(128.W)) 745 selected_data := Mux(req_paddr(3), Fill(2, block_data(block_idx)), Cat(block_data(block_idx + 1.U), block_data(block_idx))) 746 747 forward_mshr := all_match 748 for (i <- 0 until VLEN/8) { 749 forwardData(i) := selected_data(8 * i + 7, 8 * i) 750 } 751 752 (forward_mshr, forwardData) 753 } 754} 755 756// forward mshr's data to ldu 757class LduToMissqueueForwardIO(implicit p: Parameters) extends DCacheBundle { 758 // TODO: use separate Bundles for req and resp 759 // req 760 val valid = Input(Bool()) 761 val mshrid = Input(UInt(log2Up(cfg.nMissEntries).W)) 762 val paddr = Input(UInt(PAddrBits.W)) 763 // resp 764 val forward_mshr = Output(Bool()) 765 val forwardData = Output(Vec(VLEN/8, UInt(8.W))) 766 val forward_result_valid = Output(Bool()) 767 val corrupt = Output(Bool()) 768 769 // Why? What is the purpose of `connect`??? 770 def connect(sink: LduToMissqueueForwardIO) = { 771 sink.valid := valid 772 sink.mshrid := mshrid 773 sink.paddr := paddr 774 forward_mshr := sink.forward_mshr 775 forwardData := sink.forwardData 776 forward_result_valid := sink.forward_result_valid 777 corrupt := sink.corrupt 778 } 779 780 def forward() = { 781 (forward_result_valid, forward_mshr, forwardData, corrupt) 782 } 783} 784 785class StorePrefetchReq(implicit p: Parameters) extends DCacheBundle { 786 val paddr = UInt(PAddrBits.W) 787 val vaddr = UInt(VAddrBits.W) 788} 789 790class DCacheToLsuIO(implicit p: Parameters) extends DCacheBundle { 791 val load = Vec(LoadPipelineWidth, Flipped(new DCacheLoadIO)) // for speculative load 792 val sta = Vec(StorePipelineWidth, Flipped(new DCacheStoreIO)) // for non-blocking store 793 //val lsq = ValidIO(new Refill) // refill to load queue, wake up load misses 794 val tl_d_channel = Output(new DcacheToLduForwardIO) 795 val store = new DCacheToSbufferIO // for sbuffer 796 val atomics = Flipped(new AtomicWordIO) // atomics reqs 797 val release = ValidIO(new Release) // cacheline release hint for ld-ld violation check 798 val forward_D = Output(Vec(LoadPipelineWidth, new DcacheToLduForwardIO)) 799 val forward_mshr = Vec(LoadPipelineWidth, new LduToMissqueueForwardIO) 800} 801 802class DCacheTopDownIO(implicit p: Parameters) extends DCacheBundle { 803 val robHeadVaddr = Flipped(Valid(UInt(VAddrBits.W))) 804 val robHeadMissInDCache = Output(Bool()) 805 val robHeadOtherReplay = Input(Bool()) 806} 807 808class DCacheIO(implicit p: Parameters) extends DCacheBundle { 809 val hartId = Input(UInt(hartIdLen.W)) 810 val l2_pf_store_only = Input(Bool()) 811 val lsu = new DCacheToLsuIO 812 val error = ValidIO(new L1CacheErrorInfo) 813 val mshrFull = Output(Bool()) 814 val memSetPattenDetected = Output(Bool()) 815 val lqEmpty = Input(Bool()) 816 val pf_ctrl = Output(new PrefetchControlBundle) 817 val force_write = Input(Bool()) 818 val sms_agt_evict_req = DecoupledIO(new AGTEvictReq) 819 val debugTopDown = new DCacheTopDownIO 820 val debugRolling = Flipped(new RobDebugRollingIO) 821 val l2_hint = Input(Valid(new L2ToL1Hint())) 822 val cmoOpReq = Flipped(DecoupledIO(new CMOReq)) 823 val cmoOpResp = DecoupledIO(new CMOResp) 824 val l1Miss = Output(Bool()) 825} 826 827private object ArbiterCtrl { 828 def apply(request: Seq[Bool]): Seq[Bool] = request.length match { 829 case 0 => Seq() 830 case 1 => Seq(true.B) 831 case _ => true.B +: request.tail.init.scanLeft(request.head)(_ || _).map(!_) 832 } 833} 834 835class TreeArbiter[T <: MissReqWoStoreData](val gen: T, val n: Int) extends Module{ 836 val io = IO(new ArbiterIO(gen, n)) 837 838 def selectTree(in: Vec[Valid[T]], sIdx: UInt): Tuple2[UInt, T] = { 839 if (in.length == 1) { 840 (sIdx, in(0).bits) 841 } else if (in.length == 2) { 842 ( 843 Mux(in(0).valid, sIdx, sIdx + 1.U), 844 Mux(in(0).valid, in(0).bits, in(1).bits) 845 ) 846 } else { 847 val half = in.length / 2 848 val leftValid = in.slice(0, half).map(_.valid).reduce(_ || _) 849 val (leftIdx, leftSel) = selectTree(VecInit(in.slice(0, half)), sIdx) 850 val (rightIdx, rightSel) = selectTree(VecInit(in.slice(half, in.length)), sIdx + half.U) 851 ( 852 Mux(leftValid, leftIdx, rightIdx), 853 Mux(leftValid, leftSel, rightSel) 854 ) 855 } 856 } 857 val ins = Wire(Vec(n, Valid(gen))) 858 for (i <- 0 until n) { 859 ins(i).valid := io.in(i).valid 860 ins(i).bits := io.in(i).bits 861 } 862 val (idx, sel) = selectTree(ins, 0.U) 863 // NOTE: io.chosen is very slow, dont use it 864 io.chosen := idx 865 io.out.bits := sel 866 867 val grant = ArbiterCtrl(io.in.map(_.valid)) 868 for ((in, g) <- io.in.zip(grant)) 869 in.ready := g && io.out.ready 870 io.out.valid := !grant.last || io.in.last.valid 871} 872 873class DCacheMEQueryIOBundle(implicit p: Parameters) extends DCacheBundle 874{ 875 val req = ValidIO(new MissReqWoStoreData) 876 val primary_ready = Input(Bool()) 877 val secondary_ready = Input(Bool()) 878 val secondary_reject = Input(Bool()) 879} 880 881class DCacheMQQueryIOBundle(implicit p: Parameters) extends DCacheBundle 882{ 883 val req = ValidIO(new MissReq) 884 val ready = Input(Bool()) 885} 886 887class MissReadyGen(val n: Int)(implicit p: Parameters) extends XSModule { 888 val io = IO(new Bundle { 889 val in = Vec(n, Flipped(DecoupledIO(new MissReq))) 890 val queryMQ = Vec(n, new DCacheMQQueryIOBundle) 891 }) 892 893 val mqReadyVec = io.queryMQ.map(_.ready) 894 895 io.queryMQ.zipWithIndex.foreach{ 896 case (q, idx) => { 897 q.req.valid := io.in(idx).valid 898 q.req.bits := io.in(idx).bits 899 } 900 } 901 io.in.zipWithIndex.map { 902 case (r, idx) => { 903 if (idx == 0) { 904 r.ready := mqReadyVec(idx) 905 } else { 906 r.ready := mqReadyVec(idx) && !Cat(io.in.slice(0, idx).map(_.valid)).orR 907 } 908 } 909 } 910 911} 912 913class DCache()(implicit p: Parameters) extends LazyModule with HasDCacheParameters { 914 override def shouldBeInlined: Boolean = false 915 916 val reqFields: Seq[BundleFieldBase] = Seq( 917 PrefetchField(), 918 ReqSourceField(), 919 VaddrField(VAddrBits - blockOffBits), 920 // IsKeywordField() 921 ) ++ cacheParams.aliasBitsOpt.map(AliasField) 922 val echoFields: Seq[BundleFieldBase] = Seq( 923 IsKeywordField() 924 ) 925 926 val clientParameters = TLMasterPortParameters.v1( 927 Seq(TLMasterParameters.v1( 928 name = "dcache", 929 sourceId = IdRange(0, nEntries + 1), 930 supportsProbe = TransferSizes(cfg.blockBytes) 931 )), 932 requestFields = reqFields, 933 echoFields = echoFields 934 ) 935 936 val clientNode = TLClientNode(Seq(clientParameters)) 937 val cacheCtrlOpt = cacheCtrlParamsOpt.map(params => LazyModule(new CtrlUnit(params))) 938 939 lazy val module = new DCacheImp(this) 940} 941 942 943class DCacheImp(outer: DCache) extends LazyModuleImp(outer) with HasDCacheParameters with HasPerfEvents with HasL1PrefetchSourceParameter { 944 945 val io = IO(new DCacheIO) 946 947 val (bus, edge) = outer.clientNode.out.head 948 require(bus.d.bits.data.getWidth == l1BusDataWidth, "DCache: tilelink width does not match") 949 950 println("DCache:") 951 println(" DCacheSets: " + DCacheSets) 952 println(" DCacheSetDiv: " + DCacheSetDiv) 953 println(" DCacheWays: " + DCacheWays) 954 println(" DCacheBanks: " + DCacheBanks) 955 println(" DCacheSRAMRowBits: " + DCacheSRAMRowBits) 956 println(" DCacheWordOffset: " + DCacheWordOffset) 957 println(" DCacheBankOffset: " + DCacheBankOffset) 958 println(" DCacheSetOffset: " + DCacheSetOffset) 959 println(" DCacheTagOffset: " + DCacheTagOffset) 960 println(" DCacheAboveIndexOffset: " + DCacheAboveIndexOffset) 961 println(" DcacheMaxPrefetchEntry: " + MaxPrefetchEntry) 962 println(" WPUEnable: " + dwpuParam.enWPU) 963 println(" WPUEnableCfPred: " + dwpuParam.enCfPred) 964 println(" WPUAlgorithm: " + dwpuParam.algoName) 965 println(" HasCMO: " + HasCMO) 966 967 // Enable L1 Store prefetch 968 val StorePrefetchL1Enabled = EnableStorePrefetchAtCommit || EnableStorePrefetchAtIssue || EnableStorePrefetchSPB 969 val MetaReadPort = 970 if (StorePrefetchL1Enabled) 971 1 + backendParams.LduCnt + backendParams.StaCnt + backendParams.HyuCnt 972 else 973 1 + backendParams.LduCnt + backendParams.HyuCnt 974 val TagReadPort = 975 if (StorePrefetchL1Enabled) 976 1 + backendParams.LduCnt + backendParams.StaCnt + backendParams.HyuCnt 977 else 978 1 + backendParams.LduCnt + backendParams.HyuCnt 979 980 // Enable L1 Load prefetch 981 val LoadPrefetchL1Enabled = true 982 val AccessArrayReadPort = if(LoadPrefetchL1Enabled) LoadPipelineWidth + 1 + 1 else LoadPipelineWidth + 1 983 val PrefetchArrayReadPort = if(LoadPrefetchL1Enabled) LoadPipelineWidth + 1 + 1 else LoadPipelineWidth + 1 984 985 //---------------------------------------- 986 // core data structures 987 val bankedDataArray = if(dwpuParam.enWPU) Module(new SramedDataArray) else Module(new BankedDataArray) 988 val metaArray = Module(new L1CohMetaArray(readPorts = LoadPipelineWidth + 1, writePorts = 1)) 989 val errorArray = Module(new L1FlagMetaArray(readPorts = LoadPipelineWidth + 1, writePorts = 1)) 990 val prefetchArray = Module(new L1PrefetchSourceArray(readPorts = PrefetchArrayReadPort, writePorts = 1 + LoadPipelineWidth)) // prefetch flag array 991 val accessArray = Module(new L1FlagMetaArray(readPorts = AccessArrayReadPort, writePorts = LoadPipelineWidth + 1)) 992 val tagArray = Module(new DuplicatedTagArray(readPorts = TagReadPort)) 993 val prefetcherMonitor = Module(new PrefetcherMonitor) 994 val fdpMonitor = Module(new FDPrefetcherMonitor) 995 val bloomFilter = Module(new BloomFilter(BLOOM_FILTER_ENTRY_NUM, true)) 996 val counterFilter = Module(new CounterFilter) 997 bankedDataArray.dump() 998 999 //---------------------------------------- 1000 // miss queue 1001 // missReqArb port: 1002 // enableStorePrefetch: main pipe * 1 + load pipe * 2 + store pipe * 1 + 1003 // hybrid * 1; disable: main pipe * 1 + load pipe * 2 + hybrid * 1 1004 // higher priority is given to lower indices 1005 val MissReqPortCount = if(StorePrefetchL1Enabled) 1 + backendParams.LduCnt + backendParams.StaCnt + backendParams.HyuCnt else 1 + backendParams.LduCnt + backendParams.HyuCnt 1006 val MainPipeMissReqPort = 0 1007 val HybridMissReqBase = MissReqPortCount - backendParams.HyuCnt 1008 1009 //---------------------------------------- 1010 // core modules 1011 val ldu = Seq.tabulate(LoadPipelineWidth)({ i => Module(new LoadPipe(i))}) 1012 val stu = Seq.tabulate(StorePipelineWidth)({ i => Module(new StorePipe(i))}) 1013 val mainPipe = Module(new MainPipe) 1014 // val refillPipe = Module(new RefillPipe) 1015 val missQueue = Module(new MissQueue(edge, MissReqPortCount)) 1016 val probeQueue = Module(new ProbeQueue(edge)) 1017 val wb = Module(new WritebackQueue(edge)) 1018 1019 missQueue.io.lqEmpty := io.lqEmpty 1020 missQueue.io.hartId := io.hartId 1021 missQueue.io.l2_pf_store_only := RegNext(io.l2_pf_store_only, false.B) 1022 missQueue.io.debugTopDown <> io.debugTopDown 1023 missQueue.io.l2_hint <> RegNext(io.l2_hint) 1024 missQueue.io.mainpipe_info := mainPipe.io.mainpipe_info 1025 mainPipe.io.refill_info := missQueue.io.refill_info 1026 mainPipe.io.replace_block := missQueue.io.replace_block 1027 mainPipe.io.sms_agt_evict_req <> io.sms_agt_evict_req 1028 io.memSetPattenDetected := missQueue.io.memSetPattenDetected 1029 1030 // l1 dcache controller 1031 outer.cacheCtrlOpt.foreach { 1032 case mod => 1033 mod.module.io_pseudoError.foreach { 1034 case x => x.ready := false.B 1035 } 1036 } 1037 ldu.foreach { 1038 case mod => 1039 mod.io.pseudo_error.valid := false.B 1040 mod.io.pseudo_error.bits := DontCare 1041 } 1042 mainPipe.io.pseudo_error.valid := false.B 1043 mainPipe.io.pseudo_error.bits := DontCare 1044 bankedDataArray.io.pseudo_error.valid := false.B 1045 bankedDataArray.io.pseudo_error.bits := DontCare 1046 1047 // pseudo tag ecc error 1048 if (outer.cacheCtrlOpt.nonEmpty && EnableTagEcc) { 1049 val ctrlUnit = outer.cacheCtrlOpt.head.module 1050 ldu.map(mod => mod.io.pseudo_error <> ctrlUnit.io_pseudoError(0)) 1051 mainPipe.io.pseudo_error <> ctrlUnit.io_pseudoError(0) 1052 ctrlUnit.io_pseudoError(0).ready := mainPipe.io.pseudo_tag_error_inj_done || 1053 ldu.map(_.io.pseudo_tag_error_inj_done).reduce(_|_) 1054 } 1055 1056 // pseudo data ecc error 1057 if (outer.cacheCtrlOpt.nonEmpty && EnableDataEcc) { 1058 val ctrlUnit = outer.cacheCtrlOpt.head.module 1059 bankedDataArray.io.pseudo_error <> ctrlUnit.io_pseudoError(1) 1060 ctrlUnit.io_pseudoError(1).ready := bankedDataArray.io.pseudo_error.ready && 1061 (mainPipe.io.pseudo_data_error_inj_done || 1062 ldu.map(_.io.pseudo_data_error_inj_done).reduce(_|_)) 1063 } 1064 1065 val errors = ldu.map(_.io.error) ++ // load error 1066 Seq(mainPipe.io.error) // store / misc error 1067 val error_valid = errors.map(e => e.valid).reduce(_|_) 1068 io.error.bits <> RegEnable( 1069 ParallelMux(errors.map(e => RegNext(e.valid) -> RegEnable(e.bits, e.valid))), 1070 RegNext(error_valid)) 1071 io.error.valid := RegNext(RegNext(error_valid, init = false.B), init = false.B) 1072 1073 //---------------------------------------- 1074 // meta array 1075 val HybridLoadReadBase = LoadPipelineWidth - backendParams.HyuCnt 1076 val HybridStoreReadBase = StorePipelineWidth - backendParams.HyuCnt 1077 1078 val hybrid_meta_read_ports = Wire(Vec(backendParams.HyuCnt, DecoupledIO(new MetaReadReq))) 1079 val hybrid_meta_resp_ports = Wire(Vec(backendParams.HyuCnt, ldu(0).io.meta_resp.cloneType)) 1080 for (i <- 0 until backendParams.HyuCnt) { 1081 val HybridLoadMetaReadPort = HybridLoadReadBase + i 1082 val HybridStoreMetaReadPort = HybridStoreReadBase + i 1083 1084 hybrid_meta_read_ports(i).valid := ldu(HybridLoadMetaReadPort).io.meta_read.valid || 1085 (stu(HybridStoreMetaReadPort).io.meta_read.valid && StorePrefetchL1Enabled.B) 1086 hybrid_meta_read_ports(i).bits := Mux(ldu(HybridLoadMetaReadPort).io.meta_read.valid, ldu(HybridLoadMetaReadPort).io.meta_read.bits, 1087 stu(HybridStoreMetaReadPort).io.meta_read.bits) 1088 1089 ldu(HybridLoadMetaReadPort).io.meta_read.ready := hybrid_meta_read_ports(i).ready 1090 stu(HybridStoreMetaReadPort).io.meta_read.ready := hybrid_meta_read_ports(i).ready && StorePrefetchL1Enabled.B 1091 1092 ldu(HybridLoadMetaReadPort).io.meta_resp := hybrid_meta_resp_ports(i) 1093 stu(HybridStoreMetaReadPort).io.meta_resp := hybrid_meta_resp_ports(i) 1094 } 1095 1096 // read / write coh meta 1097 val meta_read_ports = ldu.map(_.io.meta_read).take(HybridLoadReadBase) ++ 1098 Seq(mainPipe.io.meta_read) ++ 1099 stu.map(_.io.meta_read).take(HybridStoreReadBase) ++ hybrid_meta_read_ports 1100 1101 val meta_resp_ports = ldu.map(_.io.meta_resp).take(HybridLoadReadBase) ++ 1102 Seq(mainPipe.io.meta_resp) ++ 1103 stu.map(_.io.meta_resp).take(HybridStoreReadBase) ++ hybrid_meta_resp_ports 1104 1105 val meta_write_ports = Seq( 1106 mainPipe.io.meta_write 1107 // refillPipe.io.meta_write 1108 ) 1109 if(StorePrefetchL1Enabled) { 1110 meta_read_ports.zip(metaArray.io.read).foreach { case (p, r) => r <> p } 1111 meta_resp_ports.zip(metaArray.io.resp).foreach { case (p, r) => p := r } 1112 } else { 1113 (meta_read_ports.take(HybridLoadReadBase + 1) ++ 1114 meta_read_ports.takeRight(backendParams.HyuCnt)).zip(metaArray.io.read).foreach { case (p, r) => r <> p } 1115 (meta_resp_ports.take(HybridLoadReadBase + 1) ++ 1116 meta_resp_ports.takeRight(backendParams.HyuCnt)).zip(metaArray.io.resp).foreach { case (p, r) => p := r } 1117 1118 meta_read_ports.drop(HybridLoadReadBase + 1).take(HybridStoreReadBase).foreach { case p => p.ready := false.B } 1119 meta_resp_ports.drop(HybridLoadReadBase + 1).take(HybridStoreReadBase).foreach { case p => p := 0.U.asTypeOf(p) } 1120 } 1121 meta_write_ports.zip(metaArray.io.write).foreach { case (p, w) => w <> p } 1122 1123 // read extra meta (exclude stu) 1124 (meta_read_ports.take(HybridLoadReadBase + 1) ++ 1125 meta_read_ports.takeRight(backendParams.HyuCnt)).zip(errorArray.io.read).foreach { case (p, r) => r <> p } 1126 (meta_read_ports.take(HybridLoadReadBase + 1) ++ 1127 meta_read_ports.takeRight(backendParams.HyuCnt)).zip(prefetchArray.io.read).foreach { case (p, r) => r <> p } 1128 (meta_read_ports.take(HybridLoadReadBase + 1) ++ 1129 meta_read_ports.takeRight(backendParams.HyuCnt)).zip(accessArray.io.read).foreach { case (p, r) => r <> p } 1130 val extra_meta_resp_ports = ldu.map(_.io.extra_meta_resp).take(HybridLoadReadBase) ++ 1131 Seq(mainPipe.io.extra_meta_resp) ++ 1132 ldu.map(_.io.extra_meta_resp).takeRight(backendParams.HyuCnt) 1133 extra_meta_resp_ports.zip(errorArray.io.resp).foreach { case (p, r) => { 1134 (0 until nWays).map(i => { p(i).error := r(i) }) 1135 }} 1136 extra_meta_resp_ports.zip(prefetchArray.io.resp).foreach { case (p, r) => { 1137 (0 until nWays).map(i => { p(i).prefetch := r(i) }) 1138 }} 1139 extra_meta_resp_ports.zip(accessArray.io.resp).foreach { case (p, r) => { 1140 (0 until nWays).map(i => { p(i).access := r(i) }) 1141 }} 1142 1143 if(LoadPrefetchL1Enabled) { 1144 // use last port to read prefetch and access flag 1145// prefetchArray.io.read.last.valid := refillPipe.io.prefetch_flag_write.valid 1146// prefetchArray.io.read.last.bits.idx := refillPipe.io.prefetch_flag_write.bits.idx 1147// prefetchArray.io.read.last.bits.way_en := refillPipe.io.prefetch_flag_write.bits.way_en 1148// 1149// accessArray.io.read.last.valid := refillPipe.io.prefetch_flag_write.valid 1150// accessArray.io.read.last.bits.idx := refillPipe.io.prefetch_flag_write.bits.idx 1151// accessArray.io.read.last.bits.way_en := refillPipe.io.prefetch_flag_write.bits.way_en 1152 prefetchArray.io.read.last.valid := mainPipe.io.prefetch_flag_write.valid 1153 prefetchArray.io.read.last.bits.idx := mainPipe.io.prefetch_flag_write.bits.idx 1154 prefetchArray.io.read.last.bits.way_en := mainPipe.io.prefetch_flag_write.bits.way_en 1155 1156 accessArray.io.read.last.valid := mainPipe.io.prefetch_flag_write.valid 1157 accessArray.io.read.last.bits.idx := mainPipe.io.prefetch_flag_write.bits.idx 1158 accessArray.io.read.last.bits.way_en := mainPipe.io.prefetch_flag_write.bits.way_en 1159 1160 val extra_flag_valid = RegNext(mainPipe.io.prefetch_flag_write.valid) 1161 val extra_flag_way_en = RegEnable(mainPipe.io.prefetch_flag_write.bits.way_en, mainPipe.io.prefetch_flag_write.valid) 1162 val extra_flag_prefetch = Mux1H(extra_flag_way_en, prefetchArray.io.resp.last) 1163 val extra_flag_access = Mux1H(extra_flag_way_en, accessArray.io.resp.last) 1164 1165 prefetcherMonitor.io.validity.good_prefetch := extra_flag_valid && isPrefetchRelated(extra_flag_prefetch) && extra_flag_access 1166 prefetcherMonitor.io.validity.bad_prefetch := extra_flag_valid && isPrefetchRelated(extra_flag_prefetch) && !extra_flag_access 1167 } 1168 1169 // write extra meta 1170 val error_flag_write_ports = Seq( 1171 mainPipe.io.error_flag_write // error flag generated by corrupted store 1172 // refillPipe.io.error_flag_write // corrupted signal from l2 1173 ) 1174 error_flag_write_ports.zip(errorArray.io.write).foreach { case (p, w) => w <> p } 1175 1176 val prefetch_flag_write_ports = ldu.map(_.io.prefetch_flag_write) ++ Seq( 1177 mainPipe.io.prefetch_flag_write // set prefetch_flag to false if coh is set to Nothing 1178 // refillPipe.io.prefetch_flag_write // refill required by prefetch will set prefetch_flag 1179 ) 1180 prefetch_flag_write_ports.zip(prefetchArray.io.write).foreach { case (p, w) => w <> p } 1181 1182 // FIXME: add hybrid unit? 1183 val same_cycle_update_pf_flag = ldu(0).io.prefetch_flag_write.valid && ldu(1).io.prefetch_flag_write.valid && (ldu(0).io.prefetch_flag_write.bits.idx === ldu(1).io.prefetch_flag_write.bits.idx) && (ldu(0).io.prefetch_flag_write.bits.way_en === ldu(1).io.prefetch_flag_write.bits.way_en) 1184 XSPerfAccumulate("same_cycle_update_pf_flag", same_cycle_update_pf_flag) 1185 1186 val access_flag_write_ports = ldu.map(_.io.access_flag_write) ++ Seq( 1187 mainPipe.io.access_flag_write 1188 // refillPipe.io.access_flag_write 1189 ) 1190 access_flag_write_ports.zip(accessArray.io.write).foreach { case (p, w) => w <> p } 1191 1192 //---------------------------------------- 1193 // tag array 1194 if(StorePrefetchL1Enabled) { 1195 require(tagArray.io.read.size == (LoadPipelineWidth + StorePipelineWidth - backendParams.HyuCnt + 1)) 1196 }else { 1197 require(tagArray.io.read.size == (LoadPipelineWidth + 1)) 1198 } 1199 // val tag_write_intend = missQueue.io.refill_pipe_req.valid || mainPipe.io.tag_write_intend 1200 val tag_write_intend = mainPipe.io.tag_write_intend 1201 assert(!RegNext(!tag_write_intend && tagArray.io.write.valid)) 1202 ldu.take(HybridLoadReadBase).zipWithIndex.foreach { 1203 case (ld, i) => 1204 tagArray.io.read(i) <> ld.io.tag_read 1205 ld.io.tag_resp := tagArray.io.resp(i) 1206 ld.io.tag_read.ready := !tag_write_intend 1207 } 1208 if(StorePrefetchL1Enabled) { 1209 stu.take(HybridStoreReadBase).zipWithIndex.foreach { 1210 case (st, i) => 1211 tagArray.io.read(HybridLoadReadBase + i) <> st.io.tag_read 1212 st.io.tag_resp := tagArray.io.resp(HybridLoadReadBase + i) 1213 st.io.tag_read.ready := !tag_write_intend 1214 } 1215 }else { 1216 stu.foreach { 1217 case st => 1218 st.io.tag_read.ready := false.B 1219 st.io.tag_resp := 0.U.asTypeOf(st.io.tag_resp) 1220 } 1221 } 1222 for (i <- 0 until backendParams.HyuCnt) { 1223 val HybridLoadTagReadPort = HybridLoadReadBase + i 1224 val HybridStoreTagReadPort = HybridStoreReadBase + i 1225 val TagReadPort = 1226 if (EnableStorePrefetchSPB) 1227 HybridLoadReadBase + HybridStoreReadBase + i 1228 else 1229 HybridLoadReadBase + i 1230 1231 // read tag 1232 ldu(HybridLoadTagReadPort).io.tag_read.ready := false.B 1233 stu(HybridStoreTagReadPort).io.tag_read.ready := false.B 1234 1235 if (StorePrefetchL1Enabled) { 1236 when (ldu(HybridLoadTagReadPort).io.tag_read.valid) { 1237 tagArray.io.read(TagReadPort) <> ldu(HybridLoadTagReadPort).io.tag_read 1238 ldu(HybridLoadTagReadPort).io.tag_read.ready := !tag_write_intend 1239 } .otherwise { 1240 tagArray.io.read(TagReadPort) <> stu(HybridStoreTagReadPort).io.tag_read 1241 stu(HybridStoreTagReadPort).io.tag_read.ready := !tag_write_intend 1242 } 1243 } else { 1244 tagArray.io.read(TagReadPort) <> ldu(HybridLoadTagReadPort).io.tag_read 1245 ldu(HybridLoadTagReadPort).io.tag_read.ready := !tag_write_intend 1246 } 1247 1248 // tag resp 1249 ldu(HybridLoadTagReadPort).io.tag_resp := tagArray.io.resp(TagReadPort) 1250 stu(HybridStoreTagReadPort).io.tag_resp := tagArray.io.resp(TagReadPort) 1251 } 1252 tagArray.io.read.last <> mainPipe.io.tag_read 1253 mainPipe.io.tag_resp := tagArray.io.resp.last 1254 1255 val fake_tag_read_conflict_this_cycle = PopCount(ldu.map(ld=> ld.io.tag_read.valid)) 1256 XSPerfAccumulate("fake_tag_read_conflict", fake_tag_read_conflict_this_cycle) 1257 1258 val tag_write_arb = Module(new Arbiter(new TagWriteReq, 1)) 1259 // tag_write_arb.io.in(0) <> refillPipe.io.tag_write 1260 tag_write_arb.io.in(0) <> mainPipe.io.tag_write 1261 tagArray.io.write <> tag_write_arb.io.out 1262 1263 ldu.map(m => { 1264 m.io.vtag_update.valid := tagArray.io.write.valid 1265 m.io.vtag_update.bits := tagArray.io.write.bits 1266 }) 1267 1268 //---------------------------------------- 1269 // data array 1270 mainPipe.io.data_read.zip(ldu).map(x => x._1 := x._2.io.lsu.req.valid) 1271 1272 val dataWriteArb = Module(new Arbiter(new L1BankedDataWriteReq, 1)) 1273 // dataWriteArb.io.in(0) <> refillPipe.io.data_write 1274 dataWriteArb.io.in(0) <> mainPipe.io.data_write 1275 1276 bankedDataArray.io.write <> dataWriteArb.io.out 1277 1278 for (bank <- 0 until DCacheBanks) { 1279 val dataWriteArb_dup = Module(new Arbiter(new L1BankedDataWriteReqCtrl, 1)) 1280 // dataWriteArb_dup.io.in(0).valid := refillPipe.io.data_write_dup(bank).valid 1281 // dataWriteArb_dup.io.in(0).bits := refillPipe.io.data_write_dup(bank).bits 1282 dataWriteArb_dup.io.in(0).valid := mainPipe.io.data_write_dup(bank).valid 1283 dataWriteArb_dup.io.in(0).bits := mainPipe.io.data_write_dup(bank).bits 1284 1285 bankedDataArray.io.write_dup(bank) <> dataWriteArb_dup.io.out 1286 } 1287 1288 bankedDataArray.io.readline <> mainPipe.io.data_readline 1289 bankedDataArray.io.readline_can_go := mainPipe.io.data_readline_can_go 1290 bankedDataArray.io.readline_stall := mainPipe.io.data_readline_stall 1291 bankedDataArray.io.readline_can_resp := mainPipe.io.data_readline_can_resp 1292 bankedDataArray.io.readline_intend := mainPipe.io.data_read_intend 1293 mainPipe.io.readline_error_delayed := bankedDataArray.io.readline_error_delayed 1294 mainPipe.io.data_resp := bankedDataArray.io.readline_resp 1295 1296 (0 until LoadPipelineWidth).map(i => { 1297 bankedDataArray.io.read(i) <> ldu(i).io.banked_data_read 1298 bankedDataArray.io.is128Req(i) <> ldu(i).io.is128Req 1299 bankedDataArray.io.read_error_delayed(i) <> ldu(i).io.read_error_delayed 1300 1301 ldu(i).io.banked_data_resp := bankedDataArray.io.read_resp(i) 1302 1303 ldu(i).io.bank_conflict_slow := bankedDataArray.io.bank_conflict_slow(i) 1304 }) 1305 1306 (0 until LoadPipelineWidth).map(i => { 1307 when(bus.d.bits.opcode === TLMessages.GrantData) { 1308 io.lsu.forward_D(i).apply(bus.d, edge) 1309 }.otherwise { 1310 io.lsu.forward_D(i).dontCare() 1311 } 1312 }) 1313 // tl D channel wakeup 1314 when (bus.d.bits.opcode === TLMessages.GrantData || bus.d.bits.opcode === TLMessages.Grant) { 1315 io.lsu.tl_d_channel.apply(bus.d, edge) 1316 } .otherwise { 1317 io.lsu.tl_d_channel.dontCare() 1318 } 1319 mainPipe.io.force_write <> io.force_write 1320 1321 /** dwpu */ 1322 if (dwpuParam.enWPU) { 1323 val dwpu = Module(new DCacheWpuWrapper(LoadPipelineWidth)) 1324 for(i <- 0 until LoadPipelineWidth){ 1325 dwpu.io.req(i) <> ldu(i).io.dwpu.req(0) 1326 dwpu.io.resp(i) <> ldu(i).io.dwpu.resp(0) 1327 dwpu.io.lookup_upd(i) <> ldu(i).io.dwpu.lookup_upd(0) 1328 dwpu.io.cfpred(i) <> ldu(i).io.dwpu.cfpred(0) 1329 } 1330 dwpu.io.tagwrite_upd.valid := tagArray.io.write.valid 1331 dwpu.io.tagwrite_upd.bits.vaddr := tagArray.io.write.bits.vaddr 1332 dwpu.io.tagwrite_upd.bits.s1_real_way_en := tagArray.io.write.bits.way_en 1333 } else { 1334 for(i <- 0 until LoadPipelineWidth){ 1335 ldu(i).io.dwpu.req(0).ready := true.B 1336 ldu(i).io.dwpu.resp(0).valid := false.B 1337 ldu(i).io.dwpu.resp(0).bits := DontCare 1338 } 1339 } 1340 1341 //---------------------------------------- 1342 // load pipe 1343 // the s1 kill signal 1344 // only lsu uses this, replay never kills 1345 for (w <- 0 until LoadPipelineWidth) { 1346 ldu(w).io.lsu <> io.lsu.load(w) 1347 1348 // TODO:when have load128Req 1349 ldu(w).io.load128Req := io.lsu.load(w).is128Req 1350 1351 // replay and nack not needed anymore 1352 // TODO: remove replay and nack 1353 ldu(w).io.nack := false.B 1354 1355 ldu(w).io.disable_ld_fast_wakeup := 1356 bankedDataArray.io.disable_ld_fast_wakeup(w) // load pipe fast wake up should be disabled when bank conflict 1357 } 1358 1359 prefetcherMonitor.io.timely.total_prefetch := ldu.map(_.io.prefetch_info.naive.total_prefetch).reduce(_ || _) 1360 prefetcherMonitor.io.timely.late_hit_prefetch := ldu.map(_.io.prefetch_info.naive.late_hit_prefetch).reduce(_ || _) 1361 prefetcherMonitor.io.timely.late_miss_prefetch := missQueue.io.prefetch_info.naive.late_miss_prefetch 1362 prefetcherMonitor.io.timely.prefetch_hit := PopCount(ldu.map(_.io.prefetch_info.naive.prefetch_hit)) 1363 io.pf_ctrl <> prefetcherMonitor.io.pf_ctrl 1364 XSPerfAccumulate("useless_prefetch", ldu.map(_.io.prefetch_info.naive.total_prefetch).reduce(_ || _) && !(ldu.map(_.io.prefetch_info.naive.useful_prefetch).reduce(_ || _))) 1365 XSPerfAccumulate("useful_prefetch", ldu.map(_.io.prefetch_info.naive.useful_prefetch).reduce(_ || _)) 1366 XSPerfAccumulate("late_prefetch_hit", ldu.map(_.io.prefetch_info.naive.late_prefetch_hit).reduce(_ || _)) 1367 XSPerfAccumulate("late_load_hit", ldu.map(_.io.prefetch_info.naive.late_load_hit).reduce(_ || _)) 1368 1369 /** LoadMissDB: record load miss state */ 1370 val hartId = p(XSCoreParamsKey).HartId 1371 val isWriteLoadMissTable = Constantin.createRecord(s"isWriteLoadMissTable$hartId") 1372 val isFirstHitWrite = Constantin.createRecord(s"isFirstHitWrite$hartId") 1373 val tableName = s"LoadMissDB$hartId" 1374 val siteName = s"DcacheWrapper$hartId" 1375 val loadMissTable = ChiselDB.createTable(tableName, new LoadMissEntry) 1376 for( i <- 0 until LoadPipelineWidth){ 1377 val loadMissEntry = Wire(new LoadMissEntry) 1378 val loadMissWriteEn = 1379 (!ldu(i).io.lsu.resp.bits.replay && ldu(i).io.miss_req.fire) || 1380 (ldu(i).io.lsu.s2_first_hit && ldu(i).io.lsu.resp.valid && isFirstHitWrite.orR) 1381 loadMissEntry.timeCnt := GTimer() 1382 loadMissEntry.robIdx := ldu(i).io.lsu.resp.bits.debug_robIdx 1383 loadMissEntry.paddr := ldu(i).io.miss_req.bits.addr 1384 loadMissEntry.vaddr := ldu(i).io.miss_req.bits.vaddr 1385 loadMissEntry.missState := OHToUInt(Cat(Seq( 1386 ldu(i).io.miss_req.fire & ldu(i).io.miss_resp.merged, 1387 ldu(i).io.miss_req.fire & !ldu(i).io.miss_resp.merged, 1388 ldu(i).io.lsu.s2_first_hit && ldu(i).io.lsu.resp.valid 1389 ))) 1390 loadMissTable.log( 1391 data = loadMissEntry, 1392 en = isWriteLoadMissTable.orR && loadMissWriteEn, 1393 site = siteName, 1394 clock = clock, 1395 reset = reset 1396 ) 1397 } 1398 1399 val isWriteLoadAccessTable = Constantin.createRecord(s"isWriteLoadAccessTable$hartId") 1400 val loadAccessTable = ChiselDB.createTable(s"LoadAccessDB$hartId", new LoadAccessEntry) 1401 for (i <- 0 until LoadPipelineWidth) { 1402 val loadAccessEntry = Wire(new LoadAccessEntry) 1403 loadAccessEntry.timeCnt := GTimer() 1404 loadAccessEntry.robIdx := ldu(i).io.lsu.resp.bits.debug_robIdx 1405 loadAccessEntry.paddr := ldu(i).io.miss_req.bits.addr 1406 loadAccessEntry.vaddr := ldu(i).io.miss_req.bits.vaddr 1407 loadAccessEntry.missState := OHToUInt(Cat(Seq( 1408 ldu(i).io.miss_req.fire & ldu(i).io.miss_resp.merged, 1409 ldu(i).io.miss_req.fire & !ldu(i).io.miss_resp.merged, 1410 ldu(i).io.lsu.s2_first_hit && ldu(i).io.lsu.resp.valid 1411 ))) 1412 loadAccessEntry.pred_way_num := ldu(i).io.lsu.debug_s2_pred_way_num 1413 loadAccessEntry.real_way_num := ldu(i).io.lsu.debug_s2_real_way_num 1414 loadAccessEntry.dm_way_num := ldu(i).io.lsu.debug_s2_dm_way_num 1415 loadAccessTable.log( 1416 data = loadAccessEntry, 1417 en = isWriteLoadAccessTable.orR && ldu(i).io.lsu.resp.valid, 1418 site = siteName + "_loadpipe" + i.toString, 1419 clock = clock, 1420 reset = reset 1421 ) 1422 } 1423 1424 //---------------------------------------- 1425 // Sta pipe 1426 for (w <- 0 until StorePipelineWidth) { 1427 stu(w).io.lsu <> io.lsu.sta(w) 1428 } 1429 1430 //---------------------------------------- 1431 // atomics 1432 // atomics not finished yet 1433 val atomic_resp_valid = mainPipe.io.atomic_resp.valid && mainPipe.io.atomic_resp.bits.isAMO 1434 io.lsu.atomics.resp.valid := RegNext(atomic_resp_valid) 1435 io.lsu.atomics.resp.bits := RegEnable(mainPipe.io.atomic_resp.bits, atomic_resp_valid) 1436 io.lsu.atomics.block_lr := mainPipe.io.block_lr 1437 1438 // Request 1439 val missReqArb = Module(new TreeArbiter(new MissReq, MissReqPortCount)) 1440 // seperately generating miss queue enq ready for better timeing 1441 val missReadyGen = Module(new MissReadyGen(MissReqPortCount)) 1442 1443 missReqArb.io.in(MainPipeMissReqPort) <> mainPipe.io.miss_req 1444 missReadyGen.io.in(MainPipeMissReqPort) <> mainPipe.io.miss_req 1445 for (w <- 0 until backendParams.LduCnt) { 1446 missReqArb.io.in(w + 1) <> ldu(w).io.miss_req 1447 missReadyGen.io.in(w + 1) <> ldu(w).io.miss_req 1448 } 1449 1450 for (w <- 0 until LoadPipelineWidth) { ldu(w).io.miss_resp := missQueue.io.resp } 1451 mainPipe.io.miss_resp := missQueue.io.resp 1452 1453 if(StorePrefetchL1Enabled) { 1454 for (w <- 0 until backendParams.StaCnt) { 1455 missReqArb.io.in(1 + backendParams.LduCnt + w) <> stu(w).io.miss_req 1456 missReadyGen.io.in(1 + backendParams.LduCnt + w) <> stu(w).io.miss_req 1457 } 1458 }else { 1459 for (w <- 0 until backendParams.StaCnt) { stu(w).io.miss_req.ready := false.B } 1460 } 1461 1462 for (i <- 0 until backendParams.HyuCnt) { 1463 val HybridLoadReqPort = HybridLoadReadBase + i 1464 val HybridStoreReqPort = HybridStoreReadBase + i 1465 val HybridMissReqPort = HybridMissReqBase + i 1466 1467 ldu(HybridLoadReqPort).io.miss_req.ready := false.B 1468 stu(HybridStoreReqPort).io.miss_req.ready := false.B 1469 1470 if (StorePrefetchL1Enabled) { 1471 when (ldu(HybridLoadReqPort).io.miss_req.valid) { 1472 missReqArb.io.in(HybridMissReqPort) <> ldu(HybridLoadReqPort).io.miss_req 1473 missReadyGen.io.in(HybridMissReqPort) <> ldu(HybridLoadReqPort).io.miss_req 1474 } .otherwise { 1475 missReqArb.io.in(HybridMissReqPort) <> stu(HybridStoreReqPort).io.miss_req 1476 missReadyGen.io.in(HybridMissReqPort) <> stu(HybridStoreReqPort).io.miss_req 1477 } 1478 } else { 1479 missReqArb.io.in(HybridMissReqPort) <> ldu(HybridLoadReqPort).io.miss_req 1480 missReadyGen.io.in(HybridMissReqPort) <> ldu(HybridLoadReqPort).io.miss_req 1481 } 1482 } 1483 1484 for(w <- 0 until LoadPipelineWidth) { 1485 wb.io.miss_req_conflict_check(w) := ldu(w).io.wbq_conflict_check 1486 ldu(w).io.wbq_block_miss_req := wb.io.block_miss_req(w) 1487 } 1488 1489 wb.io.miss_req_conflict_check(3) := mainPipe.io.wbq_conflict_check 1490 mainPipe.io.wbq_block_miss_req := wb.io.block_miss_req(3) 1491 1492 wb.io.miss_req_conflict_check(4).valid := missReqArb.io.out.valid 1493 wb.io.miss_req_conflict_check(4).bits := missReqArb.io.out.bits.addr 1494 missQueue.io.wbq_block_miss_req := wb.io.block_miss_req(4) 1495 1496 missReqArb.io.out <> missQueue.io.req 1497 missReadyGen.io.queryMQ <> missQueue.io.queryMQ 1498 io.cmoOpReq <> missQueue.io.cmo_req 1499 io.cmoOpResp <> missQueue.io.cmo_resp 1500 1501 XSPerfAccumulate("miss_queue_fire", PopCount(VecInit(missReqArb.io.in.map(_.fire))) >= 1.U) 1502 XSPerfAccumulate("miss_queue_muti_fire", PopCount(VecInit(missReqArb.io.in.map(_.fire))) > 1.U) 1503 1504 XSPerfAccumulate("miss_queue_has_enq_req", PopCount(VecInit(missReqArb.io.in.map(_.valid))) >= 1.U) 1505 XSPerfAccumulate("miss_queue_has_muti_enq_req", PopCount(VecInit(missReqArb.io.in.map(_.valid))) > 1.U) 1506 XSPerfAccumulate("miss_queue_has_muti_enq_but_not_fire", PopCount(VecInit(missReqArb.io.in.map(_.valid))) > 1.U && PopCount(VecInit(missReqArb.io.in.map(_.fire))) === 0.U) 1507 1508 // forward missqueue 1509 (0 until LoadPipelineWidth).map(i => io.lsu.forward_mshr(i).connect(missQueue.io.forward(i))) 1510 1511 // refill to load queue 1512 // io.lsu.lsq <> missQueue.io.refill_to_ldq 1513 1514 // tilelink stuff 1515 bus.a <> missQueue.io.mem_acquire 1516 bus.e <> missQueue.io.mem_finish 1517 missQueue.io.probe_addr := bus.b.bits.address 1518 missQueue.io.replace_addr := mainPipe.io.replace_addr 1519 1520 missQueue.io.main_pipe_resp.valid := RegNext(mainPipe.io.atomic_resp.valid) 1521 missQueue.io.main_pipe_resp.bits := RegEnable(mainPipe.io.atomic_resp.bits, mainPipe.io.atomic_resp.valid) 1522 1523 //---------------------------------------- 1524 // probe 1525 // probeQueue.io.mem_probe <> bus.b 1526 block_decoupled(bus.b, probeQueue.io.mem_probe, missQueue.io.probe_block) 1527 probeQueue.io.lrsc_locked_block <> mainPipe.io.lrsc_locked_block 1528 probeQueue.io.update_resv_set <> mainPipe.io.update_resv_set 1529 1530 val refill_req = RegNext(missQueue.io.main_pipe_req.valid && ((missQueue.io.main_pipe_req.bits.isLoad) | (missQueue.io.main_pipe_req.bits.isStore))) 1531 //---------------------------------------- 1532 // mainPipe 1533 // when a req enters main pipe, if it is set-conflict with replace pipe or refill pipe, 1534 // block the req in main pipe 1535 probeQueue.io.pipe_req <> mainPipe.io.probe_req 1536 io.lsu.store.req <> mainPipe.io.store_req 1537 1538 io.lsu.store.replay_resp.valid := RegNext(mainPipe.io.store_replay_resp.valid) 1539 io.lsu.store.replay_resp.bits := RegEnable(mainPipe.io.store_replay_resp.bits, mainPipe.io.store_replay_resp.valid) 1540 io.lsu.store.main_pipe_hit_resp := mainPipe.io.store_hit_resp 1541 1542 mainPipe.io.atomic_req <> io.lsu.atomics.req 1543 1544 mainPipe.io.invalid_resv_set := RegNext( 1545 wb.io.req.fire && 1546 wb.io.req.bits.addr === mainPipe.io.lrsc_locked_block.bits && 1547 mainPipe.io.lrsc_locked_block.valid 1548 ) 1549 1550 //---------------------------------------- 1551 // replace (main pipe) 1552 val mpStatus = mainPipe.io.status 1553 mainPipe.io.refill_req <> missQueue.io.main_pipe_req 1554 1555 mainPipe.io.data_write_ready_dup := VecInit(Seq.fill(nDupDataWriteReady)(true.B)) 1556 mainPipe.io.tag_write_ready_dup := VecInit(Seq.fill(nDupDataWriteReady)(true.B)) 1557 mainPipe.io.wb_ready_dup := wb.io.req_ready_dup 1558 1559 //---------------------------------------- 1560 // wb 1561 // add a queue between MainPipe and WritebackUnit to reduce MainPipe stalls due to WritebackUnit busy 1562 1563 wb.io.req <> mainPipe.io.wb 1564 bus.c <> wb.io.mem_release 1565 // wb.io.release_wakeup := refillPipe.io.release_wakeup 1566 // wb.io.release_update := mainPipe.io.release_update 1567 //wb.io.probe_ttob_check_req <> mainPipe.io.probe_ttob_check_req 1568 //wb.io.probe_ttob_check_resp <> mainPipe.io.probe_ttob_check_resp 1569 1570 io.lsu.release.valid := RegNext(wb.io.req.fire) 1571 io.lsu.release.bits.paddr := RegEnable(wb.io.req.bits.addr, wb.io.req.fire) 1572 // Note: RegNext() is required by: 1573 // * load queue released flag update logic 1574 // * load / load violation check logic 1575 // * and timing requirements 1576 // CHANGE IT WITH CARE 1577 1578 // connect bus d 1579 missQueue.io.mem_grant.valid := false.B 1580 missQueue.io.mem_grant.bits := DontCare 1581 1582 wb.io.mem_grant.valid := false.B 1583 wb.io.mem_grant.bits := DontCare 1584 1585 // in L1DCache, we ony expect Grant[Data] and ReleaseAck 1586 bus.d.ready := false.B 1587 when (bus.d.bits.opcode === TLMessages.Grant || bus.d.bits.opcode === TLMessages.GrantData || bus.d.bits.opcode === TLMessages.CBOAck) { 1588 missQueue.io.mem_grant <> bus.d 1589 } .elsewhen (bus.d.bits.opcode === TLMessages.ReleaseAck) { 1590 wb.io.mem_grant <> bus.d 1591 } .otherwise { 1592 assert (!bus.d.fire) 1593 } 1594 1595 //---------------------------------------- 1596 // Feedback Direct Prefetch Monitor 1597 fdpMonitor.io.refill := missQueue.io.prefetch_info.fdp.prefetch_monitor_cnt 1598 fdpMonitor.io.timely.late_prefetch := missQueue.io.prefetch_info.fdp.late_miss_prefetch 1599 fdpMonitor.io.accuracy.total_prefetch := missQueue.io.prefetch_info.fdp.total_prefetch 1600 for (w <- 0 until LoadPipelineWidth) { 1601 if(w == 0) { 1602 fdpMonitor.io.accuracy.useful_prefetch(w) := ldu(w).io.prefetch_info.fdp.useful_prefetch 1603 }else { 1604 fdpMonitor.io.accuracy.useful_prefetch(w) := Mux(same_cycle_update_pf_flag, false.B, ldu(w).io.prefetch_info.fdp.useful_prefetch) 1605 } 1606 } 1607 for (w <- 0 until LoadPipelineWidth) { fdpMonitor.io.pollution.cache_pollution(w) := ldu(w).io.prefetch_info.fdp.pollution } 1608 for (w <- 0 until LoadPipelineWidth) { fdpMonitor.io.pollution.demand_miss(w) := ldu(w).io.prefetch_info.fdp.demand_miss } 1609 fdpMonitor.io.debugRolling := io.debugRolling 1610 1611 //---------------------------------------- 1612 // Bloom Filter 1613 // bloomFilter.io.set <> missQueue.io.bloom_filter_query.set 1614 // bloomFilter.io.clr <> missQueue.io.bloom_filter_query.clr 1615 bloomFilter.io.set <> mainPipe.io.bloom_filter_query.set 1616 bloomFilter.io.clr <> mainPipe.io.bloom_filter_query.clr 1617 1618 for (w <- 0 until LoadPipelineWidth) { bloomFilter.io.query(w) <> ldu(w).io.bloom_filter_query.query } 1619 for (w <- 0 until LoadPipelineWidth) { bloomFilter.io.resp(w) <> ldu(w).io.bloom_filter_query.resp } 1620 1621 for (w <- 0 until LoadPipelineWidth) { counterFilter.io.ld_in(w) <> ldu(w).io.counter_filter_enq } 1622 for (w <- 0 until LoadPipelineWidth) { counterFilter.io.query(w) <> ldu(w).io.counter_filter_query } 1623 1624 //---------------------------------------- 1625 // replacement algorithm 1626 val replacer = ReplacementPolicy.fromString(cacheParams.replacer, nWays, nSets) 1627 val replWayReqs = ldu.map(_.io.replace_way) ++ Seq(mainPipe.io.replace_way) ++ stu.map(_.io.replace_way) 1628 1629 if (dwpuParam.enCfPred) { 1630 val victimList = VictimList(nSets) 1631 replWayReqs.foreach { 1632 case req => 1633 req.way := DontCare 1634 when(req.set.valid) { 1635 when(victimList.whether_sa(req.set.bits)) { 1636 req.way := replacer.way(req.set.bits) 1637 }.otherwise { 1638 req.way := req.dmWay 1639 } 1640 } 1641 } 1642 } else { 1643 replWayReqs.foreach { 1644 case req => 1645 req.way := DontCare 1646 when(req.set.valid) { 1647 req.way := replacer.way(req.set.bits) 1648 } 1649 } 1650 } 1651 1652 val replAccessReqs = ldu.map(_.io.replace_access) ++ Seq( 1653 mainPipe.io.replace_access 1654 ) ++ stu.map(_.io.replace_access) 1655 val touchWays = Seq.fill(replAccessReqs.size)(Wire(ValidIO(UInt(log2Up(nWays).W)))) 1656 touchWays.zip(replAccessReqs).foreach { 1657 case (w, req) => 1658 w.valid := req.valid 1659 w.bits := req.bits.way 1660 } 1661 val touchSets = replAccessReqs.map(_.bits.set) 1662 replacer.access(touchSets, touchWays) 1663 1664 //---------------------------------------- 1665 // assertions 1666 // dcache should only deal with DRAM addresses 1667 import freechips.rocketchip.util._ 1668 when (bus.a.fire) { 1669 assert(PmemRanges.map(_.cover(bus.a.bits.address)).reduce(_ || _)) 1670 } 1671 when (bus.b.fire) { 1672 assert(PmemRanges.map(_.cover(bus.b.bits.address)).reduce(_ || _)) 1673 } 1674 when (bus.c.fire) { 1675 assert(PmemRanges.map(_.cover(bus.c.bits.address)).reduce(_ || _)) 1676 } 1677 1678 //---------------------------------------- 1679 // utility functions 1680 def block_decoupled[T <: Data](source: DecoupledIO[T], sink: DecoupledIO[T], block_signal: Bool) = { 1681 sink.valid := source.valid && !block_signal 1682 source.ready := sink.ready && !block_signal 1683 sink.bits := source.bits 1684 } 1685 1686 //---------------------------------------- 1687 // performance counters 1688 val num_loads = PopCount(ldu.map(e => e.io.lsu.req.fire)) 1689 XSPerfAccumulate("num_loads", num_loads) 1690 1691 io.mshrFull := missQueue.io.full 1692 io.l1Miss := missQueue.io.l1Miss 1693 1694 // performance counter 1695 // val ld_access = Wire(Vec(LoadPipelineWidth, missQueue.io.debug_early_replace.last.cloneType)) 1696 // val st_access = Wire(ld_access.last.cloneType) 1697 // ld_access.zip(ldu).foreach { 1698 // case (a, u) => 1699 // a.valid := RegNext(u.io.lsu.req.fire) && !u.io.lsu.s1_kill 1700 // a.bits.idx := RegEnable(get_idx(u.io.lsu.req.bits.vaddr), u.io.lsu.req.fire) 1701 // a.bits.tag := get_tag(u.io.lsu.s1_paddr_dup_dcache) 1702 // } 1703 // st_access.valid := RegNext(mainPipe.io.store_req.fire) 1704 // st_access.bits.idx := RegEnable(get_idx(mainPipe.io.store_req.bits.vaddr), mainPipe.io.store_req.fire) 1705 // st_access.bits.tag := RegEnable(get_tag(mainPipe.io.store_req.bits.addr), mainPipe.io.store_req.fire) 1706 // val access_info = ld_access.toSeq ++ Seq(st_access) 1707 // val early_replace = RegNext(missQueue.io.debug_early_replace) // TODO: clock gate 1708 // val access_early_replace = access_info.map { 1709 // case acc => 1710 // Cat(early_replace.map { 1711 // case r => 1712 // acc.valid && r.valid && 1713 // acc.bits.tag === r.bits.tag && 1714 // acc.bits.idx === r.bits.idx 1715 // }) 1716 // } 1717 // XSPerfAccumulate("access_early_replace", PopCount(Cat(access_early_replace))) 1718 1719 val perfEvents = (Seq(wb, mainPipe, missQueue, probeQueue) ++ ldu).flatMap(_.getPerfEvents) 1720 generatePerfEvent() 1721} 1722 1723class AMOHelper() extends ExtModule { 1724 val clock = IO(Input(Clock())) 1725 val enable = IO(Input(Bool())) 1726 val cmd = IO(Input(UInt(5.W))) 1727 val addr = IO(Input(UInt(64.W))) 1728 val wdata = IO(Input(UInt(64.W))) 1729 val mask = IO(Input(UInt(8.W))) 1730 val rdata = IO(Output(UInt(64.W))) 1731} 1732 1733class DCacheWrapper()(implicit p: Parameters) extends LazyModule 1734 with HasXSParameter 1735 with HasDCacheParameters 1736{ 1737 override def shouldBeInlined: Boolean = false 1738 1739 val useDcache = coreParams.dcacheParametersOpt.nonEmpty 1740 val clientNode = if (useDcache) TLIdentityNode() else null 1741 val dcache = if (useDcache) LazyModule(new DCache()) else null 1742 if (useDcache) { 1743 clientNode := dcache.clientNode 1744 } 1745 val uncacheNode = OptionWrapper(cacheCtrlParamsOpt.isDefined, TLIdentityNode()) 1746 require( 1747 (uncacheNode.isDefined && dcache.cacheCtrlOpt.isDefined) || 1748 (!uncacheNode.isDefined && !dcache.cacheCtrlOpt.isDefined), "uncacheNode and ctrlUnitOpt are not connected!") 1749 if (uncacheNode.isDefined && dcache.cacheCtrlOpt.isDefined) { 1750 dcache.cacheCtrlOpt.get.node := uncacheNode.get 1751 } 1752 1753 class DCacheWrapperImp(wrapper: LazyModule) extends LazyModuleImp(wrapper) with HasPerfEvents { 1754 val io = IO(new DCacheIO) 1755 val perfEvents = if (!useDcache) { 1756 // a fake dcache which uses dpi-c to access memory, only for debug usage! 1757 val fake_dcache = Module(new FakeDCache()) 1758 io <> fake_dcache.io 1759 Seq() 1760 } 1761 else { 1762 io <> dcache.module.io 1763 dcache.module.getPerfEvents 1764 } 1765 generatePerfEvent() 1766 } 1767 1768 lazy val module = new DCacheWrapperImp(this) 1769} 1770