1/*************************************************************************************** 2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3* Copyright (c) 2020-2021 Peng Cheng Laboratory 4* 5* XiangShan is licensed under Mulan PSL v2. 6* You can use this software according to the terms and conditions of the Mulan PSL v2. 7* You may obtain a copy of Mulan PSL v2 at: 8* http://license.coscl.org.cn/MulanPSL2 9* 10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13* 14* See the Mulan PSL v2 for more details. 15* 16* 17* Acknowledgement 18* 19* This implementation is inspired by several key papers: 20* [1] Pierre Michaud. "[A PPM-like, tag-based branch predictor.](https://inria.hal.science/hal-03406188)" The Journal 21* of Instruction-Level Parallelism (JILP) 7: 10. 2005. 22* [2] André Seznec, and Pierre Michaud. "[A case for (partially) tagged geometric history length branch prediction.] 23* (https://inria.hal.science/hal-03408381)" The Journal of Instruction-Level Parallelism (JILP) 8: 23. 2006. 24* [3] André Seznec. "[A 256 kbits l-tage branch predictor.](http://www.irisa.fr/caps/people/seznec/L-TAGE.pdf)" The 25* Journal of Instruction-Level Parallelism (JILP) Special Issue: The Second Championship Branch Prediction Competition 26* (CBP) 9: 1-6. 2007. 27***************************************************************************************/ 28 29package xiangshan.frontend 30 31import chisel3._ 32import chisel3.util._ 33import org.chipsalliance.cde.config.Parameters 34import scala.{Tuple2 => &} 35import scala.math.min 36import utility._ 37import utility.mbist.MbistPipeline 38import utility.sram.FoldedSRAMTemplate 39import utility.sram.SRAMConflictBehavior 40import xiangshan._ 41 42trait TageParams extends HasBPUConst with HasXSParameter { 43 // println(BankTageTableInfos) 44 val TageNTables = TageTableInfos.size 45 // val BankTageNTables = BankTageTableInfos.map(_.size) // Number of tage tables 46 // val UBitPeriod = 256 47 val TageCtrBits = 3 48 val TickWidth = 7 49 50 val USE_ALT_ON_NA_WIDTH = 4 51 val NUM_USE_ALT_ON_NA = 128 52 def use_alt_idx(pc: UInt) = (pc >> instOffsetBits)(log2Ceil(NUM_USE_ALT_ON_NA) - 1, 0) 53 54 val TotalBits = TageTableInfos.map { 55 case (s, h, t) => { 56 s * (1 + t + TageCtrBits + 1) 57 } 58 }.reduce(_ + _) 59 60 def posUnconf(ctr: UInt) = ctr === (1 << (ctr.getWidth - 1)).U 61 def negUnconf(ctr: UInt) = ctr === ((1 << (ctr.getWidth - 1)) - 1).U 62 63 def unconf(ctr: UInt) = posUnconf(ctr) || negUnconf(ctr) 64 65 val unshuffleBitWidth = log2Ceil(numBr) 66 def get_unshuffle_bits(idx: UInt) = idx(unshuffleBitWidth - 1, 0) 67 // xor hashes are reversable 68 def get_phy_br_idx(unhashed_idx: UInt, br_lidx: Int) = get_unshuffle_bits(unhashed_idx) ^ br_lidx.U(log2Ceil(numBr).W) 69 def get_lgc_br_idx(unhashed_idx: UInt, br_pidx: UInt) = get_unshuffle_bits(unhashed_idx) ^ br_pidx 70 71} 72 73trait HasFoldedHistory { 74 val histLen: Int 75 def compute_folded_hist(hist: UInt, l: Int)(histLen: Int) = 76 if (histLen > 0) { 77 val nChunks = (histLen + l - 1) / l 78 val hist_chunks = (0 until nChunks) map { i => hist(min((i + 1) * l, histLen) - 1, i * l) } 79 ParallelXOR(hist_chunks) 80 } else 0.U 81 val compute_folded_ghist = compute_folded_hist(_: UInt, _: Int)(histLen) 82} 83 84abstract class TageBundle(implicit p: Parameters) 85 extends XSBundle with TageParams with BPUUtils 86 87abstract class TageModule(implicit p: Parameters) 88 extends XSModule with TageParams with BPUUtils {} 89 90class TageReq(implicit p: Parameters) extends TageBundle { 91 val pc = UInt(VAddrBits.W) 92 val ghist = UInt(HistoryLength.W) 93 val folded_hist = new AllFoldedHistories(foldedGHistInfos) 94} 95 96class TageResp_meta(implicit p: Parameters) extends TageBundle with TageParams { 97 val ctr = UInt(TageCtrBits.W) 98 val u = Bool() 99} 100 101class TageResp(implicit p: Parameters) extends TageResp_meta { 102 val unconf = Bool() 103} 104 105class TageUpdate(implicit p: Parameters) extends TageBundle { 106 val pc = UInt(VAddrBits.W) 107 val ghist = UInt(HistoryLength.W) 108 // update tag and ctr 109 val mask = Vec(numBr, Bool()) 110 val takens = Vec(numBr, Bool()) 111 val alloc = Vec(numBr, Bool()) 112 val oldCtrs = Vec(numBr, UInt(TageCtrBits.W)) 113 // update u 114 val uMask = Vec(numBr, Bool()) 115 val us = Vec(numBr, Bool()) 116 val reset_u = Vec(numBr, Bool()) 117} 118 119class TageMeta(implicit p: Parameters) 120 extends TageBundle with HasSCParameter { 121 val providers = Vec(numBr, ValidUndirectioned(UInt(log2Ceil(TageNTables).W))) 122 val providerResps = Vec(numBr, new TageResp_meta) 123 // val altProviders = Vec(numBr, ValidUndirectioned(UInt(log2Ceil(TageNTables).W))) 124 // val altProviderResps = Vec(numBr, new TageResp) 125 val altUsed = Vec(numBr, Bool()) 126 val basecnts = Vec(numBr, UInt(2.W)) 127 val allocates = Vec(numBr, UInt(TageNTables.W)) 128 val scMeta = if (EnableSC) Some(new SCMeta(SCNTables)) else None 129 val pred_cycle = if (!env.FPGAPlatform) Some(UInt(64.W)) else None 130 val use_alt_on_na = if (!env.FPGAPlatform) Some(Vec(numBr, Bool())) else None 131 132 def altPreds = basecnts.map(_(1)) 133 def allocateValid = allocates.map(_.orR) 134 def altDiffers(i: Int) = basecnts(i)(1) =/= providerResps(i).ctr(TageCtrBits - 1) 135 def takens(i: Int) = Mux(altUsed(i), basecnts(i)(1), providerResps(i).ctr(TageCtrBits - 1)) 136} 137 138trait TBTParams extends HasXSParameter with TageParams { 139 val BtSize = 2048 140 val bypassEntries = 8 141} 142 143class TageBTable(implicit p: Parameters) extends XSModule with TBTParams { 144 val io = IO(new Bundle { 145 val req = Flipped(DecoupledIO(UInt(VAddrBits.W))) // s0_pc 146 val s1_cnt = Output(Vec(numBr, UInt(2.W))) 147 val update_mask = Input(Vec(TageBanks, Bool())) 148 val update_pc = Input(UInt(VAddrBits.W)) 149 val update_cnt = Input(Vec(numBr, UInt(2.W))) 150 val update_takens = Input(Vec(TageBanks, Bool())) 151 // val update = Input(new TageUpdate) 152 }) 153 154 val bimAddr = new TableAddr(log2Up(BtSize), instOffsetBits) 155 156 // Physical SRAM Size 157 val SRAMSize = 512 158 val foldWidth = BtSize / SRAMSize 159 160 val bt = Module( 161 new FoldedSRAMTemplate( 162 UInt(2.W), 163 setSplit = 2, 164 waySplit = 1, 165 dataSplit = 1, 166 set = BtSize, 167 width = foldWidth, 168 way = numBr, 169 shouldReset = false, 170 holdRead = true, 171 conflictBehavior = SRAMConflictBehavior.BufferWriteLossy, 172 withClockGate = true, 173 hasMbist = hasMbist, 174 hasSramCtl = hasSramCtl 175 ) 176 ) 177 178 // Power-on reset to weak taken 179 val doing_reset = RegInit(true.B) 180 val resetRow = RegInit(0.U(log2Ceil(BtSize).W)) 181 resetRow := resetRow + doing_reset 182 when(resetRow === (BtSize - 1).U) { 183 doing_reset := false.B 184 } 185 186 // Require power-on reset done before handling any request 187 io.req.ready := !doing_reset 188 189 val s0_pc = io.req.bits 190 val s0_fire = io.req.valid 191 val s0_idx = bimAddr.getIdx(s0_pc) 192 bt.io.r.req.valid := s0_fire 193 bt.io.r.req.bits.setIdx := s0_idx 194 195 val s1_read = bt.io.r.resp.data 196 val s1_idx = RegEnable(s0_idx, s0_fire) 197 198 val per_br_ctr = VecInit((0 until numBr).map(i => Mux1H(UIntToOH(get_phy_br_idx(s1_idx, i), numBr), s1_read))) 199 io.s1_cnt := per_br_ctr 200 201 // Update logic 202 val u_idx = bimAddr.getIdx(io.update_pc) 203 204 val newCtrs = Wire(Vec(numBr, UInt(2.W))) // physical bridx 205 206 val wrbypass = Module(new WrBypass(UInt(2.W), bypassEntries, log2Up(BtSize), numWays = numBr)) // logical bridx 207 wrbypass.io.wen := io.update_mask.reduce(_ || _) 208 wrbypass.io.write_idx := u_idx 209 wrbypass.io.write_way_mask.map(_ := io.update_mask) 210 for (li <- 0 until numBr) { 211 val br_pidx = get_phy_br_idx(u_idx, li) 212 wrbypass.io.write_data(li) := newCtrs(br_pidx) 213 } 214 215 val oldCtrs = 216 VecInit((0 until numBr).map { pi => 217 val br_lidx = get_lgc_br_idx(u_idx, pi.U(log2Ceil(numBr).W)) 218 Mux( 219 wrbypass.io.hit && wrbypass.io.hit_data(br_lidx).valid, 220 wrbypass.io.hit_data(br_lidx).bits, 221 io.update_cnt(br_lidx) 222 ) 223 }) 224 225 def satUpdate(old: UInt, len: Int, taken: Bool): UInt = { 226 val oldSatTaken = old === ((1 << len) - 1).U 227 val oldSatNotTaken = old === 0.U 228 Mux(oldSatTaken && taken, ((1 << len) - 1).U, Mux(oldSatNotTaken && !taken, 0.U, Mux(taken, old + 1.U, old - 1.U))) 229 } 230 231 val newTakens = io.update_takens 232 newCtrs := VecInit((0 until numBr).map { pi => 233 val br_lidx = get_lgc_br_idx(u_idx, pi.U(log2Ceil(numBr).W)) 234 satUpdate(oldCtrs(pi), 2, newTakens(br_lidx)) 235 }) 236 237 val updateWayMask = VecInit((0 until numBr).map(pi => 238 (0 until numBr).map(li => 239 io.update_mask(li) && get_phy_br_idx(u_idx, li) === pi.U 240 ).reduce(_ || _) 241 )).asUInt 242 243 bt.io.w.apply( 244 valid = io.update_mask.reduce(_ || _) || doing_reset, 245 data = Mux(doing_reset, VecInit(Seq.fill(numBr)(2.U(2.W))), newCtrs), // Weak taken 246 setIdx = Mux(doing_reset, resetRow, u_idx), 247 waymask = Mux(doing_reset, Fill(numBr, 1.U(1.W)).asUInt, updateWayMask) 248 ) 249} 250 251class TageTable( 252 val nRows: Int, 253 val histLen: Int, 254 val tagLen: Int, 255 val tableIdx: Int 256)(implicit p: Parameters) 257 extends TageModule with HasFoldedHistory { 258 val io = IO(new Bundle() { 259 val req = Flipped(DecoupledIO(new TageReq)) 260 val resps = Output(Vec(numBr, Valid(new TageResp))) 261 val update = Input(new TageUpdate) 262 }) 263 264 class TageEntry() extends TageBundle { 265 val valid = Bool() 266 val tag = UInt(tagLen.W) 267 val ctr = UInt(TageCtrBits.W) 268 } 269 270 // Physical SRAM size 271 val bankSRAMSize = 512 272 val uSRAMSize = 256 273 require(nRows % bankSRAMSize == 0) 274 require(isPow2(numBr)) 275 val nRowsPerBr = nRows / numBr 276 val nBanks = 4 // Tage banks 277 val bankSize = nRowsPerBr / nBanks 278 val bankFoldWidth = if (bankSize >= bankSRAMSize) bankSize / bankSRAMSize else 1 279 val uFoldedWidth = nRowsPerBr / uSRAMSize 280 if (bankSize < bankSRAMSize) { 281 println(f"warning: tage table $tableIdx has small sram depth of $bankSize") 282 } 283 val bankIdxWidth = log2Ceil(nBanks) 284 def get_bank_mask(idx: UInt) = VecInit((0 until nBanks).map(idx(bankIdxWidth - 1, 0) === _.U)) 285 def get_bank_idx(idx: UInt) = idx >> bankIdxWidth 286 287 // bypass entries for tage update 288 val perBankWrbypassEntries = 8 289 290 val idxFhInfo = (histLen, min(log2Ceil(nRowsPerBr), histLen)) 291 val tagFhInfo = (histLen, min(histLen, tagLen)) 292 val altTagFhInfo = (histLen, min(histLen, tagLen - 1)) 293 val allFhInfos = Seq(idxFhInfo, tagFhInfo, altTagFhInfo) 294 295 def getFoldedHistoryInfo = allFhInfos.filter(_._1 > 0).toSet 296 def compute_tag_and_hash(unhashed_idx: UInt, allFh: AllFoldedHistories) = { 297 val idx_fh = allFh.getHistWithInfo(idxFhInfo).folded_hist 298 val tag_fh = allFh.getHistWithInfo(tagFhInfo).folded_hist 299 val alt_tag_fh = allFh.getHistWithInfo(altTagFhInfo).folded_hist 300 // require(idx_fh.getWidth == log2Ceil(nRows)) 301 val idx = (unhashed_idx ^ idx_fh)(log2Ceil(nRowsPerBr) - 1, 0) 302 val tag = (unhashed_idx ^ tag_fh ^ (alt_tag_fh << 1))(tagLen - 1, 0) 303 (idx, tag) 304 } 305 306 def inc_ctr(ctr: UInt, taken: Bool): UInt = satUpdate(ctr, TageCtrBits, taken) 307 308 if (EnableGHistDiff) { 309 val idx_history = compute_folded_ghist(io.req.bits.ghist, log2Ceil(nRowsPerBr)) 310 val idx_fh = io.req.bits.folded_hist.getHistWithInfo(idxFhInfo) 311 XSError( 312 idx_history =/= idx_fh.folded_hist, 313 p"tage table $tableIdx has different fh," + 314 p" ghist: ${Binary(idx_history)}, fh: ${Binary(idx_fh.folded_hist)}\n" 315 ) 316 } 317 // pc is start address of basic block, most 2 branch inst in block 318 // def getUnhashedIdx(pc: UInt) = pc >> (instOffsetBits+log2Ceil(TageBanks)) 319 def getUnhashedIdx(pc: UInt): UInt = pc >> instOffsetBits 320 321 // val s1_pc = io.req.bits.pc 322 val req_unhashed_idx = getUnhashedIdx(io.req.bits.pc) 323 324 val us = Module(new FoldedSRAMTemplate( 325 Bool(), 326 set = nRowsPerBr, 327 width = uFoldedWidth, 328 way = numBr, 329 shouldReset = true, 330 extraReset = true, 331 holdRead = true, 332 singlePort = true, 333 withClockGate = true, 334 hasMbist = hasMbist, 335 hasSramCtl = hasSramCtl 336 )) 337 us.extra_reset.get := io.update.reset_u.reduce(_ || _) && io.update.mask.reduce(_ || _) 338 339 val table_banks = Seq.fill(nBanks)( 340 Module(new FoldedSRAMTemplate( 341 new TageEntry, 342 set = bankSize, 343 width = bankFoldWidth, 344 way = numBr, 345 shouldReset = true, 346 holdRead = true, 347 singlePort = true, 348 withClockGate = true, 349 hasMbist = hasMbist, 350 hasSramCtl = hasSramCtl 351 )) 352 ) 353 354 val (s0_idx, s0_tag) = compute_tag_and_hash(req_unhashed_idx, io.req.bits.folded_hist) 355 val s0_bank_req_1h = get_bank_mask(s0_idx) 356 357 for (b <- 0 until nBanks) { 358 table_banks(b).io.r.req.valid := io.req.fire && s0_bank_req_1h(b) 359 table_banks(b).io.r.req.bits.setIdx := get_bank_idx(s0_idx) 360 } 361 362 us.io.r.req.valid := io.req.fire 363 us.io.r.req.bits.setIdx := s0_idx 364 365 val s1_unhashed_idx = RegEnable(req_unhashed_idx, io.req.fire) 366 val s1_idx = RegEnable(s0_idx, io.req.fire) 367 val s1_tag = RegEnable(s0_tag, io.req.fire) 368 val s1_pc = RegEnable(io.req.bits.pc, io.req.fire) 369 val s1_bank_req_1h = RegEnable(s0_bank_req_1h, io.req.fire) 370 val s1_bank_has_write_on_this_req = RegEnable(VecInit(table_banks.map(_.io.w.req.valid)), io.req.valid) 371 372 val resp_invalid_by_write = Wire(Bool()) 373 374 val tables_r = table_banks.map(_.io.r.resp.data) // s1 375 val unconfs = tables_r.map(r => VecInit(r.map(e => WireInit(unconf(e.ctr))))) // do unconf cal in parallel 376 val hits = 377 tables_r.map(r => 378 VecInit(r.map(e => e.tag === s1_tag && e.valid && !resp_invalid_by_write)) 379 ) // do tag compare in parallel 380 381 val resp_selected = Mux1H(s1_bank_req_1h, tables_r) 382 val unconf_selected = Mux1H(s1_bank_req_1h, unconfs) 383 val hit_selected = Mux1H(s1_bank_req_1h, hits) 384 resp_invalid_by_write := Mux1H(s1_bank_req_1h, s1_bank_has_write_on_this_req) 385 386 val per_br_resp = 387 VecInit((0 until numBr).map(i => Mux1H(UIntToOH(get_phy_br_idx(s1_unhashed_idx, i), numBr), resp_selected))) 388 val per_br_unconf = 389 VecInit((0 until numBr).map(i => Mux1H(UIntToOH(get_phy_br_idx(s1_unhashed_idx, i), numBr), unconf_selected))) 390 val per_br_hit = 391 VecInit((0 until numBr).map(i => Mux1H(UIntToOH(get_phy_br_idx(s1_unhashed_idx, i), numBr), hit_selected))) 392 val per_br_u = 393 VecInit((0 until numBr).map(i => Mux1H(UIntToOH(get_phy_br_idx(s1_unhashed_idx, i), numBr), us.io.r.resp.data))) 394 395 for (i <- 0 until numBr) { 396 io.resps(i).valid := per_br_hit(i) 397 io.resps(i).bits.ctr := per_br_resp(i).ctr 398 io.resps(i).bits.u := per_br_u(i) 399 io.resps(i).bits.unconf := per_br_unconf(i) 400 } 401 402 // Use fetchpc to compute hash 403 val update_folded_hist = WireInit(0.U.asTypeOf(new AllFoldedHistories(foldedGHistInfos))) 404 update_folded_hist.getHistWithInfo(idxFhInfo).folded_hist := compute_folded_ghist( 405 io.update.ghist, 406 log2Ceil(nRowsPerBr) 407 ) 408 update_folded_hist.getHistWithInfo(tagFhInfo).folded_hist := compute_folded_ghist(io.update.ghist, tagLen) 409 update_folded_hist.getHistWithInfo(altTagFhInfo).folded_hist := compute_folded_ghist(io.update.ghist, tagLen - 1) 410 411 val per_bank_update_wdata = Wire(Vec(nBanks, Vec(numBr, new TageEntry))) // corresponds to physical branches 412 413 val update_unhashed_idx = getUnhashedIdx(io.update.pc) 414 val (update_idx, update_tag) = compute_tag_and_hash(update_unhashed_idx, update_folded_hist) 415 val update_req_bank_1h = get_bank_mask(update_idx) 416 val update_idx_in_bank = get_bank_idx(update_idx) 417 418 val per_bank_not_silent_update = Wire(Vec(nBanks, Vec(numBr, Bool()))) // corresponds to physical branches 419 val per_bank_update_way_mask = 420 VecInit((0 until nBanks).map(b => 421 VecInit((0 until numBr).map { pi => 422 // whether any of the logical branches updates on each slot 423 Seq.tabulate(numBr)(li => 424 get_phy_br_idx(update_unhashed_idx, li) === pi.U && 425 io.update.mask(li) 426 ).reduce(_ || _) && per_bank_not_silent_update(b)(pi) 427 }).asUInt 428 )) 429 430 // val silent_update_from_wrbypass = Wire(Bool()) 431 432 for (b <- 0 until nBanks) { 433 table_banks(b).io.w.apply( 434 valid = per_bank_update_way_mask(b).orR && update_req_bank_1h(b), 435 data = per_bank_update_wdata(b), 436 setIdx = update_idx_in_bank, 437 waymask = per_bank_update_way_mask(b) 438 ) 439 } 440 441 // Power-on reset 442 val powerOnResetState = RegInit(true.B) 443 when(us.io.r.req.ready && table_banks.map(_.io.r.req.ready).reduce(_ && _)) { 444 // When all the SRAM first reach ready state, we consider power-on reset is done 445 powerOnResetState := false.B 446 } 447 // Do not use table banks io.r.req.ready directly 448 // All the us & table_banks are single port SRAM, ready := !wen 449 // We do not want write request block the whole BPU pipeline 450 io.req.ready := !powerOnResetState 451 452 val bank_conflict = (0 until nBanks).map(b => table_banks(b).io.w.req.valid && s0_bank_req_1h(b)).reduce(_ || _) 453 XSPerfAccumulate(f"tage_table_bank_conflict", bank_conflict) 454 455 val update_u_idx = update_idx 456 val update_u_way_mask = VecInit((0 until numBr).map { pi => 457 Seq.tabulate(numBr)(li => 458 get_phy_br_idx(update_unhashed_idx, li) === pi.U && 459 io.update.uMask(li) 460 ).reduce(_ || _) 461 }).asUInt 462 463 val update_u_wdata = VecInit((0 until numBr).map(pi => 464 Mux1H(Seq.tabulate(numBr)(li => 465 (get_phy_br_idx(update_unhashed_idx, li) === pi.U, io.update.us(li)) 466 )) 467 )) 468 469 us.io.w.apply( 470 io.update.mask.reduce(_ || _) && io.update.uMask.reduce(_ || _), 471 update_u_wdata, 472 update_u_idx, 473 update_u_way_mask 474 ) 475 476 // remove silent updates 477 def silentUpdate(ctr: UInt, taken: Bool) = 478 ctr.andR && taken || !ctr.orR && !taken 479 480 val bank_wrbypasses = Seq.fill(nBanks)(Seq.fill(numBr)( 481 Module(new WrBypass(UInt(TageCtrBits.W), perBankWrbypassEntries, log2Ceil(bankSize))) 482 )) // let it corresponds to logical brIdx 483 484 for (b <- 0 until nBanks) { 485 val not_silent_update = per_bank_not_silent_update(b) 486 for (pi <- 0 until numBr) { // physical brIdx 487 val update_wdata = per_bank_update_wdata(b)(pi) 488 val br_lidx = get_lgc_br_idx(update_unhashed_idx, pi.U(log2Ceil(numBr).W)) 489 // this 490 val wrbypass_io = Mux1H(UIntToOH(br_lidx, numBr), bank_wrbypasses(b).map(_.io)) 491 val wrbypass_hit = wrbypass_io.hit 492 val wrbypass_ctr = wrbypass_io.hit_data(0).bits 493 val wrbypass_data_valid = wrbypass_hit && wrbypass_io.hit_data(0).valid 494 update_wdata.ctr := 495 Mux( 496 io.update.alloc(br_lidx), 497 Mux(io.update.takens(br_lidx), 4.U, 3.U), 498 Mux( 499 wrbypass_data_valid, 500 inc_ctr(wrbypass_ctr, io.update.takens(br_lidx)), 501 inc_ctr(io.update.oldCtrs(br_lidx), io.update.takens(br_lidx)) 502 ) 503 ) 504 not_silent_update(pi) := 505 Mux( 506 wrbypass_data_valid, 507 !silentUpdate(wrbypass_ctr, io.update.takens(br_lidx)), 508 !silentUpdate(io.update.oldCtrs(br_lidx), io.update.takens(br_lidx)) 509 ) || 510 io.update.alloc(br_lidx) 511 512 update_wdata.valid := true.B 513 update_wdata.tag := update_tag 514 } 515 516 for (li <- 0 until numBr) { 517 val wrbypass = bank_wrbypasses(b)(li) 518 val br_pidx = get_phy_br_idx(update_unhashed_idx, li) 519 wrbypass.io.wen := io.update.mask(li) && update_req_bank_1h(b) 520 wrbypass.io.write_idx := get_bank_idx(update_idx) 521 wrbypass.io.write_data(0) := Mux1H(UIntToOH(br_pidx, numBr), per_bank_update_wdata(b)).ctr 522 } 523 } 524 525 for (i <- 0 until numBr) { 526 for (b <- 0 until nBanks) { 527 val wrbypass = bank_wrbypasses(b)(i) 528 XSPerfAccumulate( 529 f"tage_table_bank_${b}_wrbypass_enq_$i", 530 io.update.mask(i) && update_req_bank_1h(b) && !wrbypass.io.hit 531 ) 532 XSPerfAccumulate( 533 f"tage_table_bank_${b}_wrbypass_hit_$i", 534 io.update.mask(i) && update_req_bank_1h(b) && wrbypass.io.hit 535 ) 536 } 537 } 538 539 for (b <- 0 until nBanks) { 540 val not_silent_update = per_bank_not_silent_update(b) 541 XSPerfAccumulate( 542 f"tage_table_bank_${b}_real_updates", 543 io.update.mask.reduce(_ || _) && update_req_bank_1h(b) && not_silent_update.reduce(_ || _) 544 ) 545 XSPerfAccumulate( 546 f"tage_table_bank_${b}_silent_updates_eliminated", 547 io.update.mask.reduce(_ || _) && update_req_bank_1h(b) && !not_silent_update.reduce(_ || _) 548 ) 549 } 550 551 XSPerfAccumulate("tage_table_hits", PopCount(io.resps.map(_.valid))) 552 553 for (b <- 0 until nBanks) { 554 XSPerfAccumulate(f"tage_table_bank_${b}_update_req", io.update.mask.reduce(_ || _) && update_req_bank_1h(b)) 555 for (i <- 0 until numBr) { 556 val li = i 557 val pidx = get_phy_br_idx(update_unhashed_idx, li) 558 XSPerfAccumulate( 559 f"tage_table_bank_${b}_br_li_${li}_updated", 560 table_banks(b).io.w.req.valid && table_banks(b).io.w.req.bits.waymask.get(pidx) 561 ) 562 val pi = i 563 XSPerfAccumulate( 564 f"tage_table_bank_${b}_br_pi_${pi}_updated", 565 table_banks(b).io.w.req.valid && table_banks(b).io.w.req.bits.waymask.get(pi) 566 ) 567 } 568 } 569 570 val u = io.update 571 val b = PriorityEncoder(u.mask) 572 val ub = PriorityEncoder(u.uMask) 573 XSDebug( 574 io.req.fire, 575 p"tableReq: pc=0x${Hexadecimal(io.req.bits.pc)}, " + 576 p"idx=$s0_idx, tag=$s0_tag\n" 577 ) 578 for (i <- 0 until numBr) { 579 XSDebug( 580 RegNext(io.req.fire) && per_br_hit(i), 581 p"TageTableResp_br_$i: idx=$s1_idx, hit:${per_br_hit(i)}, " + 582 p"ctr:${io.resps(i).bits.ctr}, u:${io.resps(i).bits.u}\n" 583 ) 584 XSDebug( 585 io.update.mask(i), 586 p"update Table_br_$i: pc:${Hexadecimal(u.pc)}}, " + 587 p"taken:${u.takens(i)}, alloc:${u.alloc(i)}, oldCtrs:${u.oldCtrs(i)}\n" 588 ) 589 val bank = OHToUInt(update_req_bank_1h.asUInt, nBanks) 590 val pi = get_phy_br_idx(update_unhashed_idx, i) 591 XSDebug( 592 io.update.mask(i), 593 p"update Table_$i: writing tag:$update_tag, " + 594 p"ctr: ${per_bank_update_wdata(bank)(pi).ctr} in idx ${update_idx}\n" 595 ) 596 XSDebug(RegNext(io.req.fire) && !per_br_hit(i), p"TageTableResp_$i: not hit!\n") 597 } 598 599 // ------------------------------Debug------------------------------------- 600 val valids = RegInit(VecInit(Seq.fill(nRowsPerBr)(false.B))) 601 when(io.update.mask.reduce(_ || _))(valids(update_idx) := true.B) 602 XSDebug("Table usage:------------------------\n") 603 XSDebug("%d out of %d rows are valid\n", PopCount(valids), nRowsPerBr.U) 604 605} 606 607abstract class BaseTage(implicit p: Parameters) extends BasePredictor with TageParams with BPUUtils {} 608 609class FakeTage(implicit p: Parameters) extends BaseTage { 610 io.out <> 0.U.asTypeOf(DecoupledIO(new BasePredictorOutput)) 611 612 // io.s0_ready := true.B 613 io.s1_ready := true.B 614 io.s2_ready := true.B 615} 616 617class Tage(implicit p: Parameters) extends BaseTage { 618 619 val resp_meta = Wire(new TageMeta) 620 override val meta_size = resp_meta.getWidth 621 val tables = TageTableInfos.zipWithIndex.map { 622 case ((nRows, histLen, tagLen), i) => { 623 val t = Module(new TageTable(nRows, histLen, tagLen, i)) 624 t.io.req.valid := io.s0_fire(1) 625 t.io.req.bits.pc := s0_pc_dup(1) 626 t.io.req.bits.folded_hist := io.in.bits.folded_hist(1) 627 t.io.req.bits.ghist := io.in.bits.ghist 628 t 629 } 630 } 631 val bt = Module(new TageBTable) 632 bt.io.req.valid := io.s0_fire(1) 633 bt.io.req.bits := s0_pc_dup(1) 634 private val mbistPl = MbistPipeline.PlaceMbistPipeline(1, "MbistPipeTage", hasMbist) 635 val bankTickCtrDistanceToTops = Seq.fill(numBr)(RegInit(((1 << TickWidth) - 1).U(TickWidth.W))) 636 val bankTickCtrs = Seq.fill(numBr)(RegInit(0.U(TickWidth.W))) 637 val useAltOnNaCtrs = RegInit( 638 VecInit(Seq.fill(numBr)( 639 VecInit(Seq.fill(NUM_USE_ALT_ON_NA)((1 << (USE_ALT_ON_NA_WIDTH - 1)).U(USE_ALT_ON_NA_WIDTH.W))) 640 )) 641 ) 642 643 val tage_fh_info = tables.map(_.getFoldedHistoryInfo).reduce(_ ++ _).toSet 644 override def getFoldedHistoryInfo = Some(tage_fh_info) 645 646 val s1_resps = VecInit(tables.map(_.io.resps)) 647 648 // val s1_bim = io.in.bits.resp_in(0).s1.full_pred 649 // val s2_bim = RegEnable(s1_bim, io.s1_fire) 650 651 val debug_pc_s0 = s0_pc_dup(1) 652 val debug_pc_s1 = RegEnable(s0_pc_dup(1), io.s0_fire(1)) 653 val debug_pc_s2 = RegEnable(debug_pc_s1, io.s1_fire(1)) 654 655 val s1_provideds = Wire(Vec(numBr, Bool())) 656 val s1_providers = Wire(Vec(numBr, UInt(log2Ceil(TageNTables).W))) 657 val s1_providerResps = Wire(Vec(numBr, new TageResp)) 658 // val s1_altProvideds = Wire(Vec(numBr, Bool())) 659 // val s1_altProviders = Wire(Vec(numBr, UInt(log2Ceil(TageNTables).W))) 660 // val s1_altProviderResps = Wire(Vec(numBr, new TageResp)) 661 val s1_altUsed = Wire(Vec(numBr, Bool())) 662 val s1_tageTakens = Wire(Vec(numBr, Bool())) 663 val s1_basecnts = Wire(Vec(numBr, UInt(2.W))) 664 val s1_useAltOnNa = Wire(Vec(numBr, Bool())) 665 666 val s2_provideds = RegEnable(s1_provideds, io.s1_fire(1)) 667 val s2_providers = RegEnable(s1_providers, io.s1_fire(1)) 668 val s2_providerResps = RegEnable(s1_providerResps, io.s1_fire(1)) 669 // val s2_altProvideds = RegEnable(s1_altProvideds, io.s1_fire) 670 // val s2_altProviders = RegEnable(s1_altProviders, io.s1_fire) 671 // val s2_altProviderResps = RegEnable(s1_altProviderResps, io.s1_fire) 672 val s2_altUsed = RegEnable(s1_altUsed, io.s1_fire(1)) 673 val s2_tageTakens_dup = io.s1_fire.map(f => RegEnable(s1_tageTakens, f)) 674 val s2_basecnts = RegEnable(s1_basecnts, io.s1_fire(1)) 675 val s2_useAltOnNa = RegEnable(s1_useAltOnNa, io.s1_fire(1)) 676 677 io.out := io.in.bits.resp_in(0) 678 io.out.last_stage_meta := resp_meta.asUInt 679 680 val resp_s2 = io.out.s2 681 682 // Update logic 683 val u_valid = RegNext(io.update.valid, init = false.B) 684 val update = Wire(new BranchPredictionUpdate) 685 update := RegEnable(io.update.bits, io.update.valid) 686 687 // The pc register has been moved outside of predictor, pc field of update bundle and other update data are not in the same stage 688 // so io.update.bits.pc is used directly here 689 val update_pc = io.update.bits.pc 690 691 // To improve Clock Gating Efficiency 692 val u_valids_for_cge = 693 VecInit((0 until TageBanks).map(w => 694 io.update.bits.ftb_entry.brValids(w) && io.update.valid 695 )) // io.update.bits.ftb_entry.always_taken has timing issues(FTQEntryGen) 696 val u_meta = io.update.bits.meta.asTypeOf(new TageMeta) 697 val updateMeta = Wire(new TageMeta) 698 update.meta := updateMeta.asUInt 699 updateMeta := RegEnable(u_meta, io.update.valid) 700 for (i <- 0 until numBr) { 701 updateMeta.providers(i).bits := RegEnable( 702 u_meta.providers(i).bits, 703 u_meta.providers(i).valid && u_valids_for_cge(i) 704 ) 705 updateMeta.providerResps(i) := RegEnable( 706 u_meta.providerResps(i), 707 u_meta.providers(i).valid && u_valids_for_cge(i) 708 ) 709 updateMeta.altUsed(i) := RegEnable(u_meta.altUsed(i), u_valids_for_cge(i)) 710 updateMeta.allocates(i) := RegEnable( 711 u_meta.allocates(i), 712 io.update.valid // not using mispred_mask, because mispred_mask timing is bad 713 ) 714 } 715 if (EnableSC) { 716 for (w <- 0 until TageBanks) { 717 updateMeta.scMeta.get.scPreds(w) := RegEnable( 718 u_meta.scMeta.get.scPreds(w), 719 u_valids_for_cge(w) && u_meta.providers(w).valid 720 ) 721 updateMeta.scMeta.get.ctrs(w) := RegEnable( 722 u_meta.scMeta.get.ctrs(w), 723 u_valids_for_cge(w) && u_meta.providers(w).valid 724 ) 725 } 726 } 727 update.ghist := RegEnable(io.update.bits.ghist, io.update.valid) // TODO: CGE 728 729 val updateValids = VecInit((0 until TageBanks).map(w => 730 update.ftb_entry.brValids(w) && u_valid && !update.ftb_entry.strong_bias(w) && 731 !(PriorityEncoder(update.br_taken_mask) < w.U) 732 )) 733 734 val updateMask = WireInit(0.U.asTypeOf(Vec(numBr, Vec(TageNTables, Bool())))) 735 val updateUMask = WireInit(0.U.asTypeOf(Vec(numBr, Vec(TageNTables, Bool())))) 736 val updateResetU = WireInit(0.U.asTypeOf(Vec(numBr, Bool()))) // per predictor 737 val updateTakens = Wire(Vec(numBr, Vec(TageNTables, Bool()))) 738 val updateAlloc = WireInit(0.U.asTypeOf(Vec(numBr, Vec(TageNTables, Bool())))) 739 val updateOldCtrs = Wire(Vec(numBr, Vec(TageNTables, UInt(TageCtrBits.W)))) 740 val updateU = Wire(Vec(numBr, Vec(TageNTables, Bool()))) 741 val updatebcnt = Wire(Vec(TageBanks, UInt(2.W))) 742 val baseupdate = WireInit(0.U.asTypeOf(Vec(TageBanks, Bool()))) 743 val bUpdateTakens = Wire(Vec(TageBanks, Bool())) 744 updateTakens := DontCare 745 updateOldCtrs := DontCare 746 updateU := DontCare 747 748 val updateMisPreds = update.mispred_mask 749 750 class TageTableInfo(implicit p: Parameters) extends XSBundle { 751 val resp = new TageResp 752 val tableIdx = UInt(log2Ceil(TageNTables).W) 753 val use_alt_on_unconf = Bool() 754 } 755 // access tag tables and output meta info 756 757 for (i <- 0 until numBr) { 758 val useAltCtr = Mux1H(UIntToOH(use_alt_idx(s1_pc_dup(0)), NUM_USE_ALT_ON_NA), useAltOnNaCtrs(i)) 759 val useAltOnNa = useAltCtr(USE_ALT_ON_NA_WIDTH - 1) // highest bit 760 761 val s1_per_br_resp = VecInit(s1_resps.map(_(i))) 762 val inputRes = s1_per_br_resp.zipWithIndex.map { 763 case (r, idx) => { 764 val tableInfo = Wire(new TageTableInfo) 765 tableInfo.resp := r.bits 766 tableInfo.use_alt_on_unconf := r.bits.unconf && useAltOnNa 767 tableInfo.tableIdx := idx.U(log2Ceil(TageNTables).W) 768 (r.valid, tableInfo) 769 } 770 } 771 val providerInfo = ParallelPriorityMux(inputRes.reverse) 772 val provided = inputRes.map(_._1).reduce(_ || _) 773 // val altProvided = selectedInfo.hasTwo 774 // val providerInfo = selectedInfo 775 // val altProviderInfo = selectedInfo.second 776 s1_provideds(i) := provided 777 s1_providers(i) := providerInfo.tableIdx 778 s1_providerResps(i) := providerInfo.resp 779 // s1_altProvideds(i) := altProvided 780 // s1_altProviders(i) := altProviderInfo.tableIdx 781 // s1_altProviderResps(i) := altProviderInfo.resp 782 783 resp_meta.providers(i).valid := RegEnable(s2_provideds(i), io.s2_fire(1)) 784 resp_meta.providers(i).bits := RegEnable(s2_providers(i), io.s2_fire(1)) 785 resp_meta.providerResps(i) := RegEnable(s2_providerResps(i), io.s2_fire(1)) 786 // resp_meta.altProviders(i).valid := RegEnable(s2_altProvideds(i), io.s2_fire) 787 // resp_meta.altProviders(i).bits := RegEnable(s2_altProviders(i), io.s2_fire) 788 // resp_meta.altProviderResps(i) := RegEnable(s2_altProviderResps(i), io.s2_fire) 789 resp_meta.pred_cycle.map(_ := RegEnable(GTimer(), io.s2_fire(1))) 790 resp_meta.use_alt_on_na.map(_(i) := RegEnable(s2_useAltOnNa(i), io.s2_fire(1))) 791 792 // Create a mask fo tables which did not hit our query, and also contain useless entries 793 // and also uses a longer history than the provider 794 val allocatableSlots = 795 RegEnable( 796 VecInit(s1_per_br_resp.map(r => !r.valid && !r.bits.u)).asUInt & 797 ~(LowerMask(UIntToOH(s1_providers(i)), TageNTables) & 798 Fill(TageNTables, s1_provideds(i).asUInt)), 799 io.s1_fire(1) 800 ) 801 802 resp_meta.allocates(i) := RegEnable(allocatableSlots, io.s2_fire(1)) 803 804 val s1_bimCtr = bt.io.s1_cnt(i) 805 s1_altUsed(i) := !provided || providerInfo.use_alt_on_unconf 806 s1_tageTakens(i) := 807 Mux(s1_altUsed(i), s1_bimCtr(1), providerInfo.resp.ctr(TageCtrBits - 1)) 808 s1_basecnts(i) := s1_bimCtr 809 s1_useAltOnNa(i) := providerInfo.use_alt_on_unconf 810 811 resp_meta.altUsed(i) := RegEnable(s2_altUsed(i), io.s2_fire(1)) 812 resp_meta.basecnts(i) := RegEnable(s2_basecnts(i), io.s2_fire(1)) 813 814 val tage_enable_dup = dup(RegNext(io.ctrl.tage_enable)) 815 for (tage_enable & fp & s2_tageTakens <- tage_enable_dup zip resp_s2.full_pred zip s2_tageTakens_dup) { 816 when(tage_enable) { 817 fp.br_taken_mask(i) := s2_tageTakens(i) 818 } 819 } 820 821 // ---------------- update logics below ------------------// 822 val hasUpdate = updateValids(i) 823 val updateMispred = updateMisPreds(i) 824 val updateTaken = hasUpdate && update.br_taken_mask(i) 825 826 val updateProvided = updateMeta.providers(i).valid 827 val updateProvider = updateMeta.providers(i).bits 828 val updateProviderResp = updateMeta.providerResps(i) 829 val updateProviderCorrect = updateProviderResp.ctr(TageCtrBits - 1) === updateTaken 830 val updateUseAlt = updateMeta.altUsed(i) 831 val updateAltDiffers = updateMeta.altDiffers(i) 832 val updateAltIdx = use_alt_idx(update_pc) 833 val updateUseAltCtr = Mux1H(UIntToOH(updateAltIdx, NUM_USE_ALT_ON_NA), useAltOnNaCtrs(i)) 834 val updateAltPred = updateMeta.altPreds(i) 835 val updateAltCorrect = updateAltPred === updateTaken 836 837 val updateProviderWeakTaken = posUnconf(updateProviderResp.ctr) 838 val updateProviderWeaknotTaken = negUnconf(updateProviderResp.ctr) 839 val updateProviderWeak = unconf(updateProviderResp.ctr) 840 841 when(hasUpdate) { 842 when(updateProvided && updateProviderWeak && updateAltDiffers) { 843 val newCtr = satUpdate(updateUseAltCtr, USE_ALT_ON_NA_WIDTH, updateAltCorrect) 844 useAltOnNaCtrs(i)(updateAltIdx) := newCtr 845 } 846 } 847 848 XSPerfAccumulate(f"tage_bank_${i}_use_alt_pred", hasUpdate && updateUseAlt) 849 XSPerfAccumulate(f"tage_bank_${i}_alt_correct", hasUpdate && updateUseAlt && updateAltCorrect) 850 XSPerfAccumulate(f"tage_bank_${i}_alt_wrong", hasUpdate && updateUseAlt && !updateAltCorrect) 851 XSPerfAccumulate(f"tage_bank_${i}_alt_differs", hasUpdate && updateAltDiffers) 852 XSPerfAccumulate( 853 f"tage_bank_${i}_use_alt_on_na_ctr_updated", 854 hasUpdate && updateAltDiffers && updateProvided && updateProviderWeak 855 ) 856 XSPerfAccumulate( 857 f"tage_bank_${i}_use_alt_on_na_ctr_inc", 858 hasUpdate && updateAltDiffers && updateProvided && updateProviderWeak && updateAltCorrect 859 ) 860 XSPerfAccumulate( 861 f"tage_bank_${i}_use_alt_on_na_ctr_dec", 862 hasUpdate && updateAltDiffers && updateProvided && updateProviderWeak && !updateAltCorrect 863 ) 864 865 XSPerfAccumulate(f"tage_bank_${i}_na", hasUpdate && updateProvided && updateProviderWeak) 866 XSPerfAccumulate( 867 f"tage_bank_${i}_use_na_correct", 868 hasUpdate && updateProvided && updateProviderWeak && !updateUseAlt && !updateMispred 869 ) 870 XSPerfAccumulate( 871 f"tage_bank_${i}_use_na_wrong", 872 hasUpdate && updateProvided && updateProviderWeak && !updateUseAlt && updateMispred 873 ) 874 875 updateMeta.use_alt_on_na.map(uaon => XSPerfAccumulate(f"tage_bank_${i}_use_alt_on_na", hasUpdate && uaon(i))) 876 877 when(hasUpdate) { 878 when(updateProvided) { 879 updateMask(i)(updateProvider) := true.B 880 updateUMask(i)(updateProvider) := updateAltDiffers 881 updateU(i)(updateProvider) := updateProviderCorrect 882 updateTakens(i)(updateProvider) := updateTaken 883 updateOldCtrs(i)(updateProvider) := updateProviderResp.ctr 884 updateAlloc(i)(updateProvider) := false.B 885 } 886 } 887 888 // update base table if used base table to predict 889 baseupdate(i) := hasUpdate && updateUseAlt 890 updatebcnt(i) := updateMeta.basecnts(i) 891 bUpdateTakens(i) := updateTaken 892 893 val needToAllocate = hasUpdate && updateMispred && !(updateUseAlt && updateProviderCorrect && updateProvided) 894 val allocatableMask = updateMeta.allocates(i) 895 val canAllocate = updateMeta.allocateValid(i) 896 897 val allocLFSR = random.LFSR(width = 15)(TageNTables - 1, 0) 898 val longerHistoryTableMask = 899 ~(LowerMask(UIntToOH(updateProvider), TageNTables) & Fill(TageNTables, updateProvided.asUInt)) 900 val canAllocMask = allocatableMask & longerHistoryTableMask 901 val allocFailureMask = ~allocatableMask & longerHistoryTableMask 902 val tickInc = PopCount(allocFailureMask) > PopCount(canAllocMask) 903 val tickDec = PopCount(canAllocMask) > PopCount(allocFailureMask) 904 val tickIncVal = PopCount(allocFailureMask) - PopCount(canAllocMask) 905 val tickDecVal = PopCount(canAllocMask) - PopCount(allocFailureMask) 906 val tickToPosSat = tickIncVal >= bankTickCtrDistanceToTops(i) && tickInc 907 val tickToNegSat = tickDecVal >= bankTickCtrs(i) && tickDec 908 909 val firstEntry = PriorityEncoder(canAllocMask) 910 val maskedEntry = PriorityEncoder(canAllocMask & allocLFSR) 911 val allocate = Mux(canAllocMask(maskedEntry), maskedEntry, firstEntry) 912 913 when(needToAllocate) { 914 // val allocate = updateMeta.allocates(i).bits 915 when(tickInc) { 916 when(tickToPosSat) { 917 bankTickCtrs(i) := ((1 << TickWidth) - 1).U 918 bankTickCtrDistanceToTops(i) := 0.U 919 }.otherwise { 920 bankTickCtrs(i) := bankTickCtrs(i) + tickIncVal 921 bankTickCtrDistanceToTops(i) := bankTickCtrDistanceToTops(i) - tickIncVal 922 } 923 }.elsewhen(tickDec) { 924 when(tickToNegSat) { 925 bankTickCtrs(i) := 0.U 926 bankTickCtrDistanceToTops(i) := ((1 << TickWidth) - 1).U 927 }.otherwise { 928 bankTickCtrs(i) := bankTickCtrs(i) - tickDecVal 929 bankTickCtrDistanceToTops(i) := bankTickCtrDistanceToTops(i) + tickDecVal 930 } 931 } 932 when(canAllocate) { 933 updateMask(i)(allocate) := true.B 934 updateTakens(i)(allocate) := updateTaken 935 updateAlloc(i)(allocate) := true.B 936 updateUMask(i)(allocate) := true.B 937 updateU(i)(allocate) := false.B 938 } 939 when(bankTickCtrs(i) === ((1 << TickWidth) - 1).U) { 940 bankTickCtrs(i) := 0.U 941 bankTickCtrDistanceToTops(i) := ((1 << TickWidth) - 1).U 942 updateResetU(i) := true.B 943 } 944 } 945 XSPerfAccumulate(f"tage_bank_${i}_update_allocate_failure", needToAllocate && !canAllocate) 946 XSPerfAccumulate(f"tage_bank_${i}_update_allocate_success", needToAllocate && canAllocate) 947 XSPerfAccumulate(s"tage_bank_${i}_mispred", hasUpdate && updateMispred) 948 XSPerfAccumulate(s"tage_bank_${i}_reset_u", updateResetU(i)) 949 for (t <- 0 to TageNTables) { 950 XSPerfAccumulate(f"tage_bank_${i}_tick_inc_${t}", needToAllocate && tickInc && tickIncVal === t.U) 951 XSPerfAccumulate(f"tage_bank_${i}_tick_dec_${t}", needToAllocate && tickDec && tickDecVal === t.U) 952 } 953 } 954 955 val realWens = updateMask.transpose.map(v => v.reduce(_ | _)) 956 for (w <- 0 until TageBanks) { 957 for (i <- 0 until TageNTables) { 958 val realWen = realWens(i) 959 tables(i).io.update.reset_u(w) := RegNext(updateResetU(w)) 960 tables(i).io.update.mask(w) := RegNext(updateMask(w)(i)) 961 tables(i).io.update.takens(w) := RegEnable(updateTakens(w)(i), realWen) 962 tables(i).io.update.alloc(w) := RegEnable(updateAlloc(w)(i), realWen) 963 tables(i).io.update.oldCtrs(w) := RegEnable(updateOldCtrs(w)(i), realWen) 964 965 tables(i).io.update.uMask(w) := RegEnable(updateUMask(w)(i), realWen) 966 tables(i).io.update.us(w) := RegEnable(updateU(w)(i), realWen) 967 // use fetch pc instead of instruction pc 968 tables(i).io.update.pc := RegEnable(update_pc, realWen) 969 tables(i).io.update.ghist := RegEnable(update.ghist, realWen) 970 } 971 } 972 bt.io.update_mask := RegNext(baseupdate) 973 bt.io.update_cnt := RegEnable(updatebcnt, baseupdate.reduce(_ | _)) 974 bt.io.update_pc := RegEnable(update_pc, baseupdate.reduce(_ | _)) 975 bt.io.update_takens := RegEnable(bUpdateTakens, baseupdate.reduce(_ | _)) 976 977 // all should be ready for req 978 io.s1_ready := tables.map(_.io.req.ready).reduce(_ && _) && bt.io.req.ready 979 XSPerfAccumulate(f"tage_write_blocks_read", !io.s1_ready) 980 981 def pred_perf(name: String, cnt: UInt) = XSPerfAccumulate(s"${name}_at_pred", cnt) 982 def commit_perf(name: String, cnt: UInt) = XSPerfAccumulate(s"${name}_at_commit", cnt) 983 def tage_perf(name: String, pred_cnt: UInt, commit_cnt: UInt) = { 984 pred_perf(name, pred_cnt) 985 commit_perf(name, commit_cnt) 986 } 987 988 // Debug and perf info 989 for (b <- 0 until TageBanks) { 990 val updateProvided = updateMeta.providers(b).valid 991 val updateProvider = updateMeta.providers(b).bits 992 for (i <- 0 until TageNTables) { 993 val pred_i_provided = 994 s2_provideds(b) && s2_providers(b) === i.U 995 val commit_i_provided = 996 updateProvided && updateProvider === i.U && updateValids(b) 997 tage_perf( 998 s"bank_${b}_tage_table_${i}_provided", 999 PopCount(pred_i_provided), 1000 PopCount(commit_i_provided) 1001 ) 1002 } 1003 tage_perf( 1004 s"bank_${b}_tage_use_bim", 1005 PopCount(!s2_provideds(b)), 1006 PopCount(!updateProvided && updateValids(b)) 1007 ) 1008 def unconf(providerCtr: UInt) = providerCtr === 3.U || providerCtr === 4.U 1009 tage_perf( 1010 s"bank_${b}_tage_use_altpred", 1011 PopCount(s2_provideds(b) && unconf(s2_providerResps(b).ctr)), 1012 PopCount(updateProvided && 1013 unconf(updateMeta.providerResps(b).ctr) && updateValids(b)) 1014 ) 1015 tage_perf( 1016 s"bank_${b}_tage_provided", 1017 PopCount(s2_provideds(b)), 1018 PopCount(updateProvided && updateValids(b)) 1019 ) 1020 } 1021 1022 for (b <- 0 until TageBanks) { 1023 val m = updateMeta 1024 // val bri = u.metas(b) 1025 XSDebug( 1026 updateValids(b), 1027 "update(%d): pc=%x, cycle=%d, taken:%b, misPred:%d, bimctr:%d, pvdr(%d):%d, altDiff:%d, pvdrU:%d, pvdrCtr:%d, alloc:%b\n", 1028 b.U, 1029 update_pc, 1030 0.U, 1031 update.br_taken_mask(b), 1032 update.mispred_mask(b), 1033 0.U, 1034 m.providers(b).valid, 1035 m.providers(b).bits, 1036 m.altDiffers(b), 1037 m.providerResps(b).u, 1038 m.providerResps(b).ctr, 1039 m.allocates(b) 1040 ) 1041 } 1042 val s2_resps = RegEnable(s1_resps, io.s1_fire(1)) 1043 XSDebug("req: v=%d, pc=0x%x\n", io.s0_fire(1), s0_pc_dup(1)) 1044 XSDebug("s1_fire:%d, resp: pc=%x\n", io.s1_fire(1), debug_pc_s1) 1045 XSDebug( 1046 "s2_fireOnLastCycle: resp: pc=%x, target=%x, hits=%b, takens=%b\n", 1047 debug_pc_s2, 1048 io.out.s2.getTarget(1), 1049 s2_provideds.asUInt, 1050 s2_tageTakens_dup(0).asUInt 1051 ) 1052 1053 for (b <- 0 until TageBanks) { 1054 for (i <- 0 until TageNTables) { 1055 XSDebug( 1056 "bank(%d)_tage_table(%d): valid:%b, resp_ctr:%d, resp_us:%d\n", 1057 b.U, 1058 i.U, 1059 s2_resps(i)(b).valid, 1060 s2_resps(i)(b).bits.ctr, 1061 s2_resps(i)(b).bits.u 1062 ) 1063 } 1064 } 1065 // XSDebug(io.update.valid && updateIsBr, p"update: sc: ${updateSCMeta}\n") 1066 // XSDebug(true.B, p"scThres: use(${useThreshold}), update(${updateThreshold})\n") 1067} 1068 1069class Tage_SC(implicit p: Parameters) extends Tage with HasSC {} 1070