xref: /XiangShan/src/main/scala/xiangshan/frontend/Tage.scala (revision 30f35717e23156cb95b30a36db530384545b48a4)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15*
16*
17* Acknowledgement
18*
19* This implementation is inspired by several key papers:
20* [1] Pierre Michaud. "[A PPM-like, tag-based branch predictor.](https://inria.hal.science/hal-03406188)" The Journal
21* of Instruction-Level Parallelism (JILP) 7: 10. 2005.
22* [2] André Seznec, and Pierre Michaud. "[A case for (partially) tagged geometric history length branch prediction.]
23* (https://inria.hal.science/hal-03408381)" The Journal of Instruction-Level Parallelism (JILP) 8: 23. 2006.
24* [3] André Seznec. "[A 256 kbits l-tage branch predictor.](http://www.irisa.fr/caps/people/seznec/L-TAGE.pdf)" The
25* Journal of Instruction-Level Parallelism (JILP) Special Issue: The Second Championship Branch Prediction Competition
26* (CBP) 9: 1-6. 2007.
27***************************************************************************************/
28
29package xiangshan.frontend
30
31import chisel3._
32import chisel3.util._
33import org.chipsalliance.cde.config.Parameters
34import scala.{Tuple2 => &}
35import scala.math.min
36import utility._
37import utility.mbist.MbistPipeline
38import utility.sram.FoldedSRAMTemplate
39import utility.sram.SRAMConflictBehavior
40import xiangshan._
41
42trait TageParams extends HasBPUConst with HasXSParameter {
43  // println(BankTageTableInfos)
44  val TageNTables = TageTableInfos.size
45  // val BankTageNTables = BankTageTableInfos.map(_.size) // Number of tage tables
46  // val UBitPeriod = 256
47  val TageCtrBits = 3
48  val TickWidth   = 7
49
50  val USE_ALT_ON_NA_WIDTH = 4
51  val NUM_USE_ALT_ON_NA   = 128
52  def use_alt_idx(pc: UInt) = (pc >> instOffsetBits)(log2Ceil(NUM_USE_ALT_ON_NA) - 1, 0)
53
54  val TotalBits = TageTableInfos.map {
55    case (s, h, t) => {
56      s * (1 + t + TageCtrBits + 1)
57    }
58  }.reduce(_ + _)
59
60  def posUnconf(ctr: UInt) = ctr === (1 << (ctr.getWidth - 1)).U
61  def negUnconf(ctr: UInt) = ctr === ((1 << (ctr.getWidth - 1)) - 1).U
62
63  def unconf(ctr: UInt) = posUnconf(ctr) || negUnconf(ctr)
64
65  val unshuffleBitWidth = log2Ceil(numBr)
66  def get_unshuffle_bits(idx: UInt) = idx(unshuffleBitWidth - 1, 0)
67  // xor hashes are reversable
68  def get_phy_br_idx(unhashed_idx: UInt, br_lidx: Int) = get_unshuffle_bits(unhashed_idx) ^ br_lidx.U(log2Ceil(numBr).W)
69  def get_lgc_br_idx(unhashed_idx: UInt, br_pidx: UInt) = get_unshuffle_bits(unhashed_idx) ^ br_pidx
70
71}
72
73trait HasFoldedHistory {
74  val histLen: Int
75  def compute_folded_hist(hist: UInt, l: Int)(histLen: Int) =
76    if (histLen > 0) {
77      val nChunks     = (histLen + l - 1) / l
78      val hist_chunks = (0 until nChunks) map { i => hist(min((i + 1) * l, histLen) - 1, i * l) }
79      ParallelXOR(hist_chunks)
80    } else 0.U
81  val compute_folded_ghist = compute_folded_hist(_: UInt, _: Int)(histLen)
82}
83
84abstract class TageBundle(implicit p: Parameters)
85    extends XSBundle with TageParams with BPUUtils
86
87abstract class TageModule(implicit p: Parameters)
88    extends XSModule with TageParams with BPUUtils {}
89
90class TageReq(implicit p: Parameters) extends TageBundle {
91  val pc          = UInt(VAddrBits.W)
92  val ghist       = UInt(HistoryLength.W)
93  val folded_hist = new AllFoldedHistories(foldedGHistInfos)
94}
95
96class TageResp_meta(implicit p: Parameters) extends TageBundle with TageParams {
97  val ctr = UInt(TageCtrBits.W)
98  val u   = Bool()
99}
100
101class TageResp(implicit p: Parameters) extends TageResp_meta {
102  val unconf = Bool()
103}
104
105class TageUpdate(implicit p: Parameters) extends TageBundle {
106  val pc    = UInt(VAddrBits.W)
107  val ghist = UInt(HistoryLength.W)
108  // update tag and ctr
109  val mask    = Vec(numBr, Bool())
110  val takens  = Vec(numBr, Bool())
111  val alloc   = Vec(numBr, Bool())
112  val oldCtrs = Vec(numBr, UInt(TageCtrBits.W))
113  // update u
114  val uMask   = Vec(numBr, Bool())
115  val us      = Vec(numBr, Bool())
116  val reset_u = Vec(numBr, Bool())
117}
118
119class TageMeta(implicit p: Parameters)
120    extends TageBundle with HasSCParameter {
121  val providers     = Vec(numBr, ValidUndirectioned(UInt(log2Ceil(TageNTables).W)))
122  val providerResps = Vec(numBr, new TageResp_meta)
123  // val altProviders = Vec(numBr, ValidUndirectioned(UInt(log2Ceil(TageNTables).W)))
124  // val altProviderResps = Vec(numBr, new TageResp)
125  val altUsed       = Vec(numBr, Bool())
126  val basecnts      = Vec(numBr, UInt(2.W))
127  val allocates     = Vec(numBr, UInt(TageNTables.W))
128  val scMeta        = if (EnableSC) Some(new SCMeta(SCNTables)) else None
129  val pred_cycle    = if (!env.FPGAPlatform) Some(UInt(64.W)) else None
130  val use_alt_on_na = if (!env.FPGAPlatform) Some(Vec(numBr, Bool())) else None
131
132  def altPreds      = basecnts.map(_(1))
133  def allocateValid = allocates.map(_.orR)
134  def altDiffers(i: Int) = basecnts(i)(1) =/= providerResps(i).ctr(TageCtrBits - 1)
135  def takens(i:     Int) = Mux(altUsed(i), basecnts(i)(1), providerResps(i).ctr(TageCtrBits - 1))
136}
137
138trait TBTParams extends HasXSParameter with TageParams {
139  val BtSize        = 2048
140  val bypassEntries = 8
141}
142
143class TageBTable(implicit p: Parameters) extends XSModule with TBTParams {
144  val io = IO(new Bundle {
145    val req           = Flipped(DecoupledIO(UInt(VAddrBits.W))) // s0_pc
146    val s1_cnt        = Output(Vec(numBr, UInt(2.W)))
147    val update_mask   = Input(Vec(TageBanks, Bool()))
148    val update_pc     = Input(UInt(VAddrBits.W))
149    val update_cnt    = Input(Vec(numBr, UInt(2.W)))
150    val update_takens = Input(Vec(TageBanks, Bool()))
151    // val update  = Input(new TageUpdate)
152  })
153
154  val bimAddr = new TableAddr(log2Up(BtSize), instOffsetBits)
155
156  // Physical SRAM Size
157  val SRAMSize  = 512
158  val foldWidth = BtSize / SRAMSize
159
160  val bt = Module(
161    new FoldedSRAMTemplate(
162      UInt(2.W),
163      setSplit = 2,
164      waySplit = 1,
165      dataSplit = 1,
166      set = BtSize,
167      width = foldWidth,
168      way = numBr,
169      shouldReset = false,
170      holdRead = true,
171      conflictBehavior = SRAMConflictBehavior.BufferWriteLossy,
172      withClockGate = true,
173      hasMbist = hasMbist,
174      hasSramCtl = hasSramCtl
175    )
176  )
177
178  // Power-on reset to weak taken
179  val doing_reset = RegInit(true.B)
180  val resetRow    = RegInit(0.U(log2Ceil(BtSize).W))
181  resetRow := resetRow + doing_reset
182  when(resetRow === (BtSize - 1).U) {
183    doing_reset := false.B
184  }
185
186  // Require power-on reset done before handling any request
187  io.req.ready := !doing_reset
188
189  val s0_pc   = io.req.bits
190  val s0_fire = io.req.valid
191  val s0_idx  = bimAddr.getIdx(s0_pc)
192  bt.io.r.req.valid       := s0_fire
193  bt.io.r.req.bits.setIdx := s0_idx
194
195  val s1_read = bt.io.r.resp.data
196  val s1_idx  = RegEnable(s0_idx, s0_fire)
197
198  val per_br_ctr = VecInit((0 until numBr).map(i => Mux1H(UIntToOH(get_phy_br_idx(s1_idx, i), numBr), s1_read)))
199  io.s1_cnt := per_br_ctr
200
201  // Update logic
202  val u_idx = bimAddr.getIdx(io.update_pc)
203
204  val newCtrs = Wire(Vec(numBr, UInt(2.W))) // physical bridx
205
206  val wrbypass = Module(new WrBypass(UInt(2.W), bypassEntries, log2Up(BtSize), numWays = numBr)) // logical bridx
207  wrbypass.io.wen       := io.update_mask.reduce(_ || _)
208  wrbypass.io.write_idx := u_idx
209  wrbypass.io.write_way_mask.map(_ := io.update_mask)
210  for (li <- 0 until numBr) {
211    val br_pidx = get_phy_br_idx(u_idx, li)
212    wrbypass.io.write_data(li) := newCtrs(br_pidx)
213  }
214
215  val oldCtrs =
216    VecInit((0 until numBr).map { pi =>
217      val br_lidx = get_lgc_br_idx(u_idx, pi.U(log2Ceil(numBr).W))
218      Mux(
219        wrbypass.io.hit && wrbypass.io.hit_data(br_lidx).valid,
220        wrbypass.io.hit_data(br_lidx).bits,
221        io.update_cnt(br_lidx)
222      )
223    })
224
225  def satUpdate(old: UInt, len: Int, taken: Bool): UInt = {
226    val oldSatTaken    = old === ((1 << len) - 1).U
227    val oldSatNotTaken = old === 0.U
228    Mux(oldSatTaken && taken, ((1 << len) - 1).U, Mux(oldSatNotTaken && !taken, 0.U, Mux(taken, old + 1.U, old - 1.U)))
229  }
230
231  val newTakens = io.update_takens
232  newCtrs := VecInit((0 until numBr).map { pi =>
233    val br_lidx = get_lgc_br_idx(u_idx, pi.U(log2Ceil(numBr).W))
234    satUpdate(oldCtrs(pi), 2, newTakens(br_lidx))
235  })
236
237  val updateWayMask = VecInit((0 until numBr).map(pi =>
238    (0 until numBr).map(li =>
239      io.update_mask(li) && get_phy_br_idx(u_idx, li) === pi.U
240    ).reduce(_ || _)
241  )).asUInt
242
243  bt.io.w.apply(
244    valid = io.update_mask.reduce(_ || _) || doing_reset,
245    data = Mux(doing_reset, VecInit(Seq.fill(numBr)(2.U(2.W))), newCtrs), // Weak taken
246    setIdx = Mux(doing_reset, resetRow, u_idx),
247    waymask = Mux(doing_reset, Fill(numBr, 1.U(1.W)).asUInt, updateWayMask)
248  )
249}
250
251class TageTable(
252    val nRows:    Int,
253    val histLen:  Int,
254    val tagLen:   Int,
255    val tableIdx: Int
256)(implicit p: Parameters)
257    extends TageModule with HasFoldedHistory {
258  val io = IO(new Bundle() {
259    val req    = Flipped(DecoupledIO(new TageReq))
260    val resps  = Output(Vec(numBr, Valid(new TageResp)))
261    val update = Input(new TageUpdate)
262  })
263
264  class TageEntry() extends TageBundle {
265    val valid = Bool()
266    val tag   = UInt(tagLen.W)
267    val ctr   = UInt(TageCtrBits.W)
268  }
269
270  // Physical SRAM size
271  val bankSRAMSize = 512
272  val uSRAMSize    = 256
273  require(nRows % bankSRAMSize == 0)
274  require(isPow2(numBr))
275  val nRowsPerBr    = nRows / numBr
276  val nBanks        = 4 // Tage banks
277  val bankSize      = nRowsPerBr / nBanks
278  val bankFoldWidth = if (bankSize >= bankSRAMSize) bankSize / bankSRAMSize else 1
279  val uFoldedWidth  = nRowsPerBr / uSRAMSize
280  if (bankSize < bankSRAMSize) {
281    println(f"warning: tage table $tableIdx has small sram depth of $bankSize")
282  }
283  val bankIdxWidth = log2Ceil(nBanks)
284  def get_bank_mask(idx: UInt) = VecInit((0 until nBanks).map(idx(bankIdxWidth - 1, 0) === _.U))
285  def get_bank_idx(idx:  UInt) = idx >> bankIdxWidth
286
287  // bypass entries for tage update
288  val perBankWrbypassEntries = 8
289
290  val idxFhInfo    = (histLen, min(log2Ceil(nRowsPerBr), histLen))
291  val tagFhInfo    = (histLen, min(histLen, tagLen))
292  val altTagFhInfo = (histLen, min(histLen, tagLen - 1))
293  val allFhInfos   = Seq(idxFhInfo, tagFhInfo, altTagFhInfo)
294
295  def getFoldedHistoryInfo = allFhInfos.filter(_._1 > 0).toSet
296  def compute_tag_and_hash(unhashed_idx: UInt, allFh: AllFoldedHistories) = {
297    val idx_fh     = allFh.getHistWithInfo(idxFhInfo).folded_hist
298    val tag_fh     = allFh.getHistWithInfo(tagFhInfo).folded_hist
299    val alt_tag_fh = allFh.getHistWithInfo(altTagFhInfo).folded_hist
300    // require(idx_fh.getWidth == log2Ceil(nRows))
301    val idx = (unhashed_idx ^ idx_fh)(log2Ceil(nRowsPerBr) - 1, 0)
302    val tag = (unhashed_idx ^ tag_fh ^ (alt_tag_fh << 1))(tagLen - 1, 0)
303    (idx, tag)
304  }
305
306  def inc_ctr(ctr: UInt, taken: Bool): UInt = satUpdate(ctr, TageCtrBits, taken)
307
308  if (EnableGHistDiff) {
309    val idx_history = compute_folded_ghist(io.req.bits.ghist, log2Ceil(nRowsPerBr))
310    val idx_fh      = io.req.bits.folded_hist.getHistWithInfo(idxFhInfo)
311    XSError(
312      idx_history =/= idx_fh.folded_hist,
313      p"tage table $tableIdx has different fh," +
314        p" ghist: ${Binary(idx_history)}, fh: ${Binary(idx_fh.folded_hist)}\n"
315    )
316  }
317  // pc is start address of basic block, most 2 branch inst in block
318  // def getUnhashedIdx(pc: UInt) = pc >> (instOffsetBits+log2Ceil(TageBanks))
319  def getUnhashedIdx(pc: UInt): UInt = pc >> instOffsetBits
320
321  // val s1_pc = io.req.bits.pc
322  val req_unhashed_idx = getUnhashedIdx(io.req.bits.pc)
323
324  val us = Module(new FoldedSRAMTemplate(
325    Bool(),
326    set = nRowsPerBr,
327    width = uFoldedWidth,
328    way = numBr,
329    shouldReset = true,
330    extraReset = true,
331    holdRead = true,
332    singlePort = true,
333    withClockGate = true,
334    hasMbist = hasMbist,
335    hasSramCtl = hasSramCtl
336  ))
337  us.extra_reset.get := io.update.reset_u.reduce(_ || _) && io.update.mask.reduce(_ || _)
338
339  val table_banks = Seq.fill(nBanks)(
340    Module(new FoldedSRAMTemplate(
341      new TageEntry,
342      set = bankSize,
343      width = bankFoldWidth,
344      way = numBr,
345      shouldReset = true,
346      holdRead = true,
347      singlePort = true,
348      withClockGate = true,
349      hasMbist = hasMbist,
350      hasSramCtl = hasSramCtl
351    ))
352  )
353
354  val (s0_idx, s0_tag) = compute_tag_and_hash(req_unhashed_idx, io.req.bits.folded_hist)
355  val s0_bank_req_1h   = get_bank_mask(s0_idx)
356
357  for (b <- 0 until nBanks) {
358    table_banks(b).io.r.req.valid       := io.req.fire && s0_bank_req_1h(b)
359    table_banks(b).io.r.req.bits.setIdx := get_bank_idx(s0_idx)
360  }
361
362  us.io.r.req.valid       := io.req.fire
363  us.io.r.req.bits.setIdx := s0_idx
364
365  val s1_unhashed_idx               = RegEnable(req_unhashed_idx, io.req.fire)
366  val s1_idx                        = RegEnable(s0_idx, io.req.fire)
367  val s1_tag                        = RegEnable(s0_tag, io.req.fire)
368  val s1_pc                         = RegEnable(io.req.bits.pc, io.req.fire)
369  val s1_bank_req_1h                = RegEnable(s0_bank_req_1h, io.req.fire)
370  val s1_bank_has_write_on_this_req = RegEnable(VecInit(table_banks.map(_.io.w.req.valid)), io.req.valid)
371
372  val resp_invalid_by_write = Wire(Bool())
373
374  val tables_r = table_banks.map(_.io.r.resp.data)                               // s1
375  val unconfs  = tables_r.map(r => VecInit(r.map(e => WireInit(unconf(e.ctr))))) // do unconf cal in parallel
376  val hits =
377    tables_r.map(r =>
378      VecInit(r.map(e => e.tag === s1_tag && e.valid && !resp_invalid_by_write))
379    ) // do tag compare in parallel
380
381  val resp_selected   = Mux1H(s1_bank_req_1h, tables_r)
382  val unconf_selected = Mux1H(s1_bank_req_1h, unconfs)
383  val hit_selected    = Mux1H(s1_bank_req_1h, hits)
384  resp_invalid_by_write := Mux1H(s1_bank_req_1h, s1_bank_has_write_on_this_req)
385
386  val per_br_resp =
387    VecInit((0 until numBr).map(i => Mux1H(UIntToOH(get_phy_br_idx(s1_unhashed_idx, i), numBr), resp_selected)))
388  val per_br_unconf =
389    VecInit((0 until numBr).map(i => Mux1H(UIntToOH(get_phy_br_idx(s1_unhashed_idx, i), numBr), unconf_selected)))
390  val per_br_hit =
391    VecInit((0 until numBr).map(i => Mux1H(UIntToOH(get_phy_br_idx(s1_unhashed_idx, i), numBr), hit_selected)))
392  val per_br_u =
393    VecInit((0 until numBr).map(i => Mux1H(UIntToOH(get_phy_br_idx(s1_unhashed_idx, i), numBr), us.io.r.resp.data)))
394
395  for (i <- 0 until numBr) {
396    io.resps(i).valid       := per_br_hit(i)
397    io.resps(i).bits.ctr    := per_br_resp(i).ctr
398    io.resps(i).bits.u      := per_br_u(i)
399    io.resps(i).bits.unconf := per_br_unconf(i)
400  }
401
402  // Use fetchpc to compute hash
403  val update_folded_hist = WireInit(0.U.asTypeOf(new AllFoldedHistories(foldedGHistInfos)))
404  update_folded_hist.getHistWithInfo(idxFhInfo).folded_hist := compute_folded_ghist(
405    io.update.ghist,
406    log2Ceil(nRowsPerBr)
407  )
408  update_folded_hist.getHistWithInfo(tagFhInfo).folded_hist    := compute_folded_ghist(io.update.ghist, tagLen)
409  update_folded_hist.getHistWithInfo(altTagFhInfo).folded_hist := compute_folded_ghist(io.update.ghist, tagLen - 1)
410
411  val per_bank_update_wdata = Wire(Vec(nBanks, Vec(numBr, new TageEntry))) // corresponds to physical branches
412
413  val update_unhashed_idx      = getUnhashedIdx(io.update.pc)
414  val (update_idx, update_tag) = compute_tag_and_hash(update_unhashed_idx, update_folded_hist)
415  val update_req_bank_1h       = get_bank_mask(update_idx)
416  val update_idx_in_bank       = get_bank_idx(update_idx)
417
418  val per_bank_not_silent_update = Wire(Vec(nBanks, Vec(numBr, Bool()))) // corresponds to physical branches
419  val per_bank_update_way_mask =
420    VecInit((0 until nBanks).map(b =>
421      VecInit((0 until numBr).map { pi =>
422        // whether any of the logical branches updates on each slot
423        Seq.tabulate(numBr)(li =>
424          get_phy_br_idx(update_unhashed_idx, li) === pi.U &&
425            io.update.mask(li)
426        ).reduce(_ || _) && per_bank_not_silent_update(b)(pi)
427      }).asUInt
428    ))
429
430  // val silent_update_from_wrbypass = Wire(Bool())
431
432  for (b <- 0 until nBanks) {
433    table_banks(b).io.w.apply(
434      valid = per_bank_update_way_mask(b).orR && update_req_bank_1h(b),
435      data = per_bank_update_wdata(b),
436      setIdx = update_idx_in_bank,
437      waymask = per_bank_update_way_mask(b)
438    )
439  }
440
441  // Power-on reset
442  val powerOnResetState = RegInit(true.B)
443  when(us.io.r.req.ready && table_banks.map(_.io.r.req.ready).reduce(_ && _)) {
444    // When all the SRAM first reach ready state, we consider power-on reset is done
445    powerOnResetState := false.B
446  }
447  // Do not use table banks io.r.req.ready directly
448  // All the us & table_banks are single port SRAM, ready := !wen
449  // We do not want write request block the whole BPU pipeline
450  io.req.ready := !powerOnResetState
451
452  val bank_conflict = (0 until nBanks).map(b => table_banks(b).io.w.req.valid && s0_bank_req_1h(b)).reduce(_ || _)
453  XSPerfAccumulate(f"tage_table_bank_conflict", bank_conflict)
454
455  val update_u_idx = update_idx
456  val update_u_way_mask = VecInit((0 until numBr).map { pi =>
457    Seq.tabulate(numBr)(li =>
458      get_phy_br_idx(update_unhashed_idx, li) === pi.U &&
459        io.update.uMask(li)
460    ).reduce(_ || _)
461  }).asUInt
462
463  val update_u_wdata = VecInit((0 until numBr).map(pi =>
464    Mux1H(Seq.tabulate(numBr)(li =>
465      (get_phy_br_idx(update_unhashed_idx, li) === pi.U, io.update.us(li))
466    ))
467  ))
468
469  us.io.w.apply(
470    io.update.mask.reduce(_ || _) && io.update.uMask.reduce(_ || _),
471    update_u_wdata,
472    update_u_idx,
473    update_u_way_mask
474  )
475
476  // remove silent updates
477  def silentUpdate(ctr: UInt, taken: Bool) =
478    ctr.andR && taken || !ctr.orR && !taken
479
480  val bank_wrbypasses = Seq.fill(nBanks)(Seq.fill(numBr)(
481    Module(new WrBypass(UInt(TageCtrBits.W), perBankWrbypassEntries, log2Ceil(bankSize)))
482  )) // let it corresponds to logical brIdx
483
484  for (b <- 0 until nBanks) {
485    val not_silent_update = per_bank_not_silent_update(b)
486    for (pi <- 0 until numBr) { // physical brIdx
487      val update_wdata = per_bank_update_wdata(b)(pi)
488      val br_lidx      = get_lgc_br_idx(update_unhashed_idx, pi.U(log2Ceil(numBr).W))
489      // this
490      val wrbypass_io         = Mux1H(UIntToOH(br_lidx, numBr), bank_wrbypasses(b).map(_.io))
491      val wrbypass_hit        = wrbypass_io.hit
492      val wrbypass_ctr        = wrbypass_io.hit_data(0).bits
493      val wrbypass_data_valid = wrbypass_hit && wrbypass_io.hit_data(0).valid
494      update_wdata.ctr :=
495        Mux(
496          io.update.alloc(br_lidx),
497          Mux(io.update.takens(br_lidx), 4.U, 3.U),
498          Mux(
499            wrbypass_data_valid,
500            inc_ctr(wrbypass_ctr, io.update.takens(br_lidx)),
501            inc_ctr(io.update.oldCtrs(br_lidx), io.update.takens(br_lidx))
502          )
503        )
504      not_silent_update(pi) :=
505        Mux(
506          wrbypass_data_valid,
507          !silentUpdate(wrbypass_ctr, io.update.takens(br_lidx)),
508          !silentUpdate(io.update.oldCtrs(br_lidx), io.update.takens(br_lidx))
509        ) ||
510          io.update.alloc(br_lidx)
511
512      update_wdata.valid := true.B
513      update_wdata.tag   := update_tag
514    }
515
516    for (li <- 0 until numBr) {
517      val wrbypass = bank_wrbypasses(b)(li)
518      val br_pidx  = get_phy_br_idx(update_unhashed_idx, li)
519      wrbypass.io.wen           := io.update.mask(li) && update_req_bank_1h(b)
520      wrbypass.io.write_idx     := get_bank_idx(update_idx)
521      wrbypass.io.write_data(0) := Mux1H(UIntToOH(br_pidx, numBr), per_bank_update_wdata(b)).ctr
522    }
523  }
524
525  for (i <- 0 until numBr) {
526    for (b <- 0 until nBanks) {
527      val wrbypass = bank_wrbypasses(b)(i)
528      XSPerfAccumulate(
529        f"tage_table_bank_${b}_wrbypass_enq_$i",
530        io.update.mask(i) && update_req_bank_1h(b) && !wrbypass.io.hit
531      )
532      XSPerfAccumulate(
533        f"tage_table_bank_${b}_wrbypass_hit_$i",
534        io.update.mask(i) && update_req_bank_1h(b) && wrbypass.io.hit
535      )
536    }
537  }
538
539  for (b <- 0 until nBanks) {
540    val not_silent_update = per_bank_not_silent_update(b)
541    XSPerfAccumulate(
542      f"tage_table_bank_${b}_real_updates",
543      io.update.mask.reduce(_ || _) && update_req_bank_1h(b) && not_silent_update.reduce(_ || _)
544    )
545    XSPerfAccumulate(
546      f"tage_table_bank_${b}_silent_updates_eliminated",
547      io.update.mask.reduce(_ || _) && update_req_bank_1h(b) && !not_silent_update.reduce(_ || _)
548    )
549  }
550
551  XSPerfAccumulate("tage_table_hits", PopCount(io.resps.map(_.valid)))
552
553  for (b <- 0 until nBanks) {
554    XSPerfAccumulate(f"tage_table_bank_${b}_update_req", io.update.mask.reduce(_ || _) && update_req_bank_1h(b))
555    for (i <- 0 until numBr) {
556      val li   = i
557      val pidx = get_phy_br_idx(update_unhashed_idx, li)
558      XSPerfAccumulate(
559        f"tage_table_bank_${b}_br_li_${li}_updated",
560        table_banks(b).io.w.req.valid && table_banks(b).io.w.req.bits.waymask.get(pidx)
561      )
562      val pi = i
563      XSPerfAccumulate(
564        f"tage_table_bank_${b}_br_pi_${pi}_updated",
565        table_banks(b).io.w.req.valid && table_banks(b).io.w.req.bits.waymask.get(pi)
566      )
567    }
568  }
569
570  val u  = io.update
571  val b  = PriorityEncoder(u.mask)
572  val ub = PriorityEncoder(u.uMask)
573  XSDebug(
574    io.req.fire,
575    p"tableReq: pc=0x${Hexadecimal(io.req.bits.pc)}, " +
576      p"idx=$s0_idx, tag=$s0_tag\n"
577  )
578  for (i <- 0 until numBr) {
579    XSDebug(
580      RegNext(io.req.fire) && per_br_hit(i),
581      p"TageTableResp_br_$i: idx=$s1_idx, hit:${per_br_hit(i)}, " +
582        p"ctr:${io.resps(i).bits.ctr}, u:${io.resps(i).bits.u}\n"
583    )
584    XSDebug(
585      io.update.mask(i),
586      p"update Table_br_$i: pc:${Hexadecimal(u.pc)}}, " +
587        p"taken:${u.takens(i)}, alloc:${u.alloc(i)}, oldCtrs:${u.oldCtrs(i)}\n"
588    )
589    val bank = OHToUInt(update_req_bank_1h.asUInt, nBanks)
590    val pi   = get_phy_br_idx(update_unhashed_idx, i)
591    XSDebug(
592      io.update.mask(i),
593      p"update Table_$i: writing tag:$update_tag, " +
594        p"ctr: ${per_bank_update_wdata(bank)(pi).ctr} in idx ${update_idx}\n"
595    )
596    XSDebug(RegNext(io.req.fire) && !per_br_hit(i), p"TageTableResp_$i: not hit!\n")
597  }
598
599  // ------------------------------Debug-------------------------------------
600  val valids = RegInit(VecInit(Seq.fill(nRowsPerBr)(false.B)))
601  when(io.update.mask.reduce(_ || _))(valids(update_idx) := true.B)
602  XSDebug("Table usage:------------------------\n")
603  XSDebug("%d out of %d rows are valid\n", PopCount(valids), nRowsPerBr.U)
604
605}
606
607abstract class BaseTage(implicit p: Parameters) extends BasePredictor with TageParams with BPUUtils {}
608
609class FakeTage(implicit p: Parameters) extends BaseTage {
610  io.out <> 0.U.asTypeOf(DecoupledIO(new BasePredictorOutput))
611
612  // io.s0_ready := true.B
613  io.s1_ready := true.B
614  io.s2_ready := true.B
615}
616
617class Tage(implicit p: Parameters) extends BaseTage {
618
619  val resp_meta          = Wire(new TageMeta)
620  override val meta_size = resp_meta.getWidth
621  val tables = TageTableInfos.zipWithIndex.map {
622    case ((nRows, histLen, tagLen), i) => {
623      val t = Module(new TageTable(nRows, histLen, tagLen, i))
624      t.io.req.valid            := io.s0_fire(1)
625      t.io.req.bits.pc          := s0_pc_dup(1)
626      t.io.req.bits.folded_hist := io.in.bits.folded_hist(1)
627      t.io.req.bits.ghist       := io.in.bits.ghist
628      t
629    }
630  }
631  val bt = Module(new TageBTable)
632  bt.io.req.valid := io.s0_fire(1)
633  bt.io.req.bits  := s0_pc_dup(1)
634  private val mbistPl           = MbistPipeline.PlaceMbistPipeline(1, "MbistPipeTage", hasMbist)
635  val bankTickCtrDistanceToTops = Seq.fill(numBr)(RegInit(((1 << TickWidth) - 1).U(TickWidth.W)))
636  val bankTickCtrs              = Seq.fill(numBr)(RegInit(0.U(TickWidth.W)))
637  val useAltOnNaCtrs = RegInit(
638    VecInit(Seq.fill(numBr)(
639      VecInit(Seq.fill(NUM_USE_ALT_ON_NA)((1 << (USE_ALT_ON_NA_WIDTH - 1)).U(USE_ALT_ON_NA_WIDTH.W)))
640    ))
641  )
642
643  val tage_fh_info                  = tables.map(_.getFoldedHistoryInfo).reduce(_ ++ _).toSet
644  override def getFoldedHistoryInfo = Some(tage_fh_info)
645
646  val s1_resps = VecInit(tables.map(_.io.resps))
647
648  // val s1_bim = io.in.bits.resp_in(0).s1.full_pred
649  // val s2_bim = RegEnable(s1_bim, io.s1_fire)
650
651  val debug_pc_s0 = s0_pc_dup(1)
652  val debug_pc_s1 = RegEnable(s0_pc_dup(1), io.s0_fire(1))
653  val debug_pc_s2 = RegEnable(debug_pc_s1, io.s1_fire(1))
654
655  val s1_provideds     = Wire(Vec(numBr, Bool()))
656  val s1_providers     = Wire(Vec(numBr, UInt(log2Ceil(TageNTables).W)))
657  val s1_providerResps = Wire(Vec(numBr, new TageResp))
658  // val s1_altProvideds     = Wire(Vec(numBr, Bool()))
659  // val s1_altProviders     = Wire(Vec(numBr, UInt(log2Ceil(TageNTables).W)))
660  // val s1_altProviderResps = Wire(Vec(numBr, new TageResp))
661  val s1_altUsed    = Wire(Vec(numBr, Bool()))
662  val s1_tageTakens = Wire(Vec(numBr, Bool()))
663  val s1_basecnts   = Wire(Vec(numBr, UInt(2.W)))
664  val s1_useAltOnNa = Wire(Vec(numBr, Bool()))
665
666  val s2_provideds     = RegEnable(s1_provideds, io.s1_fire(1))
667  val s2_providers     = RegEnable(s1_providers, io.s1_fire(1))
668  val s2_providerResps = RegEnable(s1_providerResps, io.s1_fire(1))
669  // val s2_altProvideds     = RegEnable(s1_altProvideds, io.s1_fire)
670  // val s2_altProviders     = RegEnable(s1_altProviders, io.s1_fire)
671  // val s2_altProviderResps = RegEnable(s1_altProviderResps, io.s1_fire)
672  val s2_altUsed        = RegEnable(s1_altUsed, io.s1_fire(1))
673  val s2_tageTakens_dup = io.s1_fire.map(f => RegEnable(s1_tageTakens, f))
674  val s2_basecnts       = RegEnable(s1_basecnts, io.s1_fire(1))
675  val s2_useAltOnNa     = RegEnable(s1_useAltOnNa, io.s1_fire(1))
676
677  io.out                 := io.in.bits.resp_in(0)
678  io.out.last_stage_meta := resp_meta.asUInt
679
680  val resp_s2 = io.out.s2
681
682  // Update logic
683  val u_valid = RegNext(io.update.valid, init = false.B)
684  val update  = Wire(new BranchPredictionUpdate)
685  update := RegEnable(io.update.bits, io.update.valid)
686
687  // The pc register has been moved outside of predictor, pc field of update bundle and other update data are not in the same stage
688  // so io.update.bits.pc is used directly here
689  val update_pc = io.update.bits.pc
690
691  // To improve Clock Gating Efficiency
692  val u_valids_for_cge =
693    VecInit((0 until TageBanks).map(w =>
694      io.update.bits.ftb_entry.brValids(w) && io.update.valid
695    )) // io.update.bits.ftb_entry.always_taken has timing issues(FTQEntryGen)
696  val u_meta     = io.update.bits.meta.asTypeOf(new TageMeta)
697  val updateMeta = Wire(new TageMeta)
698  update.meta := updateMeta.asUInt
699  updateMeta  := RegEnable(u_meta, io.update.valid)
700  for (i <- 0 until numBr) {
701    updateMeta.providers(i).bits := RegEnable(
702      u_meta.providers(i).bits,
703      u_meta.providers(i).valid && u_valids_for_cge(i)
704    )
705    updateMeta.providerResps(i) := RegEnable(
706      u_meta.providerResps(i),
707      u_meta.providers(i).valid && u_valids_for_cge(i)
708    )
709    updateMeta.altUsed(i) := RegEnable(u_meta.altUsed(i), u_valids_for_cge(i))
710    updateMeta.allocates(i) := RegEnable(
711      u_meta.allocates(i),
712      io.update.valid // not using mispred_mask, because mispred_mask timing is bad
713    )
714  }
715  if (EnableSC) {
716    for (w <- 0 until TageBanks) {
717      updateMeta.scMeta.get.scPreds(w) := RegEnable(
718        u_meta.scMeta.get.scPreds(w),
719        u_valids_for_cge(w) && u_meta.providers(w).valid
720      )
721      updateMeta.scMeta.get.ctrs(w) := RegEnable(
722        u_meta.scMeta.get.ctrs(w),
723        u_valids_for_cge(w) && u_meta.providers(w).valid
724      )
725    }
726  }
727  update.ghist := RegEnable(io.update.bits.ghist, io.update.valid) // TODO: CGE
728
729  val updateValids = VecInit((0 until TageBanks).map(w =>
730    update.ftb_entry.brValids(w) && u_valid && !update.ftb_entry.strong_bias(w) &&
731      !(PriorityEncoder(update.br_taken_mask) < w.U)
732  ))
733
734  val updateMask    = WireInit(0.U.asTypeOf(Vec(numBr, Vec(TageNTables, Bool()))))
735  val updateUMask   = WireInit(0.U.asTypeOf(Vec(numBr, Vec(TageNTables, Bool()))))
736  val updateResetU  = WireInit(0.U.asTypeOf(Vec(numBr, Bool()))) // per predictor
737  val updateTakens  = Wire(Vec(numBr, Vec(TageNTables, Bool())))
738  val updateAlloc   = WireInit(0.U.asTypeOf(Vec(numBr, Vec(TageNTables, Bool()))))
739  val updateOldCtrs = Wire(Vec(numBr, Vec(TageNTables, UInt(TageCtrBits.W))))
740  val updateU       = Wire(Vec(numBr, Vec(TageNTables, Bool())))
741  val updatebcnt    = Wire(Vec(TageBanks, UInt(2.W)))
742  val baseupdate    = WireInit(0.U.asTypeOf(Vec(TageBanks, Bool())))
743  val bUpdateTakens = Wire(Vec(TageBanks, Bool()))
744  updateTakens  := DontCare
745  updateOldCtrs := DontCare
746  updateU       := DontCare
747
748  val updateMisPreds = update.mispred_mask
749
750  class TageTableInfo(implicit p: Parameters) extends XSBundle {
751    val resp              = new TageResp
752    val tableIdx          = UInt(log2Ceil(TageNTables).W)
753    val use_alt_on_unconf = Bool()
754  }
755  // access tag tables and output meta info
756
757  for (i <- 0 until numBr) {
758    val useAltCtr  = Mux1H(UIntToOH(use_alt_idx(s1_pc_dup(0)), NUM_USE_ALT_ON_NA), useAltOnNaCtrs(i))
759    val useAltOnNa = useAltCtr(USE_ALT_ON_NA_WIDTH - 1) // highest bit
760
761    val s1_per_br_resp = VecInit(s1_resps.map(_(i)))
762    val inputRes = s1_per_br_resp.zipWithIndex.map {
763      case (r, idx) => {
764        val tableInfo = Wire(new TageTableInfo)
765        tableInfo.resp              := r.bits
766        tableInfo.use_alt_on_unconf := r.bits.unconf && useAltOnNa
767        tableInfo.tableIdx          := idx.U(log2Ceil(TageNTables).W)
768        (r.valid, tableInfo)
769      }
770    }
771    val providerInfo = ParallelPriorityMux(inputRes.reverse)
772    val provided     = inputRes.map(_._1).reduce(_ || _)
773    // val altProvided = selectedInfo.hasTwo
774    // val providerInfo = selectedInfo
775    // val altProviderInfo = selectedInfo.second
776    s1_provideds(i)     := provided
777    s1_providers(i)     := providerInfo.tableIdx
778    s1_providerResps(i) := providerInfo.resp
779    // s1_altProvideds(i)   := altProvided
780    // s1_altProviders(i)   := altProviderInfo.tableIdx
781    // s1_altProviderResps(i) := altProviderInfo.resp
782
783    resp_meta.providers(i).valid := RegEnable(s2_provideds(i), io.s2_fire(1))
784    resp_meta.providers(i).bits  := RegEnable(s2_providers(i), io.s2_fire(1))
785    resp_meta.providerResps(i)   := RegEnable(s2_providerResps(i), io.s2_fire(1))
786    // resp_meta.altProviders(i).valid := RegEnable(s2_altProvideds(i), io.s2_fire)
787    // resp_meta.altProviders(i).bits  := RegEnable(s2_altProviders(i), io.s2_fire)
788    // resp_meta.altProviderResps(i)   := RegEnable(s2_altProviderResps(i), io.s2_fire)
789    resp_meta.pred_cycle.map(_ := RegEnable(GTimer(), io.s2_fire(1)))
790    resp_meta.use_alt_on_na.map(_(i) := RegEnable(s2_useAltOnNa(i), io.s2_fire(1)))
791
792    // Create a mask fo tables which did not hit our query, and also contain useless entries
793    // and also uses a longer history than the provider
794    val allocatableSlots =
795      RegEnable(
796        VecInit(s1_per_br_resp.map(r => !r.valid && !r.bits.u)).asUInt &
797          ~(LowerMask(UIntToOH(s1_providers(i)), TageNTables) &
798            Fill(TageNTables, s1_provideds(i).asUInt)),
799        io.s1_fire(1)
800      )
801
802    resp_meta.allocates(i) := RegEnable(allocatableSlots, io.s2_fire(1))
803
804    val s1_bimCtr = bt.io.s1_cnt(i)
805    s1_altUsed(i) := !provided || providerInfo.use_alt_on_unconf
806    s1_tageTakens(i) :=
807      Mux(s1_altUsed(i), s1_bimCtr(1), providerInfo.resp.ctr(TageCtrBits - 1))
808    s1_basecnts(i)   := s1_bimCtr
809    s1_useAltOnNa(i) := providerInfo.use_alt_on_unconf
810
811    resp_meta.altUsed(i)  := RegEnable(s2_altUsed(i), io.s2_fire(1))
812    resp_meta.basecnts(i) := RegEnable(s2_basecnts(i), io.s2_fire(1))
813
814    val tage_enable_dup = dup(RegNext(io.ctrl.tage_enable))
815    for (tage_enable & fp & s2_tageTakens <- tage_enable_dup zip resp_s2.full_pred zip s2_tageTakens_dup) {
816      when(tage_enable) {
817        fp.br_taken_mask(i) := s2_tageTakens(i)
818      }
819    }
820
821    // ---------------- update logics below ------------------//
822    val hasUpdate     = updateValids(i)
823    val updateMispred = updateMisPreds(i)
824    val updateTaken   = hasUpdate && update.br_taken_mask(i)
825
826    val updateProvided        = updateMeta.providers(i).valid
827    val updateProvider        = updateMeta.providers(i).bits
828    val updateProviderResp    = updateMeta.providerResps(i)
829    val updateProviderCorrect = updateProviderResp.ctr(TageCtrBits - 1) === updateTaken
830    val updateUseAlt          = updateMeta.altUsed(i)
831    val updateAltDiffers      = updateMeta.altDiffers(i)
832    val updateAltIdx          = use_alt_idx(update_pc)
833    val updateUseAltCtr       = Mux1H(UIntToOH(updateAltIdx, NUM_USE_ALT_ON_NA), useAltOnNaCtrs(i))
834    val updateAltPred         = updateMeta.altPreds(i)
835    val updateAltCorrect      = updateAltPred === updateTaken
836
837    val updateProviderWeakTaken    = posUnconf(updateProviderResp.ctr)
838    val updateProviderWeaknotTaken = negUnconf(updateProviderResp.ctr)
839    val updateProviderWeak         = unconf(updateProviderResp.ctr)
840
841    when(hasUpdate) {
842      when(updateProvided && updateProviderWeak && updateAltDiffers) {
843        val newCtr = satUpdate(updateUseAltCtr, USE_ALT_ON_NA_WIDTH, updateAltCorrect)
844        useAltOnNaCtrs(i)(updateAltIdx) := newCtr
845      }
846    }
847
848    XSPerfAccumulate(f"tage_bank_${i}_use_alt_pred", hasUpdate && updateUseAlt)
849    XSPerfAccumulate(f"tage_bank_${i}_alt_correct", hasUpdate && updateUseAlt && updateAltCorrect)
850    XSPerfAccumulate(f"tage_bank_${i}_alt_wrong", hasUpdate && updateUseAlt && !updateAltCorrect)
851    XSPerfAccumulate(f"tage_bank_${i}_alt_differs", hasUpdate && updateAltDiffers)
852    XSPerfAccumulate(
853      f"tage_bank_${i}_use_alt_on_na_ctr_updated",
854      hasUpdate && updateAltDiffers && updateProvided && updateProviderWeak
855    )
856    XSPerfAccumulate(
857      f"tage_bank_${i}_use_alt_on_na_ctr_inc",
858      hasUpdate && updateAltDiffers && updateProvided && updateProviderWeak && updateAltCorrect
859    )
860    XSPerfAccumulate(
861      f"tage_bank_${i}_use_alt_on_na_ctr_dec",
862      hasUpdate && updateAltDiffers && updateProvided && updateProviderWeak && !updateAltCorrect
863    )
864
865    XSPerfAccumulate(f"tage_bank_${i}_na", hasUpdate && updateProvided && updateProviderWeak)
866    XSPerfAccumulate(
867      f"tage_bank_${i}_use_na_correct",
868      hasUpdate && updateProvided && updateProviderWeak && !updateUseAlt && !updateMispred
869    )
870    XSPerfAccumulate(
871      f"tage_bank_${i}_use_na_wrong",
872      hasUpdate && updateProvided && updateProviderWeak && !updateUseAlt && updateMispred
873    )
874
875    updateMeta.use_alt_on_na.map(uaon => XSPerfAccumulate(f"tage_bank_${i}_use_alt_on_na", hasUpdate && uaon(i)))
876
877    when(hasUpdate) {
878      when(updateProvided) {
879        updateMask(i)(updateProvider)    := true.B
880        updateUMask(i)(updateProvider)   := updateAltDiffers
881        updateU(i)(updateProvider)       := updateProviderCorrect
882        updateTakens(i)(updateProvider)  := updateTaken
883        updateOldCtrs(i)(updateProvider) := updateProviderResp.ctr
884        updateAlloc(i)(updateProvider)   := false.B
885      }
886    }
887
888    // update base table if used base table to predict
889    baseupdate(i)    := hasUpdate && updateUseAlt
890    updatebcnt(i)    := updateMeta.basecnts(i)
891    bUpdateTakens(i) := updateTaken
892
893    val needToAllocate  = hasUpdate && updateMispred && !(updateUseAlt && updateProviderCorrect && updateProvided)
894    val allocatableMask = updateMeta.allocates(i)
895    val canAllocate     = updateMeta.allocateValid(i)
896
897    val allocLFSR = random.LFSR(width = 15)(TageNTables - 1, 0)
898    val longerHistoryTableMask =
899      ~(LowerMask(UIntToOH(updateProvider), TageNTables) & Fill(TageNTables, updateProvided.asUInt))
900    val canAllocMask     = allocatableMask & longerHistoryTableMask
901    val allocFailureMask = ~allocatableMask & longerHistoryTableMask
902    val tickInc          = PopCount(allocFailureMask) > PopCount(canAllocMask)
903    val tickDec          = PopCount(canAllocMask) > PopCount(allocFailureMask)
904    val tickIncVal       = PopCount(allocFailureMask) - PopCount(canAllocMask)
905    val tickDecVal       = PopCount(canAllocMask) - PopCount(allocFailureMask)
906    val tickToPosSat     = tickIncVal >= bankTickCtrDistanceToTops(i) && tickInc
907    val tickToNegSat     = tickDecVal >= bankTickCtrs(i) && tickDec
908
909    val firstEntry  = PriorityEncoder(canAllocMask)
910    val maskedEntry = PriorityEncoder(canAllocMask & allocLFSR)
911    val allocate    = Mux(canAllocMask(maskedEntry), maskedEntry, firstEntry)
912
913    when(needToAllocate) {
914      // val allocate = updateMeta.allocates(i).bits
915      when(tickInc) {
916        when(tickToPosSat) {
917          bankTickCtrs(i)              := ((1 << TickWidth) - 1).U
918          bankTickCtrDistanceToTops(i) := 0.U
919        }.otherwise {
920          bankTickCtrs(i)              := bankTickCtrs(i) + tickIncVal
921          bankTickCtrDistanceToTops(i) := bankTickCtrDistanceToTops(i) - tickIncVal
922        }
923      }.elsewhen(tickDec) {
924        when(tickToNegSat) {
925          bankTickCtrs(i)              := 0.U
926          bankTickCtrDistanceToTops(i) := ((1 << TickWidth) - 1).U
927        }.otherwise {
928          bankTickCtrs(i)              := bankTickCtrs(i) - tickDecVal
929          bankTickCtrDistanceToTops(i) := bankTickCtrDistanceToTops(i) + tickDecVal
930        }
931      }
932      when(canAllocate) {
933        updateMask(i)(allocate)   := true.B
934        updateTakens(i)(allocate) := updateTaken
935        updateAlloc(i)(allocate)  := true.B
936        updateUMask(i)(allocate)  := true.B
937        updateU(i)(allocate)      := false.B
938      }
939      when(bankTickCtrs(i) === ((1 << TickWidth) - 1).U) {
940        bankTickCtrs(i)              := 0.U
941        bankTickCtrDistanceToTops(i) := ((1 << TickWidth) - 1).U
942        updateResetU(i)              := true.B
943      }
944    }
945    XSPerfAccumulate(f"tage_bank_${i}_update_allocate_failure", needToAllocate && !canAllocate)
946    XSPerfAccumulate(f"tage_bank_${i}_update_allocate_success", needToAllocate && canAllocate)
947    XSPerfAccumulate(s"tage_bank_${i}_mispred", hasUpdate && updateMispred)
948    XSPerfAccumulate(s"tage_bank_${i}_reset_u", updateResetU(i))
949    for (t <- 0 to TageNTables) {
950      XSPerfAccumulate(f"tage_bank_${i}_tick_inc_${t}", needToAllocate && tickInc && tickIncVal === t.U)
951      XSPerfAccumulate(f"tage_bank_${i}_tick_dec_${t}", needToAllocate && tickDec && tickDecVal === t.U)
952    }
953  }
954
955  val realWens = updateMask.transpose.map(v => v.reduce(_ | _))
956  for (w <- 0 until TageBanks) {
957    for (i <- 0 until TageNTables) {
958      val realWen = realWens(i)
959      tables(i).io.update.reset_u(w) := RegNext(updateResetU(w))
960      tables(i).io.update.mask(w)    := RegNext(updateMask(w)(i))
961      tables(i).io.update.takens(w)  := RegEnable(updateTakens(w)(i), realWen)
962      tables(i).io.update.alloc(w)   := RegEnable(updateAlloc(w)(i), realWen)
963      tables(i).io.update.oldCtrs(w) := RegEnable(updateOldCtrs(w)(i), realWen)
964
965      tables(i).io.update.uMask(w) := RegEnable(updateUMask(w)(i), realWen)
966      tables(i).io.update.us(w)    := RegEnable(updateU(w)(i), realWen)
967      // use fetch pc instead of instruction pc
968      tables(i).io.update.pc    := RegEnable(update_pc, realWen)
969      tables(i).io.update.ghist := RegEnable(update.ghist, realWen)
970    }
971  }
972  bt.io.update_mask   := RegNext(baseupdate)
973  bt.io.update_cnt    := RegEnable(updatebcnt, baseupdate.reduce(_ | _))
974  bt.io.update_pc     := RegEnable(update_pc, baseupdate.reduce(_ | _))
975  bt.io.update_takens := RegEnable(bUpdateTakens, baseupdate.reduce(_ | _))
976
977  // all should be ready for req
978  io.s1_ready := tables.map(_.io.req.ready).reduce(_ && _) && bt.io.req.ready
979  XSPerfAccumulate(f"tage_write_blocks_read", !io.s1_ready)
980
981  def pred_perf(name:   String, cnt: UInt) = XSPerfAccumulate(s"${name}_at_pred", cnt)
982  def commit_perf(name: String, cnt: UInt) = XSPerfAccumulate(s"${name}_at_commit", cnt)
983  def tage_perf(name: String, pred_cnt: UInt, commit_cnt: UInt) = {
984    pred_perf(name, pred_cnt)
985    commit_perf(name, commit_cnt)
986  }
987
988  // Debug and perf info
989  for (b <- 0 until TageBanks) {
990    val updateProvided = updateMeta.providers(b).valid
991    val updateProvider = updateMeta.providers(b).bits
992    for (i <- 0 until TageNTables) {
993      val pred_i_provided =
994        s2_provideds(b) && s2_providers(b) === i.U
995      val commit_i_provided =
996        updateProvided && updateProvider === i.U && updateValids(b)
997      tage_perf(
998        s"bank_${b}_tage_table_${i}_provided",
999        PopCount(pred_i_provided),
1000        PopCount(commit_i_provided)
1001      )
1002    }
1003    tage_perf(
1004      s"bank_${b}_tage_use_bim",
1005      PopCount(!s2_provideds(b)),
1006      PopCount(!updateProvided && updateValids(b))
1007    )
1008    def unconf(providerCtr: UInt) = providerCtr === 3.U || providerCtr === 4.U
1009    tage_perf(
1010      s"bank_${b}_tage_use_altpred",
1011      PopCount(s2_provideds(b) && unconf(s2_providerResps(b).ctr)),
1012      PopCount(updateProvided &&
1013        unconf(updateMeta.providerResps(b).ctr) && updateValids(b))
1014    )
1015    tage_perf(
1016      s"bank_${b}_tage_provided",
1017      PopCount(s2_provideds(b)),
1018      PopCount(updateProvided && updateValids(b))
1019    )
1020  }
1021
1022  for (b <- 0 until TageBanks) {
1023    val m = updateMeta
1024    // val bri = u.metas(b)
1025    XSDebug(
1026      updateValids(b),
1027      "update(%d): pc=%x, cycle=%d, taken:%b, misPred:%d, bimctr:%d, pvdr(%d):%d, altDiff:%d, pvdrU:%d, pvdrCtr:%d, alloc:%b\n",
1028      b.U,
1029      update_pc,
1030      0.U,
1031      update.br_taken_mask(b),
1032      update.mispred_mask(b),
1033      0.U,
1034      m.providers(b).valid,
1035      m.providers(b).bits,
1036      m.altDiffers(b),
1037      m.providerResps(b).u,
1038      m.providerResps(b).ctr,
1039      m.allocates(b)
1040    )
1041  }
1042  val s2_resps = RegEnable(s1_resps, io.s1_fire(1))
1043  XSDebug("req: v=%d, pc=0x%x\n", io.s0_fire(1), s0_pc_dup(1))
1044  XSDebug("s1_fire:%d, resp: pc=%x\n", io.s1_fire(1), debug_pc_s1)
1045  XSDebug(
1046    "s2_fireOnLastCycle: resp: pc=%x, target=%x, hits=%b, takens=%b\n",
1047    debug_pc_s2,
1048    io.out.s2.getTarget(1),
1049    s2_provideds.asUInt,
1050    s2_tageTakens_dup(0).asUInt
1051  )
1052
1053  for (b <- 0 until TageBanks) {
1054    for (i <- 0 until TageNTables) {
1055      XSDebug(
1056        "bank(%d)_tage_table(%d): valid:%b, resp_ctr:%d, resp_us:%d\n",
1057        b.U,
1058        i.U,
1059        s2_resps(i)(b).valid,
1060        s2_resps(i)(b).bits.ctr,
1061        s2_resps(i)(b).bits.u
1062      )
1063    }
1064  }
1065  // XSDebug(io.update.valid && updateIsBr, p"update: sc: ${updateSCMeta}\n")
1066  // XSDebug(true.B, p"scThres: use(${useThreshold}), update(${updateThreshold})\n")
1067}
1068
1069class Tage_SC(implicit p: Parameters) extends Tage with HasSC {}
1070