xref: /XiangShan/src/main/scala/xiangshan/frontend/ITTAGE.scala (revision 30f35717e23156cb95b30a36db530384545b48a4)
1/***************************************************************************************
2 * Copyright (c) 2024 Beijing Institute of Open Source Chip (BOSC)
3 * Copyright (c) 2020-2024 Institute of Computing Technology, Chinese Academy of Sciences
4 * Copyright (c) 2020-2021 Peng Cheng Laboratory
5 *
6 * XiangShan is licensed under Mulan PSL v2.
7 * You can use this software according to the terms and conditions of the Mulan PSL v2.
8 * You may obtain a copy of Mulan PSL v2 at:
9 * http://license.coscl.org.cn/MulanPSL2
10 *
11 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
12 * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
13 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
14 *
15 * See the Mulan PSL v2 for more details.
16 *
17 *
18 * Acknowledgement
19 *
20 * This implementation is inspired by several key papers:
21 * [1] André Seznec. "[A 64-Kbytes ITTAGE indirect branch predictor.](https://inria.hal.science/hal-00639041)" The
22 * Journal of Instruction-Level Parallelism (JILP) 2nd JILP Workshop on Computer Architecture Competitions (JWAC):
23 * Championship Branch Prediction (CBP). 2011.
24 ***************************************************************************************/
25
26package xiangshan.frontend
27
28import chisel3._
29import chisel3.util._
30import freechips.rocketchip.util.SeqBoolBitwiseOps
31import org.chipsalliance.cde.config.Parameters
32import scala.{Tuple2 => &}
33import scala.math.min
34import utility._
35import utility.mbist.MbistPipeline
36import utility.sram.FoldedSRAMTemplate
37import xiangshan._
38
39trait ITTageParams extends HasXSParameter with HasBPUParameter {
40
41  val ITTageNTables    = ITTageTableInfos.size // Number of tage tables
42  val ITTageCtrBits    = 2
43  val uFoldedWidth     = 16
44  val TickWidth        = 8
45  val ITTageUsBits     = 1
46  val TargetOffsetBits = 20
47  val RegionNums       = 16
48  val RegionBits       = VAddrBits - TargetOffsetBits
49  val RegionPorts      = 2
50  def ctrNull(ctr: UInt, ctrBits: Int = ITTageCtrBits): Bool =
51    ctr === 0.U
52  def ctrUnconf(ctr: UInt, ctrBits: Int = ITTageCtrBits): Bool =
53    ctr < (1 << (ctrBits - 1)).U
54  val UAONA_bits = 4
55
56  def targetGetRegion(target: UInt): UInt = target(VAddrBits - 1, TargetOffsetBits)
57  def targetGetOffset(target: UInt): UInt = target(TargetOffsetBits - 1, 0)
58
59  lazy val TotalBits: Int = ITTageTableInfos.map {
60    case (s, h, t) => {
61      s * (1 + t + ITTageCtrBits + ITTageUsBits + VAddrBits)
62    }
63  }.sum
64}
65
66abstract class ITTageBundle(implicit p: Parameters)
67    extends XSBundle with ITTageParams with BPUUtils
68
69abstract class ITTageModule(implicit p: Parameters)
70    extends XSModule with ITTageParams with BPUUtils {}
71
72class ITTageReq(implicit p: Parameters) extends ITTageBundle {
73  val pc          = UInt(VAddrBits.W)
74  val folded_hist = new AllFoldedHistories(foldedGHistInfos)
75}
76
77class ITTageOffset(implicit p: Parameters) extends ITTageBundle {
78  val offset      = UInt(TargetOffsetBits.W)
79  val pointer     = UInt(log2Ceil(RegionNums).W)
80  val usePCRegion = Bool()
81}
82
83class ITTageResp(implicit p: Parameters) extends ITTageBundle {
84  val ctr           = UInt(ITTageCtrBits.W)
85  val u             = UInt(ITTageUsBits.W)
86  val target_offset = new ITTageOffset()
87}
88
89class ITTageUpdate(implicit p: Parameters) extends ITTageBundle {
90  val pc    = UInt(VAddrBits.W)
91  val ghist = UInt(HistoryLength.W)
92  // update tag and ctr
93  val valid   = Bool()
94  val correct = Bool()
95  val alloc   = Bool()
96  val oldCtr  = UInt(ITTageCtrBits.W)
97  // update u
98  val uValid  = Bool()
99  val u       = Bool()
100  val reset_u = Bool()
101  // target
102  val target_offset     = new ITTageOffset()
103  val old_target_offset = new ITTageOffset()
104}
105
106class ITTageMeta(implicit p: Parameters) extends XSBundle with ITTageParams {
107  val provider          = ValidUndirectioned(UInt(log2Ceil(ITTageNTables).W))
108  val altProvider       = ValidUndirectioned(UInt(log2Ceil(ITTageNTables).W))
109  val altDiffers        = Bool()
110  val providerU         = Bool()
111  val providerCtr       = UInt(ITTageCtrBits.W)
112  val altProviderCtr    = UInt(ITTageCtrBits.W)
113  val allocate          = ValidUndirectioned(UInt(log2Ceil(ITTageNTables).W))
114  val providerTarget    = UInt(VAddrBits.W)
115  val altProviderTarget = UInt(VAddrBits.W)
116  val pred_cycle        = if (!env.FPGAPlatform) Some(UInt(64.W)) else None
117
118  override def toPrintable =
119    p"pvdr(v:${provider.valid} num:${provider.bits} ctr:$providerCtr u:$providerU tar:${Hexadecimal(providerTarget)}), " +
120      p"altpvdr(v:${altProvider.valid} num:${altProvider.bits}, ctr:$altProviderCtr, tar:${Hexadecimal(altProviderTarget)})"
121}
122
123class FakeITTageTable()(implicit p: Parameters) extends ITTageModule {
124  val io = IO(new Bundle() {
125    val req    = Input(Valid(new ITTageReq))
126    val resp   = Output(Valid(new ITTageResp))
127    val update = Input(new ITTageUpdate)
128  })
129  io.resp := DontCare
130
131}
132
133class RegionEntry(implicit p: Parameters) extends ITTageBundle {
134  val valid  = Bool()
135  val region = UInt(RegionBits.W)
136}
137
138class RegionWays()(implicit p: Parameters) extends XSModule with ITTageParams {
139  val io = IO(new Bundle {
140    val req_pointer = Input(Vec(ITTageNTables, UInt(log2Ceil(RegionNums).W)))
141    val resp_hit    = Output(Vec(ITTageNTables, Bool()))
142    val resp_region = Output(Vec(ITTageNTables, UInt(RegionBits.W)))
143
144    val update_region  = Input(Vec(RegionPorts, UInt(RegionBits.W)))
145    val update_hit     = Output(Vec(RegionPorts, Bool()))
146    val update_pointer = Output(Vec(RegionPorts, UInt(log2Ceil(RegionNums).W)))
147
148    val write_valid   = Input(Bool())
149    val write_region  = Input(UInt(RegionBits.W))
150    val write_pointer = Output(UInt(log2Ceil(RegionNums).W))
151  })
152
153  val regions             = RegInit(VecInit(Seq.fill(RegionNums)(0.U.asTypeOf(new RegionEntry()))))
154  val replacer            = ReplacementPolicy.fromString("plru", RegionNums)
155  val replacer_touch_ways = Wire(Vec(1, Valid(UInt(log2Ceil(RegionNums).W))))
156
157  val valids = VecInit((0 until RegionNums).map(w => regions(w).valid))
158  val valid  = WireInit(valids.andR)
159  // write data
160  val w_total_hits = VecInit((0 until RegionNums).map(w => regions(w).region === io.write_region && regions(w).valid))
161  val w_hit        = w_total_hits.reduce(_ || _)
162  val w_pointer    = Mux(w_hit, OHToUInt(w_total_hits), Mux(!valid, PriorityEncoder(~valids), replacer.way))
163  XSError(PopCount(w_total_hits) > 1.U, "region has multiple hits!\n")
164  XSPerfAccumulate("Region_entry_replace", !w_hit && valid && io.write_valid)
165
166  io.write_pointer := w_pointer
167  // read and metaTarget update read ports
168  for (i <- 0 until ITTageNTables) {
169    // read region use pointer
170    io.resp_hit(i)    := regions(io.req_pointer(i)).valid
171    io.resp_region(i) := regions(io.req_pointer(i)).region
172  }
173
174  for (i <- 0 until RegionPorts) {
175    // When using metaTarget for updates, redefine the pointer
176    val u_total_hits =
177      VecInit((0 until RegionNums).map(w => regions(w).region === io.update_region(i) && regions(w).valid))
178    val u_bypass  = (io.update_region(i) === io.write_region) && io.write_valid
179    val u_hit     = u_total_hits.reduce(_ || _) || u_bypass
180    val u_pointer = Mux(u_bypass, w_pointer, OHToUInt(u_total_hits))
181    io.update_hit(i)     := u_hit
182    io.update_pointer(i) := u_pointer
183    XSError(PopCount(u_total_hits) > 1.U, "region has multiple hits!\n")
184  }
185  // write
186  when(io.write_valid) {
187    when(!regions(w_pointer).valid) {
188      regions(w_pointer).valid := true.B
189    }
190    regions(w_pointer).region := io.write_region
191  }
192  replacer_touch_ways(0).valid := io.write_valid
193  replacer_touch_ways(0).bits  := w_pointer
194  replacer.access(replacer_touch_ways)
195}
196
197class ITTageTable(
198    val nRows:    Int,
199    val histLen:  Int,
200    val tagLen:   Int,
201    val tableIdx: Int
202)(implicit p: Parameters)
203    extends ITTageModule with HasFoldedHistory {
204  val io = IO(new Bundle() {
205    val req    = Flipped(DecoupledIO(new ITTageReq))
206    val resp   = Output(Valid(new ITTageResp))
207    val update = Input(new ITTageUpdate)
208  })
209
210  val SRAM_SIZE = 128
211
212  val foldedWidth = if (nRows >= SRAM_SIZE) nRows / SRAM_SIZE else 1
213  val dataSplit   = if (nRows <= 2 * SRAM_SIZE) 1 else 2
214
215  if (nRows < SRAM_SIZE) {
216    println(f"warning: ittage table $tableIdx has small sram depth of $nRows")
217  }
218
219  // override val debug = true
220  // bypass entries for tage update
221  val wrBypassEntries = 4
222
223  require(histLen == 0 && tagLen == 0 || histLen != 0 && tagLen != 0)
224  val idxFhInfo    = (histLen, min(log2Ceil(nRows), histLen))
225  val tagFhInfo    = (histLen, min(histLen, tagLen))
226  val altTagFhInfo = (histLen, min(histLen, tagLen - 1))
227  val allFhInfos   = Seq(idxFhInfo, tagFhInfo, altTagFhInfo)
228
229  def getFoldedHistoryInfo: Set[(Int, Int)] = allFhInfos.filter(_._1 > 0).toSet
230
231  private def computeTagAndHash(unhashed_idx: UInt, allFh: AllFoldedHistories): (UInt, UInt) =
232    if (histLen > 0) {
233      val idx_fh     = allFh.getHistWithInfo(idxFhInfo).folded_hist
234      val tag_fh     = allFh.getHistWithInfo(tagFhInfo).folded_hist
235      val alt_tag_fh = allFh.getHistWithInfo(altTagFhInfo).folded_hist
236      // require(idx_fh.getWidth == log2Ceil(nRows))
237      val idx = (unhashed_idx ^ idx_fh)(log2Ceil(nRows) - 1, 0)
238      val tag = ((unhashed_idx >> log2Ceil(nRows)).asUInt ^ tag_fh ^ (alt_tag_fh << 1).asUInt)(tagLen - 1, 0)
239      (idx, tag)
240    } else {
241      require(tagLen == 0)
242      (unhashed_idx(log2Ceil(nRows) - 1, 0), 0.U)
243    }
244
245  def incCtr(ctr: UInt, taken: Bool): UInt = satUpdate(ctr, ITTageCtrBits, taken)
246
247  class ITTageEntry extends ITTageBundle {
248    val valid         = Bool()
249    val tag           = UInt(tagLen.W)
250    val ctr           = UInt(ITTageCtrBits.W)
251    val target_offset = new ITTageOffset()
252    val useful        = Bool() // Due to the bitMask the useful bit needs to be at the lowest bit
253  }
254
255  val ittageEntrySz = 1 + tagLen + ITTageCtrBits + ITTageUsBits + TargetOffsetBits + log2Ceil(RegionNums) + 1
256  require(ittageEntrySz == (new ITTageEntry).getWidth)
257
258  // pc is start address of basic block, most 2 branch inst in block
259  def getUnhashedIdx(pc: UInt): UInt = (pc >> instOffsetBits).asUInt
260
261  val s0_valid        = io.req.valid
262  val s0_pc           = io.req.bits.pc
263  val s0_unhashed_idx = getUnhashedIdx(io.req.bits.pc)
264
265  val (s0_idx, s0_tag) = computeTagAndHash(s0_unhashed_idx, io.req.bits.folded_hist)
266  val (s1_idx, s1_tag) = (RegEnable(s0_idx, io.req.fire), RegEnable(s0_tag, io.req.fire))
267  val s1_valid         = RegNext(s0_valid)
268
269  val table = Module(new FoldedSRAMTemplate(
270    new ITTageEntry,
271    setSplit = 1,
272    waySplit = 1,
273    dataSplit = dataSplit,
274    set = nRows,
275    width = foldedWidth,
276    shouldReset = true,
277    holdRead = true,
278    singlePort = true,
279    useBitmask = true,
280    withClockGate = true,
281    hasMbist = hasMbist,
282    hasSramCtl = hasSramCtl
283  ))
284  private val mbistPl = MbistPipeline.PlaceMbistPipeline(1, "MbistPipeIttage", hasMbist)
285  table.io.r.req.valid       := io.req.fire
286  table.io.r.req.bits.setIdx := s0_idx
287
288  val table_read_data = table.io.r.resp.data(0)
289
290  val s1_req_rhit = table_read_data.valid && table_read_data.tag === s1_tag
291
292  val read_write_conflict    = io.update.valid && io.req.valid
293  val s1_read_write_conflict = RegEnable(read_write_conflict, io.req.valid)
294
295  io.resp.valid    := (if (tagLen != 0) s1_req_rhit && !s1_read_write_conflict else true.B) && s1_valid // && s1_mask(b)
296  io.resp.bits.ctr := table_read_data.ctr
297  io.resp.bits.u   := table_read_data.useful
298  io.resp.bits.target_offset := table_read_data.target_offset
299
300  // Use fetchpc to compute hash
301  val update_folded_hist = WireInit(0.U.asTypeOf(new AllFoldedHistories(foldedGHistInfos)))
302
303  update_folded_hist.getHistWithInfo(idxFhInfo).folded_hist    := compute_folded_ghist(io.update.ghist, log2Ceil(nRows))
304  update_folded_hist.getHistWithInfo(tagFhInfo).folded_hist    := compute_folded_ghist(io.update.ghist, tagLen)
305  update_folded_hist.getHistWithInfo(altTagFhInfo).folded_hist := compute_folded_ghist(io.update.ghist, tagLen - 1)
306  val (update_idx, update_tag) = computeTagAndHash(getUnhashedIdx(io.update.pc), update_folded_hist)
307  val update_wdata             = Wire(new ITTageEntry)
308
309  val updateAllBitmask = VecInit.fill(ittageEntrySz)(1.U).asUInt // update all entry
310  val updateNoBitmask  = VecInit.fill(ittageEntrySz)(0.U).asUInt // update no
311  val updateNoUsBitmask =
312    VecInit.tabulate(ittageEntrySz)(_.U >= ITTageUsBits.U).asUInt // update others besides useful bit
313  val updateUsBitmask = VecInit.tabulate(ittageEntrySz)(_.U < ITTageUsBits.U).asUInt // update useful bit
314
315  val needReset               = RegInit(false.B)
316  val useful_can_reset        = !(io.req.fire || io.update.valid) && needReset
317  val (resetSet, resetFinish) = Counter(useful_can_reset, nRows)
318  when(io.update.reset_u) {
319    needReset := true.B
320  }.elsewhen(resetFinish) {
321    needReset := false.B
322  }
323  val update_bitmask = Mux(
324    io.update.uValid && io.update.valid,
325    updateAllBitmask,
326    Mux(io.update.valid, updateNoUsBitmask, Mux(useful_can_reset, updateUsBitmask, updateNoBitmask))
327  )
328
329  table.io.w.apply(
330    valid = io.update.valid || useful_can_reset,
331    data = update_wdata,
332    setIdx = Mux(useful_can_reset, resetSet, update_idx),
333    waymask = true.B,
334    bitmask = update_bitmask
335  )
336
337  // Power-on reset
338  val powerOnResetState = RegInit(true.B)
339  when(table.io.r.req.ready) {
340    // When all the SRAM first reach ready state, we consider power-on reset is done
341    powerOnResetState := false.B
342  }
343  // Do not use table banks io.r.req.ready directly
344  // All table_banks are single port SRAM, ready := !wen
345  // We do not want write request block the whole BPU pipeline
346  // Once read priority is higher than write, table_banks(*).io.r.req.ready can be used
347  io.req.ready := !powerOnResetState
348
349  val wrbypass = Module(new WrBypass(UInt(ITTageCtrBits.W), wrBypassEntries, log2Ceil(nRows)))
350
351  wrbypass.io.wen       := io.update.valid
352  wrbypass.io.write_idx := update_idx
353  wrbypass.io.write_data.map(_ := update_wdata.ctr)
354
355  val old_ctr = Mux(wrbypass.io.hit, wrbypass.io.hit_data(0).bits, io.update.oldCtr)
356  update_wdata.valid  := true.B
357  update_wdata.ctr    := Mux(io.update.alloc, 2.U, incCtr(old_ctr, io.update.correct))
358  update_wdata.tag    := update_tag
359  update_wdata.useful := Mux(useful_can_reset, false.B, io.update.u)
360  // only when ctr is null
361  update_wdata.target_offset := Mux(
362    io.update.alloc || ctrNull(old_ctr),
363    io.update.target_offset,
364    io.update.old_target_offset
365  )
366
367  XSPerfAccumulate("ittage_table_updates", io.update.valid)
368  XSPerfAccumulate("ittage_table_hits", io.resp.valid)
369  XSPerfAccumulate("ittage_us_tick_reset", io.update.reset_u)
370  XSPerfAccumulate("ittage_table_read_write_conflict", read_write_conflict)
371
372  if (BPUDebug && debug) {
373    val u   = io.update
374    val idx = s0_idx
375    val tag = s0_tag
376    XSDebug(
377      io.req.fire,
378      p"ITTageTableReq: pc=0x${Hexadecimal(io.req.bits.pc)}, " +
379        p"idx=$idx, tag=$tag\n"
380    )
381    XSDebug(
382      RegNext(io.req.fire) && s1_req_rhit,
383      p"ITTageTableResp: idx=$s1_idx, hit:${s1_req_rhit}, " +
384        p"ctr:${io.resp.bits.ctr}, u:${io.resp.bits.u}, tar:${Hexadecimal(io.resp.bits.target_offset.offset)}\n"
385    )
386    XSDebug(
387      io.update.valid,
388      p"update ITTAGE Table: pc:${Hexadecimal(u.pc)}}, " +
389        p"correct:${u.correct}, alloc:${u.alloc}, oldCtr:${u.oldCtr}, " +
390        p"target:${Hexadecimal(u.target_offset.offset)}, old_target:${Hexadecimal(u.old_target_offset.offset)}\n"
391    )
392    XSDebug(
393      io.update.valid,
394      p"update ITTAGE Table: writing tag:${update_tag}, " +
395        p"ctr: ${update_wdata.ctr}, target:${Hexadecimal(update_wdata.target_offset.offset)}" +
396        p" in idx $update_idx\n"
397    )
398    XSDebug(RegNext(io.req.fire) && !s1_req_rhit, "TageTableResp: no hits!\n")
399
400    // ------------------------------Debug-------------------------------------
401    val valids = RegInit(0.U.asTypeOf(Vec(nRows, Bool())))
402    when(io.update.valid)(valids(update_idx) := true.B)
403    XSDebug("ITTAGE Table usage:------------------------\n")
404    XSDebug("%d out of %d rows are valid\n", PopCount(valids), nRows.U)
405  }
406
407}
408
409abstract class BaseITTage(implicit p: Parameters) extends BasePredictor with ITTageParams with BPUUtils {}
410
411class FakeITTage(implicit p: Parameters) extends BaseITTage {
412  io.out <> 0.U.asTypeOf(DecoupledIO(new BasePredictorOutput))
413
414  io.s1_ready := true.B
415  io.s2_ready := true.B
416}
417
418class ITTage(implicit p: Parameters) extends BaseITTage {
419  override val meta_size = 0.U.asTypeOf(new ITTageMeta).getWidth
420
421  val tables = ITTageTableInfos.zipWithIndex.map {
422    case ((nRows, histLen, tagLen), i) =>
423      val t = Module(new ITTageTable(nRows, histLen, tagLen, i))
424      t
425  }
426  override def getFoldedHistoryInfo: Option[Set[(Int, Int)]] = Some(tables.map(_.getFoldedHistoryInfo).reduce(_ ++ _))
427
428  val useAltOnNa = RegInit((1 << (UAONA_bits - 1)).U(UAONA_bits.W))
429  val tickCtr    = RegInit(0.U(TickWidth.W))
430
431  val rTable = Module(new RegionWays)
432
433  // uftb miss or hasIndirect
434  val s1_uftbHit         = io.in.bits.resp_in(0).s1_uftbHit
435  val s1_uftbHasIndirect = io.in.bits.resp_in(0).s1_uftbHasIndirect
436  val s1_isIndirect      = (!s1_uftbHit && !io.in.bits.resp_in(0).s1_ftbCloseReq) || s1_uftbHasIndirect
437
438  // Keep the table responses to process in s2
439
440  val s2_resps = VecInit(tables.map(t => t.io.resp))
441
442  val debug_pc_s1 = RegEnable(s0_pc_dup(3), io.s0_fire(3))
443  val debug_pc_s2 = RegEnable(debug_pc_s1, io.s1_fire(3))
444  val debug_pc_s3 = RegEnable(debug_pc_s2, io.s2_fire(3))
445
446  val s2_tageTarget        = Wire(UInt(VAddrBits.W))
447  val s2_providerTarget    = Wire(UInt(VAddrBits.W))
448  val s2_altProviderTarget = Wire(UInt(VAddrBits.W))
449  val s2_provided          = Wire(Bool())
450  val s2_provider          = Wire(UInt(log2Ceil(ITTageNTables).W))
451  val s2_altProvided       = Wire(Bool())
452  val s2_altProvider       = Wire(UInt(log2Ceil(ITTageNTables).W))
453  val s2_providerU         = Wire(Bool())
454  val s2_providerCtr       = Wire(UInt(ITTageCtrBits.W))
455  val s2_altProviderCtr    = Wire(UInt(ITTageCtrBits.W))
456
457  val s3_tageTarget_dup    = io.s2_fire.map(f => RegEnable(s2_tageTarget, f))
458  val s3_providerTarget    = RegEnable(s2_providerTarget, io.s2_fire(3))
459  val s3_altProviderTarget = RegEnable(s2_altProviderTarget, io.s2_fire(3))
460  val s3_provided          = RegEnable(s2_provided, io.s2_fire(3))
461  val s3_provider          = RegEnable(s2_provider, io.s2_fire(3))
462  val s3_altProvided       = RegEnable(s2_altProvided, io.s2_fire(3))
463  val s3_altProvider       = RegEnable(s2_altProvider, io.s2_fire(3))
464  val s3_providerU         = RegEnable(s2_providerU, io.s2_fire(3))
465  val s3_providerCtr       = RegEnable(s2_providerCtr, io.s2_fire(3))
466  val s3_altProviderCtr    = RegEnable(s2_altProviderCtr, io.s2_fire(3))
467
468  val resp_meta = WireInit(0.U.asTypeOf(new ITTageMeta))
469
470  io.out.last_stage_meta := resp_meta.asUInt
471
472  // Update logic
473  val u_valid = RegNext(io.update.valid, init = false.B)
474
475  val update = Wire(new BranchPredictionUpdate)
476  update := RegEnable(io.update.bits, io.update.valid)
477
478  // meta is splited by composer
479  val updateMeta = Wire(new ITTageMeta)
480  update.meta := updateMeta.asUInt
481
482  // The pc register has been moved outside of predictor
483  // pc field of update bundle and other update data are not in the same stage
484  // so io.update.bits.pc is used directly here
485  val update_pc = io.update.bits.pc
486
487  // To improve Clock Gating Efficiency
488  val u_meta = io.update.bits.meta.asTypeOf(new ITTageMeta)
489  updateMeta := RegEnable(u_meta, io.update.valid)
490  updateMeta.provider.bits := RegEnable(
491    u_meta.provider.bits,
492    io.update.valid && u_meta.provider.valid
493  )
494  updateMeta.providerTarget := RegEnable(
495    u_meta.providerTarget,
496    io.update.valid && u_meta.provider.valid
497  )
498  updateMeta.allocate.bits := RegEnable(
499    u_meta.allocate.bits,
500    io.update.valid && u_meta.allocate.valid
501  )
502  updateMeta.altProvider.bits := RegEnable(
503    u_meta.altProvider.bits,
504    io.update.valid && u_meta.altProvider.valid
505  )
506  updateMeta.altProviderTarget := RegEnable(
507    u_meta.altProviderTarget,
508    io.update.valid && u_meta.provider.valid && u_meta.altProvider.valid && u_meta.providerCtr === 0.U
509  )
510  update.full_target := RegEnable(
511    io.update.bits.full_target,
512    io.update.valid // not using mispred_mask, because mispred_mask timing is bad
513  )
514  update.cfi_idx.bits := RegEnable(io.update.bits.cfi_idx.bits, io.update.valid && io.update.bits.cfi_idx.valid)
515  update.ghist        := RegEnable(io.update.bits.ghist, io.update.valid) // TODO: CGE
516
517  val updateValid = update.is_jalr && !update.is_ret && u_valid && update.ftb_entry.jmpValid &&
518    update.jmp_taken && update.cfi_idx.valid &&
519    update.cfi_idx.bits === update.ftb_entry.tailSlot.offset && !update.ftb_entry.strong_bias(numBr - 1)
520
521  val updateMask            = WireInit(0.U.asTypeOf(Vec(ITTageNTables, Bool())))
522  val updateUMask           = WireInit(0.U.asTypeOf(Vec(ITTageNTables, Bool())))
523  val updateResetU          = WireInit(false.B)
524  val updateCorrect         = Wire(Vec(ITTageNTables, Bool()))
525  val updateAlloc           = Wire(Vec(ITTageNTables, Bool()))
526  val updateOldCtr          = Wire(Vec(ITTageNTables, UInt(ITTageCtrBits.W)))
527  val updateU               = Wire(Vec(ITTageNTables, Bool()))
528  val updateTargetOffset    = Wire(Vec(ITTageNTables, new ITTageOffset))
529  val updateOldTargetOffset = Wire(Vec(ITTageNTables, new ITTageOffset))
530  updateCorrect         := DontCare
531  updateAlloc           := DontCare
532  updateOldCtr          := DontCare
533  updateU               := DontCare
534  updateTargetOffset    := DontCare
535  updateOldTargetOffset := DontCare
536
537  val updateMisPred = update.mispred_mask(numBr) // the last one indicates jmp results
538
539  // Predict
540  tables.map { t =>
541    t.io.req.valid            := io.s1_fire(3) && s1_isIndirect
542    t.io.req.bits.pc          := s1_pc_dup(3)
543    t.io.req.bits.folded_hist := io.in.bits.s1_folded_hist(3)
544  }
545
546  // access tag tables and output meta info
547  class ITTageTableInfo(implicit p: Parameters) extends ITTageResp {
548    val tableIdx   = UInt(log2Ceil(ITTageNTables).W)
549    val maskTarget = Vec(ITTageNTables, UInt(VAddrBits.W))
550  }
551
552  val inputRes = VecInit(s2_resps.zipWithIndex.map {
553    case (r, i) =>
554      val tableInfo = Wire(new ITTageTableInfo)
555      tableInfo.u             := r.bits.u
556      tableInfo.ctr           := r.bits.ctr
557      tableInfo.target_offset := r.bits.target_offset
558      tableInfo.tableIdx      := i.U(log2Ceil(ITTageNTables).W)
559      tableInfo.maskTarget    := VecInit(Seq.fill(ITTageNTables)(0.U(VAddrBits.W)))
560      tableInfo.maskTarget(i) := "hffff_ffff_ffff_ffff".U
561      SelectTwoInterRes(r.valid, tableInfo)
562  })
563
564  val selectedInfo = ParallelSelectTwo(inputRes.reverse)
565  val provided     = selectedInfo.hasOne
566  val altProvided  = selectedInfo.hasTwo
567
568  val providerInfo    = selectedInfo.first
569  val altProviderInfo = selectedInfo.second
570  val providerNull    = providerInfo.ctr === 0.U
571
572  val baseTarget             = io.in.bits.resp_in(0).s2.full_pred(3).jalr_target // use ftb pred as base target
573  val region_r_target_offset = VecInit(s2_resps.map(r => r.bits.target_offset))
574
575  rTable.io.req_pointer.zipWithIndex.map { case (req_pointer, i) =>
576    req_pointer := region_r_target_offset(i).pointer
577  }
578  // When the entry corresponding to the pointer is valid and does not use PCRegion, use rTable region.
579  val region_targets = Wire(Vec(ITTageNTables, UInt(VAddrBits.W)))
580  for (i <- 0 until ITTageNTables) {
581    region_targets(i) := Mux(
582      rTable.io.resp_hit(i) && !region_r_target_offset(i).usePCRegion,
583      Cat(rTable.io.resp_region(i), region_r_target_offset(i).offset),
584      Cat(targetGetRegion(s2_pc_dup(0).getAddr()), region_r_target_offset(i).offset)
585    )
586  }
587
588  val providerCatTarget = providerInfo.maskTarget.zipWithIndex.map {
589    case (mask, i) => mask & region_targets(i)
590  }.reduce(_ | _)
591
592  val altproviderCatTarget = altProviderInfo.maskTarget.zipWithIndex.map {
593    case (mask, i) => mask & region_targets(i)
594  }.reduce(_ | _)
595
596  s2_tageTarget := Mux1H(Seq(
597    (provided && !(providerNull && altProvided), providerCatTarget),
598    (altProvided && providerNull, altproviderCatTarget),
599    (!provided, baseTarget)
600  ))
601  s2_provided          := provided
602  s2_provider          := providerInfo.tableIdx
603  s2_altProvided       := altProvided
604  s2_altProvider       := altProviderInfo.tableIdx
605  s2_providerU         := providerInfo.u
606  s2_providerCtr       := providerInfo.ctr
607  s2_altProviderCtr    := altProviderInfo.ctr
608  s2_providerTarget    := providerCatTarget
609  s2_altProviderTarget := altproviderCatTarget
610
611  XSDebug(io.s2_fire(3), p"hit_taken_jalr:")
612
613  for (
614    fp & s3_tageTarget <-
615      io.out.s3.full_pred zip s3_tageTarget_dup
616  )
617    yield fp.jalr_target := s3_tageTarget
618
619  resp_meta.provider.valid    := s3_provided
620  resp_meta.provider.bits     := s3_provider
621  resp_meta.altProvider.valid := s3_altProvided
622  resp_meta.altProvider.bits  := s3_altProvider
623  resp_meta.altDiffers        := s3_providerTarget =/= s3_altProviderTarget
624  resp_meta.providerU         := s3_providerU
625  resp_meta.providerCtr       := s3_providerCtr
626  resp_meta.altProviderCtr    := s3_altProviderCtr
627  resp_meta.providerTarget    := s3_providerTarget
628  resp_meta.altProviderTarget := s3_altProviderTarget
629  resp_meta.pred_cycle.foreach(_ := GTimer())
630  // TODO: adjust for ITTAGE
631  // Create a mask fo tables which did not hit our query, and also contain useless entries
632  // and also uses a longer history than the provider
633  val s2_allocatableSlots = VecInit(s2_resps.map(r => !r.valid && !r.bits.u)).asUInt &
634    (~(LowerMask(UIntToOH(s2_provider), ITTageNTables) & Fill(ITTageNTables, s2_provided.asUInt))).asUInt
635  val s2_allocLFSR   = random.LFSR(width = 15)(ITTageNTables - 1, 0)
636  val s2_firstEntry  = PriorityEncoder(s2_allocatableSlots)
637  val s2_maskedEntry = PriorityEncoder(s2_allocatableSlots & s2_allocLFSR)
638  val s2_allocEntry  = Mux(s2_allocatableSlots(s2_maskedEntry), s2_maskedEntry, s2_firstEntry)
639  resp_meta.allocate.valid := RegEnable(s2_allocatableSlots =/= 0.U, io.s2_fire(3))
640  resp_meta.allocate.bits  := RegEnable(s2_allocEntry, io.s2_fire(3))
641
642  // Update in loop
643  val updateRealTarget       = update.full_target
644  val updatePCRegion         = targetGetRegion(update.pc)
645  val updateRealTargetRegion = targetGetRegion(updateRealTarget)
646  val metaProviderTargetOffset, metaAltProviderTargetOffset, updateRealTargetOffset =
647    WireInit(0.U.asTypeOf(new ITTageOffset))
648  updateRealTargetOffset.offset := targetGetOffset(updateRealTarget)
649  val updateRealUsePCRegion = updateRealTargetRegion === updatePCRegion
650  // If rTable is not written in Region, the pointer value will be invalid.
651  // At this time, it is necessary to raise usePCRegion.
652  // The update mechanism of the usePCRegion bit requires further consideration.
653  updateRealTargetOffset.usePCRegion := updateRealUsePCRegion || !updateAlloc.reduce(_ || _)
654  rTable.io.write_valid              := !updateRealUsePCRegion && updateAlloc.reduce(_ || _)
655  rTable.io.write_region             := updateRealTargetRegion
656  updateRealTargetOffset.pointer     := rTable.io.write_pointer
657
658  val metaProviderTargetRegion    = targetGetRegion(updateMeta.providerTarget)
659  val metaAltProviderTargetRegion = targetGetRegion(updateMeta.altProviderTarget)
660
661  rTable.io.update_region              := VecInit(metaProviderTargetRegion, metaAltProviderTargetRegion)
662  metaProviderTargetOffset.offset      := targetGetOffset(updateMeta.providerTarget)
663  metaProviderTargetOffset.pointer     := rTable.io.update_pointer(0)
664  metaProviderTargetOffset.usePCRegion := !rTable.io.update_hit(0)
665
666  metaAltProviderTargetOffset.offset      := targetGetOffset(updateMeta.altProviderTarget)
667  metaAltProviderTargetOffset.pointer     := rTable.io.update_pointer(1)
668  metaAltProviderTargetOffset.usePCRegion := !rTable.io.update_hit(1)
669
670  val provider    = updateMeta.provider.bits
671  val altProvider = updateMeta.altProvider.bits
672  val usedAltpred = updateMeta.altProvider.valid && updateMeta.providerCtr === 0.U
673  when(updateValid) {
674    when(updateMeta.provider.valid) {
675      when(usedAltpred && updateMisPred) { // update altpred if used as pred
676        updateMask(altProvider)            := true.B
677        updateUMask(altProvider)           := false.B
678        updateCorrect(altProvider)         := false.B
679        updateOldCtr(altProvider)          := updateMeta.altProviderCtr
680        updateAlloc(altProvider)           := false.B
681        updateTargetOffset(altProvider)    := updateRealTargetOffset
682        updateOldTargetOffset(altProvider) := metaAltProviderTargetOffset
683      }
684
685      updateMask(provider)  := true.B
686      updateUMask(provider) := true.B
687
688      updateU(provider) := Mux(
689        !updateMeta.altDiffers,
690        updateMeta.providerU,
691        updateMeta.providerTarget === updateRealTarget
692      )
693      updateCorrect(provider)         := updateMeta.providerTarget === updateRealTarget
694      updateOldCtr(provider)          := updateMeta.providerCtr
695      updateAlloc(provider)           := false.B
696      updateTargetOffset(provider)    := updateRealTargetOffset
697      updateOldTargetOffset(provider) := metaProviderTargetOffset
698    }
699  }
700  XSDebug(
701    updateValid && updateMeta.provider.valid,
702    p"update provider $provider, pred cycle ${updateMeta.pred_cycle.getOrElse(0.U)}\n"
703  )
704  XSDebug(
705    updateValid && updateMeta.provider.valid && usedAltpred && updateMisPred,
706    p"update altprovider $altProvider, pred cycle ${updateMeta.pred_cycle.getOrElse(0.U)}\n"
707  )
708
709  // if mispredicted and not the case that
710  // provider offered correct target but used altpred due to unconfident
711  val providerCorrect = updateMeta.provider.valid && updateMeta.providerTarget === updateRealTarget
712  val providerUnconf  = updateMeta.providerCtr === 0.U
713  val allocate        = updateMeta.allocate
714  when(updateValid && updateMisPred && !(providerCorrect && providerUnconf)) {
715    tickCtr := satUpdate(tickCtr, TickWidth, !allocate.valid)
716    when(allocate.valid) {
717      updateMask(allocate.bits)         := true.B
718      updateCorrect(allocate.bits)      := DontCare // useless for alloc
719      updateAlloc(allocate.bits)        := true.B
720      updateUMask(allocate.bits)        := true.B
721      updateU(allocate.bits)            := false.B
722      updateTargetOffset(allocate.bits) := updateRealTargetOffset
723    }
724  }
725  XSDebug(
726    updateValid && updateMisPred && !(providerCorrect && providerUnconf) && allocate.valid,
727    p"allocate new table entry, pred cycle ${updateMeta.pred_cycle.getOrElse(0.U)}\n"
728  )
729
730  when(tickCtr === ((1 << TickWidth) - 1).U) {
731    tickCtr      := 0.U
732    updateResetU := true.B
733  }
734
735  for (i <- 0 until ITTageNTables) {
736    tables(i).io.update.valid             := RegNext(updateMask(i), init = false.B)
737    tables(i).io.update.reset_u           := RegNext(updateResetU, init = false.B)
738    tables(i).io.update.correct           := RegEnable(updateCorrect(i), updateMask(i))
739    tables(i).io.update.alloc             := RegEnable(updateAlloc(i), updateMask(i))
740    tables(i).io.update.oldCtr            := RegEnable(updateOldCtr(i), updateMask(i))
741    tables(i).io.update.target_offset     := RegEnable(updateTargetOffset(i), updateMask(i))
742    tables(i).io.update.old_target_offset := RegEnable(updateOldTargetOffset(i), updateMask(i))
743
744    tables(i).io.update.uValid := RegEnable(updateUMask(i), false.B, updateMask(i))
745    tables(i).io.update.u      := RegEnable(updateU(i), updateMask(i))
746    tables(i).io.update.pc     := RegEnable(update_pc, updateMask(i))
747    tables(i).io.update.ghist  := RegEnable(update.ghist, updateMask(i))
748  }
749
750  // all should be ready for req
751  io.s1_ready := tables.map(_.io.req.ready).reduce(_ && _)
752
753  // Debug and perf info
754  XSPerfAccumulate("ittage_reset_u", updateResetU)
755  XSPerfAccumulate("ittage_used", io.s1_fire(0) && s1_isIndirect)
756  XSPerfAccumulate("ittage_closed_due_to_uftb_info", io.s1_fire(0) && !s1_isIndirect)
757  XSPerfAccumulate("ittage_allocate", updateAlloc.reduce(_ || _))
758
759  private def pred_perf(name:   String, cond: Bool) = XSPerfAccumulate(s"${name}_at_pred", cond && io.s2_fire(3))
760  private def commit_perf(name: String, cond: Bool) = XSPerfAccumulate(s"${name}_at_commit", cond && updateValid)
761  private def ittage_perf(name: String, pred_cond: Bool, commit_cond: Bool) = {
762    pred_perf(s"ittage_${name}", pred_cond)
763    commit_perf(s"ittage_${name}", commit_cond)
764  }
765  val pred_use_provider       = s2_provided && !ctrNull(s2_providerCtr)
766  val pred_use_altpred        = s2_provided && ctrNull(s2_providerCtr)
767  val pred_use_ht_as_altpred  = pred_use_altpred && s2_altProvided
768  val pred_use_bim_as_altpred = pred_use_altpred && !s2_altProvided
769  val pred_use_bim_as_pred    = !s2_provided
770
771  val commit_use_provider       = updateMeta.provider.valid && !ctrNull(updateMeta.providerCtr)
772  val commit_use_altpred        = updateMeta.provider.valid && ctrNull(updateMeta.providerCtr)
773  val commit_use_ht_as_altpred  = commit_use_altpred && updateMeta.altProvider.valid
774  val commit_use_ftb_as_altpred = commit_use_altpred && !updateMeta.altProvider.valid
775  val commit_use_ftb_as_pred    = !updateMeta.provider.valid
776
777  for (i <- 0 until ITTageNTables) {
778    val pred_this_is_provider   = s2_provider === i.U
779    val pred_this_is_altpred    = s2_altProvider === i.U
780    val commit_this_is_provider = updateMeta.provider.bits === i.U
781    val commit_this_is_altpred  = updateMeta.altProvider.bits === i.U
782    ittage_perf(
783      s"table_${i}_final_provided",
784      pred_use_provider && pred_this_is_provider,
785      commit_use_provider && commit_this_is_provider
786    )
787    ittage_perf(
788      s"table_${i}_provided_not_used",
789      pred_use_altpred && pred_this_is_provider,
790      commit_use_altpred && commit_this_is_provider
791    )
792    ittage_perf(
793      s"table_${i}_alt_provider_as_final_pred",
794      pred_use_ht_as_altpred && pred_this_is_altpred,
795      commit_use_ht_as_altpred && commit_this_is_altpred
796    )
797    ittage_perf(
798      s"table_${i}_alt_provider_not_used",
799      pred_use_provider && pred_this_is_altpred,
800      commit_use_provider && commit_this_is_altpred
801    )
802  }
803
804  ittage_perf("provided", s2_provided, updateMeta.provider.valid)
805  ittage_perf("use_provider", pred_use_provider, commit_use_provider)
806  ittage_perf("use_altpred", pred_use_altpred, commit_use_altpred)
807  ittage_perf("use_ht_as_altpred", pred_use_ht_as_altpred, commit_use_ht_as_altpred)
808  ittage_perf("use_ftb_when_no_provider", pred_use_bim_as_pred, commit_use_ftb_as_pred)
809  ittage_perf("use_ftb_as_alt_provider", pred_use_bim_as_altpred, commit_use_ftb_as_altpred)
810  XSPerfAccumulate("updated", updateValid)
811
812  if (debug) {
813    val s2_resps_regs = RegEnable(s2_resps, io.s2_fire(3))
814    XSDebug("req: v=%d, pc=0x%x\n", io.s0_fire(3), s0_pc_dup(3))
815    XSDebug("s1_fire:%d, resp: pc=%x\n", io.s1_fire(3), debug_pc_s1)
816    XSDebug("s2_fireOnLastCycle: resp: pc=%x, target=%x, hit=%b\n", debug_pc_s2, io.out.s2.getTarget(3), s2_provided)
817    for (i <- 0 until ITTageNTables) {
818      XSDebug(
819        "TageTable(%d): valids:%b, resp_ctrs:%b, resp_us:%b, target:%x\n",
820        i.U,
821        VecInit(s2_resps_regs(i).valid).asUInt,
822        s2_resps_regs(i).bits.ctr,
823        s2_resps_regs(i).bits.u,
824        s2_resps_regs(i).bits.target_offset.offset
825      )
826    }
827  }
828  XSDebug(updateValid, p"pc: ${Hexadecimal(update_pc)}, target: ${Hexadecimal(update.full_target)}\n")
829  XSDebug(updateValid, updateMeta.toPrintable + p"\n")
830  XSDebug(updateValid, p"correct(${!updateMisPred})\n")
831
832  generatePerfEvent()
833}
834