1/*************************************************************************************** 2 * Copyright (c) 2024 Beijing Institute of Open Source Chip (BOSC) 3 * Copyright (c) 2020-2024 Institute of Computing Technology, Chinese Academy of Sciences 4 * Copyright (c) 2020-2021 Peng Cheng Laboratory 5 * 6 * XiangShan is licensed under Mulan PSL v2. 7 * You can use this software according to the terms and conditions of the Mulan PSL v2. 8 * You may obtain a copy of Mulan PSL v2 at: 9 * http://license.coscl.org.cn/MulanPSL2 10 * 11 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 12 * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 13 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 14 * 15 * See the Mulan PSL v2 for more details. 16 * 17 * 18 * Acknowledgement 19 * 20 * This implementation is inspired by several key papers: 21 * [1] André Seznec. "[A 64-Kbytes ITTAGE indirect branch predictor.](https://inria.hal.science/hal-00639041)" The 22 * Journal of Instruction-Level Parallelism (JILP) 2nd JILP Workshop on Computer Architecture Competitions (JWAC): 23 * Championship Branch Prediction (CBP). 2011. 24 ***************************************************************************************/ 25 26package xiangshan.frontend 27 28import chisel3._ 29import chisel3.util._ 30import freechips.rocketchip.util.SeqBoolBitwiseOps 31import org.chipsalliance.cde.config.Parameters 32import scala.{Tuple2 => &} 33import scala.math.min 34import utility._ 35import utility.mbist.MbistPipeline 36import utility.sram.FoldedSRAMTemplate 37import xiangshan._ 38 39trait ITTageParams extends HasXSParameter with HasBPUParameter { 40 41 val ITTageNTables = ITTageTableInfos.size // Number of tage tables 42 val ITTageCtrBits = 2 43 val uFoldedWidth = 16 44 val TickWidth = 8 45 val ITTageUsBits = 1 46 val TargetOffsetBits = 20 47 val RegionNums = 16 48 val RegionBits = VAddrBits - TargetOffsetBits 49 val RegionPorts = 2 50 def ctrNull(ctr: UInt, ctrBits: Int = ITTageCtrBits): Bool = 51 ctr === 0.U 52 def ctrUnconf(ctr: UInt, ctrBits: Int = ITTageCtrBits): Bool = 53 ctr < (1 << (ctrBits - 1)).U 54 val UAONA_bits = 4 55 56 def targetGetRegion(target: UInt): UInt = target(VAddrBits - 1, TargetOffsetBits) 57 def targetGetOffset(target: UInt): UInt = target(TargetOffsetBits - 1, 0) 58 59 lazy val TotalBits: Int = ITTageTableInfos.map { 60 case (s, h, t) => { 61 s * (1 + t + ITTageCtrBits + ITTageUsBits + VAddrBits) 62 } 63 }.sum 64} 65 66abstract class ITTageBundle(implicit p: Parameters) 67 extends XSBundle with ITTageParams with BPUUtils 68 69abstract class ITTageModule(implicit p: Parameters) 70 extends XSModule with ITTageParams with BPUUtils {} 71 72class ITTageReq(implicit p: Parameters) extends ITTageBundle { 73 val pc = UInt(VAddrBits.W) 74 val folded_hist = new AllFoldedHistories(foldedGHistInfos) 75} 76 77class ITTageOffset(implicit p: Parameters) extends ITTageBundle { 78 val offset = UInt(TargetOffsetBits.W) 79 val pointer = UInt(log2Ceil(RegionNums).W) 80 val usePCRegion = Bool() 81} 82 83class ITTageResp(implicit p: Parameters) extends ITTageBundle { 84 val ctr = UInt(ITTageCtrBits.W) 85 val u = UInt(ITTageUsBits.W) 86 val target_offset = new ITTageOffset() 87} 88 89class ITTageUpdate(implicit p: Parameters) extends ITTageBundle { 90 val pc = UInt(VAddrBits.W) 91 val ghist = UInt(HistoryLength.W) 92 // update tag and ctr 93 val valid = Bool() 94 val correct = Bool() 95 val alloc = Bool() 96 val oldCtr = UInt(ITTageCtrBits.W) 97 // update u 98 val uValid = Bool() 99 val u = Bool() 100 val reset_u = Bool() 101 // target 102 val target_offset = new ITTageOffset() 103 val old_target_offset = new ITTageOffset() 104} 105 106class ITTageMeta(implicit p: Parameters) extends XSBundle with ITTageParams { 107 val provider = ValidUndirectioned(UInt(log2Ceil(ITTageNTables).W)) 108 val altProvider = ValidUndirectioned(UInt(log2Ceil(ITTageNTables).W)) 109 val altDiffers = Bool() 110 val providerU = Bool() 111 val providerCtr = UInt(ITTageCtrBits.W) 112 val altProviderCtr = UInt(ITTageCtrBits.W) 113 val allocate = ValidUndirectioned(UInt(log2Ceil(ITTageNTables).W)) 114 val providerTarget = UInt(VAddrBits.W) 115 val altProviderTarget = UInt(VAddrBits.W) 116 val pred_cycle = if (!env.FPGAPlatform) Some(UInt(64.W)) else None 117 118 override def toPrintable = 119 p"pvdr(v:${provider.valid} num:${provider.bits} ctr:$providerCtr u:$providerU tar:${Hexadecimal(providerTarget)}), " + 120 p"altpvdr(v:${altProvider.valid} num:${altProvider.bits}, ctr:$altProviderCtr, tar:${Hexadecimal(altProviderTarget)})" 121} 122 123class FakeITTageTable()(implicit p: Parameters) extends ITTageModule { 124 val io = IO(new Bundle() { 125 val req = Input(Valid(new ITTageReq)) 126 val resp = Output(Valid(new ITTageResp)) 127 val update = Input(new ITTageUpdate) 128 }) 129 io.resp := DontCare 130 131} 132 133class RegionEntry(implicit p: Parameters) extends ITTageBundle { 134 val valid = Bool() 135 val region = UInt(RegionBits.W) 136} 137 138class RegionWays()(implicit p: Parameters) extends XSModule with ITTageParams { 139 val io = IO(new Bundle { 140 val req_pointer = Input(Vec(ITTageNTables, UInt(log2Ceil(RegionNums).W))) 141 val resp_hit = Output(Vec(ITTageNTables, Bool())) 142 val resp_region = Output(Vec(ITTageNTables, UInt(RegionBits.W))) 143 144 val update_region = Input(Vec(RegionPorts, UInt(RegionBits.W))) 145 val update_hit = Output(Vec(RegionPorts, Bool())) 146 val update_pointer = Output(Vec(RegionPorts, UInt(log2Ceil(RegionNums).W))) 147 148 val write_valid = Input(Bool()) 149 val write_region = Input(UInt(RegionBits.W)) 150 val write_pointer = Output(UInt(log2Ceil(RegionNums).W)) 151 }) 152 153 val regions = RegInit(VecInit(Seq.fill(RegionNums)(0.U.asTypeOf(new RegionEntry())))) 154 val replacer = ReplacementPolicy.fromString("plru", RegionNums) 155 val replacer_touch_ways = Wire(Vec(1, Valid(UInt(log2Ceil(RegionNums).W)))) 156 157 val valids = VecInit((0 until RegionNums).map(w => regions(w).valid)) 158 val valid = WireInit(valids.andR) 159 // write data 160 val w_total_hits = VecInit((0 until RegionNums).map(w => regions(w).region === io.write_region && regions(w).valid)) 161 val w_hit = w_total_hits.reduce(_ || _) 162 val w_pointer = Mux(w_hit, OHToUInt(w_total_hits), Mux(!valid, PriorityEncoder(~valids), replacer.way)) 163 XSError(PopCount(w_total_hits) > 1.U, "region has multiple hits!\n") 164 XSPerfAccumulate("Region_entry_replace", !w_hit && valid && io.write_valid) 165 166 io.write_pointer := w_pointer 167 // read and metaTarget update read ports 168 for (i <- 0 until ITTageNTables) { 169 // read region use pointer 170 io.resp_hit(i) := regions(io.req_pointer(i)).valid 171 io.resp_region(i) := regions(io.req_pointer(i)).region 172 } 173 174 for (i <- 0 until RegionPorts) { 175 // When using metaTarget for updates, redefine the pointer 176 val u_total_hits = 177 VecInit((0 until RegionNums).map(w => regions(w).region === io.update_region(i) && regions(w).valid)) 178 val u_bypass = (io.update_region(i) === io.write_region) && io.write_valid 179 val u_hit = u_total_hits.reduce(_ || _) || u_bypass 180 val u_pointer = Mux(u_bypass, w_pointer, OHToUInt(u_total_hits)) 181 io.update_hit(i) := u_hit 182 io.update_pointer(i) := u_pointer 183 XSError(PopCount(u_total_hits) > 1.U, "region has multiple hits!\n") 184 } 185 // write 186 when(io.write_valid) { 187 when(!regions(w_pointer).valid) { 188 regions(w_pointer).valid := true.B 189 } 190 regions(w_pointer).region := io.write_region 191 } 192 replacer_touch_ways(0).valid := io.write_valid 193 replacer_touch_ways(0).bits := w_pointer 194 replacer.access(replacer_touch_ways) 195} 196 197class ITTageTable( 198 val nRows: Int, 199 val histLen: Int, 200 val tagLen: Int, 201 val tableIdx: Int 202)(implicit p: Parameters) 203 extends ITTageModule with HasFoldedHistory { 204 val io = IO(new Bundle() { 205 val req = Flipped(DecoupledIO(new ITTageReq)) 206 val resp = Output(Valid(new ITTageResp)) 207 val update = Input(new ITTageUpdate) 208 }) 209 210 val SRAM_SIZE = 128 211 212 val foldedWidth = if (nRows >= SRAM_SIZE) nRows / SRAM_SIZE else 1 213 val dataSplit = if (nRows <= 2 * SRAM_SIZE) 1 else 2 214 215 if (nRows < SRAM_SIZE) { 216 println(f"warning: ittage table $tableIdx has small sram depth of $nRows") 217 } 218 219 // override val debug = true 220 // bypass entries for tage update 221 val wrBypassEntries = 4 222 223 require(histLen == 0 && tagLen == 0 || histLen != 0 && tagLen != 0) 224 val idxFhInfo = (histLen, min(log2Ceil(nRows), histLen)) 225 val tagFhInfo = (histLen, min(histLen, tagLen)) 226 val altTagFhInfo = (histLen, min(histLen, tagLen - 1)) 227 val allFhInfos = Seq(idxFhInfo, tagFhInfo, altTagFhInfo) 228 229 def getFoldedHistoryInfo: Set[(Int, Int)] = allFhInfos.filter(_._1 > 0).toSet 230 231 private def computeTagAndHash(unhashed_idx: UInt, allFh: AllFoldedHistories): (UInt, UInt) = 232 if (histLen > 0) { 233 val idx_fh = allFh.getHistWithInfo(idxFhInfo).folded_hist 234 val tag_fh = allFh.getHistWithInfo(tagFhInfo).folded_hist 235 val alt_tag_fh = allFh.getHistWithInfo(altTagFhInfo).folded_hist 236 // require(idx_fh.getWidth == log2Ceil(nRows)) 237 val idx = (unhashed_idx ^ idx_fh)(log2Ceil(nRows) - 1, 0) 238 val tag = ((unhashed_idx >> log2Ceil(nRows)).asUInt ^ tag_fh ^ (alt_tag_fh << 1).asUInt)(tagLen - 1, 0) 239 (idx, tag) 240 } else { 241 require(tagLen == 0) 242 (unhashed_idx(log2Ceil(nRows) - 1, 0), 0.U) 243 } 244 245 def incCtr(ctr: UInt, taken: Bool): UInt = satUpdate(ctr, ITTageCtrBits, taken) 246 247 class ITTageEntry extends ITTageBundle { 248 val valid = Bool() 249 val tag = UInt(tagLen.W) 250 val ctr = UInt(ITTageCtrBits.W) 251 val target_offset = new ITTageOffset() 252 val useful = Bool() // Due to the bitMask the useful bit needs to be at the lowest bit 253 } 254 255 val ittageEntrySz = 1 + tagLen + ITTageCtrBits + ITTageUsBits + TargetOffsetBits + log2Ceil(RegionNums) + 1 256 require(ittageEntrySz == (new ITTageEntry).getWidth) 257 258 // pc is start address of basic block, most 2 branch inst in block 259 def getUnhashedIdx(pc: UInt): UInt = (pc >> instOffsetBits).asUInt 260 261 val s0_valid = io.req.valid 262 val s0_pc = io.req.bits.pc 263 val s0_unhashed_idx = getUnhashedIdx(io.req.bits.pc) 264 265 val (s0_idx, s0_tag) = computeTagAndHash(s0_unhashed_idx, io.req.bits.folded_hist) 266 val (s1_idx, s1_tag) = (RegEnable(s0_idx, io.req.fire), RegEnable(s0_tag, io.req.fire)) 267 val s1_valid = RegNext(s0_valid) 268 269 val table = Module(new FoldedSRAMTemplate( 270 new ITTageEntry, 271 setSplit = 1, 272 waySplit = 1, 273 dataSplit = dataSplit, 274 set = nRows, 275 width = foldedWidth, 276 shouldReset = true, 277 holdRead = true, 278 singlePort = true, 279 useBitmask = true, 280 withClockGate = true, 281 hasMbist = hasMbist, 282 hasSramCtl = hasSramCtl 283 )) 284 private val mbistPl = MbistPipeline.PlaceMbistPipeline(1, "MbistPipeIttage", hasMbist) 285 table.io.r.req.valid := io.req.fire 286 table.io.r.req.bits.setIdx := s0_idx 287 288 val table_read_data = table.io.r.resp.data(0) 289 290 val s1_req_rhit = table_read_data.valid && table_read_data.tag === s1_tag 291 292 val read_write_conflict = io.update.valid && io.req.valid 293 val s1_read_write_conflict = RegEnable(read_write_conflict, io.req.valid) 294 295 io.resp.valid := (if (tagLen != 0) s1_req_rhit && !s1_read_write_conflict else true.B) && s1_valid // && s1_mask(b) 296 io.resp.bits.ctr := table_read_data.ctr 297 io.resp.bits.u := table_read_data.useful 298 io.resp.bits.target_offset := table_read_data.target_offset 299 300 // Use fetchpc to compute hash 301 val update_folded_hist = WireInit(0.U.asTypeOf(new AllFoldedHistories(foldedGHistInfos))) 302 303 update_folded_hist.getHistWithInfo(idxFhInfo).folded_hist := compute_folded_ghist(io.update.ghist, log2Ceil(nRows)) 304 update_folded_hist.getHistWithInfo(tagFhInfo).folded_hist := compute_folded_ghist(io.update.ghist, tagLen) 305 update_folded_hist.getHistWithInfo(altTagFhInfo).folded_hist := compute_folded_ghist(io.update.ghist, tagLen - 1) 306 val (update_idx, update_tag) = computeTagAndHash(getUnhashedIdx(io.update.pc), update_folded_hist) 307 val update_wdata = Wire(new ITTageEntry) 308 309 val updateAllBitmask = VecInit.fill(ittageEntrySz)(1.U).asUInt // update all entry 310 val updateNoBitmask = VecInit.fill(ittageEntrySz)(0.U).asUInt // update no 311 val updateNoUsBitmask = 312 VecInit.tabulate(ittageEntrySz)(_.U >= ITTageUsBits.U).asUInt // update others besides useful bit 313 val updateUsBitmask = VecInit.tabulate(ittageEntrySz)(_.U < ITTageUsBits.U).asUInt // update useful bit 314 315 val needReset = RegInit(false.B) 316 val useful_can_reset = !(io.req.fire || io.update.valid) && needReset 317 val (resetSet, resetFinish) = Counter(useful_can_reset, nRows) 318 when(io.update.reset_u) { 319 needReset := true.B 320 }.elsewhen(resetFinish) { 321 needReset := false.B 322 } 323 val update_bitmask = Mux( 324 io.update.uValid && io.update.valid, 325 updateAllBitmask, 326 Mux(io.update.valid, updateNoUsBitmask, Mux(useful_can_reset, updateUsBitmask, updateNoBitmask)) 327 ) 328 329 table.io.w.apply( 330 valid = io.update.valid || useful_can_reset, 331 data = update_wdata, 332 setIdx = Mux(useful_can_reset, resetSet, update_idx), 333 waymask = true.B, 334 bitmask = update_bitmask 335 ) 336 337 // Power-on reset 338 val powerOnResetState = RegInit(true.B) 339 when(table.io.r.req.ready) { 340 // When all the SRAM first reach ready state, we consider power-on reset is done 341 powerOnResetState := false.B 342 } 343 // Do not use table banks io.r.req.ready directly 344 // All table_banks are single port SRAM, ready := !wen 345 // We do not want write request block the whole BPU pipeline 346 // Once read priority is higher than write, table_banks(*).io.r.req.ready can be used 347 io.req.ready := !powerOnResetState 348 349 val wrbypass = Module(new WrBypass(UInt(ITTageCtrBits.W), wrBypassEntries, log2Ceil(nRows))) 350 351 wrbypass.io.wen := io.update.valid 352 wrbypass.io.write_idx := update_idx 353 wrbypass.io.write_data.map(_ := update_wdata.ctr) 354 355 val old_ctr = Mux(wrbypass.io.hit, wrbypass.io.hit_data(0).bits, io.update.oldCtr) 356 update_wdata.valid := true.B 357 update_wdata.ctr := Mux(io.update.alloc, 2.U, incCtr(old_ctr, io.update.correct)) 358 update_wdata.tag := update_tag 359 update_wdata.useful := Mux(useful_can_reset, false.B, io.update.u) 360 // only when ctr is null 361 update_wdata.target_offset := Mux( 362 io.update.alloc || ctrNull(old_ctr), 363 io.update.target_offset, 364 io.update.old_target_offset 365 ) 366 367 XSPerfAccumulate("ittage_table_updates", io.update.valid) 368 XSPerfAccumulate("ittage_table_hits", io.resp.valid) 369 XSPerfAccumulate("ittage_us_tick_reset", io.update.reset_u) 370 XSPerfAccumulate("ittage_table_read_write_conflict", read_write_conflict) 371 372 if (BPUDebug && debug) { 373 val u = io.update 374 val idx = s0_idx 375 val tag = s0_tag 376 XSDebug( 377 io.req.fire, 378 p"ITTageTableReq: pc=0x${Hexadecimal(io.req.bits.pc)}, " + 379 p"idx=$idx, tag=$tag\n" 380 ) 381 XSDebug( 382 RegNext(io.req.fire) && s1_req_rhit, 383 p"ITTageTableResp: idx=$s1_idx, hit:${s1_req_rhit}, " + 384 p"ctr:${io.resp.bits.ctr}, u:${io.resp.bits.u}, tar:${Hexadecimal(io.resp.bits.target_offset.offset)}\n" 385 ) 386 XSDebug( 387 io.update.valid, 388 p"update ITTAGE Table: pc:${Hexadecimal(u.pc)}}, " + 389 p"correct:${u.correct}, alloc:${u.alloc}, oldCtr:${u.oldCtr}, " + 390 p"target:${Hexadecimal(u.target_offset.offset)}, old_target:${Hexadecimal(u.old_target_offset.offset)}\n" 391 ) 392 XSDebug( 393 io.update.valid, 394 p"update ITTAGE Table: writing tag:${update_tag}, " + 395 p"ctr: ${update_wdata.ctr}, target:${Hexadecimal(update_wdata.target_offset.offset)}" + 396 p" in idx $update_idx\n" 397 ) 398 XSDebug(RegNext(io.req.fire) && !s1_req_rhit, "TageTableResp: no hits!\n") 399 400 // ------------------------------Debug------------------------------------- 401 val valids = RegInit(0.U.asTypeOf(Vec(nRows, Bool()))) 402 when(io.update.valid)(valids(update_idx) := true.B) 403 XSDebug("ITTAGE Table usage:------------------------\n") 404 XSDebug("%d out of %d rows are valid\n", PopCount(valids), nRows.U) 405 } 406 407} 408 409abstract class BaseITTage(implicit p: Parameters) extends BasePredictor with ITTageParams with BPUUtils {} 410 411class FakeITTage(implicit p: Parameters) extends BaseITTage { 412 io.out <> 0.U.asTypeOf(DecoupledIO(new BasePredictorOutput)) 413 414 io.s1_ready := true.B 415 io.s2_ready := true.B 416} 417 418class ITTage(implicit p: Parameters) extends BaseITTage { 419 override val meta_size = 0.U.asTypeOf(new ITTageMeta).getWidth 420 421 val tables = ITTageTableInfos.zipWithIndex.map { 422 case ((nRows, histLen, tagLen), i) => 423 val t = Module(new ITTageTable(nRows, histLen, tagLen, i)) 424 t 425 } 426 override def getFoldedHistoryInfo: Option[Set[(Int, Int)]] = Some(tables.map(_.getFoldedHistoryInfo).reduce(_ ++ _)) 427 428 val useAltOnNa = RegInit((1 << (UAONA_bits - 1)).U(UAONA_bits.W)) 429 val tickCtr = RegInit(0.U(TickWidth.W)) 430 431 val rTable = Module(new RegionWays) 432 433 // uftb miss or hasIndirect 434 val s1_uftbHit = io.in.bits.resp_in(0).s1_uftbHit 435 val s1_uftbHasIndirect = io.in.bits.resp_in(0).s1_uftbHasIndirect 436 val s1_isIndirect = (!s1_uftbHit && !io.in.bits.resp_in(0).s1_ftbCloseReq) || s1_uftbHasIndirect 437 438 // Keep the table responses to process in s2 439 440 val s2_resps = VecInit(tables.map(t => t.io.resp)) 441 442 val debug_pc_s1 = RegEnable(s0_pc_dup(3), io.s0_fire(3)) 443 val debug_pc_s2 = RegEnable(debug_pc_s1, io.s1_fire(3)) 444 val debug_pc_s3 = RegEnable(debug_pc_s2, io.s2_fire(3)) 445 446 val s2_tageTarget = Wire(UInt(VAddrBits.W)) 447 val s2_providerTarget = Wire(UInt(VAddrBits.W)) 448 val s2_altProviderTarget = Wire(UInt(VAddrBits.W)) 449 val s2_provided = Wire(Bool()) 450 val s2_provider = Wire(UInt(log2Ceil(ITTageNTables).W)) 451 val s2_altProvided = Wire(Bool()) 452 val s2_altProvider = Wire(UInt(log2Ceil(ITTageNTables).W)) 453 val s2_providerU = Wire(Bool()) 454 val s2_providerCtr = Wire(UInt(ITTageCtrBits.W)) 455 val s2_altProviderCtr = Wire(UInt(ITTageCtrBits.W)) 456 457 val s3_tageTarget_dup = io.s2_fire.map(f => RegEnable(s2_tageTarget, f)) 458 val s3_providerTarget = RegEnable(s2_providerTarget, io.s2_fire(3)) 459 val s3_altProviderTarget = RegEnable(s2_altProviderTarget, io.s2_fire(3)) 460 val s3_provided = RegEnable(s2_provided, io.s2_fire(3)) 461 val s3_provider = RegEnable(s2_provider, io.s2_fire(3)) 462 val s3_altProvided = RegEnable(s2_altProvided, io.s2_fire(3)) 463 val s3_altProvider = RegEnable(s2_altProvider, io.s2_fire(3)) 464 val s3_providerU = RegEnable(s2_providerU, io.s2_fire(3)) 465 val s3_providerCtr = RegEnable(s2_providerCtr, io.s2_fire(3)) 466 val s3_altProviderCtr = RegEnable(s2_altProviderCtr, io.s2_fire(3)) 467 468 val resp_meta = WireInit(0.U.asTypeOf(new ITTageMeta)) 469 470 io.out.last_stage_meta := resp_meta.asUInt 471 472 // Update logic 473 val u_valid = RegNext(io.update.valid, init = false.B) 474 475 val update = Wire(new BranchPredictionUpdate) 476 update := RegEnable(io.update.bits, io.update.valid) 477 478 // meta is splited by composer 479 val updateMeta = Wire(new ITTageMeta) 480 update.meta := updateMeta.asUInt 481 482 // The pc register has been moved outside of predictor 483 // pc field of update bundle and other update data are not in the same stage 484 // so io.update.bits.pc is used directly here 485 val update_pc = io.update.bits.pc 486 487 // To improve Clock Gating Efficiency 488 val u_meta = io.update.bits.meta.asTypeOf(new ITTageMeta) 489 updateMeta := RegEnable(u_meta, io.update.valid) 490 updateMeta.provider.bits := RegEnable( 491 u_meta.provider.bits, 492 io.update.valid && u_meta.provider.valid 493 ) 494 updateMeta.providerTarget := RegEnable( 495 u_meta.providerTarget, 496 io.update.valid && u_meta.provider.valid 497 ) 498 updateMeta.allocate.bits := RegEnable( 499 u_meta.allocate.bits, 500 io.update.valid && u_meta.allocate.valid 501 ) 502 updateMeta.altProvider.bits := RegEnable( 503 u_meta.altProvider.bits, 504 io.update.valid && u_meta.altProvider.valid 505 ) 506 updateMeta.altProviderTarget := RegEnable( 507 u_meta.altProviderTarget, 508 io.update.valid && u_meta.provider.valid && u_meta.altProvider.valid && u_meta.providerCtr === 0.U 509 ) 510 update.full_target := RegEnable( 511 io.update.bits.full_target, 512 io.update.valid // not using mispred_mask, because mispred_mask timing is bad 513 ) 514 update.cfi_idx.bits := RegEnable(io.update.bits.cfi_idx.bits, io.update.valid && io.update.bits.cfi_idx.valid) 515 update.ghist := RegEnable(io.update.bits.ghist, io.update.valid) // TODO: CGE 516 517 val updateValid = update.is_jalr && !update.is_ret && u_valid && update.ftb_entry.jmpValid && 518 update.jmp_taken && update.cfi_idx.valid && 519 update.cfi_idx.bits === update.ftb_entry.tailSlot.offset && !update.ftb_entry.strong_bias(numBr - 1) 520 521 val updateMask = WireInit(0.U.asTypeOf(Vec(ITTageNTables, Bool()))) 522 val updateUMask = WireInit(0.U.asTypeOf(Vec(ITTageNTables, Bool()))) 523 val updateResetU = WireInit(false.B) 524 val updateCorrect = Wire(Vec(ITTageNTables, Bool())) 525 val updateAlloc = Wire(Vec(ITTageNTables, Bool())) 526 val updateOldCtr = Wire(Vec(ITTageNTables, UInt(ITTageCtrBits.W))) 527 val updateU = Wire(Vec(ITTageNTables, Bool())) 528 val updateTargetOffset = Wire(Vec(ITTageNTables, new ITTageOffset)) 529 val updateOldTargetOffset = Wire(Vec(ITTageNTables, new ITTageOffset)) 530 updateCorrect := DontCare 531 updateAlloc := DontCare 532 updateOldCtr := DontCare 533 updateU := DontCare 534 updateTargetOffset := DontCare 535 updateOldTargetOffset := DontCare 536 537 val updateMisPred = update.mispred_mask(numBr) // the last one indicates jmp results 538 539 // Predict 540 tables.map { t => 541 t.io.req.valid := io.s1_fire(3) && s1_isIndirect 542 t.io.req.bits.pc := s1_pc_dup(3) 543 t.io.req.bits.folded_hist := io.in.bits.s1_folded_hist(3) 544 } 545 546 // access tag tables and output meta info 547 class ITTageTableInfo(implicit p: Parameters) extends ITTageResp { 548 val tableIdx = UInt(log2Ceil(ITTageNTables).W) 549 val maskTarget = Vec(ITTageNTables, UInt(VAddrBits.W)) 550 } 551 552 val inputRes = VecInit(s2_resps.zipWithIndex.map { 553 case (r, i) => 554 val tableInfo = Wire(new ITTageTableInfo) 555 tableInfo.u := r.bits.u 556 tableInfo.ctr := r.bits.ctr 557 tableInfo.target_offset := r.bits.target_offset 558 tableInfo.tableIdx := i.U(log2Ceil(ITTageNTables).W) 559 tableInfo.maskTarget := VecInit(Seq.fill(ITTageNTables)(0.U(VAddrBits.W))) 560 tableInfo.maskTarget(i) := "hffff_ffff_ffff_ffff".U 561 SelectTwoInterRes(r.valid, tableInfo) 562 }) 563 564 val selectedInfo = ParallelSelectTwo(inputRes.reverse) 565 val provided = selectedInfo.hasOne 566 val altProvided = selectedInfo.hasTwo 567 568 val providerInfo = selectedInfo.first 569 val altProviderInfo = selectedInfo.second 570 val providerNull = providerInfo.ctr === 0.U 571 572 val baseTarget = io.in.bits.resp_in(0).s2.full_pred(3).jalr_target // use ftb pred as base target 573 val region_r_target_offset = VecInit(s2_resps.map(r => r.bits.target_offset)) 574 575 rTable.io.req_pointer.zipWithIndex.map { case (req_pointer, i) => 576 req_pointer := region_r_target_offset(i).pointer 577 } 578 // When the entry corresponding to the pointer is valid and does not use PCRegion, use rTable region. 579 val region_targets = Wire(Vec(ITTageNTables, UInt(VAddrBits.W))) 580 for (i <- 0 until ITTageNTables) { 581 region_targets(i) := Mux( 582 rTable.io.resp_hit(i) && !region_r_target_offset(i).usePCRegion, 583 Cat(rTable.io.resp_region(i), region_r_target_offset(i).offset), 584 Cat(targetGetRegion(s2_pc_dup(0).getAddr()), region_r_target_offset(i).offset) 585 ) 586 } 587 588 val providerCatTarget = providerInfo.maskTarget.zipWithIndex.map { 589 case (mask, i) => mask & region_targets(i) 590 }.reduce(_ | _) 591 592 val altproviderCatTarget = altProviderInfo.maskTarget.zipWithIndex.map { 593 case (mask, i) => mask & region_targets(i) 594 }.reduce(_ | _) 595 596 s2_tageTarget := Mux1H(Seq( 597 (provided && !(providerNull && altProvided), providerCatTarget), 598 (altProvided && providerNull, altproviderCatTarget), 599 (!provided, baseTarget) 600 )) 601 s2_provided := provided 602 s2_provider := providerInfo.tableIdx 603 s2_altProvided := altProvided 604 s2_altProvider := altProviderInfo.tableIdx 605 s2_providerU := providerInfo.u 606 s2_providerCtr := providerInfo.ctr 607 s2_altProviderCtr := altProviderInfo.ctr 608 s2_providerTarget := providerCatTarget 609 s2_altProviderTarget := altproviderCatTarget 610 611 XSDebug(io.s2_fire(3), p"hit_taken_jalr:") 612 613 for ( 614 fp & s3_tageTarget <- 615 io.out.s3.full_pred zip s3_tageTarget_dup 616 ) 617 yield fp.jalr_target := s3_tageTarget 618 619 resp_meta.provider.valid := s3_provided 620 resp_meta.provider.bits := s3_provider 621 resp_meta.altProvider.valid := s3_altProvided 622 resp_meta.altProvider.bits := s3_altProvider 623 resp_meta.altDiffers := s3_providerTarget =/= s3_altProviderTarget 624 resp_meta.providerU := s3_providerU 625 resp_meta.providerCtr := s3_providerCtr 626 resp_meta.altProviderCtr := s3_altProviderCtr 627 resp_meta.providerTarget := s3_providerTarget 628 resp_meta.altProviderTarget := s3_altProviderTarget 629 resp_meta.pred_cycle.foreach(_ := GTimer()) 630 // TODO: adjust for ITTAGE 631 // Create a mask fo tables which did not hit our query, and also contain useless entries 632 // and also uses a longer history than the provider 633 val s2_allocatableSlots = VecInit(s2_resps.map(r => !r.valid && !r.bits.u)).asUInt & 634 (~(LowerMask(UIntToOH(s2_provider), ITTageNTables) & Fill(ITTageNTables, s2_provided.asUInt))).asUInt 635 val s2_allocLFSR = random.LFSR(width = 15)(ITTageNTables - 1, 0) 636 val s2_firstEntry = PriorityEncoder(s2_allocatableSlots) 637 val s2_maskedEntry = PriorityEncoder(s2_allocatableSlots & s2_allocLFSR) 638 val s2_allocEntry = Mux(s2_allocatableSlots(s2_maskedEntry), s2_maskedEntry, s2_firstEntry) 639 resp_meta.allocate.valid := RegEnable(s2_allocatableSlots =/= 0.U, io.s2_fire(3)) 640 resp_meta.allocate.bits := RegEnable(s2_allocEntry, io.s2_fire(3)) 641 642 // Update in loop 643 val updateRealTarget = update.full_target 644 val updatePCRegion = targetGetRegion(update.pc) 645 val updateRealTargetRegion = targetGetRegion(updateRealTarget) 646 val metaProviderTargetOffset, metaAltProviderTargetOffset, updateRealTargetOffset = 647 WireInit(0.U.asTypeOf(new ITTageOffset)) 648 updateRealTargetOffset.offset := targetGetOffset(updateRealTarget) 649 val updateRealUsePCRegion = updateRealTargetRegion === updatePCRegion 650 // If rTable is not written in Region, the pointer value will be invalid. 651 // At this time, it is necessary to raise usePCRegion. 652 // The update mechanism of the usePCRegion bit requires further consideration. 653 updateRealTargetOffset.usePCRegion := updateRealUsePCRegion || !updateAlloc.reduce(_ || _) 654 rTable.io.write_valid := !updateRealUsePCRegion && updateAlloc.reduce(_ || _) 655 rTable.io.write_region := updateRealTargetRegion 656 updateRealTargetOffset.pointer := rTable.io.write_pointer 657 658 val metaProviderTargetRegion = targetGetRegion(updateMeta.providerTarget) 659 val metaAltProviderTargetRegion = targetGetRegion(updateMeta.altProviderTarget) 660 661 rTable.io.update_region := VecInit(metaProviderTargetRegion, metaAltProviderTargetRegion) 662 metaProviderTargetOffset.offset := targetGetOffset(updateMeta.providerTarget) 663 metaProviderTargetOffset.pointer := rTable.io.update_pointer(0) 664 metaProviderTargetOffset.usePCRegion := !rTable.io.update_hit(0) 665 666 metaAltProviderTargetOffset.offset := targetGetOffset(updateMeta.altProviderTarget) 667 metaAltProviderTargetOffset.pointer := rTable.io.update_pointer(1) 668 metaAltProviderTargetOffset.usePCRegion := !rTable.io.update_hit(1) 669 670 val provider = updateMeta.provider.bits 671 val altProvider = updateMeta.altProvider.bits 672 val usedAltpred = updateMeta.altProvider.valid && updateMeta.providerCtr === 0.U 673 when(updateValid) { 674 when(updateMeta.provider.valid) { 675 when(usedAltpred && updateMisPred) { // update altpred if used as pred 676 updateMask(altProvider) := true.B 677 updateUMask(altProvider) := false.B 678 updateCorrect(altProvider) := false.B 679 updateOldCtr(altProvider) := updateMeta.altProviderCtr 680 updateAlloc(altProvider) := false.B 681 updateTargetOffset(altProvider) := updateRealTargetOffset 682 updateOldTargetOffset(altProvider) := metaAltProviderTargetOffset 683 } 684 685 updateMask(provider) := true.B 686 updateUMask(provider) := true.B 687 688 updateU(provider) := Mux( 689 !updateMeta.altDiffers, 690 updateMeta.providerU, 691 updateMeta.providerTarget === updateRealTarget 692 ) 693 updateCorrect(provider) := updateMeta.providerTarget === updateRealTarget 694 updateOldCtr(provider) := updateMeta.providerCtr 695 updateAlloc(provider) := false.B 696 updateTargetOffset(provider) := updateRealTargetOffset 697 updateOldTargetOffset(provider) := metaProviderTargetOffset 698 } 699 } 700 XSDebug( 701 updateValid && updateMeta.provider.valid, 702 p"update provider $provider, pred cycle ${updateMeta.pred_cycle.getOrElse(0.U)}\n" 703 ) 704 XSDebug( 705 updateValid && updateMeta.provider.valid && usedAltpred && updateMisPred, 706 p"update altprovider $altProvider, pred cycle ${updateMeta.pred_cycle.getOrElse(0.U)}\n" 707 ) 708 709 // if mispredicted and not the case that 710 // provider offered correct target but used altpred due to unconfident 711 val providerCorrect = updateMeta.provider.valid && updateMeta.providerTarget === updateRealTarget 712 val providerUnconf = updateMeta.providerCtr === 0.U 713 val allocate = updateMeta.allocate 714 when(updateValid && updateMisPred && !(providerCorrect && providerUnconf)) { 715 tickCtr := satUpdate(tickCtr, TickWidth, !allocate.valid) 716 when(allocate.valid) { 717 updateMask(allocate.bits) := true.B 718 updateCorrect(allocate.bits) := DontCare // useless for alloc 719 updateAlloc(allocate.bits) := true.B 720 updateUMask(allocate.bits) := true.B 721 updateU(allocate.bits) := false.B 722 updateTargetOffset(allocate.bits) := updateRealTargetOffset 723 } 724 } 725 XSDebug( 726 updateValid && updateMisPred && !(providerCorrect && providerUnconf) && allocate.valid, 727 p"allocate new table entry, pred cycle ${updateMeta.pred_cycle.getOrElse(0.U)}\n" 728 ) 729 730 when(tickCtr === ((1 << TickWidth) - 1).U) { 731 tickCtr := 0.U 732 updateResetU := true.B 733 } 734 735 for (i <- 0 until ITTageNTables) { 736 tables(i).io.update.valid := RegNext(updateMask(i), init = false.B) 737 tables(i).io.update.reset_u := RegNext(updateResetU, init = false.B) 738 tables(i).io.update.correct := RegEnable(updateCorrect(i), updateMask(i)) 739 tables(i).io.update.alloc := RegEnable(updateAlloc(i), updateMask(i)) 740 tables(i).io.update.oldCtr := RegEnable(updateOldCtr(i), updateMask(i)) 741 tables(i).io.update.target_offset := RegEnable(updateTargetOffset(i), updateMask(i)) 742 tables(i).io.update.old_target_offset := RegEnable(updateOldTargetOffset(i), updateMask(i)) 743 744 tables(i).io.update.uValid := RegEnable(updateUMask(i), false.B, updateMask(i)) 745 tables(i).io.update.u := RegEnable(updateU(i), updateMask(i)) 746 tables(i).io.update.pc := RegEnable(update_pc, updateMask(i)) 747 tables(i).io.update.ghist := RegEnable(update.ghist, updateMask(i)) 748 } 749 750 // all should be ready for req 751 io.s1_ready := tables.map(_.io.req.ready).reduce(_ && _) 752 753 // Debug and perf info 754 XSPerfAccumulate("ittage_reset_u", updateResetU) 755 XSPerfAccumulate("ittage_used", io.s1_fire(0) && s1_isIndirect) 756 XSPerfAccumulate("ittage_closed_due_to_uftb_info", io.s1_fire(0) && !s1_isIndirect) 757 XSPerfAccumulate("ittage_allocate", updateAlloc.reduce(_ || _)) 758 759 private def pred_perf(name: String, cond: Bool) = XSPerfAccumulate(s"${name}_at_pred", cond && io.s2_fire(3)) 760 private def commit_perf(name: String, cond: Bool) = XSPerfAccumulate(s"${name}_at_commit", cond && updateValid) 761 private def ittage_perf(name: String, pred_cond: Bool, commit_cond: Bool) = { 762 pred_perf(s"ittage_${name}", pred_cond) 763 commit_perf(s"ittage_${name}", commit_cond) 764 } 765 val pred_use_provider = s2_provided && !ctrNull(s2_providerCtr) 766 val pred_use_altpred = s2_provided && ctrNull(s2_providerCtr) 767 val pred_use_ht_as_altpred = pred_use_altpred && s2_altProvided 768 val pred_use_bim_as_altpred = pred_use_altpred && !s2_altProvided 769 val pred_use_bim_as_pred = !s2_provided 770 771 val commit_use_provider = updateMeta.provider.valid && !ctrNull(updateMeta.providerCtr) 772 val commit_use_altpred = updateMeta.provider.valid && ctrNull(updateMeta.providerCtr) 773 val commit_use_ht_as_altpred = commit_use_altpred && updateMeta.altProvider.valid 774 val commit_use_ftb_as_altpred = commit_use_altpred && !updateMeta.altProvider.valid 775 val commit_use_ftb_as_pred = !updateMeta.provider.valid 776 777 for (i <- 0 until ITTageNTables) { 778 val pred_this_is_provider = s2_provider === i.U 779 val pred_this_is_altpred = s2_altProvider === i.U 780 val commit_this_is_provider = updateMeta.provider.bits === i.U 781 val commit_this_is_altpred = updateMeta.altProvider.bits === i.U 782 ittage_perf( 783 s"table_${i}_final_provided", 784 pred_use_provider && pred_this_is_provider, 785 commit_use_provider && commit_this_is_provider 786 ) 787 ittage_perf( 788 s"table_${i}_provided_not_used", 789 pred_use_altpred && pred_this_is_provider, 790 commit_use_altpred && commit_this_is_provider 791 ) 792 ittage_perf( 793 s"table_${i}_alt_provider_as_final_pred", 794 pred_use_ht_as_altpred && pred_this_is_altpred, 795 commit_use_ht_as_altpred && commit_this_is_altpred 796 ) 797 ittage_perf( 798 s"table_${i}_alt_provider_not_used", 799 pred_use_provider && pred_this_is_altpred, 800 commit_use_provider && commit_this_is_altpred 801 ) 802 } 803 804 ittage_perf("provided", s2_provided, updateMeta.provider.valid) 805 ittage_perf("use_provider", pred_use_provider, commit_use_provider) 806 ittage_perf("use_altpred", pred_use_altpred, commit_use_altpred) 807 ittage_perf("use_ht_as_altpred", pred_use_ht_as_altpred, commit_use_ht_as_altpred) 808 ittage_perf("use_ftb_when_no_provider", pred_use_bim_as_pred, commit_use_ftb_as_pred) 809 ittage_perf("use_ftb_as_alt_provider", pred_use_bim_as_altpred, commit_use_ftb_as_altpred) 810 XSPerfAccumulate("updated", updateValid) 811 812 if (debug) { 813 val s2_resps_regs = RegEnable(s2_resps, io.s2_fire(3)) 814 XSDebug("req: v=%d, pc=0x%x\n", io.s0_fire(3), s0_pc_dup(3)) 815 XSDebug("s1_fire:%d, resp: pc=%x\n", io.s1_fire(3), debug_pc_s1) 816 XSDebug("s2_fireOnLastCycle: resp: pc=%x, target=%x, hit=%b\n", debug_pc_s2, io.out.s2.getTarget(3), s2_provided) 817 for (i <- 0 until ITTageNTables) { 818 XSDebug( 819 "TageTable(%d): valids:%b, resp_ctrs:%b, resp_us:%b, target:%x\n", 820 i.U, 821 VecInit(s2_resps_regs(i).valid).asUInt, 822 s2_resps_regs(i).bits.ctr, 823 s2_resps_regs(i).bits.u, 824 s2_resps_regs(i).bits.target_offset.offset 825 ) 826 } 827 } 828 XSDebug(updateValid, p"pc: ${Hexadecimal(update_pc)}, target: ${Hexadecimal(update.full_target)}\n") 829 XSDebug(updateValid, updateMeta.toPrintable + p"\n") 830 XSDebug(updateValid, p"correct(${!updateMisPred})\n") 831 832 generatePerfEvent() 833} 834