109c6f1ddSLingrui98/*************************************************************************************** 209c6f1ddSLingrui98* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 309c6f1ddSLingrui98* Copyright (c) 2020-2021 Peng Cheng Laboratory 409c6f1ddSLingrui98* 509c6f1ddSLingrui98* XiangShan is licensed under Mulan PSL v2. 609c6f1ddSLingrui98* You can use this software according to the terms and conditions of the Mulan PSL v2. 709c6f1ddSLingrui98* You may obtain a copy of Mulan PSL v2 at: 809c6f1ddSLingrui98* http://license.coscl.org.cn/MulanPSL2 909c6f1ddSLingrui98* 1009c6f1ddSLingrui98* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 1109c6f1ddSLingrui98* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 1209c6f1ddSLingrui98* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 1309c6f1ddSLingrui98* 1409c6f1ddSLingrui98* See the Mulan PSL v2 for more details. 1509c6f1ddSLingrui98***************************************************************************************/ 1609c6f1ddSLingrui98 1709c6f1ddSLingrui98package xiangshan.frontend 1809c6f1ddSLingrui98 1909c6f1ddSLingrui98import chisel3._ 2009c6f1ddSLingrui98import chisel3.util._ 21cf7d6b7aSMuziimport org.chipsalliance.cde.config.Parameters 2209c6f1ddSLingrui98import scala.math.min 23cf7d6b7aSMuziimport utility._ 24cf7d6b7aSMuziimport xiangshan._ 2509c6f1ddSLingrui98 26dd6c0695SLingrui98trait HasBPUConst extends HasXSParameter { 270df539c2STang Haojin val MaxMetaBaseLength = if (!env.FPGAPlatform) 512 else 256 // TODO: Reduce meta length 28dc387c07SGuokai Chen val MaxMetaLength = if (HasHExtension) MaxMetaBaseLength + 4 else MaxMetaBaseLength 2909c6f1ddSLingrui98 val MaxBasicBlockSize = 32 3009c6f1ddSLingrui98 val LHistoryLength = 32 31dd6c0695SLingrui98 // val numBr = 2 3209c6f1ddSLingrui98 val useBPD = true 3309c6f1ddSLingrui98 val useLHist = true 34b37e4b45SLingrui98 val numBrSlot = numBr - 1 35eeb5ff92SLingrui98 val totalSlot = numBrSlot + 1 3609c6f1ddSLingrui98 37adc0b8dfSGuokai Chen val numDup = 4 38adc0b8dfSGuokai Chen 39ae21bd31SEaston Man // Used to gate PC higher parts 40ae21bd31SEaston Man val pcSegments = Seq(VAddrBits - 24, 12, 12) 41ae21bd31SEaston Man 42cb4f77ceSLingrui98 def BP_STAGES = (0 until 3).map(_.U(2.W)) 431d7e5011SLingrui98 def BP_S1 = BP_STAGES(0) 441d7e5011SLingrui98 def BP_S2 = BP_STAGES(1) 45cb4f77ceSLingrui98 def BP_S3 = BP_STAGES(2) 46adc0b8dfSGuokai Chen 47adc0b8dfSGuokai Chen def dup_seq[T](src: T, num: Int = numDup) = Seq.tabulate(num)(n => src) 48adc0b8dfSGuokai Chen def dup[T <: Data](src: T, num: Int = numDup) = VecInit(Seq.tabulate(num)(n => src)) 49adc0b8dfSGuokai Chen def dup_wire[T <: Data](src: T, num: Int = numDup) = Wire(Vec(num, src.cloneType)) 50adc0b8dfSGuokai Chen def dup_idx = Seq.tabulate(numDup)(n => n.toString()) 511d7e5011SLingrui98 val numBpStages = BP_STAGES.length 5209c6f1ddSLingrui98 5309c6f1ddSLingrui98 val debug = true 5409c6f1ddSLingrui98 // TODO: Replace log2Up by log2Ceil 5509c6f1ddSLingrui98} 5609c6f1ddSLingrui98 5709c6f1ddSLingrui98trait HasBPUParameter extends HasXSParameter with HasBPUConst { 5809c6f1ddSLingrui98 val BPUDebug = true && !env.FPGAPlatform && env.EnablePerfDebug 5909c6f1ddSLingrui98 val EnableCFICommitLog = true 6009c6f1ddSLingrui98 val EnbaleCFIPredLog = true 6109c6f1ddSLingrui98 val EnableBPUTimeRecord = (EnableCFICommitLog || EnbaleCFIPredLog) && !env.FPGAPlatform 6209c6f1ddSLingrui98 val EnableCommit = false 6309c6f1ddSLingrui98} 6409c6f1ddSLingrui98 6509c6f1ddSLingrui98class BPUCtrl(implicit p: Parameters) extends XSBundle { 6609c6f1ddSLingrui98 val ubtb_enable = Bool() 6709c6f1ddSLingrui98 val btb_enable = Bool() 6809c6f1ddSLingrui98 val bim_enable = Bool() 6909c6f1ddSLingrui98 val tage_enable = Bool() 7009c6f1ddSLingrui98 val sc_enable = Bool() 7109c6f1ddSLingrui98 val ras_enable = Bool() 7209c6f1ddSLingrui98 val loop_enable = Bool() 7309c6f1ddSLingrui98} 7409c6f1ddSLingrui98 7509c6f1ddSLingrui98trait BPUUtils extends HasXSParameter { 7609c6f1ddSLingrui98 // circular shifting 7709c6f1ddSLingrui98 def circularShiftLeft(source: UInt, len: Int, shamt: UInt): UInt = { 7809c6f1ddSLingrui98 val res = Wire(UInt(len.W)) 7909c6f1ddSLingrui98 val higher = source << shamt 8009c6f1ddSLingrui98 val lower = source >> (len.U - shamt) 8109c6f1ddSLingrui98 res := higher | lower 8209c6f1ddSLingrui98 res 8309c6f1ddSLingrui98 } 8409c6f1ddSLingrui98 8509c6f1ddSLingrui98 def circularShiftRight(source: UInt, len: Int, shamt: UInt): UInt = { 8609c6f1ddSLingrui98 val res = Wire(UInt(len.W)) 8709c6f1ddSLingrui98 val higher = source << (len.U - shamt) 8809c6f1ddSLingrui98 val lower = source >> shamt 8909c6f1ddSLingrui98 res := higher | lower 9009c6f1ddSLingrui98 res 9109c6f1ddSLingrui98 } 9209c6f1ddSLingrui98 9309c6f1ddSLingrui98 // To be verified 9409c6f1ddSLingrui98 def satUpdate(old: UInt, len: Int, taken: Bool): UInt = { 9509c6f1ddSLingrui98 val oldSatTaken = old === ((1 << len) - 1).U 9609c6f1ddSLingrui98 val oldSatNotTaken = old === 0.U 97cf7d6b7aSMuzi Mux(oldSatTaken && taken, ((1 << len) - 1).U, Mux(oldSatNotTaken && !taken, 0.U, Mux(taken, old + 1.U, old - 1.U))) 9809c6f1ddSLingrui98 } 9909c6f1ddSLingrui98 10009c6f1ddSLingrui98 def signedSatUpdate(old: SInt, len: Int, taken: Bool): SInt = { 10109c6f1ddSLingrui98 val oldSatTaken = old === ((1 << (len - 1)) - 1).S 10209c6f1ddSLingrui98 val oldSatNotTaken = old === (-(1 << (len - 1))).S 103cf7d6b7aSMuzi Mux( 104cf7d6b7aSMuzi oldSatTaken && taken, 105cf7d6b7aSMuzi ((1 << (len - 1)) - 1).S, 106cf7d6b7aSMuzi Mux(oldSatNotTaken && !taken, (-(1 << (len - 1))).S, Mux(taken, old + 1.S, old - 1.S)) 107cf7d6b7aSMuzi ) 10809c6f1ddSLingrui98 } 10909c6f1ddSLingrui98 11009c6f1ddSLingrui98 def getFallThroughAddr(start: UInt, carry: Bool, pft: UInt) = { 111a60a2901SLingrui98 val higher = start.head(VAddrBits - log2Ceil(PredictWidth) - instOffsetBits) 11209c6f1ddSLingrui98 Cat(Mux(carry, higher + 1.U, higher), pft, 0.U(instOffsetBits.W)) 11309c6f1ddSLingrui98 } 11409c6f1ddSLingrui98 11509c6f1ddSLingrui98 def foldTag(tag: UInt, l: Int): UInt = { 11609c6f1ddSLingrui98 val nChunks = (tag.getWidth + l - 1) / l 117cf7d6b7aSMuzi val chunks = (0 until nChunks).map(i => tag(min((i + 1) * l, tag.getWidth) - 1, i * l)) 11809c6f1ddSLingrui98 ParallelXOR(chunks) 11909c6f1ddSLingrui98 } 12009c6f1ddSLingrui98} 12109c6f1ddSLingrui98 12209c6f1ddSLingrui98class BasePredictorInput(implicit p: Parameters) extends XSBundle with HasBPUConst { 12309c6f1ddSLingrui98 def nInputs = 1 12409c6f1ddSLingrui98 125adc0b8dfSGuokai Chen val s0_pc = Vec(numDup, UInt(VAddrBits.W)) 12609c6f1ddSLingrui98 127adc0b8dfSGuokai Chen val folded_hist = Vec(numDup, new AllFoldedHistories(foldedGHistInfos)) 128c4a59f19SYuandongliang val s1_folded_hist = Vec(numDup, new AllFoldedHistories(foldedGHistInfos)) 12986d9c530SLingrui98 val ghist = UInt(HistoryLength.W) 13009c6f1ddSLingrui98 13109c6f1ddSLingrui98 val resp_in = Vec(nInputs, new BranchPredictionResp) 132dd6c0695SLingrui98 133dd6c0695SLingrui98 // val final_preds = Vec(numBpStages, new) 13409c6f1ddSLingrui98 // val toFtq_fire = Bool() 13509c6f1ddSLingrui98 13609c6f1ddSLingrui98 // val s0_all_ready = Bool() 13709c6f1ddSLingrui98} 13809c6f1ddSLingrui98 139c2d1ec7dSLingrui98class BasePredictorOutput(implicit p: Parameters) extends BranchPredictionResp {} 14009c6f1ddSLingrui98 14109c6f1ddSLingrui98class BasePredictorIO(implicit p: Parameters) extends XSBundle with HasBPUConst { 142c4b44470SGuokai Chen val reset_vector = Input(UInt(PAddrBits.W)) 14309c6f1ddSLingrui98 val in = Flipped(DecoupledIO(new BasePredictorInput)) // TODO: Remove DecoupledIO 14409c6f1ddSLingrui98 // val out = DecoupledIO(new BasePredictorOutput) 14509c6f1ddSLingrui98 val out = Output(new BasePredictorOutput) 14609c6f1ddSLingrui98 // val flush_out = Valid(UInt(VAddrBits.W)) 14709c6f1ddSLingrui98 148fd3aa057SYuandongliang val fauftb_entry_in = Input(new FTBEntry) 149fd3aa057SYuandongliang val fauftb_entry_hit_in = Input(Bool()) 150fd3aa057SYuandongliang val fauftb_entry_out = Output(new FTBEntry) 151fd3aa057SYuandongliang val fauftb_entry_hit_out = Output(Bool()) 152fd3aa057SYuandongliang 1536ee06c7aSSteve Gou val ctrl = Input(new BPUCtrl) 15409c6f1ddSLingrui98 155adc0b8dfSGuokai Chen val s0_fire = Input(Vec(numDup, Bool())) 156adc0b8dfSGuokai Chen val s1_fire = Input(Vec(numDup, Bool())) 157adc0b8dfSGuokai Chen val s2_fire = Input(Vec(numDup, Bool())) 158adc0b8dfSGuokai Chen val s3_fire = Input(Vec(numDup, Bool())) 15909c6f1ddSLingrui98 160adc0b8dfSGuokai Chen val s2_redirect = Input(Vec(numDup, Bool())) 161adc0b8dfSGuokai Chen val s3_redirect = Input(Vec(numDup, Bool())) 16285670bacSLingrui98 16309c6f1ddSLingrui98 val s1_ready = Output(Bool()) 16409c6f1ddSLingrui98 val s2_ready = Output(Bool()) 165cb4f77ceSLingrui98 val s3_ready = Output(Bool()) 16609c6f1ddSLingrui98 16709c6f1ddSLingrui98 val update = Flipped(Valid(new BranchPredictionUpdate)) 16809c6f1ddSLingrui98 val redirect = Flipped(Valid(new BranchPredictionRedirect)) 169fd3aa057SYuandongliang val redirectFromIFU = Input(Bool()) 17009c6f1ddSLingrui98} 17109c6f1ddSLingrui98 1724813e060SLingrui98abstract class BasePredictor(implicit p: Parameters) extends XSModule 1734813e060SLingrui98 with HasBPUConst with BPUUtils with HasPerfEvents { 17409c6f1ddSLingrui98 val meta_size = 0 17509c6f1ddSLingrui98 val spec_meta_size = 0 176b60e4b0bSLingrui98 val is_fast_pred = false 17709c6f1ddSLingrui98 val io = IO(new BasePredictorIO()) 17809c6f1ddSLingrui98 179c2d1ec7dSLingrui98 io.out := io.in.bits.resp_in(0) 18009c6f1ddSLingrui98 181fd3aa057SYuandongliang io.fauftb_entry_out := io.fauftb_entry_in 182fd3aa057SYuandongliang io.fauftb_entry_hit_out := io.fauftb_entry_hit_in 183fd3aa057SYuandongliang 1843e52bed1SLingrui98 io.out.last_stage_meta := 0.U 18509c6f1ddSLingrui98 18609c6f1ddSLingrui98 io.in.ready := !io.redirect.valid 18709c6f1ddSLingrui98 18809c6f1ddSLingrui98 io.s1_ready := true.B 18909c6f1ddSLingrui98 io.s2_ready := true.B 190cb4f77ceSLingrui98 io.s3_ready := true.B 19109c6f1ddSLingrui98 192adc0b8dfSGuokai Chen val s0_pc_dup = WireInit(io.in.bits.s0_pc) // fetchIdx(io.f0_pc) 193adc0b8dfSGuokai Chen val s1_pc_dup = s0_pc_dup.zip(io.s0_fire).map { case (s0_pc, s0_fire) => RegEnable(s0_pc, s0_fire) } 194cf7d6b7aSMuzi val s2_pc_dup = s1_pc_dup.zip(io.s1_fire).map { case (s1_pc, s1_fire) => 195cf7d6b7aSMuzi SegmentedAddrNext(s1_pc, pcSegments, s1_fire, Some("s2_pc")) 196cf7d6b7aSMuzi } 197cf7d6b7aSMuzi val s3_pc_dup = s2_pc_dup.zip(io.s2_fire).map { case (s2_pc, s2_fire) => 198cf7d6b7aSMuzi SegmentedAddrNext(s2_pc, s2_fire, Some("s3_pc")) 199cf7d6b7aSMuzi } 200dd6c0695SLingrui98 201c4b44470SGuokai Chen when(RegNext(RegNext(reset.asBool) && !reset.asBool)) { 2025f119905STang Haojin s1_pc_dup.map { case s1_pc => s1_pc := io.reset_vector } 203c4b44470SGuokai Chen } 204c4b44470SGuokai Chen 205adc0b8dfSGuokai Chen io.out.s1.pc := s1_pc_dup 206ae21bd31SEaston Man io.out.s2.pc := s2_pc_dup.map(_.getAddr()) 207ae21bd31SEaston Man io.out.s3.pc := s3_pc_dup.map(_.getAddr()) 208b37e4b45SLingrui98 2094813e060SLingrui98 val perfEvents: Seq[(String, UInt)] = Seq() 2104813e060SLingrui98 211dd6c0695SLingrui98 def getFoldedHistoryInfo: Option[Set[FoldedHistoryInfo]] = None 21209c6f1ddSLingrui98} 21309c6f1ddSLingrui98 21409c6f1ddSLingrui98class FakePredictor(implicit p: Parameters) extends BasePredictor { 21509c6f1ddSLingrui98 io.in.ready := true.B 2163e52bed1SLingrui98 io.out.last_stage_meta := 0.U 217c2d1ec7dSLingrui98 io.out := io.in.bits.resp_in(0) 21809c6f1ddSLingrui98} 21909c6f1ddSLingrui98 22009c6f1ddSLingrui98class BpuToFtqIO(implicit p: Parameters) extends XSBundle { 22109c6f1ddSLingrui98 val resp = DecoupledIO(new BpuToFtqBundle()) 22209c6f1ddSLingrui98} 22309c6f1ddSLingrui98 22409c6f1ddSLingrui98class PredictorIO(implicit p: Parameters) extends XSBundle { 22509c6f1ddSLingrui98 val bpu_to_ftq = new BpuToFtqIO() 226935edac4STang Haojin val ftq_to_bpu = Flipped(new FtqToBpuIO) 2276ee06c7aSSteve Gou val ctrl = Input(new BPUCtrl) 228c4b44470SGuokai Chen val reset_vector = Input(UInt(PAddrBits.W)) 22909c6f1ddSLingrui98} 23009c6f1ddSLingrui98 231cf7d6b7aSMuziclass Predictor(implicit p: Parameters) extends XSModule with HasBPUConst with HasPerfEvents 232cf7d6b7aSMuzi with HasCircularQueuePtrHelper { 23309c6f1ddSLingrui98 val io = IO(new PredictorIO) 23409c6f1ddSLingrui98 2356ee06c7aSSteve Gou val ctrl = DelayN(io.ctrl, 1) 23609c6f1ddSLingrui98 val predictors = Module(if (useBPD) new Composer else new FakePredictor) 23709c6f1ddSLingrui98 238d2b20d1aSTang Haojin def numOfStage = 3 239d2b20d1aSTang Haojin require(numOfStage > 1, "BPU numOfStage must be greater than 1") 240d2b20d1aSTang Haojin val topdown_stages = RegInit(VecInit(Seq.fill(numOfStage)(0.U.asTypeOf(new FrontendTopDownBundle)))) 241d2b20d1aSTang Haojin 242d2b20d1aSTang Haojin // following can only happen on s1 243d2b20d1aSTang Haojin val controlRedirectBubble = Wire(Bool()) 244d2b20d1aSTang Haojin val ControlBTBMissBubble = Wire(Bool()) 245d2b20d1aSTang Haojin val TAGEMissBubble = Wire(Bool()) 246d2b20d1aSTang Haojin val SCMissBubble = Wire(Bool()) 247d2b20d1aSTang Haojin val ITTAGEMissBubble = Wire(Bool()) 248d2b20d1aSTang Haojin val RASMissBubble = Wire(Bool()) 249d2b20d1aSTang Haojin 250d2b20d1aSTang Haojin val memVioRedirectBubble = Wire(Bool()) 251d2b20d1aSTang Haojin val otherRedirectBubble = Wire(Bool()) 252d2b20d1aSTang Haojin val btbMissBubble = Wire(Bool()) 253d2b20d1aSTang Haojin otherRedirectBubble := false.B 254d2b20d1aSTang Haojin memVioRedirectBubble := false.B 255d2b20d1aSTang Haojin 256d2b20d1aSTang Haojin // override can happen between s1-s2 and s2-s3 257d2b20d1aSTang Haojin val overrideBubble = Wire(Vec(numOfStage - 1, Bool())) 258d2b20d1aSTang Haojin def overrideStage = 1 259d2b20d1aSTang Haojin // ftq update block can happen on s1, s2 and s3 260d2b20d1aSTang Haojin val ftqUpdateBubble = Wire(Vec(numOfStage, Bool())) 261d2b20d1aSTang Haojin def ftqUpdateStage = 0 262d2b20d1aSTang Haojin // ftq full stall only happens on s3 (last stage) 263d2b20d1aSTang Haojin val ftqFullStall = Wire(Bool()) 264d2b20d1aSTang Haojin 265d2b20d1aSTang Haojin // by default, no bubble event 266d2b20d1aSTang Haojin topdown_stages(0) := 0.U.asTypeOf(new FrontendTopDownBundle) 267d2b20d1aSTang Haojin // event movement driven by clock only 268d2b20d1aSTang Haojin for (i <- 0 until numOfStage - 1) { 269d2b20d1aSTang Haojin topdown_stages(i + 1) := topdown_stages(i) 270d2b20d1aSTang Haojin } 271d2b20d1aSTang Haojin 2726ee06c7aSSteve Gou // ctrl signal 2736ee06c7aSSteve Gou predictors.io.ctrl := ctrl 274c4b44470SGuokai Chen predictors.io.reset_vector := io.reset_vector 2756ee06c7aSSteve Gou 2762abaf615SEaston Man val s0_stall_dup = dup_wire(Bool()) // For some reason s0 stalled, usually FTQ Full 277adc0b8dfSGuokai Chen val s0_fire_dup, s1_fire_dup, s2_fire_dup, s3_fire_dup = dup_wire(Bool()) 278adc0b8dfSGuokai Chen val s1_valid_dup, s2_valid_dup, s3_valid_dup = dup_seq(RegInit(false.B)) 279adc0b8dfSGuokai Chen val s1_ready_dup, s2_ready_dup, s3_ready_dup = dup_wire(Bool()) 280adc0b8dfSGuokai Chen val s1_components_ready_dup, s2_components_ready_dup, s3_components_ready_dup = dup_wire(Bool()) 281adc0b8dfSGuokai Chen 282adc0b8dfSGuokai Chen val s0_pc_dup = dup(WireInit(0.U.asTypeOf(UInt(VAddrBits.W)))) 2832abaf615SEaston Man val s0_pc_reg_dup = s0_pc_dup.zip(s0_stall_dup).map { case (s0_pc, s0_stall) => RegEnable(s0_pc, !s0_stall) } 284c4b44470SGuokai Chen when(RegNext(RegNext(reset.asBool) && !reset.asBool)) { 2855f119905STang Haojin s0_pc_reg_dup.map { case s0_pc => s0_pc := io.reset_vector } 286c4b44470SGuokai Chen } 287adc0b8dfSGuokai Chen val s1_pc = RegEnable(s0_pc_dup(0), s0_fire_dup(0)) 288adc0b8dfSGuokai Chen val s2_pc = RegEnable(s1_pc, s1_fire_dup(0)) 289adc0b8dfSGuokai Chen val s3_pc = RegEnable(s2_pc, s2_fire_dup(0)) 29009c6f1ddSLingrui98 291adc0b8dfSGuokai Chen val s0_folded_gh_dup = dup_wire(new AllFoldedHistories(foldedGHistInfos)) 2922abaf615SEaston Man val s0_folded_gh_reg_dup = s0_folded_gh_dup.zip(s0_stall_dup).map { 2932abaf615SEaston Man case (x, s0_stall) => RegEnable(x, 0.U.asTypeOf(s0_folded_gh_dup(0)), !s0_stall) 2942abaf615SEaston Man } 295adc0b8dfSGuokai Chen val s1_folded_gh_dup = RegEnable(s0_folded_gh_dup, 0.U.asTypeOf(s0_folded_gh_dup), s0_fire_dup(1)) 296adc0b8dfSGuokai Chen val s2_folded_gh_dup = RegEnable(s1_folded_gh_dup, 0.U.asTypeOf(s0_folded_gh_dup), s1_fire_dup(1)) 297adc0b8dfSGuokai Chen val s3_folded_gh_dup = RegEnable(s2_folded_gh_dup, 0.U.asTypeOf(s0_folded_gh_dup), s2_fire_dup(1)) 298c2ad24ebSLingrui98 299adc0b8dfSGuokai Chen val s0_last_br_num_oh_dup = dup_wire(UInt((numBr + 1).W)) 3002abaf615SEaston Man val s0_last_br_num_oh_reg_dup = s0_last_br_num_oh_dup.zip(s0_stall_dup).map { 3012abaf615SEaston Man case (x, s0_stall) => RegEnable(x, 0.U, !s0_stall) 3022abaf615SEaston Man } 303adc0b8dfSGuokai Chen val s1_last_br_num_oh_dup = RegEnable(s0_last_br_num_oh_dup, 0.U.asTypeOf(s0_last_br_num_oh_dup), s0_fire_dup(1)) 304adc0b8dfSGuokai Chen val s2_last_br_num_oh_dup = RegEnable(s1_last_br_num_oh_dup, 0.U.asTypeOf(s0_last_br_num_oh_dup), s1_fire_dup(1)) 305adc0b8dfSGuokai Chen val s3_last_br_num_oh_dup = RegEnable(s2_last_br_num_oh_dup, 0.U.asTypeOf(s0_last_br_num_oh_dup), s2_fire_dup(1)) 30667402d75SLingrui98 307adc0b8dfSGuokai Chen val s0_ahead_fh_oldest_bits_dup = dup_wire(new AllAheadFoldedHistoryOldestBits(foldedGHistInfos)) 3082abaf615SEaston Man val s0_ahead_fh_oldest_bits_reg_dup = s0_ahead_fh_oldest_bits_dup.zip(s0_stall_dup).map { 3092abaf615SEaston Man case (x, s0_stall) => RegEnable(x, 0.U.asTypeOf(s0_ahead_fh_oldest_bits_dup(0)), !s0_stall) 3102abaf615SEaston Man } 311cf7d6b7aSMuzi val s1_ahead_fh_oldest_bits_dup = 312cf7d6b7aSMuzi RegEnable(s0_ahead_fh_oldest_bits_dup, 0.U.asTypeOf(s0_ahead_fh_oldest_bits_dup), s0_fire_dup(1)) 313cf7d6b7aSMuzi val s2_ahead_fh_oldest_bits_dup = 314cf7d6b7aSMuzi RegEnable(s1_ahead_fh_oldest_bits_dup, 0.U.asTypeOf(s0_ahead_fh_oldest_bits_dup), s1_fire_dup(1)) 315cf7d6b7aSMuzi val s3_ahead_fh_oldest_bits_dup = 316cf7d6b7aSMuzi RegEnable(s2_ahead_fh_oldest_bits_dup, 0.U.asTypeOf(s0_ahead_fh_oldest_bits_dup), s2_fire_dup(1)) 31767402d75SLingrui98 318adc0b8dfSGuokai Chen val npcGen_dup = Seq.tabulate(numDup)(n => new PhyPriorityMuxGenerator[UInt]) 319adc0b8dfSGuokai Chen val foldedGhGen_dup = Seq.tabulate(numDup)(n => new PhyPriorityMuxGenerator[AllFoldedHistories]) 320adc0b8dfSGuokai Chen val ghistPtrGen_dup = Seq.tabulate(numDup)(n => new PhyPriorityMuxGenerator[CGHPtr]) 321adc0b8dfSGuokai Chen val lastBrNumOHGen_dup = Seq.tabulate(numDup)(n => new PhyPriorityMuxGenerator[UInt]) 322adc0b8dfSGuokai Chen val aheadFhObGen_dup = Seq.tabulate(numDup)(n => new PhyPriorityMuxGenerator[AllAheadFoldedHistoryOldestBits]) 32367402d75SLingrui98 324b37e4b45SLingrui98 val ghvBitWriteGens = Seq.tabulate(HistoryLength)(n => new PhyPriorityMuxGenerator[Bool]) 32586d9c530SLingrui98 // val ghistGen = new PhyPriorityMuxGenerator[UInt] 326ae8ed1a3Szoujr 327b37e4b45SLingrui98 val ghv = RegInit(0.U.asTypeOf(Vec(HistoryLength, Bool()))) 328b37e4b45SLingrui98 val ghv_wire = WireInit(ghv) 329c2ad24ebSLingrui98 33086d9c530SLingrui98 val s0_ghist = WireInit(0.U.asTypeOf(UInt(HistoryLength.W))) 33186d9c530SLingrui98 332c7fabd05SSteve Gou println(f"history buffer length ${HistoryLength}") 333b37e4b45SLingrui98 val ghv_write_datas = Wire(Vec(HistoryLength, Bool())) 334b37e4b45SLingrui98 val ghv_wens = Wire(Vec(HistoryLength, Bool())) 335c2ad24ebSLingrui98 336adc0b8dfSGuokai Chen val s0_ghist_ptr_dup = dup_wire(new CGHPtr) 3372abaf615SEaston Man val s0_ghist_ptr_reg_dup = s0_ghist_ptr_dup.zip(s0_stall_dup).map { 3382abaf615SEaston Man case (x, s0_stall) => RegEnable(x, 0.U.asTypeOf(new CGHPtr), !s0_stall) 3392abaf615SEaston Man } 340adc0b8dfSGuokai Chen val s1_ghist_ptr_dup = RegEnable(s0_ghist_ptr_dup, 0.U.asTypeOf(s0_ghist_ptr_dup), s0_fire_dup(1)) 341adc0b8dfSGuokai Chen val s2_ghist_ptr_dup = RegEnable(s1_ghist_ptr_dup, 0.U.asTypeOf(s0_ghist_ptr_dup), s1_fire_dup(1)) 342adc0b8dfSGuokai Chen val s3_ghist_ptr_dup = RegEnable(s2_ghist_ptr_dup, 0.U.asTypeOf(s0_ghist_ptr_dup), s2_fire_dup(1)) 343c2ad24ebSLingrui98 34486d9c530SLingrui98 def getHist(ptr: CGHPtr): UInt = (Cat(ghv_wire.asUInt, ghv_wire.asUInt) >> (ptr.value + 1.U))(HistoryLength - 1, 0) 345adc0b8dfSGuokai Chen s0_ghist := getHist(s0_ghist_ptr_dup(0)) 34686d9c530SLingrui98 347c2d1ec7dSLingrui98 val resp = predictors.io.out 34809c6f1ddSLingrui98 34909c6f1ddSLingrui98 val toFtq_fire = io.bpu_to_ftq.resp.valid && io.bpu_to_ftq.resp.ready 35009c6f1ddSLingrui98 351adc0b8dfSGuokai Chen val s1_flush_dup, s2_flush_dup, s3_flush_dup = dup_wire(Bool()) 352adc0b8dfSGuokai Chen val s2_redirect_dup, s3_redirect_dup = dup_wire(Bool()) 35309c6f1ddSLingrui98 35409c6f1ddSLingrui98 // predictors.io := DontCare 355adc0b8dfSGuokai Chen predictors.io.in.valid := s0_fire_dup(0) 356adc0b8dfSGuokai Chen predictors.io.in.bits.s0_pc := s0_pc_dup 35786d9c530SLingrui98 predictors.io.in.bits.ghist := s0_ghist 358adc0b8dfSGuokai Chen predictors.io.in.bits.folded_hist := s0_folded_gh_dup 359c4a59f19SYuandongliang predictors.io.in.bits.s1_folded_hist := s1_folded_gh_dup 360cf7d6b7aSMuzi predictors.io.in.bits.resp_in(0) := 0.U.asTypeOf(new BranchPredictionResp) 361cf7d6b7aSMuzi predictors.io.fauftb_entry_in := 0.U.asTypeOf(new FTBEntry) 362fd3aa057SYuandongliang predictors.io.fauftb_entry_hit_in := false.B 363fd3aa057SYuandongliang predictors.io.redirectFromIFU := RegNext(io.ftq_to_bpu.redirctFromIFU, init = false.B) 36409c6f1ddSLingrui98 // predictors.io.in.bits.resp_in(0).s1.pc := s0_pc 36509c6f1ddSLingrui98 // predictors.io.in.bits.toFtq_fire := toFtq_fire 36609c6f1ddSLingrui98 36709c6f1ddSLingrui98 // predictors.io.out.ready := io.bpu_to_ftq.resp.ready 36809c6f1ddSLingrui98 36953bac374SLingrui98 val redirect_req = io.ftq_to_bpu.redirect 3707af6acb0SEaston Man val do_redirect_dup = dup_seq(RegNextWithEnable(redirect_req)) 37153bac374SLingrui98 37209c6f1ddSLingrui98 // Pipeline logic 373adc0b8dfSGuokai Chen s2_redirect_dup.map(_ := false.B) 374adc0b8dfSGuokai Chen s3_redirect_dup.map(_ := false.B) 37509c6f1ddSLingrui98 376adc0b8dfSGuokai Chen s3_flush_dup.map(_ := redirect_req.valid) // flush when redirect comes 377adc0b8dfSGuokai Chen for (((s2_flush, s3_flush), s3_redirect) <- s2_flush_dup zip s3_flush_dup zip s3_redirect_dup) 378cb4f77ceSLingrui98 s2_flush := s3_flush || s3_redirect 379adc0b8dfSGuokai Chen for (((s1_flush, s2_flush), s2_redirect) <- s1_flush_dup zip s2_flush_dup zip s2_redirect_dup) 38009c6f1ddSLingrui98 s1_flush := s2_flush || s2_redirect 38109c6f1ddSLingrui98 382adc0b8dfSGuokai Chen s1_components_ready_dup.map(_ := predictors.io.s1_ready) 383adc0b8dfSGuokai Chen for (((s1_ready, s1_fire), s1_valid) <- s1_ready_dup zip s1_fire_dup zip s1_valid_dup) 384adc0b8dfSGuokai Chen s1_ready := s1_fire || !s1_valid 385adc0b8dfSGuokai Chen for (((s0_fire, s1_components_ready), s1_ready) <- s0_fire_dup zip s1_components_ready_dup zip s1_ready_dup) 386adc0b8dfSGuokai Chen s0_fire := s1_components_ready && s1_ready 387adc0b8dfSGuokai Chen predictors.io.s0_fire := s0_fire_dup 388adc0b8dfSGuokai Chen 389adc0b8dfSGuokai Chen s2_components_ready_dup.map(_ := predictors.io.s2_ready) 390adc0b8dfSGuokai Chen for (((s2_ready, s2_fire), s2_valid) <- s2_ready_dup zip s2_fire_dup zip s2_valid_dup) 39109c6f1ddSLingrui98 s2_ready := s2_fire || !s2_valid 392cf7d6b7aSMuzi for ( 393cf7d6b7aSMuzi (((s1_fire, s2_components_ready), s2_ready), s1_valid) <- 394cf7d6b7aSMuzi s1_fire_dup zip s2_components_ready_dup zip s2_ready_dup zip s1_valid_dup 395cf7d6b7aSMuzi ) 39609c6f1ddSLingrui98 s1_fire := s1_valid && s2_components_ready && s2_ready && io.bpu_to_ftq.resp.ready 39709c6f1ddSLingrui98 398adc0b8dfSGuokai Chen s3_components_ready_dup.map(_ := predictors.io.s3_ready) 399adc0b8dfSGuokai Chen for (((s3_ready, s3_fire), s3_valid) <- s3_ready_dup zip s3_fire_dup zip s3_valid_dup) 400cb4f77ceSLingrui98 s3_ready := s3_fire || !s3_valid 401cf7d6b7aSMuzi for ( 402cf7d6b7aSMuzi (((s2_fire, s3_components_ready), s3_ready), s2_valid) <- 403cf7d6b7aSMuzi s2_fire_dup zip s3_components_ready_dup zip s3_ready_dup zip s2_valid_dup 404cf7d6b7aSMuzi ) 405cb4f77ceSLingrui98 s2_fire := s2_valid && s3_components_ready && s3_ready 406cb4f77ceSLingrui98 407adc0b8dfSGuokai Chen for ((((s0_fire, s1_flush), s1_fire), s1_valid) <- s0_fire_dup zip s1_flush_dup zip s1_fire_dup zip s1_valid_dup) { 408cf7d6b7aSMuzi when(redirect_req.valid)(s1_valid := false.B) 409cf7d6b7aSMuzi .elsewhen(s0_fire)(s1_valid := true.B) 410cf7d6b7aSMuzi .elsewhen(s1_flush)(s1_valid := false.B) 411cf7d6b7aSMuzi .elsewhen(s1_fire)(s1_valid := false.B) 412adc0b8dfSGuokai Chen } 413adc0b8dfSGuokai Chen predictors.io.s1_fire := s1_fire_dup 41409c6f1ddSLingrui98 415cf7d6b7aSMuzi for ( 416cf7d6b7aSMuzi ((((s1_fire, s2_flush), s2_fire), s2_valid), s1_flush) <- 417cf7d6b7aSMuzi s1_fire_dup zip s2_flush_dup zip s2_fire_dup zip s2_valid_dup zip s1_flush_dup 418cf7d6b7aSMuzi ) { 41909c6f1ddSLingrui98 420cf7d6b7aSMuzi when(s2_flush)(s2_valid := false.B) 421cf7d6b7aSMuzi .elsewhen(s1_fire)(s2_valid := !s1_flush) 422cf7d6b7aSMuzi .elsewhen(s2_fire)(s2_valid := false.B) 423adc0b8dfSGuokai Chen } 42409c6f1ddSLingrui98 425adc0b8dfSGuokai Chen predictors.io.s2_fire := s2_fire_dup 426adc0b8dfSGuokai Chen predictors.io.s2_redirect := s2_redirect_dup 42709c6f1ddSLingrui98 428adc0b8dfSGuokai Chen s3_fire_dup := s3_valid_dup 429adc0b8dfSGuokai Chen 430cf7d6b7aSMuzi for ( 431cf7d6b7aSMuzi ((((s2_fire, s3_flush), s3_fire), s3_valid), s2_flush) <- 432cf7d6b7aSMuzi s2_fire_dup zip s3_flush_dup zip s3_fire_dup zip s3_valid_dup zip s2_flush_dup 433cf7d6b7aSMuzi ) { 434cb4f77ceSLingrui98 435cf7d6b7aSMuzi when(s3_flush)(s3_valid := false.B) 436cf7d6b7aSMuzi .elsewhen(s2_fire)(s3_valid := !s2_flush) 437cf7d6b7aSMuzi .elsewhen(s3_fire)(s3_valid := false.B) 438adc0b8dfSGuokai Chen } 439cb4f77ceSLingrui98 440adc0b8dfSGuokai Chen predictors.io.s3_fire := s3_fire_dup 441adc0b8dfSGuokai Chen predictors.io.s3_redirect := s3_redirect_dup 442cb4f77ceSLingrui98 44309c6f1ddSLingrui98 io.bpu_to_ftq.resp.valid := 444adc0b8dfSGuokai Chen s1_valid_dup(2) && s2_components_ready_dup(2) && s2_ready_dup(2) || 445adc0b8dfSGuokai Chen s2_fire_dup(2) && s2_redirect_dup(2) || 446adc0b8dfSGuokai Chen s3_fire_dup(2) && s3_redirect_dup(2) 447c2d1ec7dSLingrui98 io.bpu_to_ftq.resp.bits := predictors.io.out 448adc0b8dfSGuokai Chen io.bpu_to_ftq.resp.bits.last_stage_spec_info.histPtr := s3_ghist_ptr_dup(2) 44909c6f1ddSLingrui98 450c89b4642SGuokai Chen val full_pred_diff = WireInit(false.B) 451c89b4642SGuokai Chen val full_pred_diff_stage = WireInit(0.U) 452c89b4642SGuokai Chen val full_pred_diff_offset = WireInit(0.U) 453c89b4642SGuokai Chen for (i <- 0 until numDup - 1) { 454c89b4642SGuokai Chen when(io.bpu_to_ftq.resp.valid && 455cf7d6b7aSMuzi ((io.bpu_to_ftq.resp.bits.s1.full_pred(i).asTypeOf(UInt()) =/= io.bpu_to_ftq.resp.bits.s1.full_pred( 456cf7d6b7aSMuzi i + 1 457cf7d6b7aSMuzi ).asTypeOf(UInt()) && io.bpu_to_ftq.resp.bits.s1.full_pred(i).hit) || 458cf7d6b7aSMuzi (io.bpu_to_ftq.resp.bits.s2.full_pred(i).asTypeOf(UInt()) =/= io.bpu_to_ftq.resp.bits.s2.full_pred( 459cf7d6b7aSMuzi i + 1 460cf7d6b7aSMuzi ).asTypeOf(UInt()) && io.bpu_to_ftq.resp.bits.s2.full_pred(i).hit) || 461cf7d6b7aSMuzi (io.bpu_to_ftq.resp.bits.s3.full_pred(i).asTypeOf(UInt()) =/= io.bpu_to_ftq.resp.bits.s3.full_pred( 462cf7d6b7aSMuzi i + 1 463cf7d6b7aSMuzi ).asTypeOf(UInt()) && io.bpu_to_ftq.resp.bits.s3.full_pred(i).hit))) { 464c89b4642SGuokai Chen full_pred_diff := true.B 465c89b4642SGuokai Chen full_pred_diff_offset := i.U 466cf7d6b7aSMuzi when(io.bpu_to_ftq.resp.bits.s1.full_pred(i).asTypeOf(UInt()) =/= io.bpu_to_ftq.resp.bits.s1.full_pred( 467cf7d6b7aSMuzi i + 1 468cf7d6b7aSMuzi ).asTypeOf(UInt())) { 469c89b4642SGuokai Chen full_pred_diff_stage := 1.U 470cf7d6b7aSMuzi }.elsewhen(io.bpu_to_ftq.resp.bits.s2.full_pred(i).asTypeOf(UInt()) =/= io.bpu_to_ftq.resp.bits.s2.full_pred( 471cf7d6b7aSMuzi i + 1 472cf7d6b7aSMuzi ).asTypeOf(UInt())) { 473c89b4642SGuokai Chen full_pred_diff_stage := 2.U 474c89b4642SGuokai Chen }.otherwise { 475c89b4642SGuokai Chen full_pred_diff_stage := 3.U 476c89b4642SGuokai Chen } 477c89b4642SGuokai Chen } 478c89b4642SGuokai Chen } 479c89b4642SGuokai Chen XSError(full_pred_diff, "Full prediction difference detected!") 480c89b4642SGuokai Chen 4812abaf615SEaston Man // s0_stall should be exclusive with any other PC source 4822abaf615SEaston Man s0_stall_dup.zip(s1_valid_dup).zip(s2_redirect_dup).zip(s3_redirect_dup).zip(do_redirect_dup).foreach { 4832abaf615SEaston Man case ((((s0_stall, s1_valid), s2_redirect), s3_redirect), do_redirect) => { 4842abaf615SEaston Man s0_stall := !(s1_valid || s2_redirect || s3_redirect || do_redirect.valid) 4852abaf615SEaston Man } 4862abaf615SEaston Man } 4874f23201bSEaston Man // Power-on reset 4884f23201bSEaston Man val powerOnResetState = RegInit(true.B) 4894f23201bSEaston Man when(s0_fire_dup(0)) { 4904f23201bSEaston Man // When BPU pipeline first time fire, we consider power-on reset is done 4914f23201bSEaston Man powerOnResetState := false.B 4924f23201bSEaston Man } 493cf7d6b7aSMuzi XSError( 494cf7d6b7aSMuzi !powerOnResetState && s0_stall_dup(0) && s0_pc_dup(0) =/= s0_pc_reg_dup(0), 495cf7d6b7aSMuzi "s0_stall but s0_pc is differenct from s0_pc_reg" 496cf7d6b7aSMuzi ) 4972abaf615SEaston Man 498adc0b8dfSGuokai Chen npcGen_dup.zip(s0_pc_reg_dup).map { case (gen, reg) => 499cf7d6b7aSMuzi gen.register(true.B, reg, Some("stallPC"), 0) 500cf7d6b7aSMuzi } 501adc0b8dfSGuokai Chen foldedGhGen_dup.zip(s0_folded_gh_reg_dup).map { case (gen, reg) => 502cf7d6b7aSMuzi gen.register(true.B, reg, Some("stallFGH"), 0) 503cf7d6b7aSMuzi } 504adc0b8dfSGuokai Chen ghistPtrGen_dup.zip(s0_ghist_ptr_reg_dup).map { case (gen, reg) => 505cf7d6b7aSMuzi gen.register(true.B, reg, Some("stallGHPtr"), 0) 506cf7d6b7aSMuzi } 507adc0b8dfSGuokai Chen lastBrNumOHGen_dup.zip(s0_last_br_num_oh_reg_dup).map { case (gen, reg) => 508cf7d6b7aSMuzi gen.register(true.B, reg, Some("stallBrNumOH"), 0) 509cf7d6b7aSMuzi } 510adc0b8dfSGuokai Chen aheadFhObGen_dup.zip(s0_ahead_fh_oldest_bits_reg_dup).map { case (gen, reg) => 511cf7d6b7aSMuzi gen.register(true.B, reg, Some("stallAFHOB"), 0) 512cf7d6b7aSMuzi } 51309c6f1ddSLingrui98 514209a4cafSSteve Gou // assign pred cycle for profiling 515209a4cafSSteve Gou io.bpu_to_ftq.resp.bits.s1.full_pred.map(_.predCycle.map(_ := GTimer())) 516209a4cafSSteve Gou io.bpu_to_ftq.resp.bits.s2.full_pred.map(_.predCycle.map(_ := GTimer())) 517209a4cafSSteve Gou io.bpu_to_ftq.resp.bits.s3.full_pred.map(_.predCycle.map(_ := GTimer())) 518209a4cafSSteve Gou 51909c6f1ddSLingrui98 // History manage 52009c6f1ddSLingrui98 // s1 521adc0b8dfSGuokai Chen val s1_possible_predicted_ghist_ptrs_dup = s1_ghist_ptr_dup.map(ptr => (0 to numBr).map(ptr - _.U)) 522cf7d6b7aSMuzi val s1_predicted_ghist_ptr_dup = s1_possible_predicted_ghist_ptrs_dup.zip(resp.s1.lastBrPosOH).map { case (ptr, oh) => 523cf7d6b7aSMuzi Mux1H(oh, ptr) 524cf7d6b7aSMuzi } 525adc0b8dfSGuokai Chen val s1_possible_predicted_fhs_dup = 526cf7d6b7aSMuzi for ( 527cf7d6b7aSMuzi ((((fgh, afh), br_num_oh), t), br_pos_oh) <- 528cf7d6b7aSMuzi s1_folded_gh_dup zip s1_ahead_fh_oldest_bits_dup zip s1_last_br_num_oh_dup zip resp.s1.brTaken zip resp.s1.lastBrPosOH 529cf7d6b7aSMuzi ) 530adc0b8dfSGuokai Chen yield (0 to numBr).map(i => 531adc0b8dfSGuokai Chen fgh.update(afh, br_num_oh, i, t & br_pos_oh(i)) 532adc0b8dfSGuokai Chen ) 533cf7d6b7aSMuzi val s1_predicted_fh_dup = resp.s1.lastBrPosOH.zip(s1_possible_predicted_fhs_dup).map { case (oh, fh) => 534cf7d6b7aSMuzi Mux1H(oh, fh) 535cf7d6b7aSMuzi } 536b37e4b45SLingrui98 537adc0b8dfSGuokai Chen val s1_ahead_fh_ob_src_dup = dup_wire(new AllAheadFoldedHistoryOldestBits(foldedGHistInfos)) 538adc0b8dfSGuokai Chen s1_ahead_fh_ob_src_dup.zip(s1_ghist_ptr_dup).map { case (src, ptr) => src.read(ghv, ptr) } 53967402d75SLingrui98 54086d9c530SLingrui98 if (EnableGHistDiff) { 541adc0b8dfSGuokai Chen val s1_predicted_ghist = WireInit(getHist(s1_predicted_ghist_ptr_dup(0)).asTypeOf(Vec(HistoryLength, Bool()))) 54286d9c530SLingrui98 for (i <- 0 until numBr) { 543adc0b8dfSGuokai Chen when(resp.s1.shouldShiftVec(0)(i)) { 544adc0b8dfSGuokai Chen s1_predicted_ghist(i) := resp.s1.brTaken(0) && (i == 0).B 54586d9c530SLingrui98 } 54686d9c530SLingrui98 } 547adc0b8dfSGuokai Chen when(s1_valid_dup(0)) { 54886d9c530SLingrui98 s0_ghist := s1_predicted_ghist.asUInt 54986d9c530SLingrui98 } 55086d9c530SLingrui98 } 55186d9c530SLingrui98 552b37e4b45SLingrui98 val s1_ghv_wens = (0 until HistoryLength).map(n => 553cf7d6b7aSMuzi (0 until numBr).map(b => 554c3d62b63SEaston Man s1_ghist_ptr_dup(0).value === (CGHPtr(false.B, n.U) + b.U).value && 555c3d62b63SEaston Man resp.s1.shouldShiftVec(0)(b) && s1_valid_dup(0) 556cf7d6b7aSMuzi ) 557cf7d6b7aSMuzi ) 558b37e4b45SLingrui98 val s1_ghv_wdatas = (0 until HistoryLength).map(n => 5591ccea249SLingrui98 Mux1H( 560cf7d6b7aSMuzi (0 until numBr).map(b => 561cf7d6b7aSMuzi ( 562cf7d6b7aSMuzi s1_ghist_ptr_dup(0).value === (CGHPtr(false.B, n.U) + b.U).value && resp.s1.shouldShiftVec(0)(b), 563adc0b8dfSGuokai Chen resp.s1.brTaken(0) && resp.s1.lastBrPosOH(0)(b + 1) 5641ccea249SLingrui98 ) 5651ccea249SLingrui98 ) 566cf7d6b7aSMuzi ) 567cf7d6b7aSMuzi ) 568adc0b8dfSGuokai Chen 569adc0b8dfSGuokai Chen for (((npcGen, s1_valid), s1_target) <- npcGen_dup zip s1_valid_dup zip resp.s1.getTarget) 570adc0b8dfSGuokai Chen npcGen.register(s1_valid, s1_target, Some("s1_target"), 4) 571adc0b8dfSGuokai Chen for (((foldedGhGen, s1_valid), s1_predicted_fh) <- foldedGhGen_dup zip s1_valid_dup zip s1_predicted_fh_dup) 572cb4f77ceSLingrui98 foldedGhGen.register(s1_valid, s1_predicted_fh, Some("s1_FGH"), 4) 573cf7d6b7aSMuzi for ( 574cf7d6b7aSMuzi ((ghistPtrGen, s1_valid), s1_predicted_ghist_ptr) <- ghistPtrGen_dup zip s1_valid_dup zip s1_predicted_ghist_ptr_dup 575cf7d6b7aSMuzi ) 576cb4f77ceSLingrui98 ghistPtrGen.register(s1_valid, s1_predicted_ghist_ptr, Some("s1_GHPtr"), 4) 577cf7d6b7aSMuzi for ( 578cf7d6b7aSMuzi ((lastBrNumOHGen, s1_valid), s1_brPosOH) <- 579cf7d6b7aSMuzi lastBrNumOHGen_dup zip s1_valid_dup zip resp.s1.lastBrPosOH.map(_.asUInt) 580cf7d6b7aSMuzi ) 581adc0b8dfSGuokai Chen lastBrNumOHGen.register(s1_valid, s1_brPosOH, Some("s1_BrNumOH"), 4) 582adc0b8dfSGuokai Chen for (((aheadFhObGen, s1_valid), s1_ahead_fh_ob_src) <- aheadFhObGen_dup zip s1_valid_dup zip s1_ahead_fh_ob_src_dup) 58367402d75SLingrui98 aheadFhObGen.register(s1_valid, s1_ahead_fh_ob_src, Some("s1_AFHOB"), 4) 584b37e4b45SLingrui98 ghvBitWriteGens.zip(s1_ghv_wens).zipWithIndex.map { case ((b, w), i) => 585cb4f77ceSLingrui98 b.register(w.reduce(_ || _), s1_ghv_wdatas(i), Some(s"s1_new_bit_$i"), 4) 5861ccea249SLingrui98 } 58709c6f1ddSLingrui98 58850f995b1SLingrui98 class PreviousPredInfo extends Bundle { 589b166c0eaSEaston Man val hit = Vec(numDup, Bool()) 590adc0b8dfSGuokai Chen val target = Vec(numDup, UInt(VAddrBits.W)) 591adc0b8dfSGuokai Chen val lastBrPosOH = Vec(numDup, Vec(numBr + 1, Bool())) 592adc0b8dfSGuokai Chen val taken = Vec(numDup, Bool()) 593b166c0eaSEaston Man val takenMask = Vec(numDup, Vec(numBr, Bool())) 594adc0b8dfSGuokai Chen val cfiIndex = Vec(numDup, UInt(log2Ceil(PredictWidth).W)) 59550f995b1SLingrui98 } 59650f995b1SLingrui98 597adc0b8dfSGuokai Chen def preds_needs_redirect_vec_dup(x: PreviousPredInfo, y: BranchPredictionBundle) = { 598b166c0eaSEaston Man // Timing optimization 599b166c0eaSEaston Man // We first compare all target with previous stage target, 600b166c0eaSEaston Man // then select the difference by taken & hit 601b166c0eaSEaston Man // Usually target is generated quicker than taken, so do target compare before select can help timing 602b166c0eaSEaston Man val targetDiffVec: IndexedSeq[Vec[Bool]] = 603b166c0eaSEaston Man x.target.zip(y.getAllTargets).map { 6042bf6e0ecSEaston Man case (xTarget, yAllTarget) => VecInit(yAllTarget.map(_ =/= xTarget)) 6052bf6e0ecSEaston Man } // [numDup][all Target comparison] 606b166c0eaSEaston Man val targetDiff: IndexedSeq[Bool] = 607b166c0eaSEaston Man targetDiffVec.zip(x.hit).zip(x.takenMask).map { 608b166c0eaSEaston Man case ((diff, hit), takenMask) => selectByTaken(takenMask, hit, diff) 6092bf6e0ecSEaston Man } // [numDup] 610b166c0eaSEaston Man 611cf7d6b7aSMuzi val lastBrPosOHDiff: IndexedSeq[Bool] = x.lastBrPosOH.zip(y.lastBrPosOH).map { case (oh1, oh2) => 612cf7d6b7aSMuzi oh1.asUInt =/= oh2.asUInt 613cf7d6b7aSMuzi } 614b166c0eaSEaston Man val takenDiff: IndexedSeq[Bool] = x.taken.zip(y.taken).map { case (t1, t2) => t1 =/= t2 } 615cf7d6b7aSMuzi val takenOffsetDiff: IndexedSeq[Bool] = x.cfiIndex.zip(y.cfiIndex).zip(x.taken).zip(y.taken).map { 616cf7d6b7aSMuzi case (((i1, i2), xt), yt) => xt && yt && i1 =/= i2.bits 617cf7d6b7aSMuzi } 618b37e4b45SLingrui98 VecInit( 619cf7d6b7aSMuzi for ( 620cf7d6b7aSMuzi (((tgtd, lbpohd), tkd), tod) <- 621cf7d6b7aSMuzi targetDiff zip lastBrPosOHDiff zip takenDiff zip takenOffsetDiff 622cf7d6b7aSMuzi ) 623adc0b8dfSGuokai Chen yield VecInit(tgtd, lbpohd, tkd, tod) 62486d9c530SLingrui98 // x.shouldShiftVec.asUInt =/= y.shouldShiftVec.asUInt, 62586d9c530SLingrui98 // x.brTaken =/= y.brTaken 626b30c10d6SLingrui98 ) 627b30c10d6SLingrui98 } 628b37e4b45SLingrui98 62909c6f1ddSLingrui98 // s2 630adc0b8dfSGuokai Chen val s2_possible_predicted_ghist_ptrs_dup = s2_ghist_ptr_dup.map(ptr => (0 to numBr).map(ptr - _.U)) 631cf7d6b7aSMuzi val s2_predicted_ghist_ptr_dup = s2_possible_predicted_ghist_ptrs_dup.zip(resp.s2.lastBrPosOH).map { case (ptr, oh) => 632cf7d6b7aSMuzi Mux1H(oh, ptr) 633cf7d6b7aSMuzi } 634b37e4b45SLingrui98 635adc0b8dfSGuokai Chen val s2_possible_predicted_fhs_dup = 636cf7d6b7aSMuzi for ( 637cf7d6b7aSMuzi (((fgh, afh), br_num_oh), full_pred) <- 638cf7d6b7aSMuzi s2_folded_gh_dup zip s2_ahead_fh_oldest_bits_dup zip s2_last_br_num_oh_dup zip resp.s2.full_pred 639cf7d6b7aSMuzi ) 640adc0b8dfSGuokai Chen yield (0 to numBr).map(i => 641adc0b8dfSGuokai Chen fgh.update(afh, br_num_oh, i, if (i > 0) full_pred.br_taken_mask(i - 1) else false.B) 642adc0b8dfSGuokai Chen ) 643cf7d6b7aSMuzi val s2_predicted_fh_dup = resp.s2.lastBrPosOH.zip(s2_possible_predicted_fhs_dup).map { case (oh, fh) => 644cf7d6b7aSMuzi Mux1H(oh, fh) 645cf7d6b7aSMuzi } 646b37e4b45SLingrui98 647adc0b8dfSGuokai Chen val s2_ahead_fh_ob_src_dup = dup_wire(new AllAheadFoldedHistoryOldestBits(foldedGHistInfos)) 648adc0b8dfSGuokai Chen s2_ahead_fh_ob_src_dup.zip(s2_ghist_ptr_dup).map { case (src, ptr) => src.read(ghv, ptr) } 64967402d75SLingrui98 65086d9c530SLingrui98 if (EnableGHistDiff) { 651adc0b8dfSGuokai Chen val s2_predicted_ghist = WireInit(getHist(s2_predicted_ghist_ptr_dup(0)).asTypeOf(Vec(HistoryLength, Bool()))) 65286d9c530SLingrui98 for (i <- 0 until numBr) { 653adc0b8dfSGuokai Chen when(resp.s2.shouldShiftVec(0)(i)) { 654adc0b8dfSGuokai Chen s2_predicted_ghist(i) := resp.s2.brTaken(0) && (i == 0).B 65586d9c530SLingrui98 } 65686d9c530SLingrui98 } 657adc0b8dfSGuokai Chen when(s2_redirect_dup(0)) { 65886d9c530SLingrui98 s0_ghist := s2_predicted_ghist.asUInt 65986d9c530SLingrui98 } 66086d9c530SLingrui98 } 66186d9c530SLingrui98 662b37e4b45SLingrui98 val s2_ghv_wens = (0 until HistoryLength).map(n => 663cf7d6b7aSMuzi (0 until numBr).map(b => 664c3d62b63SEaston Man s2_ghist_ptr_dup(0).value === (CGHPtr(false.B, n.U) + b.U).value && 665c3d62b63SEaston Man resp.s2.shouldShiftVec(0)(b) && s2_redirect_dup(0) 666cf7d6b7aSMuzi ) 667cf7d6b7aSMuzi ) 668b37e4b45SLingrui98 val s2_ghv_wdatas = (0 until HistoryLength).map(n => 6691ccea249SLingrui98 Mux1H( 670cf7d6b7aSMuzi (0 until numBr).map(b => 671cf7d6b7aSMuzi ( 672cf7d6b7aSMuzi s2_ghist_ptr_dup(0).value === (CGHPtr(false.B, n.U) + b.U).value && resp.s2.shouldShiftVec(0)(b), 673adc0b8dfSGuokai Chen resp.s2.full_pred(0).real_br_taken_mask()(b) 674cf7d6b7aSMuzi ) 675cf7d6b7aSMuzi ) 6761ccea249SLingrui98 ) 6771ccea249SLingrui98 ) 6781ccea249SLingrui98 67950f995b1SLingrui98 val s1_pred_info = Wire(new PreviousPredInfo) 680b166c0eaSEaston Man s1_pred_info.hit := resp.s1.full_pred.map(_.hit) 68150f995b1SLingrui98 s1_pred_info.target := resp.s1.getTarget 682adc0b8dfSGuokai Chen s1_pred_info.lastBrPosOH := resp.s1.lastBrPosOH 68350f995b1SLingrui98 s1_pred_info.taken := resp.s1.taken 684b166c0eaSEaston Man s1_pred_info.takenMask := resp.s1.full_pred.map(_.taken_mask_on_slot) 685adc0b8dfSGuokai Chen s1_pred_info.cfiIndex := resp.s1.cfiIndex.map { case x => x.bits } 68609c6f1ddSLingrui98 687935edac4STang Haojin val previous_s1_pred_info = RegEnable(s1_pred_info, 0.U.asTypeOf(new PreviousPredInfo), s1_fire_dup(0)) 68850f995b1SLingrui98 689adc0b8dfSGuokai Chen val s2_redirect_s1_last_pred_vec_dup = preds_needs_redirect_vec_dup(previous_s1_pred_info, resp.s2) 69009c6f1ddSLingrui98 691cf7d6b7aSMuzi for ( 692cf7d6b7aSMuzi ((s2_redirect, s2_fire), s2_redirect_s1_last_pred_vec) <- 693cf7d6b7aSMuzi s2_redirect_dup zip s2_fire_dup zip s2_redirect_s1_last_pred_vec_dup 694cf7d6b7aSMuzi ) 69562e6338eSLingrui98 s2_redirect := s2_fire && s2_redirect_s1_last_pred_vec.reduce(_ || _) 696ae8ed1a3Szoujr 697adc0b8dfSGuokai Chen for (((npcGen, s2_redirect), s2_target) <- npcGen_dup zip s2_redirect_dup zip resp.s2.getTarget) 698adc0b8dfSGuokai Chen npcGen.register(s2_redirect, s2_target, Some("s2_target"), 5) 699adc0b8dfSGuokai Chen for (((foldedGhGen, s2_redirect), s2_predicted_fh) <- foldedGhGen_dup zip s2_redirect_dup zip s2_predicted_fh_dup) 700cb4f77ceSLingrui98 foldedGhGen.register(s2_redirect, s2_predicted_fh, Some("s2_FGH"), 5) 701cf7d6b7aSMuzi for ( 702cf7d6b7aSMuzi ((ghistPtrGen, s2_redirect), s2_predicted_ghist_ptr) <- 703cf7d6b7aSMuzi ghistPtrGen_dup zip s2_redirect_dup zip s2_predicted_ghist_ptr_dup 704cf7d6b7aSMuzi ) 705cb4f77ceSLingrui98 ghistPtrGen.register(s2_redirect, s2_predicted_ghist_ptr, Some("s2_GHPtr"), 5) 706cf7d6b7aSMuzi for ( 707cf7d6b7aSMuzi ((lastBrNumOHGen, s2_redirect), s2_brPosOH) <- 708cf7d6b7aSMuzi lastBrNumOHGen_dup zip s2_redirect_dup zip resp.s2.lastBrPosOH.map(_.asUInt) 709cf7d6b7aSMuzi ) 710adc0b8dfSGuokai Chen lastBrNumOHGen.register(s2_redirect, s2_brPosOH, Some("s2_BrNumOH"), 5) 711cf7d6b7aSMuzi for ( 712cf7d6b7aSMuzi ((aheadFhObGen, s2_redirect), s2_ahead_fh_ob_src) <- aheadFhObGen_dup zip s2_redirect_dup zip s2_ahead_fh_ob_src_dup 713cf7d6b7aSMuzi ) 71467402d75SLingrui98 aheadFhObGen.register(s2_redirect, s2_ahead_fh_ob_src, Some("s2_AFHOB"), 5) 715b37e4b45SLingrui98 ghvBitWriteGens.zip(s2_ghv_wens).zipWithIndex.map { case ((b, w), i) => 716cb4f77ceSLingrui98 b.register(w.reduce(_ || _), s2_ghv_wdatas(i), Some(s"s2_new_bit_$i"), 5) 7171ccea249SLingrui98 } 71809c6f1ddSLingrui98 719adc0b8dfSGuokai Chen XSPerfAccumulate("s2_redirect_because_target_diff", s2_fire_dup(0) && s2_redirect_s1_last_pred_vec_dup(0)(0)) 720adc0b8dfSGuokai Chen XSPerfAccumulate("s2_redirect_because_branch_num_diff", s2_fire_dup(0) && s2_redirect_s1_last_pred_vec_dup(0)(1)) 721adc0b8dfSGuokai Chen XSPerfAccumulate("s2_redirect_because_direction_diff", s2_fire_dup(0) && s2_redirect_s1_last_pred_vec_dup(0)(2)) 722adc0b8dfSGuokai Chen XSPerfAccumulate("s2_redirect_because_cfi_idx_diff", s2_fire_dup(0) && s2_redirect_s1_last_pred_vec_dup(0)(3)) 72386d9c530SLingrui98 // XSPerfAccumulate("s2_redirect_because_shouldShiftVec_diff", s2_fire && s2_redirect_s1_last_pred_vec(4)) 72486d9c530SLingrui98 // XSPerfAccumulate("s2_redirect_because_brTaken_diff", s2_fire && s2_redirect_s1_last_pred_vec(5)) 725adc0b8dfSGuokai Chen XSPerfAccumulate("s2_redirect_because_fallThroughError", s2_fire_dup(0) && resp.s2.fallThruError(0)) 72609c6f1ddSLingrui98 727adc0b8dfSGuokai Chen XSPerfAccumulate("s2_redirect_when_taken", s2_redirect_dup(0) && resp.s2.taken(0) && resp.s2.full_pred(0).hit) 728adc0b8dfSGuokai Chen XSPerfAccumulate("s2_redirect_when_not_taken", s2_redirect_dup(0) && !resp.s2.taken(0) && resp.s2.full_pred(0).hit) 729adc0b8dfSGuokai Chen XSPerfAccumulate("s2_redirect_when_not_hit", s2_redirect_dup(0) && !resp.s2.full_pred(0).hit) 730edc18578SLingrui98 731cb4f77ceSLingrui98 // s3 732adc0b8dfSGuokai Chen val s3_possible_predicted_ghist_ptrs_dup = s3_ghist_ptr_dup.map(ptr => (0 to numBr).map(ptr - _.U)) 733cf7d6b7aSMuzi val s3_predicted_ghist_ptr_dup = s3_possible_predicted_ghist_ptrs_dup.zip(resp.s3.lastBrPosOH).map { case (ptr, oh) => 734cf7d6b7aSMuzi Mux1H(oh, ptr) 735cf7d6b7aSMuzi } 736cb4f77ceSLingrui98 737adc0b8dfSGuokai Chen val s3_possible_predicted_fhs_dup = 738cf7d6b7aSMuzi for ( 739cf7d6b7aSMuzi (((fgh, afh), br_num_oh), full_pred) <- 740cf7d6b7aSMuzi s3_folded_gh_dup zip s3_ahead_fh_oldest_bits_dup zip s3_last_br_num_oh_dup zip resp.s3.full_pred 741cf7d6b7aSMuzi ) 742adc0b8dfSGuokai Chen yield (0 to numBr).map(i => 743adc0b8dfSGuokai Chen fgh.update(afh, br_num_oh, i, if (i > 0) full_pred.br_taken_mask(i - 1) else false.B) 744adc0b8dfSGuokai Chen ) 745cf7d6b7aSMuzi val s3_predicted_fh_dup = resp.s3.lastBrPosOH.zip(s3_possible_predicted_fhs_dup).map { case (oh, fh) => 746cf7d6b7aSMuzi Mux1H(oh, fh) 747cf7d6b7aSMuzi } 748cb4f77ceSLingrui98 749adc0b8dfSGuokai Chen val s3_ahead_fh_ob_src_dup = dup_wire(new AllAheadFoldedHistoryOldestBits(foldedGHistInfos)) 750adc0b8dfSGuokai Chen s3_ahead_fh_ob_src_dup.zip(s3_ghist_ptr_dup).map { case (src, ptr) => src.read(ghv, ptr) } 75167402d75SLingrui98 752cb4f77ceSLingrui98 if (EnableGHistDiff) { 753adc0b8dfSGuokai Chen val s3_predicted_ghist = WireInit(getHist(s3_predicted_ghist_ptr_dup(0)).asTypeOf(Vec(HistoryLength, Bool()))) 754cb4f77ceSLingrui98 for (i <- 0 until numBr) { 755adc0b8dfSGuokai Chen when(resp.s3.shouldShiftVec(0)(i)) { 756adc0b8dfSGuokai Chen s3_predicted_ghist(i) := resp.s3.brTaken(0) && (i == 0).B 757cb4f77ceSLingrui98 } 758cb4f77ceSLingrui98 } 759adc0b8dfSGuokai Chen when(s3_redirect_dup(0)) { 760cb4f77ceSLingrui98 s0_ghist := s3_predicted_ghist.asUInt 761cb4f77ceSLingrui98 } 762cb4f77ceSLingrui98 } 763cb4f77ceSLingrui98 764cb4f77ceSLingrui98 val s3_ghv_wens = (0 until HistoryLength).map(n => 765cf7d6b7aSMuzi (0 until numBr).map(b => 766cf7d6b7aSMuzi s3_ghist_ptr_dup(0).value === (CGHPtr(false.B, n.U) + b.U).value && resp.s3.shouldShiftVec(0)( 767cf7d6b7aSMuzi b 768cf7d6b7aSMuzi ) && s3_redirect_dup(0) 769cf7d6b7aSMuzi ) 770cf7d6b7aSMuzi ) 771cb4f77ceSLingrui98 val s3_ghv_wdatas = (0 until HistoryLength).map(n => 772cb4f77ceSLingrui98 Mux1H( 773cf7d6b7aSMuzi (0 until numBr).map(b => 774cf7d6b7aSMuzi ( 775cf7d6b7aSMuzi s3_ghist_ptr_dup(0).value === (CGHPtr(false.B, n.U) + b.U).value && resp.s3.shouldShiftVec(0)(b), 776adc0b8dfSGuokai Chen resp.s3.full_pred(0).real_br_taken_mask()(b) 777cf7d6b7aSMuzi ) 778cf7d6b7aSMuzi ) 779cb4f77ceSLingrui98 ) 780cb4f77ceSLingrui98 ) 781cb4f77ceSLingrui98 782*d6b0c0d9Spengxiao // To optimize Clock Gating Efficiency of previous_s2_* 783*d6b0c0d9Spengxiao val previous_s2_pred = Wire(new BranchPredictionBundle(isNotS3 = true)) 784*d6b0c0d9Spengxiao previous_s2_pred.pc := RegEnable(resp.s2.pc, 0.U.asTypeOf(resp.s2.pc), s2_fire_dup(0)).suggestName( 785*d6b0c0d9Spengxiao s"previous_s2_pred_pc" 786*d6b0c0d9Spengxiao ) 787*d6b0c0d9Spengxiao previous_s2_pred.valid := RegEnable(resp.s2.valid, 0.U.asTypeOf(resp.s2.valid), s2_fire_dup(0)).suggestName( 788*d6b0c0d9Spengxiao s"previous_s2_pred_valid" 789*d6b0c0d9Spengxiao ) 790*d6b0c0d9Spengxiao previous_s2_pred.hasRedirect := RegEnable( 791*d6b0c0d9Spengxiao resp.s2.hasRedirect, 792*d6b0c0d9Spengxiao 0.U.asTypeOf(resp.s2.hasRedirect), 793*d6b0c0d9Spengxiao s2_fire_dup(0) 794*d6b0c0d9Spengxiao ).suggestName(s"previous_s2_pred_hasRedirect") 795*d6b0c0d9Spengxiao previous_s2_pred.ftq_idx := RegEnable(resp.s2.ftq_idx, 0.U.asTypeOf(resp.s2.ftq_idx), s2_fire_dup(0)).suggestName( 796*d6b0c0d9Spengxiao s"previous_s2_pred_ftq_idx" 797*d6b0c0d9Spengxiao ) 798*d6b0c0d9Spengxiao previous_s2_pred.full_pred := RegEnable( 799*d6b0c0d9Spengxiao resp.s2.full_pred, 800*d6b0c0d9Spengxiao 0.U.asTypeOf(resp.s2.full_pred), 801*d6b0c0d9Spengxiao s2_fire_dup(0) 802*d6b0c0d9Spengxiao ).suggestName(s"previous_s2_pred_full_pred") 803*d6b0c0d9Spengxiao previous_s2_pred.full_pred.zip(resp.s2.full_pred.zipWithIndex).map { case (prev_fp, (new_fp, dupIdx)) => 804*d6b0c0d9Spengxiao prev_fp.targets.zip(new_fp.taken_mask_on_slot.zipWithIndex).map { case (target, (taken_mask, slotIdx)) => 805*d6b0c0d9Spengxiao // This enable signal can better improve CGE, but it may lead to timing violations: 806*d6b0c0d9Spengxiao // s2_fire_dup(0) && !new_fp.taken_mask_on_slot.take(slotIdx).fold(false.B)(_||_) && taken_mask && new_fp.hit 807*d6b0c0d9Spengxiao target := RegEnable(new_fp.targets(slotIdx), 0.U.asTypeOf(new_fp.targets(slotIdx)), s2_fire_dup(0) && taken_mask) 808*d6b0c0d9Spengxiao } 809*d6b0c0d9Spengxiao // This enable signal can better improve CGE, but it may lead to timing violations: 810*d6b0c0d9Spengxiao // s2_fire_dup(0) && new_fp.hit && !new_fp.taken_mask_on_slot.reduce(_||_) 811*d6b0c0d9Spengxiao prev_fp.fallThroughAddr := RegEnable( 812*d6b0c0d9Spengxiao new_fp.fallThroughAddr, 813*d6b0c0d9Spengxiao 0.U.asTypeOf(new_fp.fallThroughAddr), 814*d6b0c0d9Spengxiao s2_fire_dup(0) && resp.s2.full_pred(0).hit && !resp.s2.full_pred(0).taken_mask_on_slot(0) 815*d6b0c0d9Spengxiao ) 816*d6b0c0d9Spengxiao } 817cb4f77ceSLingrui98 818cf7d6b7aSMuzi val s3_redirect_on_br_taken_dup = resp.s3.full_pred.zip(previous_s2_pred.full_pred).map { case (fp1, fp2) => 819cf7d6b7aSMuzi fp1.real_br_taken_mask().asUInt =/= fp2.real_br_taken_mask().asUInt 820cf7d6b7aSMuzi } 821cf7d6b7aSMuzi val s3_both_first_taken_dup = resp.s3.full_pred.zip(previous_s2_pred.full_pred).map { case (fp1, fp2) => 822cf7d6b7aSMuzi fp1.real_br_taken_mask()(0) && fp2.real_br_taken_mask()(0) 823cf7d6b7aSMuzi } 824adc0b8dfSGuokai Chen val s3_redirect_on_target_dup = resp.s3.getTarget.zip(previous_s2_pred.getTarget).map { case (t1, t2) => t1 =/= t2 } 825cf7d6b7aSMuzi val s3_redirect_on_jalr_target_dup = resp.s3.full_pred.zip(previous_s2_pred.full_pred).map { case (fp1, fp2) => 826cf7d6b7aSMuzi fp1.hit_taken_on_jalr && fp1.jalr_target =/= fp2.jalr_target 827cf7d6b7aSMuzi } 828adc0b8dfSGuokai Chen val s3_redirect_on_fall_thru_error_dup = resp.s3.fallThruError 829fd3aa057SYuandongliang val s3_redirect_on_ftb_multi_hit_dup = resp.s3.ftbMultiHit 830adc0b8dfSGuokai Chen 831cf7d6b7aSMuzi for ( 832cf7d6b7aSMuzi ( 833cf7d6b7aSMuzi ( 834cf7d6b7aSMuzi ((((s3_redirect, s3_fire), s3_redirect_on_br_taken), s3_redirect_on_target), s3_redirect_on_fall_thru_error), 835cf7d6b7aSMuzi s3_redirect_on_ftb_multi_hit 836cf7d6b7aSMuzi ), 837cf7d6b7aSMuzi s3_both_first_taken 838cf7d6b7aSMuzi ) <- 839cf7d6b7aSMuzi s3_redirect_dup zip s3_fire_dup zip s3_redirect_on_br_taken_dup zip s3_redirect_on_target_dup zip s3_redirect_on_fall_thru_error_dup zip s3_redirect_on_ftb_multi_hit_dup zip s3_both_first_taken_dup 840cf7d6b7aSMuzi ) { 841cb4f77ceSLingrui98 84262e6338eSLingrui98 s3_redirect := s3_fire && ( 843fd3aa057SYuandongliang (s3_redirect_on_br_taken && !s3_both_first_taken) || s3_redirect_on_target || s3_redirect_on_fall_thru_error || s3_redirect_on_ftb_multi_hit 844cb4f77ceSLingrui98 ) 845adc0b8dfSGuokai Chen } 846cb4f77ceSLingrui98 847adc0b8dfSGuokai Chen XSPerfAccumulate(f"s3_redirect_on_br_taken", s3_fire_dup(0) && s3_redirect_on_br_taken_dup(0)) 848adc0b8dfSGuokai Chen XSPerfAccumulate(f"s3_redirect_on_jalr_target", s3_fire_dup(0) && s3_redirect_on_jalr_target_dup(0)) 849cf7d6b7aSMuzi XSPerfAccumulate( 850cf7d6b7aSMuzi f"s3_redirect_on_others", 851cf7d6b7aSMuzi s3_redirect_dup(0) && !(s3_redirect_on_br_taken_dup(0) || s3_redirect_on_jalr_target_dup(0)) 852cf7d6b7aSMuzi ) 853ced16aa1SLingrui98 854c6a44c35Smy-mayfly for (((npcGen, s3_redirect), s3_target) <- npcGen_dup zip s3_redirect_dup zip resp.s3.getTarget) 855adc0b8dfSGuokai Chen npcGen.register(s3_redirect, s3_target, Some("s3_target"), 3) 856adc0b8dfSGuokai Chen for (((foldedGhGen, s3_redirect), s3_predicted_fh) <- foldedGhGen_dup zip s3_redirect_dup zip s3_predicted_fh_dup) 857cb4f77ceSLingrui98 foldedGhGen.register(s3_redirect, s3_predicted_fh, Some("s3_FGH"), 3) 858cf7d6b7aSMuzi for ( 859cf7d6b7aSMuzi ((ghistPtrGen, s3_redirect), s3_predicted_ghist_ptr) <- 860cf7d6b7aSMuzi ghistPtrGen_dup zip s3_redirect_dup zip s3_predicted_ghist_ptr_dup 861cf7d6b7aSMuzi ) 862cb4f77ceSLingrui98 ghistPtrGen.register(s3_redirect, s3_predicted_ghist_ptr, Some("s3_GHPtr"), 3) 863cf7d6b7aSMuzi for ( 864cf7d6b7aSMuzi ((lastBrNumOHGen, s3_redirect), s3_brPosOH) <- 865cf7d6b7aSMuzi lastBrNumOHGen_dup zip s3_redirect_dup zip resp.s3.lastBrPosOH.map(_.asUInt) 866cf7d6b7aSMuzi ) 867adc0b8dfSGuokai Chen lastBrNumOHGen.register(s3_redirect, s3_brPosOH, Some("s3_BrNumOH"), 3) 868cf7d6b7aSMuzi for ( 869cf7d6b7aSMuzi ((aheadFhObGen, s3_redirect), s3_ahead_fh_ob_src) <- aheadFhObGen_dup zip s3_redirect_dup zip s3_ahead_fh_ob_src_dup 870cf7d6b7aSMuzi ) 87167402d75SLingrui98 aheadFhObGen.register(s3_redirect, s3_ahead_fh_ob_src, Some("s3_AFHOB"), 3) 872cb4f77ceSLingrui98 ghvBitWriteGens.zip(s3_ghv_wens).zipWithIndex.map { case ((b, w), i) => 873cb4f77ceSLingrui98 b.register(w.reduce(_ || _), s3_ghv_wdatas(i), Some(s"s3_new_bit_$i"), 3) 874cb4f77ceSLingrui98 } 875cb4f77ceSLingrui98 87609c6f1ddSLingrui98 // Send signal tell Ftq override 877adc0b8dfSGuokai Chen val s2_ftq_idx = RegEnable(io.ftq_to_bpu.enq_ptr, s1_fire_dup(0)) 878adc0b8dfSGuokai Chen val s3_ftq_idx = RegEnable(s2_ftq_idx, s2_fire_dup(0)) 87909c6f1ddSLingrui98 880adc0b8dfSGuokai Chen for (((to_ftq_s1_valid, s1_fire), s1_flush) <- io.bpu_to_ftq.resp.bits.s1.valid zip s1_fire_dup zip s1_flush_dup) { 881adc0b8dfSGuokai Chen to_ftq_s1_valid := s1_fire && !s1_flush 882adc0b8dfSGuokai Chen } 883adc0b8dfSGuokai Chen io.bpu_to_ftq.resp.bits.s1.hasRedirect.map(_ := false.B) 88409c6f1ddSLingrui98 io.bpu_to_ftq.resp.bits.s1.ftq_idx := DontCare 885adc0b8dfSGuokai Chen for (((to_ftq_s2_valid, s2_fire), s2_flush) <- io.bpu_to_ftq.resp.bits.s2.valid zip s2_fire_dup zip s2_flush_dup) { 886adc0b8dfSGuokai Chen to_ftq_s2_valid := s2_fire && !s2_flush 887adc0b8dfSGuokai Chen } 888adc0b8dfSGuokai Chen io.bpu_to_ftq.resp.bits.s2.hasRedirect.zip(s2_redirect_dup).map { case (hr, r) => hr := r } 88909c6f1ddSLingrui98 io.bpu_to_ftq.resp.bits.s2.ftq_idx := s2_ftq_idx 890adc0b8dfSGuokai Chen for (((to_ftq_s3_valid, s3_fire), s3_flush) <- io.bpu_to_ftq.resp.bits.s3.valid zip s3_fire_dup zip s3_flush_dup) { 891adc0b8dfSGuokai Chen to_ftq_s3_valid := s3_fire && !s3_flush 892adc0b8dfSGuokai Chen } 893adc0b8dfSGuokai Chen io.bpu_to_ftq.resp.bits.s3.hasRedirect.zip(s3_redirect_dup).map { case (hr, r) => hr := r } 894cb4f77ceSLingrui98 io.bpu_to_ftq.resp.bits.s3.ftq_idx := s3_ftq_idx 89509c6f1ddSLingrui98 89603426fe2Spengxiao predictors.io.update := io.ftq_to_bpu.update 89703426fe2Spengxiao predictors.io.update.bits.ghist := getHist(io.ftq_to_bpu.update.bits.spec_info.histPtr) 89803426fe2Spengxiao // Move the update pc registers out of predictors. 89903426fe2Spengxiao predictors.io.update.bits.pc := SegmentedAddrNext( 90003426fe2Spengxiao io.ftq_to_bpu.update.bits.pc, 90103426fe2Spengxiao pcSegments, 90203426fe2Spengxiao io.ftq_to_bpu.update.valid, 90303426fe2Spengxiao Some("predictors_io_update_pc") 90403426fe2Spengxiao ).getAddr() 905adc0b8dfSGuokai Chen 906adc0b8dfSGuokai Chen val redirect_dup = do_redirect_dup.map(_.bits) 907adc0b8dfSGuokai Chen predictors.io.redirect := do_redirect_dup(0) 90809c6f1ddSLingrui98 909ae8ed1a3Szoujr // Redirect logic 910adc0b8dfSGuokai Chen val shift_dup = redirect_dup.map(_.cfiUpdate.shift) 911adc0b8dfSGuokai Chen val addIntoHist_dup = redirect_dup.map(_.cfiUpdate.addIntoHist) 9121ccea249SLingrui98 // TODO: remove these below 913cf7d6b7aSMuzi val shouldShiftVec_dup = shift_dup.map(shift => 914cf7d6b7aSMuzi Mux( 915cf7d6b7aSMuzi shift === 0.U, 916cf7d6b7aSMuzi VecInit(0.U((1 << (log2Ceil(numBr) + 1)).W).asBools), 917cf7d6b7aSMuzi VecInit(LowerMask(1.U << (shift - 1.U)).asBools) 918cf7d6b7aSMuzi ) 919cf7d6b7aSMuzi ) 9201ccea249SLingrui98 // TODO end 921adc0b8dfSGuokai Chen val afhob_dup = redirect_dup.map(_.cfiUpdate.afhob) 922adc0b8dfSGuokai Chen val lastBrNumOH_dup = redirect_dup.map(_.cfiUpdate.lastBrNumOH) 92367402d75SLingrui98 924adc0b8dfSGuokai Chen val isBr_dup = redirect_dup.map(_.cfiUpdate.pd.isBr) 925adc0b8dfSGuokai Chen val taken_dup = redirect_dup.map(_.cfiUpdate.taken) 926adc0b8dfSGuokai Chen val real_br_taken_mask_dup = 927adc0b8dfSGuokai Chen for (((shift, taken), addIntoHist) <- shift_dup zip taken_dup zip addIntoHist_dup) 928adc0b8dfSGuokai Chen yield (0 until numBr).map(i => shift === (i + 1).U && taken && addIntoHist) 92909c6f1ddSLingrui98 930adc0b8dfSGuokai Chen val oldPtr_dup = redirect_dup.map(_.cfiUpdate.histPtr) 931adc0b8dfSGuokai Chen val updated_ptr_dup = oldPtr_dup.zip(shift_dup).map { case (oldPtr, shift) => oldPtr - shift } 932cf7d6b7aSMuzi def computeFoldedHist(hist: UInt, compLen: Int)(histLen: Int): UInt = 933a72b131fSGao-Zeyu if (histLen > 0) { 934a72b131fSGao-Zeyu val nChunks = (histLen + compLen - 1) / compLen 935cf7d6b7aSMuzi val hist_chunks = (0 until nChunks) map { i => hist(min((i + 1) * compLen, histLen) - 1, i * compLen) } 936a72b131fSGao-Zeyu ParallelXOR(hist_chunks) 937cf7d6b7aSMuzi } else 0.U 938a72b131fSGao-Zeyu 939a72b131fSGao-Zeyu val oldFh_dup = dup_seq(WireInit(0.U.asTypeOf(new AllFoldedHistories(foldedGHistInfos)))) 940a72b131fSGao-Zeyu oldFh_dup.zip(oldPtr_dup).map { case (oldFh, oldPtr) => 941a72b131fSGao-Zeyu foldedGHistInfos.foreach { case (histLen, compLen) => 942a72b131fSGao-Zeyu oldFh.getHistWithInfo((histLen, compLen)).folded_hist := computeFoldedHist(getHist(oldPtr), compLen)(histLen) 943a72b131fSGao-Zeyu } 944a72b131fSGao-Zeyu } 945a72b131fSGao-Zeyu 946adc0b8dfSGuokai Chen val updated_fh_dup = 947cf7d6b7aSMuzi for ( 948cf7d6b7aSMuzi ((((oldFh, oldPtr), taken), addIntoHist), shift) <- 949cf7d6b7aSMuzi oldFh_dup zip oldPtr_dup zip taken_dup zip addIntoHist_dup zip shift_dup 950cf7d6b7aSMuzi ) 951a72b131fSGao-Zeyu yield VecInit((0 to numBr).map(i => oldFh.update(ghv, oldPtr, i, taken && addIntoHist)))(shift) 952adc0b8dfSGuokai Chen val thisBrNumOH_dup = shift_dup.map(shift => UIntToOH(shift, numBr + 1)) 953adc0b8dfSGuokai Chen val thisAheadFhOb_dup = dup_wire(new AllAheadFoldedHistoryOldestBits(foldedGHistInfos)) 954adc0b8dfSGuokai Chen thisAheadFhOb_dup.zip(oldPtr_dup).map { case (afhob, oldPtr) => afhob.read(ghv, oldPtr) } 955b37e4b45SLingrui98 val redirect_ghv_wens = (0 until HistoryLength).map(n => 956cf7d6b7aSMuzi (0 until numBr).map(b => 957cf7d6b7aSMuzi oldPtr_dup(0).value === (CGHPtr(false.B, n.U) + b.U).value && shouldShiftVec_dup(0)(b) && do_redirect_dup(0).valid 958cf7d6b7aSMuzi ) 959cf7d6b7aSMuzi ) 960b37e4b45SLingrui98 val redirect_ghv_wdatas = (0 until HistoryLength).map(n => 9611ccea249SLingrui98 Mux1H( 962adc0b8dfSGuokai Chen (0 until numBr).map(b => oldPtr_dup(0).value === (CGHPtr(false.B, n.U) + b.U).value && shouldShiftVec_dup(0)(b)), 963adc0b8dfSGuokai Chen real_br_taken_mask_dup(0) 9641ccea249SLingrui98 ) 9651ccea249SLingrui98 ) 9661ccea249SLingrui98 96786d9c530SLingrui98 if (EnableGHistDiff) { 968adc0b8dfSGuokai Chen val updated_ghist = WireInit(getHist(updated_ptr_dup(0)).asTypeOf(Vec(HistoryLength, Bool()))) 96986d9c530SLingrui98 for (i <- 0 until numBr) { 970adc0b8dfSGuokai Chen when(shift_dup(0) >= (i + 1).U) { 971adc0b8dfSGuokai Chen updated_ghist(i) := taken_dup(0) && addIntoHist_dup(0) && (i == 0).B 97286d9c530SLingrui98 } 97386d9c530SLingrui98 } 974adc0b8dfSGuokai Chen when(do_redirect_dup(0).valid) { 97586d9c530SLingrui98 s0_ghist := updated_ghist.asUInt 97686d9c530SLingrui98 } 97786d9c530SLingrui98 } 97886d9c530SLingrui98 97909d0c404SEaston Man // Commit time history checker 980ab0200c8SEaston Man if (EnableCommitGHistDiff) { 98109d0c404SEaston Man val commitGHist = RegInit(0.U.asTypeOf(Vec(HistoryLength, Bool()))) 98209d0c404SEaston Man val commitGHistPtr = RegInit(0.U.asTypeOf(new CGHPtr)) 98394a3f0aaSEaston Man def getCommitHist(ptr: CGHPtr): UInt = 98494a3f0aaSEaston Man (Cat(commitGHist.asUInt, commitGHist.asUInt) >> (ptr.value + 1.U))(HistoryLength - 1, 0) 98509d0c404SEaston Man 986ab0200c8SEaston Man val updateValid: Bool = io.ftq_to_bpu.update.valid 987cc2d1573SEaston Man val branchValidMask: UInt = io.ftq_to_bpu.update.bits.ftb_entry.brValids.asUInt 988cc2d1573SEaston Man val branchCommittedMask: Vec[Bool] = io.ftq_to_bpu.update.bits.br_committed 989200d06ccSEaston Man val misPredictMask: UInt = io.ftq_to_bpu.update.bits.mispred_mask.asUInt 990200d06ccSEaston Man val takenMask: UInt = 991200d06ccSEaston Man io.ftq_to_bpu.update.bits.br_taken_mask.asUInt | 992dcf4211fSYuandongliang io.ftq_to_bpu.update.bits.ftb_entry.strong_bias.asUInt // Always taken branch is recorded in history 993200d06ccSEaston Man val takenIdx: UInt = (PriorityEncoder(takenMask) + 1.U((log2Ceil(numBr) + 1).W)).asUInt 994200d06ccSEaston Man val misPredictIdx: UInt = (PriorityEncoder(misPredictMask) + 1.U((log2Ceil(numBr) + 1).W)).asUInt 995cf7d6b7aSMuzi val shouldShiftMask: UInt = Mux(takenMask.orR, LowerMask(takenIdx).asUInt, ((1 << numBr) - 1).asUInt) & 996cf7d6b7aSMuzi Mux(misPredictMask.orR, LowerMask(misPredictIdx).asUInt, ((1 << numBr) - 1).asUInt) & 997cc2d1573SEaston Man branchCommittedMask.asUInt 998ab0200c8SEaston Man val updateShift: UInt = 999ab0200c8SEaston Man Mux(updateValid && branchValidMask.orR, PopCount(branchValidMask & shouldShiftMask), 0.U) 1000ab0200c8SEaston Man 100109d0c404SEaston Man // Maintain the commitGHist 100209d0c404SEaston Man for (i <- 0 until numBr) { 1003200d06ccSEaston Man when(updateShift >= (i + 1).U) { 1004200d06ccSEaston Man val ptr: CGHPtr = commitGHistPtr - i.asUInt 1005200d06ccSEaston Man commitGHist(ptr.value) := takenMask(i) 100609d0c404SEaston Man } 100709d0c404SEaston Man } 1008200d06ccSEaston Man when(updateValid) { 1009200d06ccSEaston Man commitGHistPtr := commitGHistPtr - updateShift 101009d0c404SEaston Man } 101109d0c404SEaston Man 101209d0c404SEaston Man // Calculate true history using Parallel XOR 101309d0c404SEaston Man // Do differential 101409d0c404SEaston Man TageTableInfos.map { 101509d0c404SEaston Man case (nRows, histLen, _) => { 101609d0c404SEaston Man val nRowsPerBr = nRows / numBr 1017a72b131fSGao-Zeyu val predictGHistPtr = io.ftq_to_bpu.update.bits.spec_info.histPtr 101894a3f0aaSEaston Man val commitTrueHist: UInt = computeFoldedHist(getCommitHist(commitGHistPtr), log2Ceil(nRowsPerBr))(histLen) 1019a72b131fSGao-Zeyu val predictFHist: UInt = computeFoldedHist(getHist(predictGHistPtr), log2Ceil(nRowsPerBr))(histLen) 1020cf7d6b7aSMuzi XSWarn( 1021cf7d6b7aSMuzi updateValid && predictFHist =/= commitTrueHist, 1022cf7d6b7aSMuzi p"predict time ghist: ${predictFHist} is different from commit time: ${commitTrueHist}\n" 1023cf7d6b7aSMuzi ) 102409d0c404SEaston Man } 102509d0c404SEaston Man } 102609d0c404SEaston Man } 102709d0c404SEaston Man 1028c2ad24ebSLingrui98 // val updatedGh = oldGh.update(shift, taken && addIntoHist) 1029adc0b8dfSGuokai Chen for ((npcGen, do_redirect) <- npcGen_dup zip do_redirect_dup) 103053bac374SLingrui98 npcGen.register(do_redirect.valid, do_redirect.bits.cfiUpdate.target, Some("redirect_target"), 2) 1031adc0b8dfSGuokai Chen for (((foldedGhGen, do_redirect), updated_fh) <- foldedGhGen_dup zip do_redirect_dup zip updated_fh_dup) 103253bac374SLingrui98 foldedGhGen.register(do_redirect.valid, updated_fh, Some("redirect_FGHT"), 2) 1033adc0b8dfSGuokai Chen for (((ghistPtrGen, do_redirect), updated_ptr) <- ghistPtrGen_dup zip do_redirect_dup zip updated_ptr_dup) 103453bac374SLingrui98 ghistPtrGen.register(do_redirect.valid, updated_ptr, Some("redirect_GHPtr"), 2) 1035adc0b8dfSGuokai Chen for (((lastBrNumOHGen, do_redirect), thisBrNumOH) <- lastBrNumOHGen_dup zip do_redirect_dup zip thisBrNumOH_dup) 103667402d75SLingrui98 lastBrNumOHGen.register(do_redirect.valid, thisBrNumOH, Some("redirect_BrNumOH"), 2) 1037adc0b8dfSGuokai Chen for (((aheadFhObGen, do_redirect), thisAheadFhOb) <- aheadFhObGen_dup zip do_redirect_dup zip thisAheadFhOb_dup) 103867402d75SLingrui98 aheadFhObGen.register(do_redirect.valid, thisAheadFhOb, Some("redirect_AFHOB"), 2) 1039b37e4b45SLingrui98 ghvBitWriteGens.zip(redirect_ghv_wens).zipWithIndex.map { case ((b, w), i) => 1040b37e4b45SLingrui98 b.register(w.reduce(_ || _), redirect_ghv_wdatas(i), Some(s"redirect_new_bit_$i"), 2) 10411ccea249SLingrui98 } 1042c2ad24ebSLingrui98 // no need to assign s0_last_pred 1043c2ad24ebSLingrui98 104467402d75SLingrui98 // val need_reset = RegNext(reset.asBool) && !reset.asBool 1045ae8ed1a3Szoujr 1046ae8ed1a3Szoujr // Reset 104767402d75SLingrui98 // npcGen.register(need_reset, resetVector.U, Some("reset_pc"), 1) 104867402d75SLingrui98 // foldedGhGen.register(need_reset, 0.U.asTypeOf(s0_folded_gh), Some("reset_FGH"), 1) 104967402d75SLingrui98 // ghistPtrGen.register(need_reset, 0.U.asTypeOf(new CGHPtr), Some("reset_GHPtr"), 1) 1050ae8ed1a3Szoujr 1051adc0b8dfSGuokai Chen s0_pc_dup.zip(npcGen_dup).map { case (s0_pc, npcGen) => s0_pc := npcGen() } 1052adc0b8dfSGuokai Chen s0_folded_gh_dup.zip(foldedGhGen_dup).map { case (s0_folded_gh, foldedGhGen) => s0_folded_gh := foldedGhGen() } 1053adc0b8dfSGuokai Chen s0_ghist_ptr_dup.zip(ghistPtrGen_dup).map { case (s0_ghist_ptr, ghistPtrGen) => s0_ghist_ptr := ghistPtrGen() } 1054adc0b8dfSGuokai Chen s0_ahead_fh_oldest_bits_dup.zip(aheadFhObGen_dup).map { case (s0_ahead_fh_oldest_bits, aheadFhObGen) => 1055cf7d6b7aSMuzi s0_ahead_fh_oldest_bits := aheadFhObGen() 1056cf7d6b7aSMuzi } 1057adc0b8dfSGuokai Chen s0_last_br_num_oh_dup.zip(lastBrNumOHGen_dup).map { case (s0_last_br_num_oh, lastBrNumOHGen) => 1058cf7d6b7aSMuzi s0_last_br_num_oh := lastBrNumOHGen() 1059cf7d6b7aSMuzi } 1060b37e4b45SLingrui98 (ghv_write_datas zip ghvBitWriteGens).map { case (wd, d) => wd := d() } 10611ccea249SLingrui98 for (i <- 0 until HistoryLength) { 1062cb4f77ceSLingrui98 ghv_wens(i) := Seq(s1_ghv_wens, s2_ghv_wens, s3_ghv_wens, redirect_ghv_wens).map(_(i).reduce(_ || _)).reduce(_ || _) 1063b37e4b45SLingrui98 when(ghv_wens(i)) { 1064b37e4b45SLingrui98 ghv(i) := ghv_write_datas(i) 10651ccea249SLingrui98 } 10661ccea249SLingrui98 } 106709c6f1ddSLingrui98 1068d2b20d1aSTang Haojin // TODO: signals for memVio and other Redirects 1069adc0b8dfSGuokai Chen controlRedirectBubble := do_redirect_dup(0).valid && do_redirect_dup(0).bits.ControlRedirectBubble 1070adc0b8dfSGuokai Chen ControlBTBMissBubble := do_redirect_dup(0).bits.ControlBTBMissBubble 1071adc0b8dfSGuokai Chen TAGEMissBubble := do_redirect_dup(0).bits.TAGEMissBubble 1072adc0b8dfSGuokai Chen SCMissBubble := do_redirect_dup(0).bits.SCMissBubble 1073adc0b8dfSGuokai Chen ITTAGEMissBubble := do_redirect_dup(0).bits.ITTAGEMissBubble 1074adc0b8dfSGuokai Chen RASMissBubble := do_redirect_dup(0).bits.RASMissBubble 1075d2b20d1aSTang Haojin 1076adc0b8dfSGuokai Chen memVioRedirectBubble := do_redirect_dup(0).valid && do_redirect_dup(0).bits.MemVioRedirectBubble 1077adc0b8dfSGuokai Chen otherRedirectBubble := do_redirect_dup(0).valid && do_redirect_dup(0).bits.OtherRedirectBubble 1078adc0b8dfSGuokai Chen btbMissBubble := do_redirect_dup(0).valid && do_redirect_dup(0).bits.BTBMissBubble 1079adc0b8dfSGuokai Chen overrideBubble(0) := s2_redirect_dup(0) 1080adc0b8dfSGuokai Chen overrideBubble(1) := s3_redirect_dup(0) 1081adc0b8dfSGuokai Chen ftqUpdateBubble(0) := !s1_components_ready_dup(0) 1082adc0b8dfSGuokai Chen ftqUpdateBubble(1) := !s2_components_ready_dup(0) 1083adc0b8dfSGuokai Chen ftqUpdateBubble(2) := !s3_components_ready_dup(0) 1084d2b20d1aSTang Haojin ftqFullStall := !io.bpu_to_ftq.resp.ready 1085d2b20d1aSTang Haojin io.bpu_to_ftq.resp.bits.topdown_info := topdown_stages(numOfStage - 1) 1086d2b20d1aSTang Haojin 1087d2b20d1aSTang Haojin // topdown handling logic here 1088d2b20d1aSTang Haojin when(controlRedirectBubble) { 1089d2b20d1aSTang Haojin /* 1090d2b20d1aSTang Haojin for (i <- 0 until numOfStage) 1091d2b20d1aSTang Haojin topdown_stages(i).reasons(TopDownCounters.ControlRedirectBubble.id) := true.B 1092d2b20d1aSTang Haojin io.bpu_to_ftq.resp.bits.topdown_info.reasons(TopDownCounters.ControlRedirectBubble.id) := true.B 1093d2b20d1aSTang Haojin */ 1094d2b20d1aSTang Haojin when(ControlBTBMissBubble) { 1095d2b20d1aSTang Haojin for (i <- 0 until numOfStage) 1096d2b20d1aSTang Haojin topdown_stages(i).reasons(TopDownCounters.BTBMissBubble.id) := true.B 1097d2b20d1aSTang Haojin io.bpu_to_ftq.resp.bits.topdown_info.reasons(TopDownCounters.BTBMissBubble.id) := true.B 1098d2b20d1aSTang Haojin }.elsewhen(TAGEMissBubble) { 1099d2b20d1aSTang Haojin for (i <- 0 until numOfStage) 1100d2b20d1aSTang Haojin topdown_stages(i).reasons(TopDownCounters.TAGEMissBubble.id) := true.B 1101d2b20d1aSTang Haojin io.bpu_to_ftq.resp.bits.topdown_info.reasons(TopDownCounters.TAGEMissBubble.id) := true.B 1102d2b20d1aSTang Haojin }.elsewhen(SCMissBubble) { 1103d2b20d1aSTang Haojin for (i <- 0 until numOfStage) 1104d2b20d1aSTang Haojin topdown_stages(i).reasons(TopDownCounters.SCMissBubble.id) := true.B 1105d2b20d1aSTang Haojin io.bpu_to_ftq.resp.bits.topdown_info.reasons(TopDownCounters.SCMissBubble.id) := true.B 1106d2b20d1aSTang Haojin }.elsewhen(ITTAGEMissBubble) { 1107d2b20d1aSTang Haojin for (i <- 0 until numOfStage) 1108d2b20d1aSTang Haojin topdown_stages(i).reasons(TopDownCounters.ITTAGEMissBubble.id) := true.B 1109d2b20d1aSTang Haojin io.bpu_to_ftq.resp.bits.topdown_info.reasons(TopDownCounters.ITTAGEMissBubble.id) := true.B 1110d2b20d1aSTang Haojin }.elsewhen(RASMissBubble) { 1111d2b20d1aSTang Haojin for (i <- 0 until numOfStage) 1112d2b20d1aSTang Haojin topdown_stages(i).reasons(TopDownCounters.RASMissBubble.id) := true.B 1113d2b20d1aSTang Haojin io.bpu_to_ftq.resp.bits.topdown_info.reasons(TopDownCounters.RASMissBubble.id) := true.B 1114d2b20d1aSTang Haojin } 1115d2b20d1aSTang Haojin } 1116d2b20d1aSTang Haojin when(memVioRedirectBubble) { 1117d2b20d1aSTang Haojin for (i <- 0 until numOfStage) 1118d2b20d1aSTang Haojin topdown_stages(i).reasons(TopDownCounters.MemVioRedirectBubble.id) := true.B 1119d2b20d1aSTang Haojin io.bpu_to_ftq.resp.bits.topdown_info.reasons(TopDownCounters.MemVioRedirectBubble.id) := true.B 1120d2b20d1aSTang Haojin } 1121d2b20d1aSTang Haojin when(otherRedirectBubble) { 1122d2b20d1aSTang Haojin for (i <- 0 until numOfStage) 1123d2b20d1aSTang Haojin topdown_stages(i).reasons(TopDownCounters.OtherRedirectBubble.id) := true.B 1124d2b20d1aSTang Haojin io.bpu_to_ftq.resp.bits.topdown_info.reasons(TopDownCounters.OtherRedirectBubble.id) := true.B 1125d2b20d1aSTang Haojin } 1126d2b20d1aSTang Haojin when(btbMissBubble) { 1127d2b20d1aSTang Haojin for (i <- 0 until numOfStage) 1128d2b20d1aSTang Haojin topdown_stages(i).reasons(TopDownCounters.BTBMissBubble.id) := true.B 1129d2b20d1aSTang Haojin io.bpu_to_ftq.resp.bits.topdown_info.reasons(TopDownCounters.BTBMissBubble.id) := true.B 1130d2b20d1aSTang Haojin } 1131d2b20d1aSTang Haojin 1132d2b20d1aSTang Haojin for (i <- 0 until numOfStage) { 1133d2b20d1aSTang Haojin if (i < numOfStage - overrideStage) { 1134d2b20d1aSTang Haojin when(overrideBubble(i)) { 1135d2b20d1aSTang Haojin for (j <- 0 to i) 1136d2b20d1aSTang Haojin topdown_stages(j).reasons(TopDownCounters.OverrideBubble.id) := true.B 1137d2b20d1aSTang Haojin } 1138d2b20d1aSTang Haojin } 1139d2b20d1aSTang Haojin if (i < numOfStage - ftqUpdateStage) { 1140d2b20d1aSTang Haojin when(ftqUpdateBubble(i)) { 1141d2b20d1aSTang Haojin topdown_stages(i).reasons(TopDownCounters.FtqUpdateBubble.id) := true.B 1142d2b20d1aSTang Haojin } 1143d2b20d1aSTang Haojin } 1144d2b20d1aSTang Haojin } 1145d2b20d1aSTang Haojin when(ftqFullStall) { 1146d2b20d1aSTang Haojin topdown_stages(0).reasons(TopDownCounters.FtqFullStall.id) := true.B 1147d2b20d1aSTang Haojin } 1148d2b20d1aSTang Haojin 1149cf7d6b7aSMuzi XSError( 1150cf7d6b7aSMuzi isBefore(redirect_dup(0).cfiUpdate.histPtr, s3_ghist_ptr_dup(0)) && do_redirect_dup(0).valid, 1151cf7d6b7aSMuzi p"s3_ghist_ptr ${s3_ghist_ptr_dup(0)} exceeds redirect histPtr ${redirect_dup(0).cfiUpdate.histPtr}\n" 1152cf7d6b7aSMuzi ) 1153cf7d6b7aSMuzi XSError( 1154cf7d6b7aSMuzi isBefore(redirect_dup(0).cfiUpdate.histPtr, s2_ghist_ptr_dup(0)) && do_redirect_dup(0).valid, 1155cf7d6b7aSMuzi p"s2_ghist_ptr ${s2_ghist_ptr_dup(0)} exceeds redirect histPtr ${redirect_dup(0).cfiUpdate.histPtr}\n" 1156cf7d6b7aSMuzi ) 1157cf7d6b7aSMuzi XSError( 1158cf7d6b7aSMuzi isBefore(redirect_dup(0).cfiUpdate.histPtr, s1_ghist_ptr_dup(0)) && do_redirect_dup(0).valid, 1159cf7d6b7aSMuzi p"s1_ghist_ptr ${s1_ghist_ptr_dup(0)} exceeds redirect histPtr ${redirect_dup(0).cfiUpdate.histPtr}\n" 1160cf7d6b7aSMuzi ) 1161c7fabd05SSteve Gou 116209c6f1ddSLingrui98 XSDebug(RegNext(reset.asBool) && !reset.asBool, "Reseting...\n") 116309c6f1ddSLingrui98 XSDebug(io.ftq_to_bpu.update.valid, p"Update from ftq\n") 116409c6f1ddSLingrui98 XSDebug(io.ftq_to_bpu.redirect.valid, p"Redirect from ftq\n") 116509c6f1ddSLingrui98 1166adc0b8dfSGuokai Chen XSDebug("[BP0] fire=%d pc=%x\n", s0_fire_dup(0), s0_pc_dup(0)) 1167cf7d6b7aSMuzi XSDebug( 1168cf7d6b7aSMuzi "[BP1] v=%d r=%d cr=%d fire=%d flush=%d pc=%x\n", 1169cf7d6b7aSMuzi s1_valid_dup(0), 1170cf7d6b7aSMuzi s1_ready_dup(0), 1171cf7d6b7aSMuzi s1_components_ready_dup(0), 1172cf7d6b7aSMuzi s1_fire_dup(0), 1173cf7d6b7aSMuzi s1_flush_dup(0), 1174cf7d6b7aSMuzi s1_pc 1175cf7d6b7aSMuzi ) 1176cf7d6b7aSMuzi XSDebug( 1177cf7d6b7aSMuzi "[BP2] v=%d r=%d cr=%d fire=%d redirect=%d flush=%d pc=%x\n", 1178cf7d6b7aSMuzi s2_valid_dup(0), 1179cf7d6b7aSMuzi s2_ready_dup(0), 1180cf7d6b7aSMuzi s2_components_ready_dup(0), 1181cf7d6b7aSMuzi s2_fire_dup(0), 1182cf7d6b7aSMuzi s2_redirect_dup(0), 1183cf7d6b7aSMuzi s2_flush_dup(0), 1184cf7d6b7aSMuzi s2_pc 1185cf7d6b7aSMuzi ) 1186cf7d6b7aSMuzi XSDebug( 1187cf7d6b7aSMuzi "[BP3] v=%d r=%d cr=%d fire=%d redirect=%d flush=%d pc=%x\n", 1188cf7d6b7aSMuzi s3_valid_dup(0), 1189cf7d6b7aSMuzi s3_ready_dup(0), 1190cf7d6b7aSMuzi s3_components_ready_dup(0), 1191cf7d6b7aSMuzi s3_fire_dup(0), 1192cf7d6b7aSMuzi s3_redirect_dup(0), 1193cf7d6b7aSMuzi s3_flush_dup(0), 1194cf7d6b7aSMuzi s3_pc 1195cf7d6b7aSMuzi ) 119609c6f1ddSLingrui98 XSDebug("[FTQ] ready=%d\n", io.bpu_to_ftq.resp.ready) 1197adc0b8dfSGuokai Chen XSDebug("resp.s1.target=%x\n", resp.s1.getTarget(0)) 1198adc0b8dfSGuokai Chen XSDebug("resp.s2.target=%x\n", resp.s2.getTarget(0)) 1199c2ad24ebSLingrui98 // XSDebug("s0_ghist: %b\n", s0_ghist.predHist) 1200c2ad24ebSLingrui98 // XSDebug("s1_ghist: %b\n", s1_ghist.predHist) 1201c2ad24ebSLingrui98 // XSDebug("s2_ghist: %b\n", s2_ghist.predHist) 1202c2ad24ebSLingrui98 // XSDebug("s2_predicted_ghist: %b\n", s2_predicted_ghist.predHist) 1203adc0b8dfSGuokai Chen XSDebug(p"s0_ghist_ptr: ${s0_ghist_ptr_dup(0)}\n") 1204adc0b8dfSGuokai Chen XSDebug(p"s1_ghist_ptr: ${s1_ghist_ptr_dup(0)}\n") 1205adc0b8dfSGuokai Chen XSDebug(p"s2_ghist_ptr: ${s2_ghist_ptr_dup(0)}\n") 1206adc0b8dfSGuokai Chen XSDebug(p"s3_ghist_ptr: ${s3_ghist_ptr_dup(0)}\n") 120709c6f1ddSLingrui98 120809c6f1ddSLingrui98 io.ftq_to_bpu.update.bits.display(io.ftq_to_bpu.update.valid) 120909c6f1ddSLingrui98 io.ftq_to_bpu.redirect.bits.display(io.ftq_to_bpu.redirect.valid) 121009c6f1ddSLingrui98 1211adc0b8dfSGuokai Chen XSPerfAccumulate("s2_redirect", s2_redirect_dup(0)) 1212adc0b8dfSGuokai Chen XSPerfAccumulate("s3_redirect", s3_redirect_dup(0)) 1213adc0b8dfSGuokai Chen XSPerfAccumulate("s1_not_valid", !s1_valid_dup(0)) 121409c6f1ddSLingrui98 12151ca0e4f3SYinan Xu val perfEvents = predictors.asInstanceOf[Composer].getPerfEvents 12161ca0e4f3SYinan Xu generatePerfEvent() 121709c6f1ddSLingrui98} 1218