xref: /XiangShan/src/main/scala/xiangshan/frontend/NewFtq.scala (revision a4e57ea3a91431261d57a58df4810c0d9f0366ef)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.frontend
18
19import chipsalliance.rocketchip.config.Parameters
20import chisel3._
21import chisel3.util._
22import utils._
23import xiangshan._
24import xiangshan.backend.CtrlToFtqIO
25
26class FtqPtr(implicit p: Parameters) extends CircularQueuePtr[FtqPtr](
27  p => p(XSCoreParamsKey).FtqSize
28){
29  override def cloneType = (new FtqPtr).asInstanceOf[this.type]
30}
31
32object FtqPtr {
33  def apply(f: Bool, v: UInt)(implicit p: Parameters): FtqPtr = {
34    val ptr = Wire(new FtqPtr)
35    ptr.flag := f
36    ptr.value := v
37    ptr
38  }
39  def inverse(ptr: FtqPtr)(implicit p: Parameters): FtqPtr = {
40    apply(!ptr.flag, ptr.value)
41  }
42}
43
44class FtqNRSRAM[T <: Data](gen: T, numRead: Int)(implicit p: Parameters) extends XSModule {
45
46  val io = IO(new Bundle() {
47    val raddr = Input(Vec(numRead, UInt(log2Up(FtqSize).W)))
48    val ren = Input(Vec(numRead, Bool()))
49    val rdata = Output(Vec(numRead, gen))
50    val waddr = Input(UInt(log2Up(FtqSize).W))
51    val wen = Input(Bool())
52    val wdata = Input(gen)
53  })
54
55  for(i <- 0 until numRead){
56    val sram = Module(new SRAMTemplate(gen, FtqSize))
57    sram.io.r.req.valid := io.ren(i)
58    sram.io.r.req.bits.setIdx := io.raddr(i)
59    io.rdata(i) := sram.io.r.resp.data(0)
60    sram.io.w.req.valid := io.wen
61    sram.io.w.req.bits.setIdx := io.waddr
62    sram.io.w.req.bits.data := VecInit(io.wdata)
63  }
64
65}
66
67class Ftq_RF_Components(implicit p: Parameters) extends XSBundle with BPUUtils {
68  // TODO: move pftAddr, oversize, carry to another mem
69  val startAddr = UInt(VAddrBits.W)
70  val nextRangeAddr = UInt(VAddrBits.W)
71  val pftAddr = UInt((log2Ceil(PredictWidth)+1).W)
72  val isNextMask = Vec(PredictWidth, Bool())
73  val oversize = Bool()
74  val carry = Bool()
75  def getPc(offset: UInt) = {
76    def getHigher(pc: UInt) = pc(VAddrBits-1, log2Ceil(PredictWidth)+instOffsetBits+1)
77    def getOffset(pc: UInt) = pc(log2Ceil(PredictWidth)+instOffsetBits, instOffsetBits)
78    Cat(getHigher(Mux(isNextMask(offset) && startAddr(log2Ceil(PredictWidth)+instOffsetBits), nextRangeAddr, startAddr)),
79        getOffset(startAddr)+offset, 0.U(instOffsetBits.W))
80  }
81  def getFallThrough() = {
82    def getHigher(pc: UInt) = pc.head(VAddrBits-log2Ceil(PredictWidth)-instOffsetBits-1)
83    val startHigher = getHigher(startAddr)
84    val nextHigher  = getHigher(nextRangeAddr)
85    val higher = Mux(carry, nextHigher, startHigher)
86    Cat(higher, pftAddr, 0.U(instOffsetBits.W))
87  }
88  def fallThroughError() = {
89    val startLower        = Cat(0.U(1.W), startAddr(instOffsetBits+log2Ceil(PredictWidth), instOffsetBits))
90    val endLowerwithCarry = Cat(carry,    pftAddr)
91    require(startLower.getWidth == log2Ceil(PredictWidth)+2)
92    require(endLowerwithCarry.getWidth == log2Ceil(PredictWidth)+2)
93    startLower >= endLowerwithCarry || (endLowerwithCarry - startLower) > (PredictWidth+1).U
94  }
95  def fromBranchPrediction(resp: BranchPredictionBundle) = {
96    this.startAddr := resp.pc
97    this.nextRangeAddr := resp.pc + (FetchWidth * 4 * 2).U
98    this.pftAddr :=
99      Mux(resp.preds.hit, resp.ftb_entry.pftAddr,
100        resp.pc(instOffsetBits + log2Ceil(PredictWidth), instOffsetBits) ^ (1 << log2Ceil(PredictWidth)).U)
101    this.isNextMask := VecInit((0 until PredictWidth).map(i =>
102      (resp.pc(log2Ceil(PredictWidth), 1) +& i.U)(log2Ceil(PredictWidth)).asBool()
103    ))
104    this.oversize := Mux(resp.preds.hit, resp.ftb_entry.oversize, false.B)
105    this.carry := Mux(resp.preds.hit, resp.ftb_entry.carry, resp.pc(instOffsetBits + log2Ceil(PredictWidth)).asBool)
106    this
107  }
108  override def toPrintable: Printable = {
109    p"startAddr:${Hexadecimal(startAddr)}, fallThru:${Hexadecimal(getFallThrough())}"
110  }
111}
112
113class Ftq_pd_Entry(implicit p: Parameters) extends XSBundle {
114  val brMask = Vec(PredictWidth, Bool())
115  val jmpInfo = ValidUndirectioned(Vec(3, Bool()))
116  val jmpOffset = UInt(log2Ceil(PredictWidth).W)
117  val jalTarget = UInt(VAddrBits.W)
118  val rvcMask = Vec(PredictWidth, Bool())
119  def hasJal  = jmpInfo.valid && !jmpInfo.bits(0)
120  def hasJalr = jmpInfo.valid && jmpInfo.bits(0)
121  def hasCall = jmpInfo.valid && jmpInfo.bits(1)
122  def hasRet  = jmpInfo.valid && jmpInfo.bits(2)
123
124  def fromPdWb(pdWb: PredecodeWritebackBundle) = {
125    val pds = pdWb.pd
126    this.brMask := VecInit(pds.map(pd => pd.isBr && pd.valid))
127    this.jmpInfo.valid := VecInit(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid)).asUInt.orR
128    this.jmpInfo.bits := ParallelPriorityMux(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid),
129                                             pds.map(pd => VecInit(pd.isJalr, pd.isCall, pd.isRet)))
130    this.jmpOffset := ParallelPriorityEncoder(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid))
131    this.rvcMask := VecInit(pds.map(pd => pd.isRVC))
132    this.jalTarget := pdWb.jalTarget
133  }
134
135  def toPd(offset: UInt) = {
136    require(offset.getWidth == log2Ceil(PredictWidth))
137    val pd = Wire(new PreDecodeInfo)
138    pd.valid := true.B
139    pd.isRVC := rvcMask(offset)
140    val isBr = brMask(offset)
141    val isJalr = offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(0)
142    pd.brType := Cat(offset === jmpOffset && jmpInfo.valid, isJalr || isBr)
143    pd.isCall := offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(1)
144    pd.isRet  := offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(2)
145    pd
146  }
147}
148
149
150
151class Ftq_Redirect_SRAMEntry(implicit p: Parameters) extends XSBundle with HasBPUConst {
152  val rasSp = UInt(log2Ceil(RasSize).W)
153  val rasEntry = new RASEntry
154  val specCnt = Vec(numBr, UInt(10.W))
155  // val ghist = new ShiftingGlobalHistory
156  val folded_hist = new AllFoldedHistories(foldedGHistInfos)
157  val histPtr = new CGHPtr
158  val phist = UInt(PathHistoryLength.W)
159  val phNewBit = UInt(1.W)
160
161  def fromBranchPrediction(resp: BranchPredictionBundle) = {
162    this.rasSp := resp.rasSp
163    this.rasEntry := resp.rasTop
164    this.specCnt := resp.specCnt
165    // this.ghist := resp.ghist
166    this.folded_hist := resp.folded_hist
167    this.histPtr := resp.histPtr
168    this.phist := resp.phist
169    this.phNewBit := resp.pc(instOffsetBits)
170    this
171  }
172}
173
174class Ftq_1R_SRAMEntry(implicit p: Parameters) extends XSBundle with HasBPUConst {
175  val meta = UInt(MaxMetaLength.W)
176}
177
178class Ftq_Pred_Info(implicit p: Parameters) extends XSBundle {
179  val target = UInt(VAddrBits.W)
180  val cfiIndex = ValidUndirectioned(UInt(log2Ceil(PredictWidth).W))
181}
182
183// class FtqEntry(implicit p: Parameters) extends XSBundle with HasBPUConst {
184//   val startAddr = UInt(VAddrBits.W)
185//   val fallThruAddr = UInt(VAddrBits.W)
186//   val isNextMask = Vec(PredictWidth, Bool())
187
188//   val meta = UInt(MaxMetaLength.W)
189
190//   val rasSp = UInt(log2Ceil(RasSize).W)
191//   val rasEntry = new RASEntry
192//   val hist = new ShiftingGlobalHistory
193//   val specCnt = Vec(numBr, UInt(10.W))
194
195//   val valids = Vec(PredictWidth, Bool())
196//   val brMask = Vec(PredictWidth, Bool())
197//   // isJalr, isCall, isRet
198//   val jmpInfo = ValidUndirectioned(Vec(3, Bool()))
199//   val jmpOffset = UInt(log2Ceil(PredictWidth).W)
200
201//   val mispredVec = Vec(PredictWidth, Bool())
202//   val cfiIndex = ValidUndirectioned(UInt(log2Ceil(PredictWidth).W))
203//   val target = UInt(VAddrBits.W)
204// }
205
206class FtqRead[T <: Data](private val gen: T)(implicit p: Parameters) extends XSBundle {
207  val ptr = Output(new FtqPtr)
208  val offset = Output(UInt(log2Ceil(PredictWidth).W))
209  val data = Input(gen)
210  def apply(ptr: FtqPtr, offset: UInt) = {
211    this.ptr := ptr
212    this.offset := offset
213    this.data
214  }
215  override def cloneType = (new FtqRead(gen)).asInstanceOf[this.type]
216}
217
218
219class FtqToBpuIO(implicit p: Parameters) extends XSBundle {
220  val redirect = Valid(new BranchPredictionRedirect)
221  val update = Valid(new BranchPredictionUpdate)
222  val enq_ptr = Output(new FtqPtr)
223}
224
225class FtqToIfuIO(implicit p: Parameters) extends XSBundle with HasCircularQueuePtrHelper {
226  val req = Decoupled(new FetchRequestBundle)
227  val redirect = Valid(new Redirect)
228  val flushFromBpu = new Bundle {
229    // when ifu pipeline is not stalled,
230    // a packet from bpu s3 can reach f1 at most
231    val s2 = Valid(new FtqPtr)
232    val s3 = Valid(new FtqPtr)
233    def shouldFlushBy(src: Valid[FtqPtr], idx_to_flush: FtqPtr) = {
234      src.valid && !isAfter(src.bits, idx_to_flush)
235    }
236    def shouldFlushByStage2(idx: FtqPtr) = shouldFlushBy(s2, idx)
237    def shouldFlushByStage3(idx: FtqPtr) = shouldFlushBy(s3, idx)
238  }
239}
240
241trait HasBackendRedirectInfo extends HasXSParameter {
242  def numRedirect = exuParameters.JmpCnt + exuParameters.AluCnt + 1
243  def isLoadReplay(r: Valid[Redirect]) = r.bits.flushItself()
244}
245
246class FtqToCtrlIO(implicit p: Parameters) extends XSBundle with HasBackendRedirectInfo {
247  val pc_reads = Vec(1 + numRedirect + 1 + 1, Flipped(new FtqRead(UInt(VAddrBits.W))))
248  val target_read = Flipped(new FtqRead(UInt(VAddrBits.W)))
249  def getJumpPcRead = pc_reads.head
250  def getRedirectPcRead = VecInit(pc_reads.tail.dropRight(2))
251  def getMemPredPcRead = pc_reads.init.last
252  def getRobFlushPcRead = pc_reads.last
253}
254
255
256class FTBEntryGen(implicit p: Parameters) extends XSModule with HasBackendRedirectInfo with HasBPUParameter {
257  val io = IO(new Bundle {
258    val start_addr = Input(UInt(VAddrBits.W))
259    val old_entry = Input(new FTBEntry)
260    val pd = Input(new Ftq_pd_Entry)
261    val cfiIndex = Flipped(Valid(UInt(log2Ceil(PredictWidth).W)))
262    val target = Input(UInt(VAddrBits.W))
263    val hit = Input(Bool())
264    val mispredict_vec = Input(Vec(PredictWidth, Bool()))
265
266    val new_entry = Output(new FTBEntry)
267    val new_br_insert_pos = Output(Vec(numBr, Bool()))
268    val taken_mask = Output(Vec(numBr, Bool()))
269    val mispred_mask = Output(Vec(numBr+1, Bool()))
270
271    // for perf counters
272    val is_init_entry = Output(Bool())
273    val is_old_entry = Output(Bool())
274    val is_new_br = Output(Bool())
275    val is_jalr_target_modified = Output(Bool())
276    val is_always_taken_modified = Output(Bool())
277    val is_br_full = Output(Bool())
278  })
279
280  // no mispredictions detected at predecode
281  val hit = io.hit
282  val pd = io.pd
283
284  val init_entry = WireInit(0.U.asTypeOf(new FTBEntry))
285
286
287  val cfi_is_br = pd.brMask(io.cfiIndex.bits) && io.cfiIndex.valid
288  val entry_has_jmp = pd.jmpInfo.valid
289  val new_jmp_is_jal  = entry_has_jmp && !pd.jmpInfo.bits(0) && io.cfiIndex.valid
290  val new_jmp_is_jalr = entry_has_jmp &&  pd.jmpInfo.bits(0) && io.cfiIndex.valid
291  val new_jmp_is_call = entry_has_jmp &&  pd.jmpInfo.bits(1) && io.cfiIndex.valid
292  val new_jmp_is_ret  = entry_has_jmp &&  pd.jmpInfo.bits(2) && io.cfiIndex.valid
293  val last_jmp_rvi = entry_has_jmp && pd.jmpOffset === (PredictWidth-1).U && !pd.rvcMask.last
294  val last_br_rvi = cfi_is_br && io.cfiIndex.bits === (PredictWidth-1).U && !pd.rvcMask.last
295
296  val cfi_is_jal = io.cfiIndex.bits === pd.jmpOffset && new_jmp_is_jal
297  val cfi_is_jalr = io.cfiIndex.bits === pd.jmpOffset && new_jmp_is_jalr
298
299  def carryPos = log2Ceil(PredictWidth)+instOffsetBits+1
300  def getLower(pc: UInt) = pc(carryPos-1, instOffsetBits)
301  // if not hit, establish a new entry
302  init_entry.valid := true.B
303  // tag is left for ftb to assign
304
305  // case br
306  val init_br_slot = init_entry.getSlotForBr(0)
307  when (cfi_is_br) {
308    init_br_slot.valid := true.B
309    init_br_slot.offset := io.cfiIndex.bits
310    init_br_slot.setLowerStatByTarget(io.start_addr, io.target, shareTailSlot && numBr == 1)
311    init_entry.always_taken(0) := true.B // set to always taken on init
312  }
313  // init_entry.isBrSharing := shareTailSlot.B && (numBr == 1).B && cfi_is_br
314
315  // case jmp
316  when (entry_has_jmp) {
317    init_entry.tailSlot.offset := pd.jmpOffset
318    init_entry.tailSlot.valid := new_jmp_is_jal || new_jmp_is_jalr
319    init_entry.tailSlot.setLowerStatByTarget(io.start_addr, Mux(cfi_is_jalr, io.target, pd.jalTarget), isShare=false)
320  }
321
322  val jmpPft = getLower(io.start_addr) +& pd.jmpOffset +& Mux(pd.rvcMask(pd.jmpOffset), 1.U, 2.U)
323  init_entry.pftAddr := Mux(entry_has_jmp, jmpPft, getLower(io.start_addr) + ((FetchWidth*4)>>instOffsetBits).U + Mux(last_br_rvi, 1.U, 0.U))
324  init_entry.carry   := Mux(entry_has_jmp, jmpPft(carryPos-instOffsetBits), io.start_addr(carryPos-1) || (io.start_addr(carryPos-2, instOffsetBits).andR && last_br_rvi))
325  init_entry.isJalr := new_jmp_is_jalr
326  init_entry.isCall := new_jmp_is_call
327  init_entry.isRet  := new_jmp_is_ret
328  init_entry.last_is_rvc := Mux(entry_has_jmp, pd.rvcMask(pd.jmpOffset), pd.rvcMask.last)
329
330  init_entry.oversize := last_br_rvi || last_jmp_rvi
331
332  // if hit, check whether a new cfi(only br is possible) is detected
333  val oe = io.old_entry
334  val br_recorded_vec = oe.getBrRecordedVec(io.cfiIndex.bits)
335  val br_recorded = br_recorded_vec.asUInt.orR
336  val is_new_br = cfi_is_br && !br_recorded
337  val new_br_offset = io.cfiIndex.bits
338  // vec(i) means new br will be inserted BEFORE old br(i)
339  val allBrSlotsVec = oe.allSlotsForBr
340  val new_br_insert_onehot = VecInit((0 until numBr).map{
341    i => i match {
342      case 0 =>
343        !allBrSlotsVec(0).valid || new_br_offset < allBrSlotsVec(0).offset
344      case idx =>
345        allBrSlotsVec(idx-1).valid && new_br_offset > allBrSlotsVec(idx-1).offset &&
346        (!allBrSlotsVec(idx).valid || new_br_offset < allBrSlotsVec(idx).offset)
347    }
348  })
349
350  val old_entry_modified = WireInit(io.old_entry)
351  for (i <- 0 until numBr) {
352    val slot = old_entry_modified.allSlotsForBr(i)
353    when (new_br_insert_onehot(i)) {
354      slot.valid := true.B
355      slot.offset := new_br_offset
356      slot.setLowerStatByTarget(io.start_addr, io.target, shareTailSlot && i == numBr-1)
357      old_entry_modified.always_taken(i) := true.B
358    }.elsewhen (new_br_offset > oe.allSlotsForBr(i).offset) {
359      old_entry_modified.always_taken(i) := false.B
360      // all other fields remain unchanged
361    }.otherwise {
362      // case i == 0, remain unchanged
363      if (i != 0) {
364        val noNeedToMoveFromFormerSlot = (shareTailSlot && i == numBr-1).B && !oe.brSlots.last.valid
365        when (!noNeedToMoveFromFormerSlot) {
366          slot.fromAnotherSlot(oe.allSlotsForBr(i-1))
367          old_entry_modified.always_taken(i) := oe.always_taken(i)
368        }
369      }
370    }
371  }
372
373  // two circumstances:
374  // 1. oe: | br | j  |, new br should be in front of j, thus addr of j should be new pft
375  // 2. oe: | br | br |, new br could be anywhere between, thus new pft is the addr of either
376  //        the previous last br or the new br
377  val may_have_to_replace = oe.noEmptySlotForNewBr
378  val pft_need_to_change = is_new_br && may_have_to_replace
379  // it should either be the given last br or the new br
380  when (pft_need_to_change) {
381    val new_pft_offset =
382      Mux(!new_br_insert_onehot.asUInt.orR,
383        new_br_offset, oe.allSlotsForBr.last.offset)
384
385    // set jmp to invalid
386    if (!shareTailSlot) {
387      old_entry_modified.tailSlot.valid := false.B
388    }
389    old_entry_modified.pftAddr := getLower(io.start_addr) + new_pft_offset
390    old_entry_modified.last_is_rvc := pd.rvcMask(new_pft_offset - 1.U) // TODO: fix this
391    old_entry_modified.carry := (getLower(io.start_addr) +& new_pft_offset).head(1).asBool
392    old_entry_modified.oversize := false.B
393    old_entry_modified.isCall := false.B
394    old_entry_modified.isRet := false.B
395    old_entry_modified.isJalr := false.B
396  }
397
398  val old_entry_jmp_target_modified = WireInit(oe)
399  val old_target = oe.tailSlot.getTarget(io.start_addr) // may be wrong because we store only 20 lowest bits
400  val old_tail_is_jmp = !oe.tailSlot.sharing || !shareTailSlot.B
401  val jalr_target_modified = cfi_is_jalr && (old_target =/= io.target) && old_tail_is_jmp // TODO: pass full jalr target
402  when (jalr_target_modified) {
403    old_entry_jmp_target_modified.setByJmpTarget(io.start_addr, io.target)
404    old_entry_jmp_target_modified.always_taken := 0.U.asTypeOf(Vec(numBr, Bool()))
405  }
406
407  val old_entry_always_taken = WireInit(oe)
408  val always_taken_modified_vec = Wire(Vec(numBr, Bool())) // whether modified or not
409  for (i <- 0 until numBr) {
410    old_entry_always_taken.always_taken(i) :=
411      oe.always_taken(i) && io.cfiIndex.valid && oe.brValids(i) && io.cfiIndex.bits === oe.brOffset(i)
412    always_taken_modified_vec(i) := oe.always_taken(i) && !old_entry_always_taken.always_taken(i)
413  }
414  val always_taken_modified = always_taken_modified_vec.reduce(_||_)
415
416
417
418  val derived_from_old_entry =
419    Mux(is_new_br, old_entry_modified,
420      Mux(jalr_target_modified, old_entry_jmp_target_modified, old_entry_always_taken))
421
422
423  io.new_entry := Mux(!hit, init_entry, derived_from_old_entry)
424
425  io.new_br_insert_pos := new_br_insert_onehot
426  io.taken_mask := VecInit((io.new_entry.brOffset zip io.new_entry.brValids).map{
427    case (off, v) => io.cfiIndex.bits === off && io.cfiIndex.valid && v
428  })
429  for (i <- 0 until numBr) {
430    io.mispred_mask(i) := io.new_entry.brValids(i) && io.mispredict_vec(io.new_entry.brOffset(i))
431  }
432  io.mispred_mask.last := io.new_entry.jmpValid && io.mispredict_vec(pd.jmpOffset)
433
434  // for perf counters
435  io.is_init_entry := !hit
436  io.is_old_entry := hit && !is_new_br && !jalr_target_modified && !always_taken_modified
437  io.is_new_br := hit && is_new_br
438  io.is_jalr_target_modified := hit && jalr_target_modified
439  io.is_always_taken_modified := hit && always_taken_modified
440  io.is_br_full := hit && is_new_br && may_have_to_replace
441}
442
443class Ftq(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelper
444  with HasBackendRedirectInfo with BPUUtils with HasBPUConst with HasPerfEvents {
445  val io = IO(new Bundle {
446    val fromBpu = Flipped(new BpuToFtqIO)
447    val fromIfu = Flipped(new IfuToFtqIO)
448    val fromBackend = Flipped(new CtrlToFtqIO)
449
450    val toBpu = new FtqToBpuIO
451    val toIfu = new FtqToIfuIO
452    val toBackend = new FtqToCtrlIO
453
454    val bpuInfo = new Bundle {
455      val bpRight = Output(UInt(XLEN.W))
456      val bpWrong = Output(UInt(XLEN.W))
457    }
458  })
459  io.bpuInfo := DontCare
460
461  val backendRedirect = io.fromBackend.redirect
462  val backendRedirectReg = RegNext(io.fromBackend.redirect)
463
464  val stage2Flush = backendRedirect.valid
465  val backendFlush = stage2Flush || RegNext(stage2Flush)
466  val ifuFlush = Wire(Bool())
467
468  val flush = stage2Flush || RegNext(stage2Flush)
469
470  val allowBpuIn, allowToIfu = WireInit(false.B)
471  val flushToIfu = !allowToIfu
472  allowBpuIn := !ifuFlush && !backendRedirect.valid && !backendRedirectReg.valid
473  allowToIfu := !ifuFlush && !backendRedirect.valid && !backendRedirectReg.valid
474
475  val bpuPtr, ifuPtr, ifuWbPtr, commPtr = RegInit(FtqPtr(false.B, 0.U))
476  val validEntries = distanceBetween(bpuPtr, commPtr)
477
478  // **********************************************************************
479  // **************************** enq from bpu ****************************
480  // **********************************************************************
481  val new_entry_ready = validEntries < FtqSize.U
482  io.fromBpu.resp.ready := new_entry_ready
483
484  val bpu_s2_resp = io.fromBpu.resp.bits.s2
485  val bpu_s3_resp = io.fromBpu.resp.bits.s3
486  val bpu_s2_redirect = bpu_s2_resp.valid && bpu_s2_resp.hasRedirect
487  val bpu_s3_redirect = bpu_s3_resp.valid && bpu_s3_resp.hasRedirect
488
489  io.toBpu.enq_ptr := bpuPtr
490  val enq_fire = io.fromBpu.resp.fire() && allowBpuIn // from bpu s1
491  val bpu_in_fire = (io.fromBpu.resp.fire() || bpu_s2_redirect || bpu_s3_redirect) && allowBpuIn
492
493  val bpu_in_resp = WireInit(io.fromBpu.resp.bits.selectedResp)
494  val bpu_in_stage = WireInit(io.fromBpu.resp.bits.selectedRespIdx)
495  val bpu_in_resp_ptr = Mux(bpu_in_stage === BP_S1, bpuPtr, bpu_in_resp.ftq_idx)
496  val bpu_in_resp_idx = bpu_in_resp_ptr.value
497
498  // read ports:                            jumpPc + redirects + loadPred + robFlush + ifuReq1 + ifuReq2 + commitUpdate
499  val ftq_pc_mem = Module(new SyncDataModuleTemplate(new Ftq_RF_Components, FtqSize, 1+numRedirect+2+1+1+1, 1))
500  // resp from uBTB
501  ftq_pc_mem.io.wen(0) := bpu_in_fire
502  ftq_pc_mem.io.waddr(0) := bpu_in_resp_idx
503  ftq_pc_mem.io.wdata(0).fromBranchPrediction(bpu_in_resp)
504
505  //                                                            ifuRedirect + backendRedirect + commit
506  val ftq_redirect_sram = Module(new FtqNRSRAM(new Ftq_Redirect_SRAMEntry, 1+1+1))
507  // these info is intended to enq at the last stage of bpu
508  ftq_redirect_sram.io.wen := io.fromBpu.resp.bits.lastStage.valid
509  ftq_redirect_sram.io.waddr := io.fromBpu.resp.bits.lastStage.ftq_idx.value
510  ftq_redirect_sram.io.wdata.fromBranchPrediction(io.fromBpu.resp.bits.lastStage)
511
512  val ftq_meta_1r_sram = Module(new FtqNRSRAM(new Ftq_1R_SRAMEntry, 1))
513  // these info is intended to enq at the last stage of bpu
514  ftq_meta_1r_sram.io.wen := io.fromBpu.resp.bits.lastStage.valid
515  ftq_meta_1r_sram.io.waddr := io.fromBpu.resp.bits.lastStage.ftq_idx.value
516  ftq_meta_1r_sram.io.wdata.meta := io.fromBpu.resp.bits.meta
517  //                                                            ifuRedirect + backendRedirect + commit
518  val ftb_entry_mem = Module(new SyncDataModuleTemplate(new FTBEntry, FtqSize, 1+1+1, 1))
519  ftb_entry_mem.io.wen(0) := io.fromBpu.resp.bits.lastStage.valid
520  ftb_entry_mem.io.waddr(0) := io.fromBpu.resp.bits.lastStage.ftq_idx.value
521  ftb_entry_mem.io.wdata(0) := io.fromBpu.resp.bits.lastStage.ftb_entry
522
523
524  // multi-write
525  val update_target = Reg(Vec(FtqSize, UInt(VAddrBits.W)))
526  val cfiIndex_vec = Reg(Vec(FtqSize, ValidUndirectioned(UInt(log2Ceil(PredictWidth).W))))
527  val mispredict_vec = Reg(Vec(FtqSize, Vec(PredictWidth, Bool())))
528  val pred_stage = Reg(Vec(FtqSize, UInt(2.W)))
529
530  val c_invalid :: c_valid :: c_commited :: Nil = Enum(3)
531  val commitStateQueue = RegInit(VecInit(Seq.fill(FtqSize) {
532    VecInit(Seq.fill(PredictWidth)(c_invalid))
533  }))
534
535  val f_to_send :: f_sent :: Nil = Enum(2)
536  val entry_fetch_status = RegInit(VecInit(Seq.fill(FtqSize)(f_sent)))
537
538  val h_not_hit :: h_false_hit :: h_hit :: Nil = Enum(3)
539  val entry_hit_status = RegInit(VecInit(Seq.fill(FtqSize)(h_not_hit)))
540
541
542  when (bpu_in_fire) {
543    entry_fetch_status(bpu_in_resp_idx) := f_to_send
544    commitStateQueue(bpu_in_resp_idx) := VecInit(Seq.fill(PredictWidth)(c_invalid))
545    cfiIndex_vec(bpu_in_resp_idx) := bpu_in_resp.genCfiIndex
546    mispredict_vec(bpu_in_resp_idx) := WireInit(VecInit(Seq.fill(PredictWidth)(false.B)))
547    update_target(bpu_in_resp_idx) := bpu_in_resp.target
548    pred_stage(bpu_in_resp_idx) := bpu_in_stage
549  }
550
551  bpuPtr := bpuPtr + enq_fire
552  ifuPtr := ifuPtr + io.toIfu.req.fire
553
554  // only use ftb result to assign hit status
555  when (bpu_s2_resp.valid) {
556    entry_hit_status(bpu_s2_resp.ftq_idx.value) := Mux(bpu_s2_resp.preds.hit, h_hit, h_not_hit)
557  }
558
559
560  io.toIfu.flushFromBpu.s2.valid := bpu_s2_redirect
561  io.toIfu.flushFromBpu.s2.bits := bpu_s2_resp.ftq_idx
562  when (bpu_s2_resp.valid && bpu_s2_resp.hasRedirect) {
563    bpuPtr := bpu_s2_resp.ftq_idx + 1.U
564    // only when ifuPtr runs ahead of bpu s2 resp should we recover it
565    when (!isBefore(ifuPtr, bpu_s2_resp.ftq_idx)) {
566      ifuPtr := bpu_s2_resp.ftq_idx
567    }
568  }
569
570  io.toIfu.flushFromBpu.s3.valid := bpu_s3_redirect
571  io.toIfu.flushFromBpu.s3.bits := bpu_s3_resp.ftq_idx
572  when (bpu_s3_resp.valid && bpu_s3_resp.hasRedirect) {
573    bpuPtr := bpu_s3_resp.ftq_idx + 1.U
574    // only when ifuPtr runs ahead of bpu s2 resp should we recover it
575    when (!isBefore(ifuPtr, bpu_s3_resp.ftq_idx)) {
576      ifuPtr := bpu_s3_resp.ftq_idx
577    }
578    XSError(true.B, "\ns3_redirect mechanism not implemented!\n")
579  }
580
581  XSError(isBefore(bpuPtr, ifuPtr) && !isFull(bpuPtr, ifuPtr), "\nifuPtr is before bpuPtr!\n")
582
583  // ****************************************************************
584  // **************************** to ifu ****************************
585  // ****************************************************************
586  val bpu_in_bypass_buf = RegEnable(ftq_pc_mem.io.wdata(0), enable=bpu_in_fire)
587  val bpu_in_bypass_ptr = RegNext(bpu_in_resp_ptr)
588  val last_cycle_bpu_in = RegNext(bpu_in_fire)
589  val last_cycle_to_ifu_fire = RegNext(io.toIfu.req.fire)
590
591  // read pc and target
592  ftq_pc_mem.io.raddr.init.init.last := ifuPtr.value
593  ftq_pc_mem.io.raddr.init.last := (ifuPtr+1.U).value
594
595  io.toIfu.req.valid := allowToIfu && entry_fetch_status(ifuPtr.value) === f_to_send && ifuPtr =/= bpuPtr
596  io.toIfu.req.bits.ftqIdx := ifuPtr
597  io.toIfu.req.bits.target := update_target(ifuPtr.value)
598  io.toIfu.req.bits.ftqOffset := cfiIndex_vec(ifuPtr.value)
599
600  when (last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtr) {
601    io.toIfu.req.bits.fromFtqPcBundle(bpu_in_bypass_buf)
602  }.elsewhen (last_cycle_to_ifu_fire) {
603    io.toIfu.req.bits.fromFtqPcBundle(ftq_pc_mem.io.rdata.init.last)
604  }.otherwise {
605    io.toIfu.req.bits.fromFtqPcBundle(ftq_pc_mem.io.rdata.init.init.last)
606  }
607
608  // when fall through is smaller in value than start address, there must be a false hit
609  when (io.toIfu.req.bits.fallThruError && entry_hit_status(ifuPtr.value) === h_hit) {
610    when (io.toIfu.req.fire &&
611      !(bpu_s2_redirect && bpu_s2_resp.ftq_idx === ifuPtr) &&
612      !(bpu_s3_redirect && bpu_s3_resp.ftq_idx === ifuPtr)
613    ) {
614      entry_hit_status(ifuPtr.value) := h_false_hit
615      XSDebug(true.B, "FTB false hit by fallThroughError, startAddr: %x, fallTHru: %x\n", io.toIfu.req.bits.startAddr, io.toIfu.req.bits.fallThruAddr)
616    }
617    XSDebug(true.B, "fallThruError! start:%x, fallThru:%x\n", io.toIfu.req.bits.startAddr, io.toIfu.req.bits.fallThruAddr)
618  }
619
620  val ifu_req_should_be_flushed =
621    io.toIfu.flushFromBpu.shouldFlushByStage2(io.toIfu.req.bits.ftqIdx) ||
622    io.toIfu.flushFromBpu.shouldFlushByStage3(io.toIfu.req.bits.ftqIdx)
623
624  when (io.toIfu.req.fire && !ifu_req_should_be_flushed) {
625    entry_fetch_status(ifuPtr.value) := f_sent
626  }
627
628
629  // *********************************************************************
630  // **************************** wb from ifu ****************************
631  // *********************************************************************
632  val pdWb = io.fromIfu.pdWb
633  val pds = pdWb.bits.pd
634  val ifu_wb_valid = pdWb.valid
635  val ifu_wb_idx = pdWb.bits.ftqIdx.value
636  // read ports:                                                         commit update
637  val ftq_pd_mem = Module(new SyncDataModuleTemplate(new Ftq_pd_Entry, FtqSize, 1, 1))
638  ftq_pd_mem.io.wen(0) := ifu_wb_valid
639  ftq_pd_mem.io.waddr(0) := pdWb.bits.ftqIdx.value
640  ftq_pd_mem.io.wdata(0).fromPdWb(pdWb.bits)
641
642  val hit_pd_valid = entry_hit_status(ifu_wb_idx) === h_hit && ifu_wb_valid
643  val hit_pd_mispred = hit_pd_valid && pdWb.bits.misOffset.valid
644  val hit_pd_mispred_reg = RegNext(hit_pd_mispred, init=false.B)
645  val pd_reg       = RegEnable(pds,             enable = pdWb.valid)
646  val start_pc_reg = RegEnable(pdWb.bits.pc(0), enable = pdWb.valid)
647  val wb_idx_reg   = RegEnable(ifu_wb_idx,      enable = pdWb.valid)
648
649  when (ifu_wb_valid) {
650    val comm_stq_wen = VecInit(pds.map(_.valid).zip(pdWb.bits.instrRange).map{
651      case (v, inRange) => v && inRange
652    })
653    (commitStateQueue(ifu_wb_idx) zip comm_stq_wen).map{
654      case (qe, v) => when (v) { qe := c_valid }
655    }
656  }
657
658  ifuWbPtr := ifuWbPtr + ifu_wb_valid
659
660  ftb_entry_mem.io.raddr.head := ifu_wb_idx
661  val has_false_hit = WireInit(false.B)
662  when (RegNext(hit_pd_valid)) {
663    // check for false hit
664    val pred_ftb_entry = ftb_entry_mem.io.rdata.head
665    val brSlots = pred_ftb_entry.brSlots
666    val tailSlot = pred_ftb_entry.tailSlot
667    // we check cfis that bpu predicted
668
669    // bpu predicted branches but denied by predecode
670    val br_false_hit =
671      brSlots.map{
672        s => s.valid && !(pd_reg(s.offset).valid && pd_reg(s.offset).isBr)
673      }.reduce(_||_) ||
674      (shareTailSlot.B && tailSlot.valid && pred_ftb_entry.tailSlot.sharing &&
675        !(pd_reg(tailSlot.offset).valid && pd_reg(tailSlot.offset).isBr))
676
677    val jmpOffset = tailSlot.offset
678    val jmp_pd = pd_reg(jmpOffset)
679    val jal_false_hit = pred_ftb_entry.jmpValid &&
680      ((pred_ftb_entry.isJal  && !(jmp_pd.valid && jmp_pd.isJal)) ||
681       (pred_ftb_entry.isJalr && !(jmp_pd.valid && jmp_pd.isJalr)) ||
682       (pred_ftb_entry.isCall && !(jmp_pd.valid && jmp_pd.isCall)) ||
683       (pred_ftb_entry.isRet  && !(jmp_pd.valid && jmp_pd.isRet))
684      )
685
686    has_false_hit := br_false_hit || jal_false_hit || hit_pd_mispred_reg
687    XSDebug(has_false_hit, "FTB false hit by br or jal or hit_pd, startAddr: %x\n", pdWb.bits.pc(0))
688
689    // assert(!has_false_hit)
690  }
691
692  when (has_false_hit) {
693    entry_hit_status(wb_idx_reg) := h_false_hit
694  }
695
696
697  // **********************************************************************
698  // **************************** backend read ****************************
699  // **********************************************************************
700
701  // pc reads
702  for ((req, i) <- io.toBackend.pc_reads.zipWithIndex) {
703    ftq_pc_mem.io.raddr(i) := req.ptr.value
704    req.data := ftq_pc_mem.io.rdata(i).getPc(RegNext(req.offset))
705  }
706  // target read
707  io.toBackend.target_read.data := RegNext(update_target(io.toBackend.target_read.ptr.value))
708
709  // *******************************************************************************
710  // **************************** redirect from backend ****************************
711  // *******************************************************************************
712
713  // redirect read cfiInfo, couples to redirectGen s2
714  ftq_redirect_sram.io.ren.init.last := io.fromBackend.redirect.valid
715  ftq_redirect_sram.io.raddr.init.last := io.fromBackend.redirect.bits.ftqIdx.value
716
717  ftb_entry_mem.io.raddr.init.last := io.fromBackend.redirect.bits.ftqIdx.value
718
719  val stage3CfiInfo = ftq_redirect_sram.io.rdata.init.last
720  val fromBackendRedirect = WireInit(backendRedirectReg)
721  val backendRedirectCfi = fromBackendRedirect.bits.cfiUpdate
722  backendRedirectCfi.fromFtqRedirectSram(stage3CfiInfo)
723
724  val r_ftb_entry = ftb_entry_mem.io.rdata.init.last
725  val r_ftqOffset = fromBackendRedirect.bits.ftqOffset
726
727  when (entry_hit_status(fromBackendRedirect.bits.ftqIdx.value) === h_hit) {
728    backendRedirectCfi.shift := PopCount(r_ftb_entry.getBrMaskByOffset(r_ftqOffset)) +&
729      (backendRedirectCfi.pd.isBr && !r_ftb_entry.brIsSaved(r_ftqOffset) &&
730      !r_ftb_entry.newBrCanNotInsert(r_ftqOffset))
731
732    backendRedirectCfi.addIntoHist := backendRedirectCfi.pd.isBr && (r_ftb_entry.brIsSaved(r_ftqOffset) ||
733        !r_ftb_entry.newBrCanNotInsert(r_ftqOffset))
734  }.otherwise {
735    backendRedirectCfi.shift := (backendRedirectCfi.pd.isBr && backendRedirectCfi.taken).asUInt
736    backendRedirectCfi.addIntoHist := backendRedirectCfi.pd.isBr.asUInt
737  }
738
739
740  // ***************************************************************************
741  // **************************** redirect from ifu ****************************
742  // ***************************************************************************
743  val fromIfuRedirect = WireInit(0.U.asTypeOf(Valid(new Redirect)))
744  fromIfuRedirect.valid := pdWb.valid && pdWb.bits.misOffset.valid && !backendFlush
745  fromIfuRedirect.bits.ftqIdx := pdWb.bits.ftqIdx
746  fromIfuRedirect.bits.ftqOffset := pdWb.bits.misOffset.bits
747  fromIfuRedirect.bits.level := RedirectLevel.flushAfter
748
749  val ifuRedirectCfiUpdate = fromIfuRedirect.bits.cfiUpdate
750  ifuRedirectCfiUpdate.pc := pdWb.bits.pc(pdWb.bits.misOffset.bits)
751  ifuRedirectCfiUpdate.pd := pdWb.bits.pd(pdWb.bits.misOffset.bits)
752  ifuRedirectCfiUpdate.predTaken := cfiIndex_vec(pdWb.bits.ftqIdx.value).valid
753  ifuRedirectCfiUpdate.target := pdWb.bits.target
754  ifuRedirectCfiUpdate.taken := pdWb.bits.cfiOffset.valid
755  ifuRedirectCfiUpdate.isMisPred := pdWb.bits.misOffset.valid
756
757  val ifuRedirectReg = RegNext(fromIfuRedirect, init=0.U.asTypeOf(Valid(new Redirect)))
758  val ifuRedirectToBpu = WireInit(ifuRedirectReg)
759  ifuFlush := fromIfuRedirect.valid || ifuRedirectToBpu.valid
760
761  ftq_redirect_sram.io.ren.head := fromIfuRedirect.valid
762  ftq_redirect_sram.io.raddr.head := fromIfuRedirect.bits.ftqIdx.value
763
764  ftb_entry_mem.io.raddr.head := fromIfuRedirect.bits.ftqIdx.value
765
766  val toBpuCfi = ifuRedirectToBpu.bits.cfiUpdate
767  toBpuCfi.fromFtqRedirectSram(ftq_redirect_sram.io.rdata.head)
768  when (ifuRedirectReg.bits.cfiUpdate.pd.isRet) {
769    toBpuCfi.target := toBpuCfi.rasEntry.retAddr
770  }
771
772  // *********************************************************************
773  // **************************** wb from exu ****************************
774  // *********************************************************************
775
776  def extractRedirectInfo(wb: Valid[Redirect]) = {
777    val ftqIdx = wb.bits.ftqIdx.value
778    val ftqOffset = wb.bits.ftqOffset
779    val taken = wb.bits.cfiUpdate.taken
780    val mispred = wb.bits.cfiUpdate.isMisPred
781    (wb.valid, ftqIdx, ftqOffset, taken, mispred)
782  }
783
784  // fix mispredict entry
785  val lastIsMispredict = RegNext(
786    backendRedirect.valid && backendRedirect.bits.level === RedirectLevel.flushAfter, init = false.B
787  )
788
789  def updateCfiInfo(redirect: Valid[Redirect], isBackend: Boolean = true) = {
790    val (r_valid, r_idx, r_offset, r_taken, r_mispred) = extractRedirectInfo(redirect)
791    val cfiIndex_bits_wen = r_valid && r_taken && r_offset < cfiIndex_vec(r_idx).bits
792    val cfiIndex_valid_wen = r_valid && r_offset === cfiIndex_vec(r_idx).bits
793    when (cfiIndex_bits_wen || cfiIndex_valid_wen) {
794      cfiIndex_vec(r_idx).valid := cfiIndex_bits_wen || cfiIndex_valid_wen && r_taken
795    }
796    when (cfiIndex_bits_wen) {
797      cfiIndex_vec(r_idx).bits := r_offset
798    }
799    update_target(r_idx) := redirect.bits.cfiUpdate.target
800    if (isBackend) {
801      mispredict_vec(r_idx)(r_offset) := r_mispred
802    }
803  }
804
805  when(backendRedirectReg.valid && lastIsMispredict) {
806    updateCfiInfo(backendRedirectReg)
807  }.elsewhen (ifuRedirectToBpu.valid) {
808    updateCfiInfo(ifuRedirectToBpu, isBackend=false)
809  }
810
811  // ***********************************************************************************
812  // **************************** flush ptr and state queue ****************************
813  // ***********************************************************************************
814
815  val redirectVec = VecInit(backendRedirect, fromIfuRedirect)
816
817  // when redirect, we should reset ptrs and status queues
818  when(redirectVec.map(r => r.valid).reduce(_||_)){
819    val r = PriorityMux(redirectVec.map(r => (r.valid -> r.bits)))
820    val notIfu = redirectVec.dropRight(1).map(r => r.valid).reduce(_||_)
821    val (idx, offset, flushItSelf) = (r.ftqIdx, r.ftqOffset, RedirectLevel.flushItself(r.level))
822    val next = idx + 1.U
823    bpuPtr := next
824    ifuPtr := next
825    ifuWbPtr := next
826    when (notIfu) {
827      commitStateQueue(idx.value).zipWithIndex.foreach({ case (s, i) =>
828        when(i.U > offset || i.U === offset && flushItSelf){
829          s := c_invalid
830        }
831      })
832    }
833  }
834
835  // only the valid bit is actually needed
836  io.toIfu.redirect.bits    := backendRedirect.bits
837  io.toIfu.redirect.valid   := stage2Flush
838
839  // commit
840  for (c <- io.fromBackend.rob_commits) {
841    when(c.valid) {
842      commitStateQueue(c.bits.ftqIdx.value)(c.bits.ftqOffset) := c_commited
843      // TODO: remove this
844      // For instruction fusions, we also update the next instruction
845      when (c.bits.commitType === 4.U) {
846        commitStateQueue(c.bits.ftqIdx.value)(c.bits.ftqOffset + 1.U) := c_commited
847      }.elsewhen(c.bits.commitType === 5.U) {
848        commitStateQueue(c.bits.ftqIdx.value)(c.bits.ftqOffset + 2.U) := c_commited
849      }.elsewhen(c.bits.commitType === 6.U) {
850        val index = (c.bits.ftqIdx + 1.U).value
851        commitStateQueue(index)(0) := c_commited
852      }.elsewhen(c.bits.commitType === 7.U) {
853        val index = (c.bits.ftqIdx + 1.U).value
854        commitStateQueue(index)(1) := c_commited
855      }
856    }
857  }
858
859  // ****************************************************************
860  // **************************** to bpu ****************************
861  // ****************************************************************
862
863  io.toBpu.redirect <> Mux(fromBackendRedirect.valid, fromBackendRedirect, ifuRedirectToBpu)
864
865  val may_have_stall_from_bpu = RegInit(false.B)
866  val canCommit = commPtr =/= ifuWbPtr && !may_have_stall_from_bpu &&
867    Cat(commitStateQueue(commPtr.value).map(s => {
868      s === c_invalid || s === c_commited
869    })).andR()
870
871  // commit reads
872  ftq_pc_mem.io.raddr.last := commPtr.value
873  val commit_pc_bundle = ftq_pc_mem.io.rdata.last
874  ftq_pd_mem.io.raddr.last := commPtr.value
875  val commit_pd = ftq_pd_mem.io.rdata.last
876  ftq_redirect_sram.io.ren.last := canCommit
877  ftq_redirect_sram.io.raddr.last := commPtr.value
878  val commit_spec_meta = ftq_redirect_sram.io.rdata.last
879  ftq_meta_1r_sram.io.ren(0) := canCommit
880  ftq_meta_1r_sram.io.raddr(0) := commPtr.value
881  val commit_meta = ftq_meta_1r_sram.io.rdata(0)
882  ftb_entry_mem.io.raddr.last := commPtr.value
883  val commit_ftb_entry = ftb_entry_mem.io.rdata.last
884
885  // need one cycle to read mem and srams
886  val do_commit_ptr = RegNext(commPtr)
887  val do_commit = RegNext(canCommit, init=false.B)
888  when (canCommit) { commPtr := commPtr + 1.U }
889  val commit_state = RegNext(commitStateQueue(commPtr.value))
890  val can_commit_cfi = WireInit(cfiIndex_vec(commPtr.value))
891  when (commitStateQueue(commPtr.value)(can_commit_cfi.bits) =/= c_commited) {
892    can_commit_cfi.valid := false.B
893  }
894  val commit_cfi = RegNext(can_commit_cfi)
895
896  val commit_mispredict = VecInit((RegNext(mispredict_vec(commPtr.value)) zip commit_state).map {
897    case (mis, state) => mis && state === c_commited
898  })
899  val can_commit_hit = entry_hit_status(commPtr.value)
900  val commit_hit = RegNext(can_commit_hit)
901  val commit_target = RegNext(update_target(commPtr.value))
902  val commit_valid = commit_hit === h_hit || commit_cfi.valid // hit or taken
903
904  val to_bpu_hit = can_commit_hit === h_hit || can_commit_hit === h_false_hit
905  may_have_stall_from_bpu := can_commit_cfi.valid && !to_bpu_hit && !may_have_stall_from_bpu
906
907  io.toBpu.update := DontCare
908  io.toBpu.update.valid := commit_valid && do_commit
909  val update = io.toBpu.update.bits
910  update.false_hit   := commit_hit === h_false_hit
911  update.pc          := commit_pc_bundle.startAddr
912  update.preds.hit   := commit_hit === h_hit || commit_hit === h_false_hit
913  update.meta        := commit_meta.meta
914  update.full_target := commit_target
915  update.fromFtqRedirectSram(commit_spec_meta)
916
917  val commit_real_hit = commit_hit === h_hit
918  val update_ftb_entry = update.ftb_entry
919
920  val ftbEntryGen = Module(new FTBEntryGen).io
921  ftbEntryGen.start_addr     := commit_pc_bundle.startAddr
922  ftbEntryGen.old_entry      := commit_ftb_entry
923  ftbEntryGen.pd             := commit_pd
924  ftbEntryGen.cfiIndex       := commit_cfi
925  ftbEntryGen.target         := commit_target
926  ftbEntryGen.hit            := commit_real_hit
927  ftbEntryGen.mispredict_vec := commit_mispredict
928
929  update_ftb_entry         := ftbEntryGen.new_entry
930  update.new_br_insert_pos := ftbEntryGen.new_br_insert_pos
931  update.mispred_mask      := ftbEntryGen.mispred_mask
932  update.old_entry         := ftbEntryGen.is_old_entry
933  update.preds.br_taken_mask  := ftbEntryGen.taken_mask
934
935  // ******************************************************************************
936  // **************************** commit perf counters ****************************
937  // ******************************************************************************
938
939  val commit_inst_mask    = VecInit(commit_state.map(c => c === c_commited && do_commit)).asUInt
940  val commit_mispred_mask = commit_mispredict.asUInt
941  val commit_not_mispred_mask = ~commit_mispred_mask
942
943  val commit_br_mask = commit_pd.brMask.asUInt
944  val commit_jmp_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.jmpInfo.valid.asTypeOf(UInt(1.W)))
945  val commit_cfi_mask = (commit_br_mask | commit_jmp_mask)
946
947  val mbpInstrs = commit_inst_mask & commit_cfi_mask
948
949  val mbpRights = mbpInstrs & commit_not_mispred_mask
950  val mbpWrongs = mbpInstrs & commit_mispred_mask
951
952  io.bpuInfo.bpRight := PopCount(mbpRights)
953  io.bpuInfo.bpWrong := PopCount(mbpWrongs)
954
955  // Cfi Info
956  for (i <- 0 until PredictWidth) {
957    val pc = commit_pc_bundle.startAddr + (i * instBytes).U
958    val v = commit_state(i) === c_commited
959    val isBr = commit_pd.brMask(i)
960    val isJmp = commit_pd.jmpInfo.valid && commit_pd.jmpOffset === i.U
961    val isCfi = isBr || isJmp
962    val isTaken = commit_cfi.valid && commit_cfi.bits === i.U
963    val misPred = commit_mispredict(i)
964    // val ghist = commit_spec_meta.ghist.predHist
965    val histPtr = commit_spec_meta.histPtr
966    val predCycle = commit_meta.meta(63, 0)
967    val target = commit_target
968
969    val brIdx = OHToUInt(Reverse(Cat(update_ftb_entry.brValids.zip(update_ftb_entry.brOffset).map{case(v, offset) => v && offset === i.U})))
970    val inFtbEntry = update_ftb_entry.brValids.zip(update_ftb_entry.brOffset).map{case(v, offset) => v && offset === i.U}.reduce(_||_)
971    val addIntoHist = ((commit_hit === h_hit) && inFtbEntry) || ((!(commit_hit === h_hit) && i.U === commit_cfi.bits && isBr && commit_cfi.valid))
972    XSDebug(v && do_commit && isCfi, p"cfi_update: isBr(${isBr}) pc(${Hexadecimal(pc)}) " +
973    p"taken(${isTaken}) mispred(${misPred}) cycle($predCycle) hist(${histPtr.value}) " +
974    p"startAddr(${Hexadecimal(commit_pc_bundle.startAddr)}) AddIntoHist(${addIntoHist}) " +
975    p"brInEntry(${inFtbEntry}) brIdx(${brIdx}) target(${Hexadecimal(target)})\n")
976  }
977
978  val enq = io.fromBpu.resp
979  val perf_redirect = io.fromBackend.redirect
980
981  XSPerfAccumulate("entry", validEntries)
982  XSPerfAccumulate("bpu_to_ftq_stall", enq.valid && !enq.ready)
983  XSPerfAccumulate("mispredictRedirect", perf_redirect.valid && RedirectLevel.flushAfter === perf_redirect.bits.level)
984  XSPerfAccumulate("replayRedirect", perf_redirect.valid && RedirectLevel.flushItself(perf_redirect.bits.level))
985  XSPerfAccumulate("predecodeRedirect", fromIfuRedirect.valid)
986
987  XSPerfAccumulate("to_ifu_bubble", io.toIfu.req.ready && !io.toIfu.req.valid)
988
989  XSPerfAccumulate("to_ifu_stall", io.toIfu.req.valid && !io.toIfu.req.ready)
990  XSPerfAccumulate("from_bpu_real_bubble", !enq.valid && enq.ready && allowBpuIn)
991  XSPerfAccumulate("bpu_to_ftq_bubble", bpuPtr === ifuPtr)
992
993  val from_bpu = io.fromBpu.resp.bits
994  def in_entry_len_map_gen(resp: BranchPredictionBundle)(stage: String) = {
995    val entry_len = (resp.ftb_entry.getFallThrough(resp.pc) - resp.pc) >> instOffsetBits
996    val entry_len_recording_vec = (1 to PredictWidth+1).map(i => entry_len === i.U)
997    val entry_len_map = (1 to PredictWidth+1).map(i =>
998      f"${stage}_ftb_entry_len_$i" -> (entry_len_recording_vec(i-1) && resp.valid)
999    ).foldLeft(Map[String, UInt]())(_+_)
1000    entry_len_map
1001  }
1002  val s1_entry_len_map = in_entry_len_map_gen(from_bpu.s1)("s1")
1003  val s2_entry_len_map = in_entry_len_map_gen(from_bpu.s2)("s2")
1004  val s3_entry_len_map = in_entry_len_map_gen(from_bpu.s3)("s3")
1005
1006  val to_ifu = io.toIfu.req.bits
1007  val to_ifu_entry_len = (to_ifu.fallThruAddr - to_ifu.startAddr) >> instOffsetBits
1008  val to_ifu_entry_len_recording_vec = (1 to PredictWidth+1).map(i => to_ifu_entry_len === i.U)
1009  val to_ifu_entry_len_map = (1 to PredictWidth+1).map(i =>
1010    f"to_ifu_ftb_entry_len_$i" -> (to_ifu_entry_len_recording_vec(i-1) && io.toIfu.req.fire)
1011  ).foldLeft(Map[String, UInt]())(_+_)
1012
1013
1014
1015  val commit_num_inst_recording_vec = (1 to PredictWidth).map(i => PopCount(commit_inst_mask) === i.U)
1016  val commit_num_inst_map = (1 to PredictWidth).map(i =>
1017    f"commit_num_inst_$i" -> (commit_num_inst_recording_vec(i-1) && do_commit)
1018  ).foldLeft(Map[String, UInt]())(_+_)
1019
1020
1021
1022  val commit_jal_mask  = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasJal.asTypeOf(UInt(1.W)))
1023  val commit_jalr_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasJalr.asTypeOf(UInt(1.W)))
1024  val commit_call_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasCall.asTypeOf(UInt(1.W)))
1025  val commit_ret_mask  = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasRet.asTypeOf(UInt(1.W)))
1026
1027
1028  val mbpBRights = mbpRights & commit_br_mask
1029  val mbpJRights = mbpRights & commit_jal_mask
1030  val mbpIRights = mbpRights & commit_jalr_mask
1031  val mbpCRights = mbpRights & commit_call_mask
1032  val mbpRRights = mbpRights & commit_ret_mask
1033
1034  val mbpBWrongs = mbpWrongs & commit_br_mask
1035  val mbpJWrongs = mbpWrongs & commit_jal_mask
1036  val mbpIWrongs = mbpWrongs & commit_jalr_mask
1037  val mbpCWrongs = mbpWrongs & commit_call_mask
1038  val mbpRWrongs = mbpWrongs & commit_ret_mask
1039
1040  val commit_pred_stage = RegNext(pred_stage(commPtr.value))
1041
1042  def pred_stage_map(src: UInt, name: String) = {
1043    (0 until numBpStages).map(i =>
1044      f"${name}_stage_${i+1}" -> PopCount(src.asBools.map(_ && commit_pred_stage === BP_STAGES(i)))
1045    ).foldLeft(Map[String, UInt]())(_+_)
1046  }
1047
1048  val mispred_stage_map      = pred_stage_map(mbpWrongs,  "mispredict")
1049  val br_mispred_stage_map   = pred_stage_map(mbpBWrongs, "br_mispredict")
1050  val jalr_mispred_stage_map = pred_stage_map(mbpIWrongs, "jalr_mispredict")
1051  val correct_stage_map      = pred_stage_map(mbpRights,  "correct")
1052  val br_correct_stage_map   = pred_stage_map(mbpBRights, "br_correct")
1053  val jalr_correct_stage_map = pred_stage_map(mbpIRights, "jalr_correct")
1054
1055  val update_valid = io.toBpu.update.valid
1056  def u(cond: Bool) = update_valid && cond
1057  val ftb_false_hit = u(update.false_hit)
1058  // assert(!ftb_false_hit)
1059  val ftb_hit = u(commit_hit === h_hit)
1060
1061  val ftb_new_entry = u(ftbEntryGen.is_init_entry)
1062  val ftb_new_entry_only_br = ftb_new_entry && !update.ftb_entry.jmpValid
1063  val ftb_new_entry_only_jmp = ftb_new_entry && !update.ftb_entry.brValids(0)
1064  val ftb_new_entry_has_br_and_jmp = ftb_new_entry && update.ftb_entry.brValids(0) && update.ftb_entry.jmpValid
1065
1066  val ftb_old_entry = u(ftbEntryGen.is_old_entry)
1067
1068  val ftb_modified_entry = u(ftbEntryGen.is_new_br || ftbEntryGen.is_jalr_target_modified || ftbEntryGen.is_always_taken_modified)
1069  val ftb_modified_entry_new_br = u(ftbEntryGen.is_new_br)
1070  val ftb_modified_entry_jalr_target_modified = u(ftbEntryGen.is_jalr_target_modified)
1071  val ftb_modified_entry_br_full = ftb_modified_entry && ftbEntryGen.is_br_full
1072  val ftb_modified_entry_always_taken = ftb_modified_entry && ftbEntryGen.is_always_taken_modified
1073
1074  val ftb_entry_len = (ftbEntryGen.new_entry.getFallThrough(update.pc) - update.pc) >> instOffsetBits
1075  val ftb_entry_len_recording_vec = (1 to PredictWidth+1).map(i => ftb_entry_len === i.U)
1076  val ftb_init_entry_len_map = (1 to PredictWidth+1).map(i =>
1077    f"ftb_init_entry_len_$i" -> (ftb_entry_len_recording_vec(i-1) && ftb_new_entry)
1078  ).foldLeft(Map[String, UInt]())(_+_)
1079  val ftb_modified_entry_len_map = (1 to PredictWidth+1).map(i =>
1080    f"ftb_modified_entry_len_$i" -> (ftb_entry_len_recording_vec(i-1) && ftb_modified_entry)
1081  ).foldLeft(Map[String, UInt]())(_+_)
1082
1083  val ftq_occupancy_map = (0 to FtqSize).map(i =>
1084    f"ftq_has_entry_$i" ->( validEntries === i.U)
1085  ).foldLeft(Map[String, UInt]())(_+_)
1086
1087  val perfCountsMap = Map(
1088    "BpInstr" -> PopCount(mbpInstrs),
1089    "BpBInstr" -> PopCount(mbpBRights | mbpBWrongs),
1090    "BpRight"  -> PopCount(mbpRights),
1091    "BpWrong"  -> PopCount(mbpWrongs),
1092    "BpBRight" -> PopCount(mbpBRights),
1093    "BpBWrong" -> PopCount(mbpBWrongs),
1094    "BpJRight" -> PopCount(mbpJRights),
1095    "BpJWrong" -> PopCount(mbpJWrongs),
1096    "BpIRight" -> PopCount(mbpIRights),
1097    "BpIWrong" -> PopCount(mbpIWrongs),
1098    "BpCRight" -> PopCount(mbpCRights),
1099    "BpCWrong" -> PopCount(mbpCWrongs),
1100    "BpRRight" -> PopCount(mbpRRights),
1101    "BpRWrong" -> PopCount(mbpRWrongs),
1102
1103    "ftb_false_hit"                -> PopCount(ftb_false_hit),
1104    "ftb_hit"                      -> PopCount(ftb_hit),
1105    "ftb_new_entry"                -> PopCount(ftb_new_entry),
1106    "ftb_new_entry_only_br"        -> PopCount(ftb_new_entry_only_br),
1107    "ftb_new_entry_only_jmp"       -> PopCount(ftb_new_entry_only_jmp),
1108    "ftb_new_entry_has_br_and_jmp" -> PopCount(ftb_new_entry_has_br_and_jmp),
1109    "ftb_old_entry"                -> PopCount(ftb_old_entry),
1110    "ftb_modified_entry"           -> PopCount(ftb_modified_entry),
1111    "ftb_modified_entry_new_br"    -> PopCount(ftb_modified_entry_new_br),
1112    "ftb_jalr_target_modified"     -> PopCount(ftb_modified_entry_jalr_target_modified),
1113    "ftb_modified_entry_br_full"   -> PopCount(ftb_modified_entry_br_full),
1114    "ftb_modified_entry_always_taken" -> PopCount(ftb_modified_entry_always_taken)
1115  ) ++ ftb_init_entry_len_map ++ ftb_modified_entry_len_map ++ s1_entry_len_map ++
1116  s2_entry_len_map ++ s3_entry_len_map ++
1117  to_ifu_entry_len_map ++ commit_num_inst_map ++ ftq_occupancy_map ++
1118  mispred_stage_map ++ br_mispred_stage_map ++ jalr_mispred_stage_map ++
1119  correct_stage_map ++ br_correct_stage_map ++ jalr_correct_stage_map
1120
1121  for((key, value) <- perfCountsMap) {
1122    XSPerfAccumulate(key, value)
1123  }
1124
1125  // --------------------------- Debug --------------------------------
1126  // XSDebug(enq_fire, p"enq! " + io.fromBpu.resp.bits.toPrintable)
1127  XSDebug(io.toIfu.req.fire, p"fire to ifu " + io.toIfu.req.bits.toPrintable)
1128  XSDebug(do_commit, p"deq! [ptr] $do_commit_ptr\n")
1129  XSDebug(true.B, p"[bpuPtr] $bpuPtr, [ifuPtr] $ifuPtr, [ifuWbPtr] $ifuWbPtr [commPtr] $commPtr\n")
1130  XSDebug(true.B, p"[in] v:${io.fromBpu.resp.valid} r:${io.fromBpu.resp.ready} " +
1131    p"[out] v:${io.toIfu.req.valid} r:${io.toIfu.req.ready}\n")
1132  XSDebug(do_commit, p"[deq info] cfiIndex: $commit_cfi, $commit_pc_bundle, target: ${Hexadecimal(commit_target)}\n")
1133
1134  //   def ubtbCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1135  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1136  //       case (((valid, pd), ans), taken) =>
1137  //       Mux(valid && pd.isBr,
1138  //         isWrong ^ Mux(ans.hit.asBool,
1139  //           Mux(ans.taken.asBool, taken && ans.target === commitEntry.target,
1140  //           !taken),
1141  //         !taken),
1142  //       false.B)
1143  //     }
1144  //   }
1145
1146  //   def btbCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1147  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1148  //       case (((valid, pd), ans), taken) =>
1149  //       Mux(valid && pd.isBr,
1150  //         isWrong ^ Mux(ans.hit.asBool,
1151  //           Mux(ans.taken.asBool, taken && ans.target === commitEntry.target,
1152  //           !taken),
1153  //         !taken),
1154  //       false.B)
1155  //     }
1156  //   }
1157
1158  //   def tageCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1159  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1160  //       case (((valid, pd), ans), taken) =>
1161  //       Mux(valid && pd.isBr,
1162  //         isWrong ^ (ans.taken.asBool === taken),
1163  //       false.B)
1164  //     }
1165  //   }
1166
1167  //   def loopCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1168  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1169  //       case (((valid, pd), ans), taken) =>
1170  //       Mux(valid && (pd.isBr) && ans.hit.asBool,
1171  //         isWrong ^ (!taken),
1172  //           false.B)
1173  //     }
1174  //   }
1175
1176  //   def rasCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1177  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1178  //       case (((valid, pd), ans), taken) =>
1179  //       Mux(valid && pd.isRet.asBool /*&& taken*/ && ans.hit.asBool,
1180  //         isWrong ^ (ans.target === commitEntry.target),
1181  //           false.B)
1182  //     }
1183  //   }
1184
1185  //   val ubtbRights = ubtbCheck(commitEntry, commitEntry.metas.map(_.ubtbAns), false.B)
1186  //   val ubtbWrongs = ubtbCheck(commitEntry, commitEntry.metas.map(_.ubtbAns), true.B)
1187  //   // btb and ubtb pred jal and jalr as well
1188  //   val btbRights = btbCheck(commitEntry, commitEntry.metas.map(_.btbAns), false.B)
1189  //   val btbWrongs = btbCheck(commitEntry, commitEntry.metas.map(_.btbAns), true.B)
1190  //   val tageRights = tageCheck(commitEntry, commitEntry.metas.map(_.tageAns), false.B)
1191  //   val tageWrongs = tageCheck(commitEntry, commitEntry.metas.map(_.tageAns), true.B)
1192
1193  //   val loopRights = loopCheck(commitEntry, commitEntry.metas.map(_.loopAns), false.B)
1194  //   val loopWrongs = loopCheck(commitEntry, commitEntry.metas.map(_.loopAns), true.B)
1195
1196  //   val rasRights = rasCheck(commitEntry, commitEntry.metas.map(_.rasAns), false.B)
1197  //   val rasWrongs = rasCheck(commitEntry, commitEntry.metas.map(_.rasAns), true.B)
1198
1199  val perfEvents = Seq(
1200    ("bpu_s2_redirect        ", bpu_s2_redirect                                                             ),
1201    ("bpu_s3_redirect        ", bpu_s3_redirect                                                             ),
1202    ("bpu_to_ftq_stall       ", enq.valid && ~enq.ready                                                     ),
1203    ("mispredictRedirect     ", perf_redirect.valid && RedirectLevel.flushAfter === perf_redirect.bits.level),
1204    ("replayRedirect         ", perf_redirect.valid && RedirectLevel.flushItself(perf_redirect.bits.level)  ),
1205    ("predecodeRedirect      ", fromIfuRedirect.valid                                                       ),
1206    ("to_ifu_bubble          ", io.toIfu.req.ready && !io.toIfu.req.valid                                   ),
1207    ("from_bpu_real_bubble   ", !enq.valid && enq.ready && allowBpuIn                                       ),
1208    ("BpInstr                ", PopCount(mbpInstrs)                                                         ),
1209    ("BpBInstr               ", PopCount(mbpBRights | mbpBWrongs)                                           ),
1210    ("BpRight                ", PopCount(mbpRights)                                                         ),
1211    ("BpWrong                ", PopCount(mbpWrongs)                                                         ),
1212    ("BpBRight               ", PopCount(mbpBRights)                                                        ),
1213    ("BpBWrong               ", PopCount(mbpBWrongs)                                                        ),
1214    ("BpJRight               ", PopCount(mbpJRights)                                                        ),
1215    ("BpJWrong               ", PopCount(mbpJWrongs)                                                        ),
1216    ("BpIRight               ", PopCount(mbpIRights)                                                        ),
1217    ("BpIWrong               ", PopCount(mbpIWrongs)                                                        ),
1218    ("BpCRight               ", PopCount(mbpCRights)                                                        ),
1219    ("BpCWrong               ", PopCount(mbpCWrongs)                                                        ),
1220    ("BpRRight               ", PopCount(mbpRRights)                                                        ),
1221    ("BpRWrong               ", PopCount(mbpRWrongs)                                                        ),
1222    ("ftb_false_hit          ", PopCount(ftb_false_hit)                                                     ),
1223    ("ftb_hit                ", PopCount(ftb_hit)                                                           ),
1224  )
1225  generatePerfEvent()
1226}
1227