xref: /XiangShan/src/main/scala/xiangshan/mem/prefetch/SMSPrefetcher.scala (revision 30f35717e23156cb95b30a36db530384545b48a4)
1c49ebec8SHaoyuan Feng/***************************************************************************************
2c49ebec8SHaoyuan Feng* Copyright (c) 2024 Beijing Institute of Open Source Chip (BOSC)
3c49ebec8SHaoyuan Feng* Copyright (c) 2020-2024 Institute of Computing Technology, Chinese Academy of Sciences
4c49ebec8SHaoyuan Feng* Copyright (c) 2020-2021 Peng Cheng Laboratory
5c49ebec8SHaoyuan Feng*
6c49ebec8SHaoyuan Feng* XiangShan is licensed under Mulan PSL v2.
7c49ebec8SHaoyuan Feng* You can use this software according to the terms and conditions of the Mulan PSL v2.
8c49ebec8SHaoyuan Feng* You may obtain a copy of Mulan PSL v2 at:
9c49ebec8SHaoyuan Feng*          http://license.coscl.org.cn/MulanPSL2
10c49ebec8SHaoyuan Feng*
11c49ebec8SHaoyuan Feng* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
12c49ebec8SHaoyuan Feng* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
13c49ebec8SHaoyuan Feng* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
14c49ebec8SHaoyuan Feng*
15c49ebec8SHaoyuan Feng* See the Mulan PSL v2 for more details.
16c49ebec8SHaoyuan Feng*
17c49ebec8SHaoyuan Feng*
18c49ebec8SHaoyuan Feng* Acknowledgement
19c49ebec8SHaoyuan Feng*
20c49ebec8SHaoyuan Feng* This implementation is inspired by several key papers:
21c49ebec8SHaoyuan Feng* [1] Stephen Somogyi, Thomas F. Wenisch, Anastassia Ailamaki, Babak Falsafi and Andreas Moshovos. "[Spatial memory
22c49ebec8SHaoyuan Feng* streaming.](https://doi.org/10.1109/ISCA.2006.38)" 33rd International Symposium on Computer Architecture (ISCA).
23c49ebec8SHaoyuan Feng* 2006.
24c49ebec8SHaoyuan Feng***************************************************************************************/
25c49ebec8SHaoyuan Feng
26289fc2f9SLinJiaweipackage xiangshan.mem.prefetch
27289fc2f9SLinJiawei
288891a219SYinan Xuimport org.chipsalliance.cde.config.Parameters
29289fc2f9SLinJiaweiimport chisel3._
30289fc2f9SLinJiaweiimport chisel3.util._
31289fc2f9SLinJiaweiimport utils._
324b6d4d13SWilliam Wangimport utility._
339e12e8edScz4eimport xiangshan._
3425a80bceSYanqin Liimport xiangshan.backend.fu.PMPRespBundle
359e12e8edScz4eimport xiangshan.mem.L1PrefetchReq
3699ce5576Scz4eimport xiangshan.mem.Bundles.LsPrefetchTrainBundle
374b6d4d13SWilliam Wangimport xiangshan.mem.trace._
380d32f713Shappy-lximport xiangshan.mem.HasL1PrefetchSourceParameter
399e12e8edScz4eimport xiangshan.cache.HasDCacheParameters
409e12e8edScz4eimport xiangshan.cache.mmu._
41289fc2f9SLinJiawei
42289fc2f9SLinJiaweicase class SMSParams
43289fc2f9SLinJiawei(
44289fc2f9SLinJiawei  region_size: Int = 1024,
45c0ad71d9SLinJiawei  vaddr_hash_width: Int = 5,
46c0ad71d9SLinJiawei  block_addr_raw_width: Int = 10,
47967327d8SLinJiawei  stride_pc_bits: Int = 10,
48967327d8SLinJiawei  max_stride: Int = 1024,
49967327d8SLinJiawei  stride_entries: Int = 16,
50289fc2f9SLinJiawei  active_gen_table_size: Int = 16,
51289fc2f9SLinJiawei  pht_size: Int = 64,
52289fc2f9SLinJiawei  pht_ways: Int = 2,
53289fc2f9SLinJiawei  pht_hist_bits: Int = 2,
54289fc2f9SLinJiawei  pht_tag_bits: Int = 13,
55289fc2f9SLinJiawei  pht_lookup_queue_size: Int = 4,
560d32f713Shappy-lx  pf_filter_size: Int = 16,
570d32f713Shappy-lx  train_filter_size: Int = 8
58289fc2f9SLinJiawei) extends PrefetcherParams
59289fc2f9SLinJiawei
60f21b441aSLinJiaweitrait HasSMSModuleHelper extends HasCircularQueuePtrHelper with HasDCacheParameters
61289fc2f9SLinJiawei{ this: HasXSParameter =>
62289fc2f9SLinJiawei  val smsParams = coreParams.prefetcher.get.asInstanceOf[SMSParams]
6385de5caeSLinJiawei  val BLK_ADDR_WIDTH = VAddrBits - log2Up(dcacheParameters.blockBytes)
64289fc2f9SLinJiawei  val REGION_SIZE = smsParams.region_size
65289fc2f9SLinJiawei  val REGION_BLKS = smsParams.region_size / dcacheParameters.blockBytes
66289fc2f9SLinJiawei  val REGION_ADDR_BITS = VAddrBits - log2Up(REGION_SIZE)
67289fc2f9SLinJiawei  val REGION_OFFSET = log2Up(REGION_BLKS)
68289fc2f9SLinJiawei  val VADDR_HASH_WIDTH = smsParams.vaddr_hash_width
69289fc2f9SLinJiawei  val BLK_ADDR_RAW_WIDTH = smsParams.block_addr_raw_width
70289fc2f9SLinJiawei  val REGION_ADDR_RAW_WIDTH = BLK_ADDR_RAW_WIDTH - REGION_OFFSET
71289fc2f9SLinJiawei  val BLK_TAG_WIDTH = BLK_ADDR_RAW_WIDTH + VADDR_HASH_WIDTH
72289fc2f9SLinJiawei  val REGION_TAG_WIDTH = REGION_ADDR_RAW_WIDTH + VADDR_HASH_WIDTH
73289fc2f9SLinJiawei  val PHT_INDEX_BITS = log2Up(smsParams.pht_size / smsParams.pht_ways)
74289fc2f9SLinJiawei  val PHT_TAG_BITS = smsParams.pht_tag_bits
75289fc2f9SLinJiawei  val PHT_HIST_BITS = smsParams.pht_hist_bits
76f21b441aSLinJiawei  // page bit index in block addr
77f21b441aSLinJiawei  val BLOCK_ADDR_PAGE_BIT = log2Up(dcacheParameters.pageSize / dcacheParameters.blockBytes)
78f21b441aSLinJiawei  val REGION_ADDR_PAGE_BIT = log2Up(dcacheParameters.pageSize / smsParams.region_size)
79967327d8SLinJiawei  val STRIDE_PC_BITS = smsParams.stride_pc_bits
80967327d8SLinJiawei  val STRIDE_BLK_ADDR_BITS = log2Up(smsParams.max_stride)
81289fc2f9SLinJiawei
82289fc2f9SLinJiawei  def block_addr(x: UInt): UInt = {
83289fc2f9SLinJiawei    val offset = log2Up(dcacheParameters.blockBytes)
84289fc2f9SLinJiawei    x(x.getWidth - 1, offset)
85289fc2f9SLinJiawei  }
86289fc2f9SLinJiawei
87289fc2f9SLinJiawei  def region_addr(x: UInt): UInt = {
88289fc2f9SLinJiawei    val offset = log2Up(REGION_SIZE)
89289fc2f9SLinJiawei    x(x.getWidth - 1, offset)
90289fc2f9SLinJiawei  }
91289fc2f9SLinJiawei
92289fc2f9SLinJiawei  def region_offset_to_bits(off: UInt): UInt = {
93289fc2f9SLinJiawei    (1.U << off).asUInt
94289fc2f9SLinJiawei  }
95289fc2f9SLinJiawei
96289fc2f9SLinJiawei  def region_hash_tag(rg_addr: UInt): UInt = {
97289fc2f9SLinJiawei    val low = rg_addr(REGION_ADDR_RAW_WIDTH - 1, 0)
98289fc2f9SLinJiawei    val high = rg_addr(REGION_ADDR_RAW_WIDTH + 3 * VADDR_HASH_WIDTH - 1, REGION_ADDR_RAW_WIDTH)
99289fc2f9SLinJiawei    val high_hash = vaddr_hash(high)
100289fc2f9SLinJiawei    Cat(high_hash, low)
101289fc2f9SLinJiawei  }
102289fc2f9SLinJiawei
103289fc2f9SLinJiawei  def page_bit(region_addr: UInt): UInt = {
104289fc2f9SLinJiawei    region_addr(log2Up(dcacheParameters.pageSize/REGION_SIZE))
105289fc2f9SLinJiawei  }
106289fc2f9SLinJiawei
107289fc2f9SLinJiawei  def block_hash_tag(x: UInt): UInt = {
108289fc2f9SLinJiawei    val blk_addr = block_addr(x)
109289fc2f9SLinJiawei    val low = blk_addr(BLK_ADDR_RAW_WIDTH - 1, 0)
110289fc2f9SLinJiawei    val high = blk_addr(BLK_ADDR_RAW_WIDTH - 1 + 3 * VADDR_HASH_WIDTH, BLK_ADDR_RAW_WIDTH)
111289fc2f9SLinJiawei    val high_hash = vaddr_hash(high)
112289fc2f9SLinJiawei    Cat(high_hash, low)
113289fc2f9SLinJiawei  }
114289fc2f9SLinJiawei
115289fc2f9SLinJiawei  def vaddr_hash(x: UInt): UInt = {
116289fc2f9SLinJiawei    val width = VADDR_HASH_WIDTH
117289fc2f9SLinJiawei    val low = x(width - 1, 0)
118289fc2f9SLinJiawei    val mid = x(2 * width - 1, width)
119289fc2f9SLinJiawei    val high = x(3 * width - 1, 2 * width)
120289fc2f9SLinJiawei    low ^ mid ^ high
121289fc2f9SLinJiawei  }
122289fc2f9SLinJiawei
123289fc2f9SLinJiawei  def pht_index(pc: UInt): UInt = {
124289fc2f9SLinJiawei    val low_bits = pc(PHT_INDEX_BITS, 2)
125289fc2f9SLinJiawei    val hi_bit = pc(1) ^ pc(PHT_INDEX_BITS+1)
126289fc2f9SLinJiawei    Cat(hi_bit, low_bits)
127289fc2f9SLinJiawei  }
128289fc2f9SLinJiawei
129289fc2f9SLinJiawei  def pht_tag(pc: UInt): UInt = {
130289fc2f9SLinJiawei    pc(PHT_INDEX_BITS + 2 + PHT_TAG_BITS - 1, PHT_INDEX_BITS + 2)
131289fc2f9SLinJiawei  }
132967327d8SLinJiawei
133401876faSYanqin Li  def get_alias_bits(region_vaddr: UInt): UInt = {
134401876faSYanqin Li    val offset = log2Up(REGION_SIZE)
135401876faSYanqin Li    get_alias(Cat(region_vaddr, 0.U(offset.W)))
136401876faSYanqin Li  }
137289fc2f9SLinJiawei}
138289fc2f9SLinJiawei
139967327d8SLinJiaweiclass StridePF()(implicit p: Parameters) extends XSModule with HasSMSModuleHelper {
140289fc2f9SLinJiawei  val io = IO(new Bundle() {
141967327d8SLinJiawei    val stride_en = Input(Bool())
142967327d8SLinJiawei    val s0_lookup = Flipped(new ValidIO(new Bundle() {
143967327d8SLinJiawei      val pc = UInt(STRIDE_PC_BITS.W)
144967327d8SLinJiawei      val vaddr = UInt(VAddrBits.W)
145967327d8SLinJiawei      val paddr = UInt(PAddrBits.W)
146967327d8SLinJiawei    }))
147967327d8SLinJiawei    val s1_valid = Input(Bool())
148967327d8SLinJiawei    val s2_gen_req = ValidIO(new PfGenReq())
149289fc2f9SLinJiawei  })
150289fc2f9SLinJiawei
1514ccb2e8bSYanqin Li  val prev_valid = GatedValidRegNext(io.s0_lookup.valid, false.B)
152967327d8SLinJiawei  val prev_pc = RegEnable(io.s0_lookup.bits.pc, io.s0_lookup.valid)
153289fc2f9SLinJiawei
154967327d8SLinJiawei  val s0_valid = io.s0_lookup.valid && !(prev_valid && prev_pc === io.s0_lookup.bits.pc)
155289fc2f9SLinJiawei
156967327d8SLinJiawei  def entry_map[T](fn: Int => T) = (0 until smsParams.stride_entries).map(fn)
157289fc2f9SLinJiawei
158967327d8SLinJiawei  val replacement = ReplacementPolicy.fromString("plru", smsParams.stride_entries)
159967327d8SLinJiawei  val valids = entry_map(_ => RegInit(false.B))
160967327d8SLinJiawei  val entries_pc = entry_map(_ => Reg(UInt(STRIDE_PC_BITS.W)) )
161967327d8SLinJiawei  val entries_conf = entry_map(_ => RegInit(1.U(2.W)))
162967327d8SLinJiawei  val entries_last_addr = entry_map(_ => Reg(UInt(STRIDE_BLK_ADDR_BITS.W)) )
163967327d8SLinJiawei  val entries_stride = entry_map(_ => Reg(SInt((STRIDE_BLK_ADDR_BITS+1).W)))
164967327d8SLinJiawei
165967327d8SLinJiawei
166967327d8SLinJiawei  val s0_match_vec = valids.zip(entries_pc).map({
167967327d8SLinJiawei    case (v, pc) => v && pc === io.s0_lookup.bits.pc
168289fc2f9SLinJiawei  })
169289fc2f9SLinJiawei
170967327d8SLinJiawei  val s0_hit = s0_valid && Cat(s0_match_vec).orR
171967327d8SLinJiawei  val s0_miss = s0_valid && !s0_hit
172967327d8SLinJiawei  val s0_matched_conf = Mux1H(s0_match_vec, entries_conf)
173967327d8SLinJiawei  val s0_matched_last_addr = Mux1H(s0_match_vec, entries_last_addr)
174967327d8SLinJiawei  val s0_matched_last_stride = Mux1H(s0_match_vec, entries_stride)
175289fc2f9SLinJiawei
1764ccb2e8bSYanqin Li  val s1_hit = GatedValidRegNext(s0_hit) && io.s1_valid
1774ccb2e8bSYanqin Li  val s1_alloc = GatedValidRegNext(s0_miss) && io.s1_valid
178967327d8SLinJiawei  val s1_vaddr = RegEnable(io.s0_lookup.bits.vaddr, s0_valid)
179967327d8SLinJiawei  val s1_paddr = RegEnable(io.s0_lookup.bits.paddr, s0_valid)
1804ccb2e8bSYanqin Li  val s1_conf = RegEnable(s0_matched_conf, s0_valid)
1814ccb2e8bSYanqin Li  val s1_last_addr = RegEnable(s0_matched_last_addr, s0_valid)
1824ccb2e8bSYanqin Li  val s1_last_stride = RegEnable(s0_matched_last_stride, s0_valid)
1834ccb2e8bSYanqin Li  val s1_match_vec = RegEnable(VecInit(s0_match_vec), s0_valid)
184289fc2f9SLinJiawei
185967327d8SLinJiawei  val BLOCK_OFFSET = log2Up(dcacheParameters.blockBytes)
186967327d8SLinJiawei  val s1_new_stride_vaddr = s1_vaddr(BLOCK_OFFSET + STRIDE_BLK_ADDR_BITS - 1, BLOCK_OFFSET)
187967327d8SLinJiawei  val s1_new_stride = (0.U(1.W) ## s1_new_stride_vaddr).asSInt - (0.U(1.W) ## s1_last_addr).asSInt
188967327d8SLinJiawei  val s1_stride_non_zero = s1_last_stride =/= 0.S
189967327d8SLinJiawei  val s1_stride_match = s1_new_stride === s1_last_stride && s1_stride_non_zero
190967327d8SLinJiawei  val s1_replace_idx = replacement.way
191289fc2f9SLinJiawei
192967327d8SLinJiawei  for(i <- 0 until smsParams.stride_entries){
193967327d8SLinJiawei    val alloc = s1_alloc && i.U === s1_replace_idx
194967327d8SLinJiawei    val update = s1_hit && s1_match_vec(i)
195967327d8SLinJiawei    when(update){
196967327d8SLinJiawei      assert(valids(i))
197967327d8SLinJiawei      entries_conf(i) := Mux(s1_stride_match,
198967327d8SLinJiawei        Mux(s1_conf === 3.U, 3.U, s1_conf + 1.U),
199967327d8SLinJiawei        Mux(s1_conf === 0.U, 0.U, s1_conf - 1.U)
200289fc2f9SLinJiawei      )
201967327d8SLinJiawei      entries_last_addr(i) := s1_new_stride_vaddr
202967327d8SLinJiawei      when(!s1_conf(1)){
203967327d8SLinJiawei        entries_stride(i) := s1_new_stride
204289fc2f9SLinJiawei      }
205289fc2f9SLinJiawei    }
206967327d8SLinJiawei    when(alloc){
207967327d8SLinJiawei      valids(i) := true.B
208967327d8SLinJiawei      entries_pc(i) := prev_pc
209967327d8SLinJiawei      entries_conf(i) := 0.U
210967327d8SLinJiawei      entries_last_addr(i) := s1_new_stride_vaddr
211967327d8SLinJiawei      entries_stride(i) := 0.S
212967327d8SLinJiawei    }
213967327d8SLinJiawei    assert(!(update && alloc))
214967327d8SLinJiawei  }
215967327d8SLinJiawei  when(s1_hit){
216967327d8SLinJiawei    replacement.access(OHToUInt(s1_match_vec.asUInt))
217967327d8SLinJiawei  }.elsewhen(s1_alloc){
218967327d8SLinJiawei    replacement.access(s1_replace_idx)
219967327d8SLinJiawei  }
220289fc2f9SLinJiawei
221967327d8SLinJiawei  val s1_block_vaddr = block_addr(s1_vaddr)
222967327d8SLinJiawei  val s1_pf_block_vaddr = (s1_block_vaddr.asSInt + s1_last_stride).asUInt
223967327d8SLinJiawei  val s1_pf_cross_page = s1_pf_block_vaddr(BLOCK_ADDR_PAGE_BIT) =/= s1_block_vaddr(BLOCK_ADDR_PAGE_BIT)
224967327d8SLinJiawei
2254ccb2e8bSYanqin Li  val s2_pf_gen_valid = GatedValidRegNext(s1_hit && s1_stride_match, false.B)
226967327d8SLinJiawei  val s2_pf_gen_paddr_valid = RegEnable(!s1_pf_cross_page, s1_hit && s1_stride_match)
227967327d8SLinJiawei  val s2_pf_block_vaddr = RegEnable(s1_pf_block_vaddr, s1_hit && s1_stride_match)
228967327d8SLinJiawei  val s2_block_paddr = RegEnable(block_addr(s1_paddr), s1_hit && s1_stride_match)
229967327d8SLinJiawei
230967327d8SLinJiawei  val s2_pf_block_addr = Mux(s2_pf_gen_paddr_valid,
231967327d8SLinJiawei    Cat(
232967327d8SLinJiawei      s2_block_paddr(PAddrBits - BLOCK_OFFSET - 1, BLOCK_ADDR_PAGE_BIT),
233967327d8SLinJiawei      s2_pf_block_vaddr(BLOCK_ADDR_PAGE_BIT - 1, 0)
234967327d8SLinJiawei    ),
235967327d8SLinJiawei    s2_pf_block_vaddr
236967327d8SLinJiawei  )
237967327d8SLinJiawei  val s2_pf_full_addr = Wire(UInt(VAddrBits.W))
238967327d8SLinJiawei  s2_pf_full_addr := s2_pf_block_addr ## 0.U(BLOCK_OFFSET.W)
239967327d8SLinJiawei
240967327d8SLinJiawei  val s2_pf_region_addr = region_addr(s2_pf_full_addr)
241967327d8SLinJiawei  val s2_pf_region_offset = s2_pf_block_addr(REGION_OFFSET - 1, 0)
242967327d8SLinJiawei
243967327d8SLinJiawei  val s2_full_vaddr = Wire(UInt(VAddrBits.W))
244967327d8SLinJiawei  s2_full_vaddr := s2_pf_block_vaddr ## 0.U(BLOCK_OFFSET.W)
245967327d8SLinJiawei
246967327d8SLinJiawei  val s2_region_tag = region_hash_tag(region_addr(s2_full_vaddr))
247967327d8SLinJiawei
248967327d8SLinJiawei  io.s2_gen_req.valid := s2_pf_gen_valid && io.stride_en
249967327d8SLinJiawei  io.s2_gen_req.bits.region_tag := s2_region_tag
250967327d8SLinJiawei  io.s2_gen_req.bits.region_addr := s2_pf_region_addr
251967327d8SLinJiawei  io.s2_gen_req.bits.alias_bits := get_alias_bits(region_addr(s2_full_vaddr))
252967327d8SLinJiawei  io.s2_gen_req.bits.region_bits := region_offset_to_bits(s2_pf_region_offset)
253967327d8SLinJiawei  io.s2_gen_req.bits.paddr_valid := s2_pf_gen_paddr_valid
254967327d8SLinJiawei  io.s2_gen_req.bits.decr_mode := false.B
2552db9ec44SLinJiawei  io.s2_gen_req.bits.debug_source_type := HW_PREFETCH_STRIDE.U
256967327d8SLinJiawei
257289fc2f9SLinJiawei}
258289fc2f9SLinJiawei
259289fc2f9SLinJiaweiclass AGTEntry()(implicit p: Parameters) extends XSBundle with HasSMSModuleHelper {
260289fc2f9SLinJiawei  val pht_index = UInt(PHT_INDEX_BITS.W)
261289fc2f9SLinJiawei  val pht_tag = UInt(PHT_TAG_BITS.W)
262289fc2f9SLinJiawei  val region_bits = UInt(REGION_BLKS.W)
263a982a3c9Shappy-lx  val region_bit_single = UInt(REGION_BLKS.W)
264289fc2f9SLinJiawei  val region_tag = UInt(REGION_TAG_WIDTH.W)
265967327d8SLinJiawei  val region_offset = UInt(REGION_OFFSET.W)
266289fc2f9SLinJiawei  val access_cnt = UInt((REGION_BLKS-1).U.getWidth.W)
267289fc2f9SLinJiawei  val decr_mode = Bool()
268a982a3c9Shappy-lx  val single_update = Bool()//this is a signal update request
269a982a3c9Shappy-lx  val has_been_signal_updated = Bool()
270289fc2f9SLinJiawei}
271289fc2f9SLinJiawei
272289fc2f9SLinJiaweiclass PfGenReq()(implicit p: Parameters) extends XSBundle with HasSMSModuleHelper {
273289fc2f9SLinJiawei  val region_tag = UInt(REGION_TAG_WIDTH.W)
274289fc2f9SLinJiawei  val region_addr = UInt(REGION_ADDR_BITS.W)
275289fc2f9SLinJiawei  val region_bits = UInt(REGION_BLKS.W)
276289fc2f9SLinJiawei  val paddr_valid = Bool()
277289fc2f9SLinJiawei  val decr_mode = Bool()
278967327d8SLinJiawei  val alias_bits = UInt(2.W)
2792db9ec44SLinJiawei  val debug_source_type = UInt(log2Up(nSourceType).W)
280289fc2f9SLinJiawei}
281289fc2f9SLinJiawei
2826005a7e2Shappy-lxclass AGTEvictReq()(implicit p: Parameters) extends XSBundle {
2836005a7e2Shappy-lx  val vaddr = UInt(VAddrBits.W)
2846005a7e2Shappy-lx}
2856005a7e2Shappy-lx
286289fc2f9SLinJiaweiclass ActiveGenerationTable()(implicit p: Parameters) extends XSModule with HasSMSModuleHelper {
287289fc2f9SLinJiawei  val io = IO(new Bundle() {
288967327d8SLinJiawei    val agt_en = Input(Bool())
289289fc2f9SLinJiawei    val s0_lookup = Flipped(ValidIO(new Bundle() {
290289fc2f9SLinJiawei      val region_tag = UInt(REGION_TAG_WIDTH.W)
291289fc2f9SLinJiawei      val region_p1_tag = UInt(REGION_TAG_WIDTH.W)
292289fc2f9SLinJiawei      val region_m1_tag = UInt(REGION_TAG_WIDTH.W)
293289fc2f9SLinJiawei      val region_offset = UInt(REGION_OFFSET.W)
294289fc2f9SLinJiawei      val pht_index = UInt(PHT_INDEX_BITS.W)
295289fc2f9SLinJiawei      val pht_tag = UInt(PHT_TAG_BITS.W)
296289fc2f9SLinJiawei      val allow_cross_region_p1 = Bool()
297289fc2f9SLinJiawei      val allow_cross_region_m1 = Bool()
298289fc2f9SLinJiawei      val region_p1_cross_page = Bool()
299289fc2f9SLinJiawei      val region_m1_cross_page = Bool()
300289fc2f9SLinJiawei      val region_paddr = UInt(REGION_ADDR_BITS.W)
301289fc2f9SLinJiawei      val region_vaddr = UInt(REGION_ADDR_BITS.W)
302289fc2f9SLinJiawei    }))
3036005a7e2Shappy-lx    // dcache has released a block, evict it from agt
3046005a7e2Shappy-lx    val s0_dcache_evict = Flipped(DecoupledIO(new AGTEvictReq))
305967327d8SLinJiawei    val s1_sel_stride = Output(Bool())
306967327d8SLinJiawei    val s2_stride_hit = Input(Bool())
307967327d8SLinJiawei    // if agt/stride missed, try lookup pht
30885de5caeSLinJiawei    val s2_pht_lookup = ValidIO(new PhtLookup())
309289fc2f9SLinJiawei    // evict entry to pht
310289fc2f9SLinJiawei    val s2_evict = ValidIO(new AGTEntry())
311289fc2f9SLinJiawei    val s2_pf_gen_req = ValidIO(new PfGenReq())
3125d13017eSLinJiawei    val act_threshold = Input(UInt(REGION_OFFSET.W))
3135d13017eSLinJiawei    val act_stride = Input(UInt(6.W))
314289fc2f9SLinJiawei  })
315289fc2f9SLinJiawei
316289fc2f9SLinJiawei  val entries = Seq.fill(smsParams.active_gen_table_size){ Reg(new AGTEntry()) }
317289fc2f9SLinJiawei  val valids = Seq.fill(smsParams.active_gen_table_size){ RegInit(false.B) }
318289fc2f9SLinJiawei  val replacement = ReplacementPolicy.fromString("plru", smsParams.active_gen_table_size)
319289fc2f9SLinJiawei
320967327d8SLinJiawei  val s1_replace_mask_w = Wire(UInt(smsParams.active_gen_table_size.W))
321967327d8SLinJiawei
322289fc2f9SLinJiawei  val s0_lookup = io.s0_lookup.bits
323289fc2f9SLinJiawei  val s0_lookup_valid = io.s0_lookup.valid
324289fc2f9SLinJiawei
3256005a7e2Shappy-lx  val s0_dcache_evict = io.s0_dcache_evict.bits
3266005a7e2Shappy-lx  val s0_dcache_evict_valid = io.s0_dcache_evict.valid
3276005a7e2Shappy-lx  val s0_dcache_evict_tag = block_hash_tag(s0_dcache_evict.vaddr).head(REGION_TAG_WIDTH)
3286005a7e2Shappy-lx
329289fc2f9SLinJiawei  val prev_lookup = RegEnable(s0_lookup, s0_lookup_valid)
3304ccb2e8bSYanqin Li  val prev_lookup_valid = GatedValidRegNext(s0_lookup_valid, false.B)
331289fc2f9SLinJiawei
332289fc2f9SLinJiawei  val s0_match_prev = prev_lookup_valid && s0_lookup.region_tag === prev_lookup.region_tag
333289fc2f9SLinJiawei
334289fc2f9SLinJiawei  def gen_match_vec(region_tag: UInt): Seq[Bool] = {
335289fc2f9SLinJiawei    entries.zip(valids).map({
336289fc2f9SLinJiawei      case (ent, v) => v && ent.region_tag === region_tag
337289fc2f9SLinJiawei    })
338289fc2f9SLinJiawei  }
339289fc2f9SLinJiawei
340289fc2f9SLinJiawei  val region_match_vec_s0 = gen_match_vec(s0_lookup.region_tag)
341289fc2f9SLinJiawei  val region_p1_match_vec_s0 = gen_match_vec(s0_lookup.region_p1_tag)
342289fc2f9SLinJiawei  val region_m1_match_vec_s0 = gen_match_vec(s0_lookup.region_m1_tag)
343289fc2f9SLinJiawei
344289fc2f9SLinJiawei  val any_region_match = Cat(region_match_vec_s0).orR
345289fc2f9SLinJiawei  val any_region_p1_match = Cat(region_p1_match_vec_s0).orR && s0_lookup.allow_cross_region_p1
346289fc2f9SLinJiawei  val any_region_m1_match = Cat(region_m1_match_vec_s0).orR && s0_lookup.allow_cross_region_m1
347289fc2f9SLinJiawei
3486005a7e2Shappy-lx  val region_match_vec_dcache_evict_s0 = gen_match_vec(s0_dcache_evict_tag)
3496005a7e2Shappy-lx  val any_region_dcache_evict_match = Cat(region_match_vec_dcache_evict_s0).orR
3506005a7e2Shappy-lx  // s0 dcache evict a entry that may be replaced in s1
3516005a7e2Shappy-lx  val s0_dcache_evict_conflict = Cat(VecInit(region_match_vec_dcache_evict_s0).asUInt & s1_replace_mask_w).orR
3526005a7e2Shappy-lx  val s0_do_dcache_evict = io.s0_dcache_evict.fire && any_region_dcache_evict_match
3536005a7e2Shappy-lx
3546005a7e2Shappy-lx  io.s0_dcache_evict.ready := !s0_lookup_valid && !s0_dcache_evict_conflict
3556005a7e2Shappy-lx
356289fc2f9SLinJiawei  val s0_region_hit = any_region_match
357967327d8SLinJiawei  val s0_cross_region_hit = any_region_m1_match || any_region_p1_match
358967327d8SLinJiawei  val s0_alloc = s0_lookup_valid && !s0_region_hit && !s0_match_prev
359289fc2f9SLinJiawei  val s0_pf_gen_match_vec = valids.indices.map(i => {
360289fc2f9SLinJiawei    Mux(any_region_match,
361289fc2f9SLinJiawei      region_match_vec_s0(i),
362289fc2f9SLinJiawei      Mux(any_region_m1_match,
363289fc2f9SLinJiawei        region_m1_match_vec_s0(i), region_p1_match_vec_s0(i)
364289fc2f9SLinJiawei      )
365289fc2f9SLinJiawei    )
366289fc2f9SLinJiawei  })
367289fc2f9SLinJiawei  val s0_agt_entry = Wire(new AGTEntry())
368289fc2f9SLinJiawei
369289fc2f9SLinJiawei  s0_agt_entry.pht_index := s0_lookup.pht_index
370289fc2f9SLinJiawei  s0_agt_entry.pht_tag := s0_lookup.pht_tag
371289fc2f9SLinJiawei  s0_agt_entry.region_bits := region_offset_to_bits(s0_lookup.region_offset)
372a982a3c9Shappy-lx  // update bits this time
373a982a3c9Shappy-lx  s0_agt_entry.region_bit_single := region_offset_to_bits(s0_lookup.region_offset)
374289fc2f9SLinJiawei  s0_agt_entry.region_tag := s0_lookup.region_tag
375967327d8SLinJiawei  s0_agt_entry.region_offset := s0_lookup.region_offset
376289fc2f9SLinJiawei  s0_agt_entry.access_cnt := 1.U
377a982a3c9Shappy-lx
378a982a3c9Shappy-lx  s0_agt_entry.has_been_signal_updated := false.B
379289fc2f9SLinJiawei  // lookup_region + 1 == entry_region
380289fc2f9SLinJiawei  // lookup_region = entry_region - 1 => decr mode
381289fc2f9SLinJiawei  s0_agt_entry.decr_mode := !s0_region_hit && !any_region_m1_match && any_region_p1_match
382967327d8SLinJiawei  val s0_replace_way = replacement.way
383967327d8SLinJiawei  val s0_replace_mask = UIntToOH(s0_replace_way)
384289fc2f9SLinJiawei  // s0 hit a entry that may be replaced in s1
385967327d8SLinJiawei  val s0_update_conflict = Cat(VecInit(region_match_vec_s0).asUInt & s1_replace_mask_w).orR
386967327d8SLinJiawei  val s0_update = s0_lookup_valid && s0_region_hit && !s0_update_conflict
387a982a3c9Shappy-lx  s0_agt_entry.single_update := s0_update
388967327d8SLinJiawei
389967327d8SLinJiawei  val s0_access_way = Mux1H(
390967327d8SLinJiawei    Seq(s0_update, s0_alloc),
391967327d8SLinJiawei    Seq(OHToUInt(region_match_vec_s0), s0_replace_way)
392967327d8SLinJiawei  )
393967327d8SLinJiawei  when(s0_update || s0_alloc) {
394967327d8SLinJiawei    replacement.access(s0_access_way)
395967327d8SLinJiawei  }
396289fc2f9SLinJiawei
397289fc2f9SLinJiawei  // stage1: update/alloc
398289fc2f9SLinJiawei  // region hit, update entry
3994ccb2e8bSYanqin Li  val s1_update = GatedValidRegNext(s0_update, false.B)
400967327d8SLinJiawei  val s1_update_mask = RegEnable(VecInit(region_match_vec_s0), s0_lookup_valid)
401f21b441aSLinJiawei  val s1_agt_entry = RegEnable(s0_agt_entry, s0_lookup_valid)
4024ccb2e8bSYanqin Li  val s1_cross_region_match = RegEnable(s0_cross_region_hit, s0_lookup_valid)
4034ccb2e8bSYanqin Li  val s1_alloc = GatedValidRegNext(s0_alloc, false.B)
404967327d8SLinJiawei  val s1_alloc_entry = s1_agt_entry
4054ccb2e8bSYanqin Li  val s1_do_dcache_evict = GatedValidRegNext(s0_do_dcache_evict, false.B)
4066005a7e2Shappy-lx  val s1_replace_mask = Mux(
4076005a7e2Shappy-lx    s1_do_dcache_evict,
4086005a7e2Shappy-lx    RegEnable(VecInit(region_match_vec_dcache_evict_s0).asUInt, s0_do_dcache_evict),
4096005a7e2Shappy-lx    RegEnable(s0_replace_mask, s0_lookup_valid)
4106005a7e2Shappy-lx  )
4116005a7e2Shappy-lx  s1_replace_mask_w := s1_replace_mask & Fill(smsParams.active_gen_table_size, s1_alloc || s1_do_dcache_evict)
412f21b441aSLinJiawei  val s1_evict_entry = Mux1H(s1_replace_mask, entries)
413f21b441aSLinJiawei  val s1_evict_valid = Mux1H(s1_replace_mask, valids)
414f21b441aSLinJiawei  // pf gen
415289fc2f9SLinJiawei  val s1_pf_gen_match_vec = RegEnable(VecInit(s0_pf_gen_match_vec), s0_lookup_valid)
416289fc2f9SLinJiawei  val s1_region_paddr = RegEnable(s0_lookup.region_paddr, s0_lookup_valid)
417289fc2f9SLinJiawei  val s1_region_vaddr = RegEnable(s0_lookup.region_vaddr, s0_lookup_valid)
418289fc2f9SLinJiawei  val s1_region_offset = RegEnable(s0_lookup.region_offset, s0_lookup_valid)
419a982a3c9Shappy-lx  val s1_bit_region_signal = RegEnable(region_offset_to_bits(s0_lookup.region_offset), s0_lookup_valid)
420a982a3c9Shappy-lx
421289fc2f9SLinJiawei  for(i <- entries.indices){
422289fc2f9SLinJiawei    val alloc = s1_replace_mask(i) && s1_alloc
423289fc2f9SLinJiawei    val update = s1_update_mask(i) && s1_update
424289fc2f9SLinJiawei    val update_entry = WireInit(entries(i))
425289fc2f9SLinJiawei    update_entry.region_bits := entries(i).region_bits | s1_agt_entry.region_bits
426289fc2f9SLinJiawei    update_entry.access_cnt := Mux(entries(i).access_cnt === (REGION_BLKS - 1).U,
427289fc2f9SLinJiawei      entries(i).access_cnt,
428289fc2f9SLinJiawei      entries(i).access_cnt + (s1_agt_entry.region_bits & (~entries(i).region_bits).asUInt).orR
429289fc2f9SLinJiawei    )
430a982a3c9Shappy-lx    update_entry.region_bit_single := s1_agt_entry.region_bit_single
431a982a3c9Shappy-lx    update_entry.has_been_signal_updated := entries(i).has_been_signal_updated || (!((s1_alloc || s1_do_dcache_evict) && s1_evict_valid)) && s1_update
432289fc2f9SLinJiawei    valids(i) := valids(i) || alloc
433289fc2f9SLinJiawei    entries(i) := Mux(alloc, s1_alloc_entry, Mux(update, update_entry, entries(i)))
434289fc2f9SLinJiawei  }
435289fc2f9SLinJiawei
436a982a3c9Shappy-lx  val s1_update_entry = Mux1H(s1_update_mask, entries)
437a982a3c9Shappy-lx  val s1_update_valid = Mux1H(s1_update_mask, valids)
438a982a3c9Shappy-lx
439a982a3c9Shappy-lx
440f21b441aSLinJiawei  when(s1_update){
441f21b441aSLinJiawei    assert(PopCount(s1_update_mask) === 1.U, "multi-agt-update")
442f21b441aSLinJiawei  }
443f21b441aSLinJiawei  when(s1_alloc){
444f21b441aSLinJiawei    assert(PopCount(s1_replace_mask) === 1.U, "multi-agt-alloc")
445f21b441aSLinJiawei  }
446289fc2f9SLinJiawei
447289fc2f9SLinJiawei  // pf_addr
448289fc2f9SLinJiawei  // 1.hit => pf_addr = lookup_addr + (decr ? -1 : 1)
449289fc2f9SLinJiawei  // 2.lookup region - 1 hit => lookup_addr + 1 (incr mode)
450289fc2f9SLinJiawei  // 3.lookup region + 1 hit => lookup_addr - 1 (decr mode)
451289fc2f9SLinJiawei  val s1_hited_entry_decr = Mux1H(s1_update_mask, entries.map(_.decr_mode))
452289fc2f9SLinJiawei  val s1_pf_gen_decr_mode = Mux(s1_update,
453289fc2f9SLinJiawei    s1_hited_entry_decr,
454289fc2f9SLinJiawei    s1_agt_entry.decr_mode
455289fc2f9SLinJiawei  )
45685de5caeSLinJiawei
4575d13017eSLinJiawei  val s1_pf_gen_vaddr_inc = Cat(0.U, s1_region_vaddr(REGION_TAG_WIDTH - 1, 0), s1_region_offset) + io.act_stride
4585d13017eSLinJiawei  val s1_pf_gen_vaddr_dec = Cat(0.U, s1_region_vaddr(REGION_TAG_WIDTH - 1, 0), s1_region_offset) - io.act_stride
459f21b441aSLinJiawei  val s1_vaddr_inc_cross_page = s1_pf_gen_vaddr_inc(BLOCK_ADDR_PAGE_BIT) =/= s1_region_vaddr(REGION_ADDR_PAGE_BIT)
460f21b441aSLinJiawei  val s1_vaddr_dec_cross_page = s1_pf_gen_vaddr_dec(BLOCK_ADDR_PAGE_BIT) =/= s1_region_vaddr(REGION_ADDR_PAGE_BIT)
46185de5caeSLinJiawei  val s1_vaddr_inc_cross_max_lim = s1_pf_gen_vaddr_inc.head(1).asBool
46285de5caeSLinJiawei  val s1_vaddr_dec_cross_max_lim = s1_pf_gen_vaddr_dec.head(1).asBool
46385de5caeSLinJiawei
46485de5caeSLinJiawei  //val s1_pf_gen_vaddr_p1 = s1_region_vaddr(REGION_TAG_WIDTH - 1, 0) + 1.U
46585de5caeSLinJiawei  //val s1_pf_gen_vaddr_m1 = s1_region_vaddr(REGION_TAG_WIDTH - 1, 0) - 1.U
466289fc2f9SLinJiawei  val s1_pf_gen_vaddr = Cat(
467289fc2f9SLinJiawei    s1_region_vaddr(REGION_ADDR_BITS - 1, REGION_TAG_WIDTH),
468289fc2f9SLinJiawei    Mux(s1_pf_gen_decr_mode,
46985de5caeSLinJiawei      s1_pf_gen_vaddr_dec.tail(1).head(REGION_TAG_WIDTH),
47085de5caeSLinJiawei      s1_pf_gen_vaddr_inc.tail(1).head(REGION_TAG_WIDTH)
471289fc2f9SLinJiawei    )
472289fc2f9SLinJiawei  )
47385de5caeSLinJiawei  val s1_pf_gen_offset = Mux(s1_pf_gen_decr_mode,
47485de5caeSLinJiawei    s1_pf_gen_vaddr_dec(REGION_OFFSET - 1, 0),
47585de5caeSLinJiawei    s1_pf_gen_vaddr_inc(REGION_OFFSET - 1, 0)
47685de5caeSLinJiawei  )
47785de5caeSLinJiawei  val s1_pf_gen_offset_mask = UIntToOH(s1_pf_gen_offset)
478289fc2f9SLinJiawei  val s1_pf_gen_access_cnt = Mux1H(s1_pf_gen_match_vec, entries.map(_.access_cnt))
479967327d8SLinJiawei  val s1_in_active_page = s1_pf_gen_access_cnt > io.act_threshold
480967327d8SLinJiawei  val s1_pf_gen_valid = prev_lookup_valid && (s1_alloc && s1_cross_region_match || s1_update) && Mux(s1_pf_gen_decr_mode,
481f21b441aSLinJiawei    !s1_vaddr_dec_cross_max_lim,
482f21b441aSLinJiawei    !s1_vaddr_inc_cross_max_lim
483967327d8SLinJiawei  ) && s1_in_active_page && io.agt_en
484f21b441aSLinJiawei  val s1_pf_gen_paddr_valid = Mux(s1_pf_gen_decr_mode, !s1_vaddr_dec_cross_page, !s1_vaddr_inc_cross_page)
485289fc2f9SLinJiawei  val s1_pf_gen_region_addr = Mux(s1_pf_gen_paddr_valid,
486f21b441aSLinJiawei    Cat(s1_region_paddr(REGION_ADDR_BITS - 1, REGION_ADDR_PAGE_BIT), s1_pf_gen_vaddr(REGION_ADDR_PAGE_BIT - 1, 0)),
487f21b441aSLinJiawei    s1_pf_gen_vaddr
488289fc2f9SLinJiawei  )
489f21b441aSLinJiawei  val s1_pf_gen_region_tag = region_hash_tag(s1_pf_gen_vaddr)
490289fc2f9SLinJiawei  val s1_pf_gen_incr_region_bits = VecInit((0 until REGION_BLKS).map(i => {
49185de5caeSLinJiawei    if(i == 0) true.B else !s1_pf_gen_offset_mask(i - 1, 0).orR
492289fc2f9SLinJiawei  })).asUInt
493289fc2f9SLinJiawei  val s1_pf_gen_decr_region_bits = VecInit((0 until REGION_BLKS).map(i => {
494289fc2f9SLinJiawei    if(i == REGION_BLKS - 1) true.B
49585de5caeSLinJiawei    else !s1_pf_gen_offset_mask(REGION_BLKS - 1, i + 1).orR
496289fc2f9SLinJiawei  })).asUInt
497289fc2f9SLinJiawei  val s1_pf_gen_region_bits = Mux(s1_pf_gen_decr_mode,
498289fc2f9SLinJiawei    s1_pf_gen_decr_region_bits,
499289fc2f9SLinJiawei    s1_pf_gen_incr_region_bits
500289fc2f9SLinJiawei  )
50185de5caeSLinJiawei  val s1_pht_lookup_valid = Wire(Bool())
50285de5caeSLinJiawei  val s1_pht_lookup = Wire(new PhtLookup())
503289fc2f9SLinJiawei
50485de5caeSLinJiawei  s1_pht_lookup_valid := !s1_pf_gen_valid && prev_lookup_valid
50585de5caeSLinJiawei  s1_pht_lookup.pht_index := s1_agt_entry.pht_index
50685de5caeSLinJiawei  s1_pht_lookup.pht_tag := s1_agt_entry.pht_tag
507f21b441aSLinJiawei  s1_pht_lookup.region_vaddr := s1_region_vaddr
508f21b441aSLinJiawei  s1_pht_lookup.region_paddr := s1_region_paddr
50985de5caeSLinJiawei  s1_pht_lookup.region_offset := s1_region_offset
510a982a3c9Shappy-lx  s1_pht_lookup.region_bit_single := s1_bit_region_signal
511289fc2f9SLinJiawei
512967327d8SLinJiawei  io.s1_sel_stride := prev_lookup_valid && (s1_alloc && s1_cross_region_match || s1_update) && !s1_in_active_page
513967327d8SLinJiawei
514289fc2f9SLinJiawei  // stage2: gen pf reg / evict entry to pht
515a982a3c9Shappy-lx  // if no evict, update this time region bits to pht
5164ccb2e8bSYanqin Li  val s2_do_dcache_evict = GatedValidRegNext(s1_do_dcache_evict, false.B)
517a982a3c9Shappy-lx  val s1_send_update_entry = Mux((s1_alloc || s1_do_dcache_evict) && s1_evict_valid, s1_evict_entry, s1_update_entry)
518a982a3c9Shappy-lx  val s2_evict_entry = RegEnable(s1_send_update_entry, s1_alloc || s1_do_dcache_evict || s1_update)
519a982a3c9Shappy-lx  val s2_evict_valid = GatedValidRegNext(((s1_alloc || s1_do_dcache_evict) && s1_evict_valid) || s1_update, false.B)
520a982a3c9Shappy-lx  val s2_update = RegNext(s1_update, false.B)
521a982a3c9Shappy-lx  val s2_real_update = RegNext(((s1_alloc || s1_do_dcache_evict) && s1_evict_valid), false.B)
522289fc2f9SLinJiawei  val s2_paddr_valid = RegEnable(s1_pf_gen_paddr_valid, s1_pf_gen_valid)
523289fc2f9SLinJiawei  val s2_pf_gen_region_tag = RegEnable(s1_pf_gen_region_tag, s1_pf_gen_valid)
524289fc2f9SLinJiawei  val s2_pf_gen_decr_mode = RegEnable(s1_pf_gen_decr_mode, s1_pf_gen_valid)
525289fc2f9SLinJiawei  val s2_pf_gen_region_paddr = RegEnable(s1_pf_gen_region_addr, s1_pf_gen_valid)
526967327d8SLinJiawei  val s2_pf_gen_alias_bits = RegEnable(get_alias_bits(s1_pf_gen_vaddr), s1_pf_gen_valid)
527289fc2f9SLinJiawei  val s2_pf_gen_region_bits = RegEnable(s1_pf_gen_region_bits, s1_pf_gen_valid)
5284ccb2e8bSYanqin Li  val s2_pf_gen_valid = GatedValidRegNext(s1_pf_gen_valid, false.B)
5294ccb2e8bSYanqin Li  val s2_pht_lookup_valid = GatedValidRegNext(s1_pht_lookup_valid, false.B) && !io.s2_stride_hit
53085de5caeSLinJiawei  val s2_pht_lookup = RegEnable(s1_pht_lookup, s1_pht_lookup_valid)
531289fc2f9SLinJiawei
532a982a3c9Shappy-lx  io.s2_evict.valid := Mux(s2_real_update, s2_evict_valid && (s2_evict_entry.access_cnt > 1.U), s2_evict_valid)
533289fc2f9SLinJiawei  io.s2_evict.bits := s2_evict_entry
534a982a3c9Shappy-lx  io.s2_evict.bits.single_update := s2_update && (!s2_real_update)
535289fc2f9SLinJiawei
536289fc2f9SLinJiawei  io.s2_pf_gen_req.bits.region_tag := s2_pf_gen_region_tag
537289fc2f9SLinJiawei  io.s2_pf_gen_req.bits.region_addr := s2_pf_gen_region_paddr
538967327d8SLinJiawei  io.s2_pf_gen_req.bits.alias_bits := s2_pf_gen_alias_bits
539289fc2f9SLinJiawei  io.s2_pf_gen_req.bits.region_bits := s2_pf_gen_region_bits
540289fc2f9SLinJiawei  io.s2_pf_gen_req.bits.paddr_valid := s2_paddr_valid
541289fc2f9SLinJiawei  io.s2_pf_gen_req.bits.decr_mode := s2_pf_gen_decr_mode
542d7fb6da3Shappy-lx  io.s2_pf_gen_req.valid := false.B
5432db9ec44SLinJiawei  io.s2_pf_gen_req.bits.debug_source_type := HW_PREFETCH_AGT.U
544289fc2f9SLinJiawei
54585de5caeSLinJiawei  io.s2_pht_lookup.valid := s2_pht_lookup_valid
54685de5caeSLinJiawei  io.s2_pht_lookup.bits := s2_pht_lookup
54785de5caeSLinJiawei
548967327d8SLinJiawei  XSPerfAccumulate("sms_agt_in", io.s0_lookup.valid)
549289fc2f9SLinJiawei  XSPerfAccumulate("sms_agt_alloc", s1_alloc) // cross region match or filter evict
550289fc2f9SLinJiawei  XSPerfAccumulate("sms_agt_update", s1_update) // entry hit
551289fc2f9SLinJiawei  XSPerfAccumulate("sms_agt_pf_gen", io.s2_pf_gen_req.valid)
552289fc2f9SLinJiawei  XSPerfAccumulate("sms_agt_pf_gen_paddr_valid",
553289fc2f9SLinJiawei    io.s2_pf_gen_req.valid && io.s2_pf_gen_req.bits.paddr_valid
554289fc2f9SLinJiawei  )
555289fc2f9SLinJiawei  XSPerfAccumulate("sms_agt_pf_gen_decr_mode",
556289fc2f9SLinJiawei    io.s2_pf_gen_req.valid && io.s2_pf_gen_req.bits.decr_mode
557289fc2f9SLinJiawei  )
558289fc2f9SLinJiawei  for(i <- 0 until smsParams.active_gen_table_size){
559289fc2f9SLinJiawei    XSPerfAccumulate(s"sms_agt_access_entry_$i",
560289fc2f9SLinJiawei      s1_alloc && s1_replace_mask(i) || s1_update && s1_update_mask(i)
561289fc2f9SLinJiawei    )
562289fc2f9SLinJiawei  }
563d7fb6da3Shappy-lx  XSPerfAccumulate("sms_agt_evict", s2_evict_valid)
5646005a7e2Shappy-lx  XSPerfAccumulate("sms_agt_evict_by_plru", s2_evict_valid && !s2_do_dcache_evict)
5656005a7e2Shappy-lx  XSPerfAccumulate("sms_agt_evict_by_dcache", s2_evict_valid && s2_do_dcache_evict)
566d7fb6da3Shappy-lx  XSPerfAccumulate("sms_agt_evict_one_hot_pattern", s2_evict_valid && (s2_evict_entry.access_cnt === 1.U))
567289fc2f9SLinJiawei}
568289fc2f9SLinJiawei
569289fc2f9SLinJiaweiclass PhtLookup()(implicit p: Parameters) extends XSBundle with HasSMSModuleHelper {
570289fc2f9SLinJiawei  val pht_index = UInt(PHT_INDEX_BITS.W)
571289fc2f9SLinJiawei  val pht_tag = UInt(PHT_TAG_BITS.W)
572f21b441aSLinJiawei  val region_paddr = UInt(REGION_ADDR_BITS.W)
573f21b441aSLinJiawei  val region_vaddr = UInt(REGION_ADDR_BITS.W)
574289fc2f9SLinJiawei  val region_offset = UInt(REGION_OFFSET.W)
575a982a3c9Shappy-lx  val region_bit_single = UInt(REGION_BLKS.W)
576289fc2f9SLinJiawei}
577289fc2f9SLinJiawei
578289fc2f9SLinJiaweiclass PhtEntry()(implicit p: Parameters) extends XSBundle with HasSMSModuleHelper {
579f21b441aSLinJiawei  val hist = Vec(2 * (REGION_BLKS - 1), UInt(PHT_HIST_BITS.W))
580289fc2f9SLinJiawei  val tag = UInt(PHT_TAG_BITS.W)
581289fc2f9SLinJiawei  val decr_mode = Bool()
582289fc2f9SLinJiawei}
583289fc2f9SLinJiawei
584289fc2f9SLinJiaweiclass PatternHistoryTable()(implicit p: Parameters) extends XSModule with HasSMSModuleHelper {
585289fc2f9SLinJiawei  val io = IO(new Bundle() {
586289fc2f9SLinJiawei    // receive agt evicted entry
587289fc2f9SLinJiawei    val agt_update = Flipped(ValidIO(new AGTEntry()))
58885de5caeSLinJiawei    // at stage2, if we know agt missed, lookup pht
58985de5caeSLinJiawei    val s2_agt_lookup = Flipped(ValidIO(new PhtLookup()))
590289fc2f9SLinJiawei    // pht-generated prefetch req
591289fc2f9SLinJiawei    val pf_gen_req = ValidIO(new PfGenReq())
592289fc2f9SLinJiawei  })
593289fc2f9SLinJiawei
594289fc2f9SLinJiawei  val pht_ram = Module(new SRAMTemplate[PhtEntry](new PhtEntry,
595289fc2f9SLinJiawei    set = smsParams.pht_size / smsParams.pht_ways,
596289fc2f9SLinJiawei    way =smsParams.pht_ways,
597452b5843SHuijin Li    singlePort = true,
5984b2c87baS梁森 Liang Sen    withClockGate = true,
599*30f35717Scz4e    hasMbist = hasMbist,
600*30f35717Scz4e    hasSramCtl = hasSramCtl
601289fc2f9SLinJiawei  ))
602289fc2f9SLinJiawei  def PHT_SETS = smsParams.pht_size / smsParams.pht_ways
6034ccb2e8bSYanqin Li  // clockgated on pht_valids
6044ccb2e8bSYanqin Li  val pht_valids_reg = RegInit(VecInit(Seq.fill(smsParams.pht_ways){
6054ccb2e8bSYanqin Li    VecInit(Seq.fill(PHT_SETS){false.B})
6064ccb2e8bSYanqin Li  }))
6074ccb2e8bSYanqin Li  val pht_valids_enable = WireInit(VecInit(Seq.fill(PHT_SETS) {false.B}))
6084ccb2e8bSYanqin Li  val pht_valids_next = WireInit(pht_valids_reg)
6094ccb2e8bSYanqin Li  for(j <- 0 until PHT_SETS){
6104ccb2e8bSYanqin Li    when(pht_valids_enable(j)){
6114ccb2e8bSYanqin Li      (0 until smsParams.pht_ways).foreach(i => pht_valids_reg(i)(j) := pht_valids_next(i)(j))
612289fc2f9SLinJiawei    }
6134ccb2e8bSYanqin Li  }
6144ccb2e8bSYanqin Li
615289fc2f9SLinJiawei  val replacement = Seq.fill(PHT_SETS) { ReplacementPolicy.fromString("plru", smsParams.pht_ways) }
616289fc2f9SLinJiawei
617289fc2f9SLinJiawei  val lookup_queue = Module(new OverrideableQueue(new PhtLookup, smsParams.pht_lookup_queue_size))
61885de5caeSLinJiawei  lookup_queue.io.in := io.s2_agt_lookup
619289fc2f9SLinJiawei  val lookup = lookup_queue.io.out
620289fc2f9SLinJiawei
621289fc2f9SLinJiawei  val evict_queue = Module(new OverrideableQueue(new AGTEntry, smsParams.pht_lookup_queue_size))
622289fc2f9SLinJiawei  evict_queue.io.in := io.agt_update
623289fc2f9SLinJiawei  val evict = evict_queue.io.out
624289fc2f9SLinJiawei
625967327d8SLinJiawei  XSPerfAccumulate("sms_pht_lookup_in", lookup_queue.io.in.fire)
626967327d8SLinJiawei  XSPerfAccumulate("sms_pht_lookup_out", lookup_queue.io.out.fire)
627967327d8SLinJiawei  XSPerfAccumulate("sms_pht_evict_in", evict_queue.io.in.fire)
628967327d8SLinJiawei  XSPerfAccumulate("sms_pht_evict_out", evict_queue.io.out.fire)
629967327d8SLinJiawei
630289fc2f9SLinJiawei  val s3_ram_en = Wire(Bool())
631289fc2f9SLinJiawei  val s1_valid = Wire(Bool())
632f21b441aSLinJiawei  // if s1.raddr == s2.waddr or s3 is using ram port, block s1
633f21b441aSLinJiawei  val s1_wait = Wire(Bool())
634289fc2f9SLinJiawei  // pipe s0: select an op from [lookup, update], generate ram read addr
635289fc2f9SLinJiawei  val s0_valid = lookup.valid || evict.valid
636289fc2f9SLinJiawei
637f21b441aSLinJiawei  evict.ready := !s1_valid || !s1_wait
638289fc2f9SLinJiawei  lookup.ready := evict.ready && !evict.valid
639289fc2f9SLinJiawei
640289fc2f9SLinJiawei  val s0_ram_raddr = Mux(evict.valid,
641289fc2f9SLinJiawei    evict.bits.pht_index,
642289fc2f9SLinJiawei    lookup.bits.pht_index
643289fc2f9SLinJiawei  )
644289fc2f9SLinJiawei  val s0_tag = Mux(evict.valid, evict.bits.pht_tag, lookup.bits.pht_tag)
645967327d8SLinJiawei  val s0_region_offset = Mux(evict.valid, evict.bits.region_offset, lookup.bits.region_offset)
646f21b441aSLinJiawei  val s0_region_paddr = lookup.bits.region_paddr
647f21b441aSLinJiawei  val s0_region_vaddr = lookup.bits.region_vaddr
648289fc2f9SLinJiawei  val s0_region_bits = evict.bits.region_bits
649289fc2f9SLinJiawei  val s0_decr_mode = evict.bits.decr_mode
650289fc2f9SLinJiawei  val s0_evict = evict.valid
651a982a3c9Shappy-lx  val s0_access_cnt_signal = evict.bits.access_cnt
652a982a3c9Shappy-lx  val s0_single_update = evict.bits.single_update
653a982a3c9Shappy-lx  val s0_has_been_single_update = evict.bits.has_been_signal_updated
654a982a3c9Shappy-lx  val s0_region_bit_single = evict.bits.region_bit_single
655289fc2f9SLinJiawei
656289fc2f9SLinJiawei  // pipe s1: send addr to ram
657289fc2f9SLinJiawei  val s1_valid_r = RegInit(false.B)
658f21b441aSLinJiawei  s1_valid_r := Mux(s1_valid && s1_wait, true.B, s0_valid)
659289fc2f9SLinJiawei  s1_valid := s1_valid_r
660f21b441aSLinJiawei  val s1_reg_en = s0_valid && (!s1_wait || !s1_valid)
661289fc2f9SLinJiawei  val s1_ram_raddr = RegEnable(s0_ram_raddr, s1_reg_en)
662289fc2f9SLinJiawei  val s1_tag = RegEnable(s0_tag, s1_reg_en)
663a982a3c9Shappy-lx  val s1_access_cnt_signal = RegEnable(s0_access_cnt_signal, s1_reg_en)
664289fc2f9SLinJiawei  val s1_region_bits = RegEnable(s0_region_bits, s1_reg_en)
665289fc2f9SLinJiawei  val s1_decr_mode = RegEnable(s0_decr_mode, s1_reg_en)
666f21b441aSLinJiawei  val s1_region_paddr = RegEnable(s0_region_paddr, s1_reg_en)
667f21b441aSLinJiawei  val s1_region_vaddr = RegEnable(s0_region_vaddr, s1_reg_en)
668289fc2f9SLinJiawei  val s1_region_offset = RegEnable(s0_region_offset, s1_reg_en)
669a982a3c9Shappy-lx  val s1_single_update = RegEnable(s0_single_update, s1_reg_en)
670a982a3c9Shappy-lx  val s1_has_been_single_update = RegEnable(s0_has_been_single_update, s1_reg_en)
671a982a3c9Shappy-lx  val s1_region_bit_single = RegEnable(s0_region_bit_single, s1_reg_en)
6724ccb2e8bSYanqin Li  val s1_pht_valids = pht_valids_reg.map(way => Mux1H(
67385de5caeSLinJiawei    (0 until PHT_SETS).map(i => i.U === s1_ram_raddr),
67485de5caeSLinJiawei    way
67585de5caeSLinJiawei  ))
676289fc2f9SLinJiawei  val s1_evict = RegEnable(s0_evict, s1_reg_en)
677289fc2f9SLinJiawei  val s1_replace_way = Mux1H(
678289fc2f9SLinJiawei    (0 until PHT_SETS).map(i => i.U === s1_ram_raddr),
679289fc2f9SLinJiawei    replacement.map(_.way)
680289fc2f9SLinJiawei  )
681f21b441aSLinJiawei  val s1_hist_update_mask = Cat(
682f21b441aSLinJiawei    Fill(REGION_BLKS - 1, true.B), 0.U((REGION_BLKS - 1).W)
683f21b441aSLinJiawei  ) >> s1_region_offset
684f21b441aSLinJiawei  val s1_hist_bits = Cat(
685f21b441aSLinJiawei    s1_region_bits.head(REGION_BLKS - 1) >> s1_region_offset,
686f21b441aSLinJiawei    (Cat(
687f21b441aSLinJiawei      s1_region_bits.tail(1), 0.U((REGION_BLKS - 1).W)
688f21b441aSLinJiawei    ) >> s1_region_offset)(REGION_BLKS - 2, 0)
689f21b441aSLinJiawei  )
690a982a3c9Shappy-lx  val s1_hist_single_bit = Cat(
691a982a3c9Shappy-lx    s1_region_bit_single.head(REGION_BLKS - 1) >> s1_region_offset,
692a982a3c9Shappy-lx    (Cat(
693a982a3c9Shappy-lx      s1_region_bit_single.tail(1), 0.U((REGION_BLKS - 1).W)
694a982a3c9Shappy-lx    ) >> s1_region_offset)(REGION_BLKS - 2, 0)
695a982a3c9Shappy-lx  )
696289fc2f9SLinJiawei
697289fc2f9SLinJiawei  // pipe s2: generate ram write addr/data
6984ccb2e8bSYanqin Li  val s2_valid = GatedValidRegNext(s1_valid && !s1_wait, false.B)
699967327d8SLinJiawei  val s2_reg_en = s1_valid && !s1_wait
700f21b441aSLinJiawei  val s2_hist_update_mask = RegEnable(s1_hist_update_mask, s2_reg_en)
701a982a3c9Shappy-lx  val s2_single_update = RegEnable(s1_single_update, s2_reg_en)
702a982a3c9Shappy-lx  val s2_has_been_single_update = RegEnable(s1_has_been_single_update, s2_reg_en)
703f21b441aSLinJiawei  val s2_hist_bits = RegEnable(s1_hist_bits, s2_reg_en)
704a982a3c9Shappy-lx  val s2_hist_bit_single = RegEnable(s1_hist_single_bit, s2_reg_en)
705289fc2f9SLinJiawei  val s2_tag = RegEnable(s1_tag, s2_reg_en)
706289fc2f9SLinJiawei  val s2_region_bits = RegEnable(s1_region_bits, s2_reg_en)
707289fc2f9SLinJiawei  val s2_decr_mode = RegEnable(s1_decr_mode, s2_reg_en)
708f21b441aSLinJiawei  val s2_region_paddr = RegEnable(s1_region_paddr, s2_reg_en)
709f21b441aSLinJiawei  val s2_region_vaddr = RegEnable(s1_region_vaddr, s2_reg_en)
710289fc2f9SLinJiawei  val s2_region_offset = RegEnable(s1_region_offset, s2_reg_en)
711289fc2f9SLinJiawei  val s2_region_offset_mask = region_offset_to_bits(s2_region_offset)
712289fc2f9SLinJiawei  val s2_evict = RegEnable(s1_evict, s2_reg_en)
713289fc2f9SLinJiawei  val s2_pht_valids = s1_pht_valids.map(v => RegEnable(v, s2_reg_en))
714289fc2f9SLinJiawei  val s2_replace_way = RegEnable(s1_replace_way, s2_reg_en)
715289fc2f9SLinJiawei  val s2_ram_waddr = RegEnable(s1_ram_raddr, s2_reg_en)
716289fc2f9SLinJiawei  val s2_ram_rdata = pht_ram.io.r.resp.data
717289fc2f9SLinJiawei  val s2_ram_rtags = s2_ram_rdata.map(_.tag)
718289fc2f9SLinJiawei  val s2_tag_match_vec = s2_ram_rtags.map(t => t === s2_tag)
719a982a3c9Shappy-lx  val s2_access_cnt_signal = RegEnable(s1_access_cnt_signal, s2_reg_en)
720289fc2f9SLinJiawei  val s2_hit_vec = s2_tag_match_vec.zip(s2_pht_valids).map({
721289fc2f9SLinJiawei    case (tag_match, v) => v && tag_match
722289fc2f9SLinJiawei  })
723a982a3c9Shappy-lx
724a982a3c9Shappy-lx  //distinguish single update and evict update
725289fc2f9SLinJiawei  val s2_hist_update = s2_ram_rdata.map(way => VecInit(way.hist.zipWithIndex.map({
726f21b441aSLinJiawei    case (h, i) =>
727f21b441aSLinJiawei      val do_update = s2_hist_update_mask(i)
728a982a3c9Shappy-lx      val hist_updated = Mux(!s2_single_update,
729a982a3c9Shappy-lx                            Mux(s2_has_been_single_update,
730a982a3c9Shappy-lx                              Mux(s2_hist_bits(i), h, Mux(h === 0.U, 0.U, h - 1.U)), Mux(s2_hist_bits(i),Mux(h.andR, h, h + 1.U), Mux(h === 0.U, 0.U, h - 1.U))),
731a982a3c9Shappy-lx                                Mux(s2_hist_bit_single(i), Mux(h.andR, h, Mux(h===0.U, h+2.U, h+1.U)), h)
732289fc2f9SLinJiawei                             )
733f21b441aSLinJiawei      Mux(do_update, hist_updated, h)
734289fc2f9SLinJiawei  })))
735a982a3c9Shappy-lx
736a982a3c9Shappy-lx
737f21b441aSLinJiawei  val s2_hist_pf_gen = Mux1H(s2_hit_vec, s2_ram_rdata.map(way => VecInit(way.hist.map(_.head(1))).asUInt))
738f21b441aSLinJiawei  val s2_new_hist = VecInit(s2_hist_bits.asBools.map(b => Cat(0.U((PHT_HIST_BITS - 1).W), b)))
739a982a3c9Shappy-lx  val s2_new_hist_single = VecInit(s2_hist_bit_single.asBools.map(b => Cat(0.U((PHT_HIST_BITS - 1).W), b)))
740a982a3c9Shappy-lx  val s2_new_hist_real = Mux(s2_single_update,s2_new_hist_single,s2_new_hist)
741289fc2f9SLinJiawei  val s2_pht_hit = Cat(s2_hit_vec).orR
742a982a3c9Shappy-lx  // update when valid bits over 4
743a982a3c9Shappy-lx  val signal_update_write = Mux(!s2_single_update, true.B, s2_pht_hit || s2_single_update && (s2_access_cnt_signal >4.U) )
744a982a3c9Shappy-lx  val s2_hist = Mux(s2_pht_hit, Mux1H(s2_hit_vec, s2_hist_update), s2_new_hist_real)
745289fc2f9SLinJiawei  val s2_repl_way_mask = UIntToOH(s2_replace_way)
746f275998aSsfencevma  val s2_incr_region_vaddr = s2_region_vaddr + 1.U
747f275998aSsfencevma  val s2_decr_region_vaddr = s2_region_vaddr - 1.U
748289fc2f9SLinJiawei
749a982a3c9Shappy-lx
750a982a3c9Shappy-lx
751289fc2f9SLinJiawei  // pipe s3: send addr/data to ram, gen pf_req
752a982a3c9Shappy-lx  val s3_valid = GatedValidRegNext(s2_valid && signal_update_write, false.B)
753289fc2f9SLinJiawei  val s3_evict = RegEnable(s2_evict, s2_valid)
754289fc2f9SLinJiawei  val s3_hist = RegEnable(s2_hist, s2_valid)
755f21b441aSLinJiawei  val s3_hist_pf_gen = RegEnable(s2_hist_pf_gen, s2_valid)
756a982a3c9Shappy-lx
757f21b441aSLinJiawei  val s3_hist_update_mask = RegEnable(s2_hist_update_mask.asUInt, s2_valid)
758a982a3c9Shappy-lx
759f21b441aSLinJiawei  val s3_region_offset = RegEnable(s2_region_offset, s2_valid)
760289fc2f9SLinJiawei  val s3_region_offset_mask = RegEnable(s2_region_offset_mask, s2_valid)
761f21b441aSLinJiawei  val s3_decr_mode = RegEnable(s2_decr_mode, s2_valid)
762f21b441aSLinJiawei  val s3_region_paddr = RegEnable(s2_region_paddr, s2_valid)
763f21b441aSLinJiawei  val s3_region_vaddr = RegEnable(s2_region_vaddr, s2_valid)
764289fc2f9SLinJiawei  val s3_pht_tag = RegEnable(s2_tag, s2_valid)
765289fc2f9SLinJiawei  val s3_hit_vec = s2_hit_vec.map(h => RegEnable(h, s2_valid))
766289fc2f9SLinJiawei  val s3_hit = Cat(s3_hit_vec).orR
767289fc2f9SLinJiawei  val s3_hit_way = OHToUInt(s3_hit_vec)
768289fc2f9SLinJiawei  val s3_repl_way = RegEnable(s2_replace_way, s2_valid)
769289fc2f9SLinJiawei  val s3_repl_way_mask = RegEnable(s2_repl_way_mask, s2_valid)
770289fc2f9SLinJiawei  val s3_repl_update_mask = RegEnable(VecInit((0 until PHT_SETS).map(i => i.U === s2_ram_waddr)), s2_valid)
771289fc2f9SLinJiawei  val s3_ram_waddr = RegEnable(s2_ram_waddr, s2_valid)
772f275998aSsfencevma  val s3_incr_region_vaddr = RegEnable(s2_incr_region_vaddr, s2_valid)
773f275998aSsfencevma  val s3_decr_region_vaddr = RegEnable(s2_decr_region_vaddr, s2_valid)
774289fc2f9SLinJiawei  s3_ram_en := s3_valid && s3_evict
775289fc2f9SLinJiawei  val s3_ram_wdata = Wire(new PhtEntry())
776289fc2f9SLinJiawei  s3_ram_wdata.hist := s3_hist
777289fc2f9SLinJiawei  s3_ram_wdata.tag := s3_pht_tag
778289fc2f9SLinJiawei  s3_ram_wdata.decr_mode := s3_decr_mode
779289fc2f9SLinJiawei
780f21b441aSLinJiawei  s1_wait := (s2_valid && s2_evict && s2_ram_waddr === s1_ram_raddr) || s3_ram_en
781f21b441aSLinJiawei
7824ccb2e8bSYanqin Li  for((valids, way_idx) <- pht_valids_next.zipWithIndex){
783289fc2f9SLinJiawei    val update_way = s3_repl_way_mask(way_idx)
784289fc2f9SLinJiawei    for((v, set_idx) <- valids.zipWithIndex){
785289fc2f9SLinJiawei      val update_set = s3_repl_update_mask(set_idx)
786289fc2f9SLinJiawei      when(s3_valid && s3_evict && !s3_hit && update_set && update_way){
7874ccb2e8bSYanqin Li        pht_valids_enable(set_idx) := true.B
788289fc2f9SLinJiawei        v := true.B
789289fc2f9SLinJiawei      }
790289fc2f9SLinJiawei    }
791289fc2f9SLinJiawei  }
792289fc2f9SLinJiawei  for((r, i) <- replacement.zipWithIndex){
793289fc2f9SLinJiawei    when(s3_valid && s3_repl_update_mask(i)){
794289fc2f9SLinJiawei      when(s3_hit){
795289fc2f9SLinJiawei        r.access(s3_hit_way)
796289fc2f9SLinJiawei      }.elsewhen(s3_evict){
797289fc2f9SLinJiawei        r.access(s3_repl_way)
798289fc2f9SLinJiawei      }
799289fc2f9SLinJiawei    }
800289fc2f9SLinJiawei  }
801289fc2f9SLinJiawei
802289fc2f9SLinJiawei  val s3_way_mask = Mux(s3_hit,
803289fc2f9SLinJiawei    VecInit(s3_hit_vec).asUInt,
804289fc2f9SLinJiawei    s3_repl_way_mask,
805289fc2f9SLinJiawei  ).asUInt
806289fc2f9SLinJiawei
807289fc2f9SLinJiawei  pht_ram.io.r(
808289fc2f9SLinJiawei    s1_valid, s1_ram_raddr
809289fc2f9SLinJiawei  )
810289fc2f9SLinJiawei  pht_ram.io.w(
811289fc2f9SLinJiawei    s3_ram_en, s3_ram_wdata, s3_ram_waddr, s3_way_mask
812289fc2f9SLinJiawei  )
813f21b441aSLinJiawei  when(s3_valid && s3_hit){
814f21b441aSLinJiawei    assert(!Cat(s3_hit_vec).andR, "sms_pht: multi-hit!")
815f21b441aSLinJiawei  }
816f21b441aSLinJiawei
817289fc2f9SLinJiawei  // generate pf req if hit
818f21b441aSLinJiawei  val s3_hist_hi = s3_hist_pf_gen.head(REGION_BLKS - 1)
819f21b441aSLinJiawei  val s3_hist_lo = s3_hist_pf_gen.tail(REGION_BLKS - 1)
820f21b441aSLinJiawei  val s3_hist_hi_shifted = (Cat(0.U((REGION_BLKS - 1).W), s3_hist_hi) << s3_region_offset)(2 * (REGION_BLKS - 1) - 1, 0)
821f21b441aSLinJiawei  val s3_hist_lo_shifted = (Cat(0.U((REGION_BLKS - 1).W), s3_hist_lo) << s3_region_offset)(2 * (REGION_BLKS - 1) - 1, 0)
822f21b441aSLinJiawei  val s3_cur_region_bits = Cat(s3_hist_hi_shifted.tail(REGION_BLKS - 1), 0.U(1.W)) |
823f21b441aSLinJiawei    Cat(0.U(1.W), s3_hist_lo_shifted.head(REGION_BLKS - 1))
824f21b441aSLinJiawei  val s3_incr_region_bits = Cat(0.U(1.W), s3_hist_hi_shifted.head(REGION_BLKS - 1))
825f21b441aSLinJiawei  val s3_decr_region_bits = Cat(s3_hist_lo_shifted.tail(REGION_BLKS - 1), 0.U(1.W))
826f21b441aSLinJiawei  val s3_pf_gen_valid = s3_valid && s3_hit && !s3_evict
827f21b441aSLinJiawei  val s3_cur_region_valid =  s3_pf_gen_valid && (s3_hist_pf_gen & s3_hist_update_mask).orR
828f21b441aSLinJiawei  val s3_incr_region_valid = s3_pf_gen_valid && (s3_hist_hi & (~s3_hist_update_mask.head(REGION_BLKS - 1)).asUInt).orR
829f21b441aSLinJiawei  val s3_decr_region_valid = s3_pf_gen_valid && (s3_hist_lo & (~s3_hist_update_mask.tail(REGION_BLKS - 1)).asUInt).orR
830967327d8SLinJiawei  val s3_incr_alias_bits = get_alias_bits(s3_incr_region_vaddr)
831967327d8SLinJiawei  val s3_decr_alias_bits = get_alias_bits(s3_decr_region_vaddr)
832967327d8SLinJiawei  val s3_incr_region_paddr = Cat(
833967327d8SLinJiawei    s3_region_paddr(REGION_ADDR_BITS - 1, REGION_ADDR_PAGE_BIT),
834967327d8SLinJiawei    s3_incr_region_vaddr(REGION_ADDR_PAGE_BIT - 1, 0)
835967327d8SLinJiawei  )
836967327d8SLinJiawei  val s3_decr_region_paddr = Cat(
837967327d8SLinJiawei    s3_region_paddr(REGION_ADDR_BITS - 1, REGION_ADDR_PAGE_BIT),
838967327d8SLinJiawei    s3_decr_region_vaddr(REGION_ADDR_PAGE_BIT - 1, 0)
839967327d8SLinJiawei  )
840f21b441aSLinJiawei  val s3_incr_crosspage = s3_incr_region_vaddr(REGION_ADDR_PAGE_BIT) =/= s3_region_vaddr(REGION_ADDR_PAGE_BIT)
841f21b441aSLinJiawei  val s3_decr_crosspage = s3_decr_region_vaddr(REGION_ADDR_PAGE_BIT) =/= s3_region_vaddr(REGION_ADDR_PAGE_BIT)
842f21b441aSLinJiawei  val s3_cur_region_tag = region_hash_tag(s3_region_vaddr)
843f21b441aSLinJiawei  val s3_incr_region_tag = region_hash_tag(s3_incr_region_vaddr)
844f21b441aSLinJiawei  val s3_decr_region_tag = region_hash_tag(s3_decr_region_vaddr)
845f21b441aSLinJiawei
846f21b441aSLinJiawei  val pf_gen_req_arb = Module(new Arbiter(new PfGenReq, 3))
847f21b441aSLinJiawei  val s4_pf_gen_cur_region_valid = RegInit(false.B)
848f21b441aSLinJiawei  val s4_pf_gen_cur_region = Reg(new PfGenReq)
849f21b441aSLinJiawei  val s4_pf_gen_incr_region_valid = RegInit(false.B)
850f21b441aSLinJiawei  val s4_pf_gen_incr_region = Reg(new PfGenReq)
851f21b441aSLinJiawei  val s4_pf_gen_decr_region_valid = RegInit(false.B)
852f21b441aSLinJiawei  val s4_pf_gen_decr_region = Reg(new PfGenReq)
853f21b441aSLinJiawei
854f21b441aSLinJiawei  s4_pf_gen_cur_region_valid := s3_cur_region_valid
855f21b441aSLinJiawei  when(s3_cur_region_valid){
856f21b441aSLinJiawei    s4_pf_gen_cur_region.region_addr := s3_region_paddr
857967327d8SLinJiawei    s4_pf_gen_cur_region.alias_bits := get_alias_bits(s3_region_vaddr)
858f21b441aSLinJiawei    s4_pf_gen_cur_region.region_tag := s3_cur_region_tag
859f21b441aSLinJiawei    s4_pf_gen_cur_region.region_bits := s3_cur_region_bits
860f21b441aSLinJiawei    s4_pf_gen_cur_region.paddr_valid := true.B
861f21b441aSLinJiawei    s4_pf_gen_cur_region.decr_mode := false.B
862f21b441aSLinJiawei  }
863f21b441aSLinJiawei  s4_pf_gen_incr_region_valid := s3_incr_region_valid ||
864f21b441aSLinJiawei    (!pf_gen_req_arb.io.in(1).ready && s4_pf_gen_incr_region_valid)
865f21b441aSLinJiawei  when(s3_incr_region_valid){
866967327d8SLinJiawei    s4_pf_gen_incr_region.region_addr := Mux(s3_incr_crosspage, s3_incr_region_vaddr, s3_incr_region_paddr)
867967327d8SLinJiawei    s4_pf_gen_incr_region.alias_bits := s3_incr_alias_bits
868f21b441aSLinJiawei    s4_pf_gen_incr_region.region_tag := s3_incr_region_tag
869f21b441aSLinJiawei    s4_pf_gen_incr_region.region_bits := s3_incr_region_bits
870f21b441aSLinJiawei    s4_pf_gen_incr_region.paddr_valid := !s3_incr_crosspage
871f21b441aSLinJiawei    s4_pf_gen_incr_region.decr_mode := false.B
872f21b441aSLinJiawei  }
873f21b441aSLinJiawei  s4_pf_gen_decr_region_valid := s3_decr_region_valid ||
874f21b441aSLinJiawei    (!pf_gen_req_arb.io.in(2).ready && s4_pf_gen_decr_region_valid)
875f21b441aSLinJiawei  when(s3_decr_region_valid){
876967327d8SLinJiawei    s4_pf_gen_decr_region.region_addr := Mux(s3_decr_crosspage, s3_decr_region_vaddr, s3_decr_region_paddr)
877967327d8SLinJiawei    s4_pf_gen_decr_region.alias_bits := s3_decr_alias_bits
878f21b441aSLinJiawei    s4_pf_gen_decr_region.region_tag := s3_decr_region_tag
879f21b441aSLinJiawei    s4_pf_gen_decr_region.region_bits := s3_decr_region_bits
880f21b441aSLinJiawei    s4_pf_gen_decr_region.paddr_valid := !s3_decr_crosspage
8815d13017eSLinJiawei    s4_pf_gen_decr_region.decr_mode := true.B
882f21b441aSLinJiawei  }
883f21b441aSLinJiawei
884f21b441aSLinJiawei  pf_gen_req_arb.io.in.head.valid := s4_pf_gen_cur_region_valid
885f21b441aSLinJiawei  pf_gen_req_arb.io.in.head.bits := s4_pf_gen_cur_region
8862db9ec44SLinJiawei  pf_gen_req_arb.io.in.head.bits.debug_source_type := HW_PREFETCH_PHT_CUR.U
887f21b441aSLinJiawei  pf_gen_req_arb.io.in(1).valid := s4_pf_gen_incr_region_valid
888f21b441aSLinJiawei  pf_gen_req_arb.io.in(1).bits := s4_pf_gen_incr_region
8892db9ec44SLinJiawei  pf_gen_req_arb.io.in(1).bits.debug_source_type := HW_PREFETCH_PHT_INC.U
890f21b441aSLinJiawei  pf_gen_req_arb.io.in(2).valid := s4_pf_gen_decr_region_valid
891f21b441aSLinJiawei  pf_gen_req_arb.io.in(2).bits := s4_pf_gen_decr_region
8922db9ec44SLinJiawei  pf_gen_req_arb.io.in(2).bits.debug_source_type := HW_PREFETCH_PHT_DEC.U
893f21b441aSLinJiawei  pf_gen_req_arb.io.out.ready := true.B
894f21b441aSLinJiawei
895f21b441aSLinJiawei  io.pf_gen_req.valid := pf_gen_req_arb.io.out.valid
896f21b441aSLinJiawei  io.pf_gen_req.bits := pf_gen_req_arb.io.out.bits
897289fc2f9SLinJiawei
898289fc2f9SLinJiawei  XSPerfAccumulate("sms_pht_update", io.agt_update.valid)
89985de5caeSLinJiawei  XSPerfAccumulate("sms_pht_update_hit", s2_valid && s2_evict && s2_pht_hit)
90085de5caeSLinJiawei  XSPerfAccumulate("sms_pht_lookup", io.s2_agt_lookup.valid)
90185de5caeSLinJiawei  XSPerfAccumulate("sms_pht_lookup_hit", s2_valid && !s2_evict && s2_pht_hit)
902289fc2f9SLinJiawei  for(i <- 0 until smsParams.pht_ways){
903289fc2f9SLinJiawei    XSPerfAccumulate(s"sms_pht_write_way_$i", pht_ram.io.w.req.fire && pht_ram.io.w.req.bits.waymask.get(i))
904289fc2f9SLinJiawei  }
905289fc2f9SLinJiawei  for(i <- 0 until PHT_SETS){
906289fc2f9SLinJiawei    XSPerfAccumulate(s"sms_pht_write_set_$i", pht_ram.io.w.req.fire && pht_ram.io.w.req.bits.setIdx === i.U)
907289fc2f9SLinJiawei  }
90885de5caeSLinJiawei  XSPerfAccumulate(s"sms_pht_pf_gen", io.pf_gen_req.valid)
909289fc2f9SLinJiawei}
910289fc2f9SLinJiawei
911289fc2f9SLinJiaweiclass PrefetchFilterEntry()(implicit p: Parameters) extends XSBundle with HasSMSModuleHelper {
912289fc2f9SLinJiawei  val region_tag = UInt(REGION_TAG_WIDTH.W)
913289fc2f9SLinJiawei  val region_addr = UInt(REGION_ADDR_BITS.W)
914289fc2f9SLinJiawei  val region_bits = UInt(REGION_BLKS.W)
915289fc2f9SLinJiawei  val filter_bits = UInt(REGION_BLKS.W)
916967327d8SLinJiawei  val alias_bits = UInt(2.W)
917289fc2f9SLinJiawei  val paddr_valid = Bool()
918289fc2f9SLinJiawei  val decr_mode = Bool()
9192db9ec44SLinJiawei  val debug_source_type = UInt(log2Up(nSourceType).W)
920289fc2f9SLinJiawei}
921289fc2f9SLinJiawei
922289fc2f9SLinJiaweiclass PrefetchFilter()(implicit p: Parameters) extends XSModule with HasSMSModuleHelper {
923289fc2f9SLinJiawei  val io = IO(new Bundle() {
924289fc2f9SLinJiawei    val gen_req = Flipped(ValidIO(new PfGenReq()))
92585de5caeSLinJiawei    val tlb_req = new TlbRequestIO(2)
92625a80bceSYanqin Li    val pmp_resp = Flipped(new PMPRespBundle())
927289fc2f9SLinJiawei    val l2_pf_addr = ValidIO(UInt(PAddrBits.W))
928967327d8SLinJiawei    val pf_alias_bits = Output(UInt(2.W))
9292db9ec44SLinJiawei    val debug_source_type = Output(UInt(log2Up(nSourceType).W))
930289fc2f9SLinJiawei  })
931289fc2f9SLinJiawei  val entries = Seq.fill(smsParams.pf_filter_size){ Reg(new PrefetchFilterEntry()) }
932289fc2f9SLinJiawei  val valids = Seq.fill(smsParams.pf_filter_size){ RegInit(false.B) }
933289fc2f9SLinJiawei  val replacement = ReplacementPolicy.fromString("plru", smsParams.pf_filter_size)
934289fc2f9SLinJiawei
9354ccb2e8bSYanqin Li  val prev_valid = GatedValidRegNext(io.gen_req.valid, false.B)
93685de5caeSLinJiawei  val prev_gen_req = RegEnable(io.gen_req.bits, io.gen_req.valid)
93785de5caeSLinJiawei
938967327d8SLinJiawei  val tlb_req_arb = Module(new RRArbiterInit(new TlbReq, smsParams.pf_filter_size))
939967327d8SLinJiawei  val pf_req_arb = Module(new RRArbiterInit(UInt(PAddrBits.W), smsParams.pf_filter_size))
940289fc2f9SLinJiawei
941289fc2f9SLinJiawei  io.l2_pf_addr.valid := pf_req_arb.io.out.valid
942289fc2f9SLinJiawei  io.l2_pf_addr.bits := pf_req_arb.io.out.bits
943967327d8SLinJiawei  io.pf_alias_bits := Mux1H(entries.zipWithIndex.map({
944967327d8SLinJiawei    case (entry, i) => (i.U === pf_req_arb.io.chosen) -> entry.alias_bits
945967327d8SLinJiawei  }))
946289fc2f9SLinJiawei  pf_req_arb.io.out.ready := true.B
947289fc2f9SLinJiawei
9482db9ec44SLinJiawei  io.debug_source_type := VecInit(entries.map(_.debug_source_type))(pf_req_arb.io.chosen)
9492db9ec44SLinJiawei
950289fc2f9SLinJiawei  val s1_valid = Wire(Bool())
951f21b441aSLinJiawei  val s1_hit = Wire(Bool())
952289fc2f9SLinJiawei  val s1_replace_vec = Wire(UInt(smsParams.pf_filter_size.W))
953289fc2f9SLinJiawei  val s1_tlb_fire_vec = Wire(UInt(smsParams.pf_filter_size.W))
954375a3f86SHaoyuan Feng  val s2_tlb_fire_vec = Wire(UInt(smsParams.pf_filter_size.W))
95525a80bceSYanqin Li  val s3_tlb_fire_vec = Wire(UInt(smsParams.pf_filter_size.W))
95625a80bceSYanqin Li  val not_tlbing_vec = VecInit((0 until smsParams.pf_filter_size).map{case i =>
95725a80bceSYanqin Li    !s1_tlb_fire_vec(i) && !s2_tlb_fire_vec(i) && !s3_tlb_fire_vec(i)
95825a80bceSYanqin Li  })
959289fc2f9SLinJiawei
960289fc2f9SLinJiawei  // s0: entries lookup
961289fc2f9SLinJiawei  val s0_gen_req = io.gen_req.bits
96285de5caeSLinJiawei  val s0_match_prev = prev_valid && (s0_gen_req.region_tag === prev_gen_req.region_tag)
96385de5caeSLinJiawei  val s0_gen_req_valid = io.gen_req.valid && !s0_match_prev
964289fc2f9SLinJiawei  val s0_match_vec = valids.indices.map(i => {
965f21b441aSLinJiawei    valids(i) && entries(i).region_tag === s0_gen_req.region_tag && !(s1_valid && !s1_hit && s1_replace_vec(i))
966289fc2f9SLinJiawei  })
967289fc2f9SLinJiawei  val s0_any_matched = Cat(s0_match_vec).orR
968289fc2f9SLinJiawei  val s0_replace_vec = UIntToOH(replacement.way)
969289fc2f9SLinJiawei  val s0_hit = s0_gen_req_valid && s0_any_matched
970289fc2f9SLinJiawei
971289fc2f9SLinJiawei  for(((v, ent), i) <- valids.zip(entries).zipWithIndex){
972289fc2f9SLinJiawei    val is_evicted = s1_valid && s1_replace_vec(i)
97325a80bceSYanqin Li    tlb_req_arb.io.in(i).valid := v && not_tlbing_vec(i) && !ent.paddr_valid && !is_evicted
974289fc2f9SLinJiawei    tlb_req_arb.io.in(i).bits.vaddr := Cat(ent.region_addr, 0.U(log2Up(REGION_SIZE).W))
975289fc2f9SLinJiawei    tlb_req_arb.io.in(i).bits.cmd := TlbCmd.read
9768a4dab4dSHaoyuan Feng    tlb_req_arb.io.in(i).bits.isPrefetch := true.B
977289fc2f9SLinJiawei    tlb_req_arb.io.in(i).bits.size := 3.U
9787f111a00SWilliam Wang    tlb_req_arb.io.in(i).bits.kill := false.B
979967327d8SLinJiawei    tlb_req_arb.io.in(i).bits.no_translate := false.B
980db6cfb5aSHaoyuan Feng    tlb_req_arb.io.in(i).bits.fullva := 0.U
981db6cfb5aSHaoyuan Feng    tlb_req_arb.io.in(i).bits.checkfullva := false.B
9828744445eSMaxpicca-Li    tlb_req_arb.io.in(i).bits.memidx := DontCare
983289fc2f9SLinJiawei    tlb_req_arb.io.in(i).bits.debug := DontCare
984382a2ebdSpeixiaokun    tlb_req_arb.io.in(i).bits.hlvx := DontCare
985382a2ebdSpeixiaokun    tlb_req_arb.io.in(i).bits.hyperinst := DontCare
986149a2326Sweiding liu    tlb_req_arb.io.in(i).bits.pmp_addr := DontCare
987289fc2f9SLinJiawei
988289fc2f9SLinJiawei    val pending_req_vec = ent.region_bits & (~ent.filter_bits).asUInt
989289fc2f9SLinJiawei    val first_one_offset = PriorityMux(
990289fc2f9SLinJiawei      pending_req_vec.asBools,
991967327d8SLinJiawei      (0 until smsParams.pf_filter_size).map(_.U(REGION_OFFSET.W))
992289fc2f9SLinJiawei    )
993289fc2f9SLinJiawei    val last_one_offset = PriorityMux(
994289fc2f9SLinJiawei      pending_req_vec.asBools.reverse,
995967327d8SLinJiawei      (0 until smsParams.pf_filter_size).reverse.map(_.U(REGION_OFFSET.W))
996289fc2f9SLinJiawei    )
997289fc2f9SLinJiawei    val pf_addr = Cat(
998289fc2f9SLinJiawei      ent.region_addr,
999289fc2f9SLinJiawei      Mux(ent.decr_mode, last_one_offset, first_one_offset),
1000289fc2f9SLinJiawei      0.U(log2Up(dcacheParameters.blockBytes).W)
1001289fc2f9SLinJiawei    )
1002289fc2f9SLinJiawei    pf_req_arb.io.in(i).valid := v && Cat(pending_req_vec).orR && ent.paddr_valid && !is_evicted
1003289fc2f9SLinJiawei    pf_req_arb.io.in(i).bits := pf_addr
1004289fc2f9SLinJiawei  }
1005289fc2f9SLinJiawei
1006289fc2f9SLinJiawei  val s0_tlb_fire_vec = VecInit(tlb_req_arb.io.in.map(_.fire))
1007289fc2f9SLinJiawei  val s0_pf_fire_vec = VecInit(pf_req_arb.io.in.map(_.fire))
1008289fc2f9SLinJiawei
1009967327d8SLinJiawei  val s0_update_way = OHToUInt(s0_match_vec)
1010967327d8SLinJiawei  val s0_replace_way = replacement.way
1011967327d8SLinJiawei  val s0_access_way = Mux(s0_any_matched, s0_update_way, s0_replace_way)
1012967327d8SLinJiawei  when(s0_gen_req_valid){
1013967327d8SLinJiawei    replacement.access(s0_access_way)
1014967327d8SLinJiawei  }
1015967327d8SLinJiawei
1016289fc2f9SLinJiawei  // s1: update or alloc
10174ccb2e8bSYanqin Li  val s1_valid_r = GatedValidRegNext(s0_gen_req_valid, false.B)
1018967327d8SLinJiawei  val s1_hit_r = RegEnable(s0_hit, false.B, s0_gen_req_valid)
1019289fc2f9SLinJiawei  val s1_gen_req = RegEnable(s0_gen_req, s0_gen_req_valid)
1020289fc2f9SLinJiawei  val s1_replace_vec_r = RegEnable(s0_replace_vec, s0_gen_req_valid && !s0_hit)
1021289fc2f9SLinJiawei  val s1_update_vec = RegEnable(VecInit(s0_match_vec).asUInt, s0_gen_req_valid && s0_hit)
10224ccb2e8bSYanqin Li  val s1_tlb_fire_vec_r = GatedValidRegNext(s0_tlb_fire_vec)
1023cd2ff98bShappy-lx  // tlb req will latch one cycle after tlb_arb
10244ccb2e8bSYanqin Li  val s1_tlb_req_valid = GatedValidRegNext(tlb_req_arb.io.out.fire)
1025cd2ff98bShappy-lx  val s1_tlb_req_bits  = RegEnable(tlb_req_arb.io.out.bits, tlb_req_arb.io.out.fire)
1026289fc2f9SLinJiawei  val s1_alloc_entry = Wire(new PrefetchFilterEntry())
1027289fc2f9SLinJiawei  s1_valid := s1_valid_r
1028f21b441aSLinJiawei  s1_hit := s1_hit_r
1029289fc2f9SLinJiawei  s1_replace_vec := s1_replace_vec_r
1030289fc2f9SLinJiawei  s1_tlb_fire_vec := s1_tlb_fire_vec_r.asUInt
1031289fc2f9SLinJiawei  s1_alloc_entry.region_tag := s1_gen_req.region_tag
1032289fc2f9SLinJiawei  s1_alloc_entry.region_addr := s1_gen_req.region_addr
1033289fc2f9SLinJiawei  s1_alloc_entry.region_bits := s1_gen_req.region_bits
1034289fc2f9SLinJiawei  s1_alloc_entry.paddr_valid := s1_gen_req.paddr_valid
1035289fc2f9SLinJiawei  s1_alloc_entry.decr_mode := s1_gen_req.decr_mode
1036289fc2f9SLinJiawei  s1_alloc_entry.filter_bits := 0.U
1037967327d8SLinJiawei  s1_alloc_entry.alias_bits := s1_gen_req.alias_bits
10382db9ec44SLinJiawei  s1_alloc_entry.debug_source_type := s1_gen_req.debug_source_type
1039cd2ff98bShappy-lx  io.tlb_req.req.valid := s1_tlb_req_valid && !((s1_tlb_fire_vec & s1_replace_vec).orR && s1_valid && !s1_hit)
1040cd2ff98bShappy-lx  io.tlb_req.req.bits := s1_tlb_req_bits
1041cd2ff98bShappy-lx  io.tlb_req.resp.ready := true.B
1042cd2ff98bShappy-lx  io.tlb_req.req_kill := false.B
1043cd2ff98bShappy-lx  tlb_req_arb.io.out.ready := true.B
1044375a3f86SHaoyuan Feng
104525a80bceSYanqin Li  // s2: get response from tlb
10464ccb2e8bSYanqin Li  val s2_tlb_fire_vec_r = GatedValidRegNext(s1_tlb_fire_vec_r)
1047375a3f86SHaoyuan Feng  s2_tlb_fire_vec := s2_tlb_fire_vec_r.asUInt
1048375a3f86SHaoyuan Feng
104925a80bceSYanqin Li  // s3: get pmp response form PMPChecker
105025a80bceSYanqin Li  val s3_tlb_fire_vec_r = GatedValidRegNext(s2_tlb_fire_vec_r)
105125a80bceSYanqin Li  val s3_tlb_resp_fire = RegNext(io.tlb_req.resp.fire)
105225a80bceSYanqin Li  val s3_tlb_resp = RegEnable(io.tlb_req.resp.bits, io.tlb_req.resp.valid)
105325a80bceSYanqin Li  val s3_pmp_resp = io.pmp_resp
105425a80bceSYanqin Li  val s3_update_valid = s3_tlb_resp_fire && !s3_tlb_resp.miss
105525a80bceSYanqin Li  val s3_drop = s3_update_valid && (
105625a80bceSYanqin Li    // page/access fault
105725a80bceSYanqin Li    s3_tlb_resp.excp.head.pf.ld || s3_tlb_resp.excp.head.gpf.ld || s3_tlb_resp.excp.head.af.ld ||
105825a80bceSYanqin Li    // uncache
105925a80bceSYanqin Li    s3_pmp_resp.mmio || Pbmt.isUncache(s3_tlb_resp.pbmt.head) ||
106025a80bceSYanqin Li    // pmp access fault
106125a80bceSYanqin Li    s3_pmp_resp.ld
106225a80bceSYanqin Li  )
106325a80bceSYanqin Li  s3_tlb_fire_vec := s3_tlb_fire_vec_r.asUInt
106425a80bceSYanqin Li
1065289fc2f9SLinJiawei  for(((v, ent), i) <- valids.zip(entries).zipWithIndex){
1066289fc2f9SLinJiawei    val alloc = s1_valid && !s1_hit && s1_replace_vec(i)
1067289fc2f9SLinJiawei    val update = s1_valid && s1_hit && s1_update_vec(i)
1068289fc2f9SLinJiawei    // for pf: use s0 data
1069289fc2f9SLinJiawei    val pf_fired = s0_pf_fire_vec(i)
107025a80bceSYanqin Li    val tlb_fired = s3_tlb_fire_vec(i) && s3_update_valid
1071289fc2f9SLinJiawei    when(tlb_fired){
107225a80bceSYanqin Li      when(s3_drop){
107325a80bceSYanqin Li        v := false.B
107425a80bceSYanqin Li      }.otherwise{
107525a80bceSYanqin Li        ent.paddr_valid := !s3_tlb_resp.miss
107625a80bceSYanqin Li        ent.region_addr := region_addr(s3_tlb_resp.paddr.head)
107725a80bceSYanqin Li      }
1078289fc2f9SLinJiawei    }
1079289fc2f9SLinJiawei    when(update){
1080289fc2f9SLinJiawei      ent.region_bits := ent.region_bits | s1_gen_req.region_bits
1081289fc2f9SLinJiawei    }
1082289fc2f9SLinJiawei    when(pf_fired){
1083289fc2f9SLinJiawei      val curr_bit = UIntToOH(block_addr(pf_req_arb.io.in(i).bits)(REGION_OFFSET - 1, 0))
1084289fc2f9SLinJiawei      ent.filter_bits := ent.filter_bits | curr_bit
1085289fc2f9SLinJiawei    }
1086289fc2f9SLinJiawei    when(alloc){
1087289fc2f9SLinJiawei      ent := s1_alloc_entry
1088289fc2f9SLinJiawei      v := true.B
1089289fc2f9SLinJiawei    }
1090289fc2f9SLinJiawei  }
1091f21b441aSLinJiawei  when(s1_valid && s1_hit){
1092f21b441aSLinJiawei    assert(PopCount(s1_update_vec) === 1.U, "sms_pf_filter: multi-hit")
1093f21b441aSLinJiawei  }
1094cd2ff98bShappy-lx  assert(!io.tlb_req.resp.fire || Cat(s2_tlb_fire_vec).orR, "sms_pf_filter: tlb resp fires, but no tlb req from tlb_req_arb 2 cycles ago")
1095289fc2f9SLinJiawei
1096289fc2f9SLinJiawei  XSPerfAccumulate("sms_pf_filter_recv_req", io.gen_req.valid)
1097289fc2f9SLinJiawei  XSPerfAccumulate("sms_pf_filter_hit", s1_valid && s1_hit)
1098289fc2f9SLinJiawei  XSPerfAccumulate("sms_pf_filter_tlb_req", io.tlb_req.req.fire)
1099289fc2f9SLinJiawei  XSPerfAccumulate("sms_pf_filter_tlb_resp_miss", io.tlb_req.resp.fire && io.tlb_req.resp.bits.miss)
110025a80bceSYanqin Li  XSPerfAccumulate("sms_pf_filter_tlb_resp_drop", s3_drop)
110125a80bceSYanqin Li  XSPerfAccumulate("sms_pf_filter_tlb_resp_drop_by_pf_or_af",
110225a80bceSYanqin Li    s3_update_valid && (s3_tlb_resp.excp.head.pf.ld || s3_tlb_resp.excp.head.gpf.ld || s3_tlb_resp.excp.head.af.ld)
110325a80bceSYanqin Li  )
110425a80bceSYanqin Li  XSPerfAccumulate("sms_pf_filter_tlb_resp_drop_by_uncache",
110525a80bceSYanqin Li    s3_update_valid && (s3_pmp_resp.mmio || Pbmt.isUncache(s3_tlb_resp.pbmt.head))
110625a80bceSYanqin Li  )
110725a80bceSYanqin Li  XSPerfAccumulate("sms_pf_filter_tlb_resp_drop_by_pmp_af",
110825a80bceSYanqin Li    s3_update_valid && (s3_pmp_resp.ld)
110925a80bceSYanqin Li  )
1110289fc2f9SLinJiawei  for(i <- 0 until smsParams.pf_filter_size){
1111967327d8SLinJiawei    XSPerfAccumulate(s"sms_pf_filter_access_way_$i", s0_gen_req_valid && s0_access_way === i.U)
1112289fc2f9SLinJiawei  }
1113289fc2f9SLinJiawei  XSPerfAccumulate("sms_pf_filter_l2_req", io.l2_pf_addr.valid)
1114289fc2f9SLinJiawei}
1115289fc2f9SLinJiawei
11160d32f713Shappy-lxclass SMSTrainFilter()(implicit p: Parameters) extends XSModule with HasSMSModuleHelper with HasTrainFilterHelper {
11170d32f713Shappy-lx  val io = IO(new Bundle() {
11180d32f713Shappy-lx    // train input
11190d32f713Shappy-lx    // hybrid load store
112099ce5576Scz4e    val ld_in = Flipped(Vec(backendParams.LdExuCnt, ValidIO(new LsPrefetchTrainBundle())))
112199ce5576Scz4e    val st_in = Flipped(Vec(backendParams.StaExuCnt, ValidIO(new LsPrefetchTrainBundle())))
11220d32f713Shappy-lx    // filter out
11230d32f713Shappy-lx    val train_req = ValidIO(new PrefetchReqBundle())
11240d32f713Shappy-lx  })
11250d32f713Shappy-lx
11260d32f713Shappy-lx  class Ptr(implicit p: Parameters) extends CircularQueuePtr[Ptr](
11270d32f713Shappy-lx    p => smsParams.train_filter_size
11280d32f713Shappy-lx  ){
11290d32f713Shappy-lx  }
11300d32f713Shappy-lx
11310d32f713Shappy-lx  object Ptr {
11320d32f713Shappy-lx    def apply(f: Bool, v: UInt)(implicit p: Parameters): Ptr = {
11330d32f713Shappy-lx      val ptr = Wire(new Ptr)
11340d32f713Shappy-lx      ptr.flag := f
11350d32f713Shappy-lx      ptr.value := v
11360d32f713Shappy-lx      ptr
11370d32f713Shappy-lx    }
11380d32f713Shappy-lx  }
11390d32f713Shappy-lx
11400d32f713Shappy-lx  val entries = RegInit(VecInit(Seq.fill(smsParams.train_filter_size){ (0.U.asTypeOf(new PrefetchReqBundle())) }))
11410d32f713Shappy-lx  val valids = RegInit(VecInit(Seq.fill(smsParams.train_filter_size){ (false.B) }))
11420d32f713Shappy-lx
114383ba63b3SXuan Hu  val enqLen = backendParams.LduCnt + backendParams.StaCnt
11440d32f713Shappy-lx  val enqPtrExt = RegInit(VecInit((0 until enqLen).map(_.U.asTypeOf(new Ptr))))
11450d32f713Shappy-lx  val deqPtrExt = RegInit(0.U.asTypeOf(new Ptr))
11460d32f713Shappy-lx
11470d32f713Shappy-lx  val deqPtr = WireInit(deqPtrExt.value)
11480d32f713Shappy-lx
11490d32f713Shappy-lx  require(smsParams.train_filter_size >= enqLen)
11500d32f713Shappy-lx
11510d32f713Shappy-lx  val ld_reorder = reorder(io.ld_in)
11520d32f713Shappy-lx  val st_reorder = reorder(io.st_in)
115399ce5576Scz4e  val reqs_ls = ld_reorder.map(_.bits.toPrefetchReqBundle()) ++ st_reorder.map(_.bits.toPrefetchReqBundle())
11540d32f713Shappy-lx  val reqs_vls = ld_reorder.map(_.valid) ++ st_reorder.map(_.valid)
11550d32f713Shappy-lx  val needAlloc = Wire(Vec(enqLen, Bool()))
11560d32f713Shappy-lx  val canAlloc = Wire(Vec(enqLen, Bool()))
11570d32f713Shappy-lx
11580d32f713Shappy-lx  for(i <- (0 until enqLen)) {
11590d32f713Shappy-lx    val req = reqs_ls(i)
11600d32f713Shappy-lx    val req_v = reqs_vls(i)
11610d32f713Shappy-lx    val index = PopCount(needAlloc.take(i))
11620d32f713Shappy-lx    val allocPtr = enqPtrExt(index)
11630d32f713Shappy-lx    val entry_match = Cat(entries.zip(valids).map {
11640d32f713Shappy-lx      case(e, v) => v && block_hash_tag(e.vaddr) === block_hash_tag(req.vaddr)
11650d32f713Shappy-lx    }).orR
11660d32f713Shappy-lx    val prev_enq_match = if(i == 0) false.B else Cat(reqs_ls.zip(reqs_vls).take(i).map {
11670d32f713Shappy-lx      case(pre, pre_v) => pre_v && block_hash_tag(pre.vaddr) === block_hash_tag(req.vaddr)
11680d32f713Shappy-lx    }).orR
11690d32f713Shappy-lx
11700d32f713Shappy-lx    needAlloc(i) := req_v && !entry_match && !prev_enq_match
11710d32f713Shappy-lx    canAlloc(i) := needAlloc(i) && allocPtr >= deqPtrExt
11720d32f713Shappy-lx
11730d32f713Shappy-lx    when(canAlloc(i)) {
11740d32f713Shappy-lx      valids(allocPtr.value) := true.B
11750d32f713Shappy-lx      entries(allocPtr.value) := req
11760d32f713Shappy-lx    }
11770d32f713Shappy-lx  }
11780d32f713Shappy-lx  val allocNum = PopCount(canAlloc)
11790d32f713Shappy-lx
11804ccb2e8bSYanqin Li  enqPtrExt.foreach{case x => when(canAlloc.asUInt.orR) {x := x + allocNum} }
11810d32f713Shappy-lx
11820d32f713Shappy-lx  io.train_req.valid := false.B
11830d32f713Shappy-lx  io.train_req.bits := DontCare
11840d32f713Shappy-lx  valids.zip(entries).zipWithIndex.foreach {
11850d32f713Shappy-lx    case((valid, entry), i) => {
11860d32f713Shappy-lx      when(deqPtr === i.U) {
11870d32f713Shappy-lx        io.train_req.valid := valid
11880d32f713Shappy-lx        io.train_req.bits := entry
11890d32f713Shappy-lx      }
11900d32f713Shappy-lx    }
11910d32f713Shappy-lx  }
11920d32f713Shappy-lx
11930d32f713Shappy-lx  when(io.train_req.valid) {
11940d32f713Shappy-lx    valids(deqPtr) := false.B
11950d32f713Shappy-lx    deqPtrExt := deqPtrExt + 1.U
11960d32f713Shappy-lx  }
11970d32f713Shappy-lx
11980d32f713Shappy-lx  XSPerfAccumulate("sms_train_filter_full", PopCount(valids) === (smsParams.train_filter_size).U)
11990d32f713Shappy-lx  XSPerfAccumulate("sms_train_filter_half", PopCount(valids) >= (smsParams.train_filter_size / 2).U)
12000d32f713Shappy-lx  XSPerfAccumulate("sms_train_filter_empty", PopCount(valids) === 0.U)
12010d32f713Shappy-lx
12020d32f713Shappy-lx  val raw_enq_pattern = Cat(reqs_vls)
12030d32f713Shappy-lx  val filtered_enq_pattern = Cat(needAlloc)
12040d32f713Shappy-lx  val actual_enq_pattern = Cat(canAlloc)
12050d32f713Shappy-lx  XSPerfAccumulate("sms_train_filter_enq", allocNum > 0.U)
12060d32f713Shappy-lx  XSPerfAccumulate("sms_train_filter_deq", io.train_req.fire)
12070d32f713Shappy-lx  def toBinary(n: Int): String = n match {
12080d32f713Shappy-lx    case 0|1 => s"$n"
12090d32f713Shappy-lx    case _   => s"${toBinary(n/2)}${n%2}"
12100d32f713Shappy-lx  }
12110d32f713Shappy-lx  for(i <- 0 until (1 << enqLen)) {
12120d32f713Shappy-lx    XSPerfAccumulate(s"sms_train_filter_raw_enq_pattern_${toBinary(i)}", raw_enq_pattern === i.U)
12130d32f713Shappy-lx    XSPerfAccumulate(s"sms_train_filter_filtered_enq_pattern_${toBinary(i)}", filtered_enq_pattern === i.U)
12140d32f713Shappy-lx    XSPerfAccumulate(s"sms_train_filter_actual_enq_pattern_${toBinary(i)}", actual_enq_pattern === i.U)
12150d32f713Shappy-lx  }
12160d32f713Shappy-lx}
12170d32f713Shappy-lx
12180d32f713Shappy-lxclass SMSPrefetcher()(implicit p: Parameters) extends BasePrefecher with HasSMSModuleHelper with HasL1PrefetchSourceParameter {
121945def856STang Haojin  import freechips.rocketchip.util._
1220289fc2f9SLinJiawei
122185de5caeSLinJiawei  val io_agt_en = IO(Input(Bool()))
1222967327d8SLinJiawei  val io_stride_en = IO(Input(Bool()))
122385de5caeSLinJiawei  val io_pht_en = IO(Input(Bool()))
12245d13017eSLinJiawei  val io_act_threshold = IO(Input(UInt(REGION_OFFSET.W)))
12255d13017eSLinJiawei  val io_act_stride = IO(Input(UInt(6.W)))
12266005a7e2Shappy-lx  val io_dcache_evict = IO(Flipped(DecoupledIO(new AGTEvictReq)))
122785de5caeSLinJiawei
12280d32f713Shappy-lx  val train_filter = Module(new SMSTrainFilter)
1229289fc2f9SLinJiawei
12300d32f713Shappy-lx  train_filter.io.ld_in <> io.ld_in
12310d32f713Shappy-lx  train_filter.io.st_in <> io.st_in
1232289fc2f9SLinJiawei
12330d32f713Shappy-lx  val train_ld = train_filter.io.train_req.bits
1234967327d8SLinJiawei
1235967327d8SLinJiawei  val train_block_tag = block_hash_tag(train_ld.vaddr)
1236289fc2f9SLinJiawei  val train_region_tag = train_block_tag.head(REGION_TAG_WIDTH)
1237289fc2f9SLinJiawei
1238289fc2f9SLinJiawei  val train_region_addr_raw = region_addr(train_ld.vaddr)(REGION_TAG_WIDTH + 2 * VADDR_HASH_WIDTH - 1, 0)
1239289fc2f9SLinJiawei  val train_region_addr_p1 = Cat(0.U(1.W), train_region_addr_raw) + 1.U
1240289fc2f9SLinJiawei  val train_region_addr_m1 = Cat(0.U(1.W), train_region_addr_raw) - 1.U
1241289fc2f9SLinJiawei  // addr_p1 or addr_m1 is valid?
1242289fc2f9SLinJiawei  val train_allow_cross_region_p1 = !train_region_addr_p1.head(1).asBool
1243289fc2f9SLinJiawei  val train_allow_cross_region_m1 = !train_region_addr_m1.head(1).asBool
1244289fc2f9SLinJiawei
1245289fc2f9SLinJiawei  val train_region_p1_tag = region_hash_tag(train_region_addr_p1.tail(1))
1246289fc2f9SLinJiawei  val train_region_m1_tag = region_hash_tag(train_region_addr_m1.tail(1))
1247289fc2f9SLinJiawei
1248289fc2f9SLinJiawei  val train_region_p1_cross_page = page_bit(train_region_addr_p1) ^ page_bit(train_region_addr_raw)
1249289fc2f9SLinJiawei  val train_region_m1_cross_page = page_bit(train_region_addr_m1) ^ page_bit(train_region_addr_raw)
1250289fc2f9SLinJiawei
1251289fc2f9SLinJiawei  val train_region_paddr = region_addr(train_ld.paddr)
1252289fc2f9SLinJiawei  val train_region_vaddr = region_addr(train_ld.vaddr)
1253289fc2f9SLinJiawei  val train_region_offset = train_block_tag(REGION_OFFSET - 1, 0)
12540d32f713Shappy-lx  val train_vld = train_filter.io.train_req.valid
1255289fc2f9SLinJiawei
1256289fc2f9SLinJiawei
1257289fc2f9SLinJiawei  // prefetch stage0
1258289fc2f9SLinJiawei  val active_gen_table = Module(new ActiveGenerationTable())
1259967327d8SLinJiawei  val stride = Module(new StridePF())
1260289fc2f9SLinJiawei  val pht = Module(new PatternHistoryTable())
1261289fc2f9SLinJiawei  val pf_filter = Module(new PrefetchFilter())
1262289fc2f9SLinJiawei
12634ccb2e8bSYanqin Li  val train_vld_s0 = GatedValidRegNext(train_vld, false.B)
1264289fc2f9SLinJiawei  val train_s0 = RegEnable(train_ld, train_vld)
1265289fc2f9SLinJiawei  val train_region_tag_s0 = RegEnable(train_region_tag, train_vld)
1266289fc2f9SLinJiawei  val train_region_p1_tag_s0 = RegEnable(train_region_p1_tag, train_vld)
1267289fc2f9SLinJiawei  val train_region_m1_tag_s0 = RegEnable(train_region_m1_tag, train_vld)
1268289fc2f9SLinJiawei  val train_allow_cross_region_p1_s0 = RegEnable(train_allow_cross_region_p1, train_vld)
1269289fc2f9SLinJiawei  val train_allow_cross_region_m1_s0 = RegEnable(train_allow_cross_region_m1, train_vld)
12700d32f713Shappy-lx  val train_pht_tag_s0 = RegEnable(pht_tag(train_ld.pc), train_vld)
12710d32f713Shappy-lx  val train_pht_index_s0 = RegEnable(pht_index(train_ld.pc), train_vld)
1272289fc2f9SLinJiawei  val train_region_offset_s0 = RegEnable(train_region_offset, train_vld)
1273289fc2f9SLinJiawei  val train_region_p1_cross_page_s0 = RegEnable(train_region_p1_cross_page, train_vld)
1274289fc2f9SLinJiawei  val train_region_m1_cross_page_s0 = RegEnable(train_region_m1_cross_page, train_vld)
1275289fc2f9SLinJiawei  val train_region_paddr_s0 = RegEnable(train_region_paddr, train_vld)
1276289fc2f9SLinJiawei  val train_region_vaddr_s0 = RegEnable(train_region_vaddr, train_vld)
1277289fc2f9SLinJiawei
1278967327d8SLinJiawei  active_gen_table.io.agt_en := io_agt_en
12795d13017eSLinJiawei  active_gen_table.io.act_threshold := io_act_threshold
12805d13017eSLinJiawei  active_gen_table.io.act_stride := io_act_stride
1281289fc2f9SLinJiawei  active_gen_table.io.s0_lookup.valid := train_vld_s0
1282289fc2f9SLinJiawei  active_gen_table.io.s0_lookup.bits.region_tag := train_region_tag_s0
1283289fc2f9SLinJiawei  active_gen_table.io.s0_lookup.bits.region_p1_tag := train_region_p1_tag_s0
1284289fc2f9SLinJiawei  active_gen_table.io.s0_lookup.bits.region_m1_tag := train_region_m1_tag_s0
1285289fc2f9SLinJiawei  active_gen_table.io.s0_lookup.bits.region_offset := train_region_offset_s0
1286289fc2f9SLinJiawei  active_gen_table.io.s0_lookup.bits.pht_index := train_pht_index_s0
1287289fc2f9SLinJiawei  active_gen_table.io.s0_lookup.bits.pht_tag := train_pht_tag_s0
1288289fc2f9SLinJiawei  active_gen_table.io.s0_lookup.bits.allow_cross_region_p1 := train_allow_cross_region_p1_s0
1289289fc2f9SLinJiawei  active_gen_table.io.s0_lookup.bits.allow_cross_region_m1 := train_allow_cross_region_m1_s0
1290289fc2f9SLinJiawei  active_gen_table.io.s0_lookup.bits.region_p1_cross_page := train_region_p1_cross_page_s0
1291289fc2f9SLinJiawei  active_gen_table.io.s0_lookup.bits.region_m1_cross_page := train_region_m1_cross_page_s0
1292289fc2f9SLinJiawei  active_gen_table.io.s0_lookup.bits.region_paddr := train_region_paddr_s0
1293289fc2f9SLinJiawei  active_gen_table.io.s0_lookup.bits.region_vaddr := train_region_vaddr_s0
1294967327d8SLinJiawei  active_gen_table.io.s2_stride_hit := stride.io.s2_gen_req.valid
12956005a7e2Shappy-lx  active_gen_table.io.s0_dcache_evict <> io_dcache_evict
1296289fc2f9SLinJiawei
1297967327d8SLinJiawei  stride.io.stride_en := io_stride_en
1298967327d8SLinJiawei  stride.io.s0_lookup.valid := train_vld_s0
12990d32f713Shappy-lx  stride.io.s0_lookup.bits.pc := train_s0.pc(STRIDE_PC_BITS - 1, 0)
1300967327d8SLinJiawei  stride.io.s0_lookup.bits.vaddr := Cat(
1301967327d8SLinJiawei    train_region_vaddr_s0, train_region_offset_s0, 0.U(log2Up(dcacheParameters.blockBytes).W)
1302967327d8SLinJiawei  )
1303967327d8SLinJiawei  stride.io.s0_lookup.bits.paddr := Cat(
1304967327d8SLinJiawei    train_region_paddr_s0, train_region_offset_s0, 0.U(log2Up(dcacheParameters.blockBytes).W)
1305967327d8SLinJiawei  )
1306967327d8SLinJiawei  stride.io.s1_valid := active_gen_table.io.s1_sel_stride
1307289fc2f9SLinJiawei
130885de5caeSLinJiawei  pht.io.s2_agt_lookup := active_gen_table.io.s2_pht_lookup
1309289fc2f9SLinJiawei  pht.io.agt_update := active_gen_table.io.s2_evict
1310289fc2f9SLinJiawei
131185de5caeSLinJiawei  val pht_gen_valid = pht.io.pf_gen_req.valid && io_pht_en
1312967327d8SLinJiawei  val agt_gen_valid = active_gen_table.io.s2_pf_gen_req.valid
1313967327d8SLinJiawei  val stride_gen_valid = stride.io.s2_gen_req.valid
1314967327d8SLinJiawei  val pf_gen_req = Mux(agt_gen_valid || stride_gen_valid,
1315967327d8SLinJiawei    Mux1H(Seq(
1316967327d8SLinJiawei      agt_gen_valid -> active_gen_table.io.s2_pf_gen_req.bits,
1317967327d8SLinJiawei      stride_gen_valid -> stride.io.s2_gen_req.bits
1318967327d8SLinJiawei    )),
1319f21b441aSLinJiawei    pht.io.pf_gen_req.bits
1320289fc2f9SLinJiawei  )
1321967327d8SLinJiawei  assert(!(agt_gen_valid && stride_gen_valid))
1322967327d8SLinJiawei  pf_filter.io.gen_req.valid := pht_gen_valid || agt_gen_valid || stride_gen_valid
1323289fc2f9SLinJiawei  pf_filter.io.gen_req.bits := pf_gen_req
1324289fc2f9SLinJiawei  io.tlb_req <> pf_filter.io.tlb_req
132525a80bceSYanqin Li  pf_filter.io.pmp_resp := io.pmp_resp
13265bd65c56STang Haojin  val is_valid_address = PmemRanges.map(_.cover(pf_filter.io.l2_pf_addr.bits)).reduce(_ || _)
13270d32f713Shappy-lx
1328ffc9de54Swakafa  io.l2_req.valid := pf_filter.io.l2_pf_addr.valid && io.enable && is_valid_address
1329ffc9de54Swakafa  io.l2_req.bits.addr := pf_filter.io.l2_pf_addr.bits
1330ffc9de54Swakafa  io.l2_req.bits.source := MemReqSource.Prefetch2L2SMS.id.U
13310d32f713Shappy-lx
13320d32f713Shappy-lx  // for now, sms will not send l1 prefetch requests
1333967327d8SLinJiawei  io.l1_req.bits.paddr := pf_filter.io.l2_pf_addr.bits
1334967327d8SLinJiawei  io.l1_req.bits.alias := pf_filter.io.pf_alias_bits
1335967327d8SLinJiawei  io.l1_req.bits.is_store := true.B
1336967327d8SLinJiawei  io.l1_req.bits.confidence := 1.U
13370d32f713Shappy-lx  io.l1_req.bits.pf_source.value := L1_HW_PREFETCH_NULL
1338e9fc0cf8SLinJiawei  io.l1_req.valid := false.B
1339289fc2f9SLinJiawei
1340cfb0efcfSLinJiawei  for((train, i) <- io.ld_in.zipWithIndex){
1341cfb0efcfSLinJiawei    XSPerfAccumulate(s"pf_train_miss_${i}", train.valid && train.bits.miss)
13420d32f713Shappy-lx    XSPerfAccumulate(s"pf_train_prefetched_${i}", train.valid && isFromL1Prefetch(train.bits.meta_prefetch))
1343cfb0efcfSLinJiawei  }
13442db9ec44SLinJiawei  val trace = Wire(new L1MissTrace)
13452db9ec44SLinJiawei  trace.vaddr := 0.U
13462db9ec44SLinJiawei  trace.pc := 0.U
1347ffc9de54Swakafa  trace.paddr := io.l2_req.bits.addr
13482db9ec44SLinJiawei  trace.source := pf_filter.io.debug_source_type
13497ccf006bSWilliam Wang  val table = ChiselDB.createTable("L1SMSMissTrace_hart"+ p(XSCoreParamsKey).HartId.toString, new L1MissTrace)
1350ffc9de54Swakafa  table.log(trace, io.l2_req.fire, "SMSPrefetcher", clock, reset)
13512db9ec44SLinJiawei
1352289fc2f9SLinJiawei  XSPerfAccumulate("sms_pf_gen_conflict",
1353f21b441aSLinJiawei    pht_gen_valid && agt_gen_valid
1354289fc2f9SLinJiawei  )
135585de5caeSLinJiawei  XSPerfAccumulate("sms_pht_disabled", pht.io.pf_gen_req.valid && !io_pht_en)
135685de5caeSLinJiawei  XSPerfAccumulate("sms_agt_disabled", active_gen_table.io.s2_pf_gen_req.valid && !io_agt_en)
1357ffc9de54Swakafa  XSPerfAccumulate("sms_pf_real_issued", io.l2_req.valid)
1358967327d8SLinJiawei  XSPerfAccumulate("sms_l1_req_valid", io.l1_req.valid)
1359967327d8SLinJiawei  XSPerfAccumulate("sms_l1_req_fire", io.l1_req.fire)
1360289fc2f9SLinJiawei}
1361