1c49ebec8SHaoyuan Feng/*************************************************************************************** 2c49ebec8SHaoyuan Feng* Copyright (c) 2024 Beijing Institute of Open Source Chip (BOSC) 3c49ebec8SHaoyuan Feng* Copyright (c) 2020-2024 Institute of Computing Technology, Chinese Academy of Sciences 4c49ebec8SHaoyuan Feng* Copyright (c) 2020-2021 Peng Cheng Laboratory 5c49ebec8SHaoyuan Feng* 6c49ebec8SHaoyuan Feng* XiangShan is licensed under Mulan PSL v2. 7c49ebec8SHaoyuan Feng* You can use this software according to the terms and conditions of the Mulan PSL v2. 8c49ebec8SHaoyuan Feng* You may obtain a copy of Mulan PSL v2 at: 9c49ebec8SHaoyuan Feng* http://license.coscl.org.cn/MulanPSL2 10c49ebec8SHaoyuan Feng* 11c49ebec8SHaoyuan Feng* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 12c49ebec8SHaoyuan Feng* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 13c49ebec8SHaoyuan Feng* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 14c49ebec8SHaoyuan Feng* 15c49ebec8SHaoyuan Feng* See the Mulan PSL v2 for more details. 16c49ebec8SHaoyuan Feng* 17c49ebec8SHaoyuan Feng* 18c49ebec8SHaoyuan Feng* Acknowledgement 19c49ebec8SHaoyuan Feng* 20c49ebec8SHaoyuan Feng* This implementation is inspired by several key papers: 21c49ebec8SHaoyuan Feng* [1] Stephen Somogyi, Thomas F. Wenisch, Anastassia Ailamaki, Babak Falsafi and Andreas Moshovos. "[Spatial memory 22c49ebec8SHaoyuan Feng* streaming.](https://doi.org/10.1109/ISCA.2006.38)" 33rd International Symposium on Computer Architecture (ISCA). 23c49ebec8SHaoyuan Feng* 2006. 24c49ebec8SHaoyuan Feng***************************************************************************************/ 25c49ebec8SHaoyuan Feng 26289fc2f9SLinJiaweipackage xiangshan.mem.prefetch 27289fc2f9SLinJiawei 288891a219SYinan Xuimport org.chipsalliance.cde.config.Parameters 29289fc2f9SLinJiaweiimport chisel3._ 30289fc2f9SLinJiaweiimport chisel3.util._ 31289fc2f9SLinJiaweiimport utils._ 324b6d4d13SWilliam Wangimport utility._ 339e12e8edScz4eimport xiangshan._ 3425a80bceSYanqin Liimport xiangshan.backend.fu.PMPRespBundle 359e12e8edScz4eimport xiangshan.mem.L1PrefetchReq 3699ce5576Scz4eimport xiangshan.mem.Bundles.LsPrefetchTrainBundle 374b6d4d13SWilliam Wangimport xiangshan.mem.trace._ 380d32f713Shappy-lximport xiangshan.mem.HasL1PrefetchSourceParameter 399e12e8edScz4eimport xiangshan.cache.HasDCacheParameters 409e12e8edScz4eimport xiangshan.cache.mmu._ 41289fc2f9SLinJiawei 42289fc2f9SLinJiaweicase class SMSParams 43289fc2f9SLinJiawei( 44289fc2f9SLinJiawei region_size: Int = 1024, 45c0ad71d9SLinJiawei vaddr_hash_width: Int = 5, 46c0ad71d9SLinJiawei block_addr_raw_width: Int = 10, 47967327d8SLinJiawei stride_pc_bits: Int = 10, 48967327d8SLinJiawei max_stride: Int = 1024, 49967327d8SLinJiawei stride_entries: Int = 16, 50289fc2f9SLinJiawei active_gen_table_size: Int = 16, 51289fc2f9SLinJiawei pht_size: Int = 64, 52289fc2f9SLinJiawei pht_ways: Int = 2, 53289fc2f9SLinJiawei pht_hist_bits: Int = 2, 54289fc2f9SLinJiawei pht_tag_bits: Int = 13, 55289fc2f9SLinJiawei pht_lookup_queue_size: Int = 4, 560d32f713Shappy-lx pf_filter_size: Int = 16, 570d32f713Shappy-lx train_filter_size: Int = 8 58289fc2f9SLinJiawei) extends PrefetcherParams 59289fc2f9SLinJiawei 60f21b441aSLinJiaweitrait HasSMSModuleHelper extends HasCircularQueuePtrHelper with HasDCacheParameters 61289fc2f9SLinJiawei{ this: HasXSParameter => 62289fc2f9SLinJiawei val smsParams = coreParams.prefetcher.get.asInstanceOf[SMSParams] 6385de5caeSLinJiawei val BLK_ADDR_WIDTH = VAddrBits - log2Up(dcacheParameters.blockBytes) 64289fc2f9SLinJiawei val REGION_SIZE = smsParams.region_size 65289fc2f9SLinJiawei val REGION_BLKS = smsParams.region_size / dcacheParameters.blockBytes 66289fc2f9SLinJiawei val REGION_ADDR_BITS = VAddrBits - log2Up(REGION_SIZE) 67289fc2f9SLinJiawei val REGION_OFFSET = log2Up(REGION_BLKS) 68289fc2f9SLinJiawei val VADDR_HASH_WIDTH = smsParams.vaddr_hash_width 69289fc2f9SLinJiawei val BLK_ADDR_RAW_WIDTH = smsParams.block_addr_raw_width 70289fc2f9SLinJiawei val REGION_ADDR_RAW_WIDTH = BLK_ADDR_RAW_WIDTH - REGION_OFFSET 71289fc2f9SLinJiawei val BLK_TAG_WIDTH = BLK_ADDR_RAW_WIDTH + VADDR_HASH_WIDTH 72289fc2f9SLinJiawei val REGION_TAG_WIDTH = REGION_ADDR_RAW_WIDTH + VADDR_HASH_WIDTH 73289fc2f9SLinJiawei val PHT_INDEX_BITS = log2Up(smsParams.pht_size / smsParams.pht_ways) 74289fc2f9SLinJiawei val PHT_TAG_BITS = smsParams.pht_tag_bits 75289fc2f9SLinJiawei val PHT_HIST_BITS = smsParams.pht_hist_bits 76f21b441aSLinJiawei // page bit index in block addr 77f21b441aSLinJiawei val BLOCK_ADDR_PAGE_BIT = log2Up(dcacheParameters.pageSize / dcacheParameters.blockBytes) 78f21b441aSLinJiawei val REGION_ADDR_PAGE_BIT = log2Up(dcacheParameters.pageSize / smsParams.region_size) 79967327d8SLinJiawei val STRIDE_PC_BITS = smsParams.stride_pc_bits 80967327d8SLinJiawei val STRIDE_BLK_ADDR_BITS = log2Up(smsParams.max_stride) 81289fc2f9SLinJiawei 82289fc2f9SLinJiawei def block_addr(x: UInt): UInt = { 83289fc2f9SLinJiawei val offset = log2Up(dcacheParameters.blockBytes) 84289fc2f9SLinJiawei x(x.getWidth - 1, offset) 85289fc2f9SLinJiawei } 86289fc2f9SLinJiawei 87289fc2f9SLinJiawei def region_addr(x: UInt): UInt = { 88289fc2f9SLinJiawei val offset = log2Up(REGION_SIZE) 89289fc2f9SLinJiawei x(x.getWidth - 1, offset) 90289fc2f9SLinJiawei } 91289fc2f9SLinJiawei 92289fc2f9SLinJiawei def region_offset_to_bits(off: UInt): UInt = { 93289fc2f9SLinJiawei (1.U << off).asUInt 94289fc2f9SLinJiawei } 95289fc2f9SLinJiawei 96289fc2f9SLinJiawei def region_hash_tag(rg_addr: UInt): UInt = { 97289fc2f9SLinJiawei val low = rg_addr(REGION_ADDR_RAW_WIDTH - 1, 0) 98289fc2f9SLinJiawei val high = rg_addr(REGION_ADDR_RAW_WIDTH + 3 * VADDR_HASH_WIDTH - 1, REGION_ADDR_RAW_WIDTH) 99289fc2f9SLinJiawei val high_hash = vaddr_hash(high) 100289fc2f9SLinJiawei Cat(high_hash, low) 101289fc2f9SLinJiawei } 102289fc2f9SLinJiawei 103289fc2f9SLinJiawei def page_bit(region_addr: UInt): UInt = { 104289fc2f9SLinJiawei region_addr(log2Up(dcacheParameters.pageSize/REGION_SIZE)) 105289fc2f9SLinJiawei } 106289fc2f9SLinJiawei 107289fc2f9SLinJiawei def block_hash_tag(x: UInt): UInt = { 108289fc2f9SLinJiawei val blk_addr = block_addr(x) 109289fc2f9SLinJiawei val low = blk_addr(BLK_ADDR_RAW_WIDTH - 1, 0) 110289fc2f9SLinJiawei val high = blk_addr(BLK_ADDR_RAW_WIDTH - 1 + 3 * VADDR_HASH_WIDTH, BLK_ADDR_RAW_WIDTH) 111289fc2f9SLinJiawei val high_hash = vaddr_hash(high) 112289fc2f9SLinJiawei Cat(high_hash, low) 113289fc2f9SLinJiawei } 114289fc2f9SLinJiawei 115289fc2f9SLinJiawei def vaddr_hash(x: UInt): UInt = { 116289fc2f9SLinJiawei val width = VADDR_HASH_WIDTH 117289fc2f9SLinJiawei val low = x(width - 1, 0) 118289fc2f9SLinJiawei val mid = x(2 * width - 1, width) 119289fc2f9SLinJiawei val high = x(3 * width - 1, 2 * width) 120289fc2f9SLinJiawei low ^ mid ^ high 121289fc2f9SLinJiawei } 122289fc2f9SLinJiawei 123289fc2f9SLinJiawei def pht_index(pc: UInt): UInt = { 124289fc2f9SLinJiawei val low_bits = pc(PHT_INDEX_BITS, 2) 125289fc2f9SLinJiawei val hi_bit = pc(1) ^ pc(PHT_INDEX_BITS+1) 126289fc2f9SLinJiawei Cat(hi_bit, low_bits) 127289fc2f9SLinJiawei } 128289fc2f9SLinJiawei 129289fc2f9SLinJiawei def pht_tag(pc: UInt): UInt = { 130289fc2f9SLinJiawei pc(PHT_INDEX_BITS + 2 + PHT_TAG_BITS - 1, PHT_INDEX_BITS + 2) 131289fc2f9SLinJiawei } 132967327d8SLinJiawei 133401876faSYanqin Li def get_alias_bits(region_vaddr: UInt): UInt = { 134401876faSYanqin Li val offset = log2Up(REGION_SIZE) 135401876faSYanqin Li get_alias(Cat(region_vaddr, 0.U(offset.W))) 136401876faSYanqin Li } 137289fc2f9SLinJiawei} 138289fc2f9SLinJiawei 139967327d8SLinJiaweiclass StridePF()(implicit p: Parameters) extends XSModule with HasSMSModuleHelper { 140289fc2f9SLinJiawei val io = IO(new Bundle() { 141967327d8SLinJiawei val stride_en = Input(Bool()) 142967327d8SLinJiawei val s0_lookup = Flipped(new ValidIO(new Bundle() { 143967327d8SLinJiawei val pc = UInt(STRIDE_PC_BITS.W) 144967327d8SLinJiawei val vaddr = UInt(VAddrBits.W) 145967327d8SLinJiawei val paddr = UInt(PAddrBits.W) 146967327d8SLinJiawei })) 147967327d8SLinJiawei val s1_valid = Input(Bool()) 148967327d8SLinJiawei val s2_gen_req = ValidIO(new PfGenReq()) 149289fc2f9SLinJiawei }) 150289fc2f9SLinJiawei 1514ccb2e8bSYanqin Li val prev_valid = GatedValidRegNext(io.s0_lookup.valid, false.B) 152967327d8SLinJiawei val prev_pc = RegEnable(io.s0_lookup.bits.pc, io.s0_lookup.valid) 153289fc2f9SLinJiawei 154967327d8SLinJiawei val s0_valid = io.s0_lookup.valid && !(prev_valid && prev_pc === io.s0_lookup.bits.pc) 155289fc2f9SLinJiawei 156967327d8SLinJiawei def entry_map[T](fn: Int => T) = (0 until smsParams.stride_entries).map(fn) 157289fc2f9SLinJiawei 158967327d8SLinJiawei val replacement = ReplacementPolicy.fromString("plru", smsParams.stride_entries) 159967327d8SLinJiawei val valids = entry_map(_ => RegInit(false.B)) 160967327d8SLinJiawei val entries_pc = entry_map(_ => Reg(UInt(STRIDE_PC_BITS.W)) ) 161967327d8SLinJiawei val entries_conf = entry_map(_ => RegInit(1.U(2.W))) 162967327d8SLinJiawei val entries_last_addr = entry_map(_ => Reg(UInt(STRIDE_BLK_ADDR_BITS.W)) ) 163967327d8SLinJiawei val entries_stride = entry_map(_ => Reg(SInt((STRIDE_BLK_ADDR_BITS+1).W))) 164967327d8SLinJiawei 165967327d8SLinJiawei 166967327d8SLinJiawei val s0_match_vec = valids.zip(entries_pc).map({ 167967327d8SLinJiawei case (v, pc) => v && pc === io.s0_lookup.bits.pc 168289fc2f9SLinJiawei }) 169289fc2f9SLinJiawei 170967327d8SLinJiawei val s0_hit = s0_valid && Cat(s0_match_vec).orR 171967327d8SLinJiawei val s0_miss = s0_valid && !s0_hit 172967327d8SLinJiawei val s0_matched_conf = Mux1H(s0_match_vec, entries_conf) 173967327d8SLinJiawei val s0_matched_last_addr = Mux1H(s0_match_vec, entries_last_addr) 174967327d8SLinJiawei val s0_matched_last_stride = Mux1H(s0_match_vec, entries_stride) 175289fc2f9SLinJiawei 1764ccb2e8bSYanqin Li val s1_hit = GatedValidRegNext(s0_hit) && io.s1_valid 1774ccb2e8bSYanqin Li val s1_alloc = GatedValidRegNext(s0_miss) && io.s1_valid 178967327d8SLinJiawei val s1_vaddr = RegEnable(io.s0_lookup.bits.vaddr, s0_valid) 179967327d8SLinJiawei val s1_paddr = RegEnable(io.s0_lookup.bits.paddr, s0_valid) 1804ccb2e8bSYanqin Li val s1_conf = RegEnable(s0_matched_conf, s0_valid) 1814ccb2e8bSYanqin Li val s1_last_addr = RegEnable(s0_matched_last_addr, s0_valid) 1824ccb2e8bSYanqin Li val s1_last_stride = RegEnable(s0_matched_last_stride, s0_valid) 1834ccb2e8bSYanqin Li val s1_match_vec = RegEnable(VecInit(s0_match_vec), s0_valid) 184289fc2f9SLinJiawei 185967327d8SLinJiawei val BLOCK_OFFSET = log2Up(dcacheParameters.blockBytes) 186967327d8SLinJiawei val s1_new_stride_vaddr = s1_vaddr(BLOCK_OFFSET + STRIDE_BLK_ADDR_BITS - 1, BLOCK_OFFSET) 187967327d8SLinJiawei val s1_new_stride = (0.U(1.W) ## s1_new_stride_vaddr).asSInt - (0.U(1.W) ## s1_last_addr).asSInt 188967327d8SLinJiawei val s1_stride_non_zero = s1_last_stride =/= 0.S 189967327d8SLinJiawei val s1_stride_match = s1_new_stride === s1_last_stride && s1_stride_non_zero 190967327d8SLinJiawei val s1_replace_idx = replacement.way 191289fc2f9SLinJiawei 192967327d8SLinJiawei for(i <- 0 until smsParams.stride_entries){ 193967327d8SLinJiawei val alloc = s1_alloc && i.U === s1_replace_idx 194967327d8SLinJiawei val update = s1_hit && s1_match_vec(i) 195967327d8SLinJiawei when(update){ 196967327d8SLinJiawei assert(valids(i)) 197967327d8SLinJiawei entries_conf(i) := Mux(s1_stride_match, 198967327d8SLinJiawei Mux(s1_conf === 3.U, 3.U, s1_conf + 1.U), 199967327d8SLinJiawei Mux(s1_conf === 0.U, 0.U, s1_conf - 1.U) 200289fc2f9SLinJiawei ) 201967327d8SLinJiawei entries_last_addr(i) := s1_new_stride_vaddr 202967327d8SLinJiawei when(!s1_conf(1)){ 203967327d8SLinJiawei entries_stride(i) := s1_new_stride 204289fc2f9SLinJiawei } 205289fc2f9SLinJiawei } 206967327d8SLinJiawei when(alloc){ 207967327d8SLinJiawei valids(i) := true.B 208967327d8SLinJiawei entries_pc(i) := prev_pc 209967327d8SLinJiawei entries_conf(i) := 0.U 210967327d8SLinJiawei entries_last_addr(i) := s1_new_stride_vaddr 211967327d8SLinJiawei entries_stride(i) := 0.S 212967327d8SLinJiawei } 213967327d8SLinJiawei assert(!(update && alloc)) 214967327d8SLinJiawei } 215967327d8SLinJiawei when(s1_hit){ 216967327d8SLinJiawei replacement.access(OHToUInt(s1_match_vec.asUInt)) 217967327d8SLinJiawei }.elsewhen(s1_alloc){ 218967327d8SLinJiawei replacement.access(s1_replace_idx) 219967327d8SLinJiawei } 220289fc2f9SLinJiawei 221967327d8SLinJiawei val s1_block_vaddr = block_addr(s1_vaddr) 222967327d8SLinJiawei val s1_pf_block_vaddr = (s1_block_vaddr.asSInt + s1_last_stride).asUInt 223967327d8SLinJiawei val s1_pf_cross_page = s1_pf_block_vaddr(BLOCK_ADDR_PAGE_BIT) =/= s1_block_vaddr(BLOCK_ADDR_PAGE_BIT) 224967327d8SLinJiawei 2254ccb2e8bSYanqin Li val s2_pf_gen_valid = GatedValidRegNext(s1_hit && s1_stride_match, false.B) 226967327d8SLinJiawei val s2_pf_gen_paddr_valid = RegEnable(!s1_pf_cross_page, s1_hit && s1_stride_match) 227967327d8SLinJiawei val s2_pf_block_vaddr = RegEnable(s1_pf_block_vaddr, s1_hit && s1_stride_match) 228967327d8SLinJiawei val s2_block_paddr = RegEnable(block_addr(s1_paddr), s1_hit && s1_stride_match) 229967327d8SLinJiawei 230967327d8SLinJiawei val s2_pf_block_addr = Mux(s2_pf_gen_paddr_valid, 231967327d8SLinJiawei Cat( 232967327d8SLinJiawei s2_block_paddr(PAddrBits - BLOCK_OFFSET - 1, BLOCK_ADDR_PAGE_BIT), 233967327d8SLinJiawei s2_pf_block_vaddr(BLOCK_ADDR_PAGE_BIT - 1, 0) 234967327d8SLinJiawei ), 235967327d8SLinJiawei s2_pf_block_vaddr 236967327d8SLinJiawei ) 237967327d8SLinJiawei val s2_pf_full_addr = Wire(UInt(VAddrBits.W)) 238967327d8SLinJiawei s2_pf_full_addr := s2_pf_block_addr ## 0.U(BLOCK_OFFSET.W) 239967327d8SLinJiawei 240967327d8SLinJiawei val s2_pf_region_addr = region_addr(s2_pf_full_addr) 241967327d8SLinJiawei val s2_pf_region_offset = s2_pf_block_addr(REGION_OFFSET - 1, 0) 242967327d8SLinJiawei 243967327d8SLinJiawei val s2_full_vaddr = Wire(UInt(VAddrBits.W)) 244967327d8SLinJiawei s2_full_vaddr := s2_pf_block_vaddr ## 0.U(BLOCK_OFFSET.W) 245967327d8SLinJiawei 246967327d8SLinJiawei val s2_region_tag = region_hash_tag(region_addr(s2_full_vaddr)) 247967327d8SLinJiawei 248967327d8SLinJiawei io.s2_gen_req.valid := s2_pf_gen_valid && io.stride_en 249967327d8SLinJiawei io.s2_gen_req.bits.region_tag := s2_region_tag 250967327d8SLinJiawei io.s2_gen_req.bits.region_addr := s2_pf_region_addr 251967327d8SLinJiawei io.s2_gen_req.bits.alias_bits := get_alias_bits(region_addr(s2_full_vaddr)) 252967327d8SLinJiawei io.s2_gen_req.bits.region_bits := region_offset_to_bits(s2_pf_region_offset) 253967327d8SLinJiawei io.s2_gen_req.bits.paddr_valid := s2_pf_gen_paddr_valid 254967327d8SLinJiawei io.s2_gen_req.bits.decr_mode := false.B 2552db9ec44SLinJiawei io.s2_gen_req.bits.debug_source_type := HW_PREFETCH_STRIDE.U 256967327d8SLinJiawei 257289fc2f9SLinJiawei} 258289fc2f9SLinJiawei 259289fc2f9SLinJiaweiclass AGTEntry()(implicit p: Parameters) extends XSBundle with HasSMSModuleHelper { 260289fc2f9SLinJiawei val pht_index = UInt(PHT_INDEX_BITS.W) 261289fc2f9SLinJiawei val pht_tag = UInt(PHT_TAG_BITS.W) 262289fc2f9SLinJiawei val region_bits = UInt(REGION_BLKS.W) 263a982a3c9Shappy-lx val region_bit_single = UInt(REGION_BLKS.W) 264289fc2f9SLinJiawei val region_tag = UInt(REGION_TAG_WIDTH.W) 265967327d8SLinJiawei val region_offset = UInt(REGION_OFFSET.W) 266289fc2f9SLinJiawei val access_cnt = UInt((REGION_BLKS-1).U.getWidth.W) 267289fc2f9SLinJiawei val decr_mode = Bool() 268a982a3c9Shappy-lx val single_update = Bool()//this is a signal update request 269a982a3c9Shappy-lx val has_been_signal_updated = Bool() 270289fc2f9SLinJiawei} 271289fc2f9SLinJiawei 272289fc2f9SLinJiaweiclass PfGenReq()(implicit p: Parameters) extends XSBundle with HasSMSModuleHelper { 273289fc2f9SLinJiawei val region_tag = UInt(REGION_TAG_WIDTH.W) 274289fc2f9SLinJiawei val region_addr = UInt(REGION_ADDR_BITS.W) 275289fc2f9SLinJiawei val region_bits = UInt(REGION_BLKS.W) 276289fc2f9SLinJiawei val paddr_valid = Bool() 277289fc2f9SLinJiawei val decr_mode = Bool() 278967327d8SLinJiawei val alias_bits = UInt(2.W) 2792db9ec44SLinJiawei val debug_source_type = UInt(log2Up(nSourceType).W) 280289fc2f9SLinJiawei} 281289fc2f9SLinJiawei 2826005a7e2Shappy-lxclass AGTEvictReq()(implicit p: Parameters) extends XSBundle { 2836005a7e2Shappy-lx val vaddr = UInt(VAddrBits.W) 2846005a7e2Shappy-lx} 2856005a7e2Shappy-lx 286289fc2f9SLinJiaweiclass ActiveGenerationTable()(implicit p: Parameters) extends XSModule with HasSMSModuleHelper { 287289fc2f9SLinJiawei val io = IO(new Bundle() { 288967327d8SLinJiawei val agt_en = Input(Bool()) 289289fc2f9SLinJiawei val s0_lookup = Flipped(ValidIO(new Bundle() { 290289fc2f9SLinJiawei val region_tag = UInt(REGION_TAG_WIDTH.W) 291289fc2f9SLinJiawei val region_p1_tag = UInt(REGION_TAG_WIDTH.W) 292289fc2f9SLinJiawei val region_m1_tag = UInt(REGION_TAG_WIDTH.W) 293289fc2f9SLinJiawei val region_offset = UInt(REGION_OFFSET.W) 294289fc2f9SLinJiawei val pht_index = UInt(PHT_INDEX_BITS.W) 295289fc2f9SLinJiawei val pht_tag = UInt(PHT_TAG_BITS.W) 296289fc2f9SLinJiawei val allow_cross_region_p1 = Bool() 297289fc2f9SLinJiawei val allow_cross_region_m1 = Bool() 298289fc2f9SLinJiawei val region_p1_cross_page = Bool() 299289fc2f9SLinJiawei val region_m1_cross_page = Bool() 300289fc2f9SLinJiawei val region_paddr = UInt(REGION_ADDR_BITS.W) 301289fc2f9SLinJiawei val region_vaddr = UInt(REGION_ADDR_BITS.W) 302289fc2f9SLinJiawei })) 3036005a7e2Shappy-lx // dcache has released a block, evict it from agt 3046005a7e2Shappy-lx val s0_dcache_evict = Flipped(DecoupledIO(new AGTEvictReq)) 305967327d8SLinJiawei val s1_sel_stride = Output(Bool()) 306967327d8SLinJiawei val s2_stride_hit = Input(Bool()) 307967327d8SLinJiawei // if agt/stride missed, try lookup pht 30885de5caeSLinJiawei val s2_pht_lookup = ValidIO(new PhtLookup()) 309289fc2f9SLinJiawei // evict entry to pht 310289fc2f9SLinJiawei val s2_evict = ValidIO(new AGTEntry()) 311289fc2f9SLinJiawei val s2_pf_gen_req = ValidIO(new PfGenReq()) 3125d13017eSLinJiawei val act_threshold = Input(UInt(REGION_OFFSET.W)) 3135d13017eSLinJiawei val act_stride = Input(UInt(6.W)) 314289fc2f9SLinJiawei }) 315289fc2f9SLinJiawei 316289fc2f9SLinJiawei val entries = Seq.fill(smsParams.active_gen_table_size){ Reg(new AGTEntry()) } 317289fc2f9SLinJiawei val valids = Seq.fill(smsParams.active_gen_table_size){ RegInit(false.B) } 318289fc2f9SLinJiawei val replacement = ReplacementPolicy.fromString("plru", smsParams.active_gen_table_size) 319289fc2f9SLinJiawei 320967327d8SLinJiawei val s1_replace_mask_w = Wire(UInt(smsParams.active_gen_table_size.W)) 321967327d8SLinJiawei 322289fc2f9SLinJiawei val s0_lookup = io.s0_lookup.bits 323289fc2f9SLinJiawei val s0_lookup_valid = io.s0_lookup.valid 324289fc2f9SLinJiawei 3256005a7e2Shappy-lx val s0_dcache_evict = io.s0_dcache_evict.bits 3266005a7e2Shappy-lx val s0_dcache_evict_valid = io.s0_dcache_evict.valid 3276005a7e2Shappy-lx val s0_dcache_evict_tag = block_hash_tag(s0_dcache_evict.vaddr).head(REGION_TAG_WIDTH) 3286005a7e2Shappy-lx 329289fc2f9SLinJiawei val prev_lookup = RegEnable(s0_lookup, s0_lookup_valid) 3304ccb2e8bSYanqin Li val prev_lookup_valid = GatedValidRegNext(s0_lookup_valid, false.B) 331289fc2f9SLinJiawei 332289fc2f9SLinJiawei val s0_match_prev = prev_lookup_valid && s0_lookup.region_tag === prev_lookup.region_tag 333289fc2f9SLinJiawei 334289fc2f9SLinJiawei def gen_match_vec(region_tag: UInt): Seq[Bool] = { 335289fc2f9SLinJiawei entries.zip(valids).map({ 336289fc2f9SLinJiawei case (ent, v) => v && ent.region_tag === region_tag 337289fc2f9SLinJiawei }) 338289fc2f9SLinJiawei } 339289fc2f9SLinJiawei 340289fc2f9SLinJiawei val region_match_vec_s0 = gen_match_vec(s0_lookup.region_tag) 341289fc2f9SLinJiawei val region_p1_match_vec_s0 = gen_match_vec(s0_lookup.region_p1_tag) 342289fc2f9SLinJiawei val region_m1_match_vec_s0 = gen_match_vec(s0_lookup.region_m1_tag) 343289fc2f9SLinJiawei 344289fc2f9SLinJiawei val any_region_match = Cat(region_match_vec_s0).orR 345289fc2f9SLinJiawei val any_region_p1_match = Cat(region_p1_match_vec_s0).orR && s0_lookup.allow_cross_region_p1 346289fc2f9SLinJiawei val any_region_m1_match = Cat(region_m1_match_vec_s0).orR && s0_lookup.allow_cross_region_m1 347289fc2f9SLinJiawei 3486005a7e2Shappy-lx val region_match_vec_dcache_evict_s0 = gen_match_vec(s0_dcache_evict_tag) 3496005a7e2Shappy-lx val any_region_dcache_evict_match = Cat(region_match_vec_dcache_evict_s0).orR 3506005a7e2Shappy-lx // s0 dcache evict a entry that may be replaced in s1 3516005a7e2Shappy-lx val s0_dcache_evict_conflict = Cat(VecInit(region_match_vec_dcache_evict_s0).asUInt & s1_replace_mask_w).orR 3526005a7e2Shappy-lx val s0_do_dcache_evict = io.s0_dcache_evict.fire && any_region_dcache_evict_match 3536005a7e2Shappy-lx 3546005a7e2Shappy-lx io.s0_dcache_evict.ready := !s0_lookup_valid && !s0_dcache_evict_conflict 3556005a7e2Shappy-lx 356289fc2f9SLinJiawei val s0_region_hit = any_region_match 357967327d8SLinJiawei val s0_cross_region_hit = any_region_m1_match || any_region_p1_match 358967327d8SLinJiawei val s0_alloc = s0_lookup_valid && !s0_region_hit && !s0_match_prev 359289fc2f9SLinJiawei val s0_pf_gen_match_vec = valids.indices.map(i => { 360289fc2f9SLinJiawei Mux(any_region_match, 361289fc2f9SLinJiawei region_match_vec_s0(i), 362289fc2f9SLinJiawei Mux(any_region_m1_match, 363289fc2f9SLinJiawei region_m1_match_vec_s0(i), region_p1_match_vec_s0(i) 364289fc2f9SLinJiawei ) 365289fc2f9SLinJiawei ) 366289fc2f9SLinJiawei }) 367289fc2f9SLinJiawei val s0_agt_entry = Wire(new AGTEntry()) 368289fc2f9SLinJiawei 369289fc2f9SLinJiawei s0_agt_entry.pht_index := s0_lookup.pht_index 370289fc2f9SLinJiawei s0_agt_entry.pht_tag := s0_lookup.pht_tag 371289fc2f9SLinJiawei s0_agt_entry.region_bits := region_offset_to_bits(s0_lookup.region_offset) 372a982a3c9Shappy-lx // update bits this time 373a982a3c9Shappy-lx s0_agt_entry.region_bit_single := region_offset_to_bits(s0_lookup.region_offset) 374289fc2f9SLinJiawei s0_agt_entry.region_tag := s0_lookup.region_tag 375967327d8SLinJiawei s0_agt_entry.region_offset := s0_lookup.region_offset 376289fc2f9SLinJiawei s0_agt_entry.access_cnt := 1.U 377a982a3c9Shappy-lx 378a982a3c9Shappy-lx s0_agt_entry.has_been_signal_updated := false.B 379289fc2f9SLinJiawei // lookup_region + 1 == entry_region 380289fc2f9SLinJiawei // lookup_region = entry_region - 1 => decr mode 381289fc2f9SLinJiawei s0_agt_entry.decr_mode := !s0_region_hit && !any_region_m1_match && any_region_p1_match 382967327d8SLinJiawei val s0_replace_way = replacement.way 383967327d8SLinJiawei val s0_replace_mask = UIntToOH(s0_replace_way) 384289fc2f9SLinJiawei // s0 hit a entry that may be replaced in s1 385967327d8SLinJiawei val s0_update_conflict = Cat(VecInit(region_match_vec_s0).asUInt & s1_replace_mask_w).orR 386967327d8SLinJiawei val s0_update = s0_lookup_valid && s0_region_hit && !s0_update_conflict 387a982a3c9Shappy-lx s0_agt_entry.single_update := s0_update 388967327d8SLinJiawei 389967327d8SLinJiawei val s0_access_way = Mux1H( 390967327d8SLinJiawei Seq(s0_update, s0_alloc), 391967327d8SLinJiawei Seq(OHToUInt(region_match_vec_s0), s0_replace_way) 392967327d8SLinJiawei ) 393967327d8SLinJiawei when(s0_update || s0_alloc) { 394967327d8SLinJiawei replacement.access(s0_access_way) 395967327d8SLinJiawei } 396289fc2f9SLinJiawei 397289fc2f9SLinJiawei // stage1: update/alloc 398289fc2f9SLinJiawei // region hit, update entry 3994ccb2e8bSYanqin Li val s1_update = GatedValidRegNext(s0_update, false.B) 400967327d8SLinJiawei val s1_update_mask = RegEnable(VecInit(region_match_vec_s0), s0_lookup_valid) 401f21b441aSLinJiawei val s1_agt_entry = RegEnable(s0_agt_entry, s0_lookup_valid) 4024ccb2e8bSYanqin Li val s1_cross_region_match = RegEnable(s0_cross_region_hit, s0_lookup_valid) 4034ccb2e8bSYanqin Li val s1_alloc = GatedValidRegNext(s0_alloc, false.B) 404967327d8SLinJiawei val s1_alloc_entry = s1_agt_entry 4054ccb2e8bSYanqin Li val s1_do_dcache_evict = GatedValidRegNext(s0_do_dcache_evict, false.B) 4066005a7e2Shappy-lx val s1_replace_mask = Mux( 4076005a7e2Shappy-lx s1_do_dcache_evict, 4086005a7e2Shappy-lx RegEnable(VecInit(region_match_vec_dcache_evict_s0).asUInt, s0_do_dcache_evict), 4096005a7e2Shappy-lx RegEnable(s0_replace_mask, s0_lookup_valid) 4106005a7e2Shappy-lx ) 4116005a7e2Shappy-lx s1_replace_mask_w := s1_replace_mask & Fill(smsParams.active_gen_table_size, s1_alloc || s1_do_dcache_evict) 412f21b441aSLinJiawei val s1_evict_entry = Mux1H(s1_replace_mask, entries) 413f21b441aSLinJiawei val s1_evict_valid = Mux1H(s1_replace_mask, valids) 414f21b441aSLinJiawei // pf gen 415289fc2f9SLinJiawei val s1_pf_gen_match_vec = RegEnable(VecInit(s0_pf_gen_match_vec), s0_lookup_valid) 416289fc2f9SLinJiawei val s1_region_paddr = RegEnable(s0_lookup.region_paddr, s0_lookup_valid) 417289fc2f9SLinJiawei val s1_region_vaddr = RegEnable(s0_lookup.region_vaddr, s0_lookup_valid) 418289fc2f9SLinJiawei val s1_region_offset = RegEnable(s0_lookup.region_offset, s0_lookup_valid) 419a982a3c9Shappy-lx val s1_bit_region_signal = RegEnable(region_offset_to_bits(s0_lookup.region_offset), s0_lookup_valid) 420a982a3c9Shappy-lx 421289fc2f9SLinJiawei for(i <- entries.indices){ 422289fc2f9SLinJiawei val alloc = s1_replace_mask(i) && s1_alloc 423289fc2f9SLinJiawei val update = s1_update_mask(i) && s1_update 424289fc2f9SLinJiawei val update_entry = WireInit(entries(i)) 425289fc2f9SLinJiawei update_entry.region_bits := entries(i).region_bits | s1_agt_entry.region_bits 426289fc2f9SLinJiawei update_entry.access_cnt := Mux(entries(i).access_cnt === (REGION_BLKS - 1).U, 427289fc2f9SLinJiawei entries(i).access_cnt, 428289fc2f9SLinJiawei entries(i).access_cnt + (s1_agt_entry.region_bits & (~entries(i).region_bits).asUInt).orR 429289fc2f9SLinJiawei ) 430a982a3c9Shappy-lx update_entry.region_bit_single := s1_agt_entry.region_bit_single 431a982a3c9Shappy-lx update_entry.has_been_signal_updated := entries(i).has_been_signal_updated || (!((s1_alloc || s1_do_dcache_evict) && s1_evict_valid)) && s1_update 432289fc2f9SLinJiawei valids(i) := valids(i) || alloc 433289fc2f9SLinJiawei entries(i) := Mux(alloc, s1_alloc_entry, Mux(update, update_entry, entries(i))) 434289fc2f9SLinJiawei } 435289fc2f9SLinJiawei 436a982a3c9Shappy-lx val s1_update_entry = Mux1H(s1_update_mask, entries) 437a982a3c9Shappy-lx val s1_update_valid = Mux1H(s1_update_mask, valids) 438a982a3c9Shappy-lx 439a982a3c9Shappy-lx 440f21b441aSLinJiawei when(s1_update){ 441f21b441aSLinJiawei assert(PopCount(s1_update_mask) === 1.U, "multi-agt-update") 442f21b441aSLinJiawei } 443f21b441aSLinJiawei when(s1_alloc){ 444f21b441aSLinJiawei assert(PopCount(s1_replace_mask) === 1.U, "multi-agt-alloc") 445f21b441aSLinJiawei } 446289fc2f9SLinJiawei 447289fc2f9SLinJiawei // pf_addr 448289fc2f9SLinJiawei // 1.hit => pf_addr = lookup_addr + (decr ? -1 : 1) 449289fc2f9SLinJiawei // 2.lookup region - 1 hit => lookup_addr + 1 (incr mode) 450289fc2f9SLinJiawei // 3.lookup region + 1 hit => lookup_addr - 1 (decr mode) 451289fc2f9SLinJiawei val s1_hited_entry_decr = Mux1H(s1_update_mask, entries.map(_.decr_mode)) 452289fc2f9SLinJiawei val s1_pf_gen_decr_mode = Mux(s1_update, 453289fc2f9SLinJiawei s1_hited_entry_decr, 454289fc2f9SLinJiawei s1_agt_entry.decr_mode 455289fc2f9SLinJiawei ) 45685de5caeSLinJiawei 4575d13017eSLinJiawei val s1_pf_gen_vaddr_inc = Cat(0.U, s1_region_vaddr(REGION_TAG_WIDTH - 1, 0), s1_region_offset) + io.act_stride 4585d13017eSLinJiawei val s1_pf_gen_vaddr_dec = Cat(0.U, s1_region_vaddr(REGION_TAG_WIDTH - 1, 0), s1_region_offset) - io.act_stride 459f21b441aSLinJiawei val s1_vaddr_inc_cross_page = s1_pf_gen_vaddr_inc(BLOCK_ADDR_PAGE_BIT) =/= s1_region_vaddr(REGION_ADDR_PAGE_BIT) 460f21b441aSLinJiawei val s1_vaddr_dec_cross_page = s1_pf_gen_vaddr_dec(BLOCK_ADDR_PAGE_BIT) =/= s1_region_vaddr(REGION_ADDR_PAGE_BIT) 46185de5caeSLinJiawei val s1_vaddr_inc_cross_max_lim = s1_pf_gen_vaddr_inc.head(1).asBool 46285de5caeSLinJiawei val s1_vaddr_dec_cross_max_lim = s1_pf_gen_vaddr_dec.head(1).asBool 46385de5caeSLinJiawei 46485de5caeSLinJiawei //val s1_pf_gen_vaddr_p1 = s1_region_vaddr(REGION_TAG_WIDTH - 1, 0) + 1.U 46585de5caeSLinJiawei //val s1_pf_gen_vaddr_m1 = s1_region_vaddr(REGION_TAG_WIDTH - 1, 0) - 1.U 466289fc2f9SLinJiawei val s1_pf_gen_vaddr = Cat( 467289fc2f9SLinJiawei s1_region_vaddr(REGION_ADDR_BITS - 1, REGION_TAG_WIDTH), 468289fc2f9SLinJiawei Mux(s1_pf_gen_decr_mode, 46985de5caeSLinJiawei s1_pf_gen_vaddr_dec.tail(1).head(REGION_TAG_WIDTH), 47085de5caeSLinJiawei s1_pf_gen_vaddr_inc.tail(1).head(REGION_TAG_WIDTH) 471289fc2f9SLinJiawei ) 472289fc2f9SLinJiawei ) 47385de5caeSLinJiawei val s1_pf_gen_offset = Mux(s1_pf_gen_decr_mode, 47485de5caeSLinJiawei s1_pf_gen_vaddr_dec(REGION_OFFSET - 1, 0), 47585de5caeSLinJiawei s1_pf_gen_vaddr_inc(REGION_OFFSET - 1, 0) 47685de5caeSLinJiawei ) 47785de5caeSLinJiawei val s1_pf_gen_offset_mask = UIntToOH(s1_pf_gen_offset) 478289fc2f9SLinJiawei val s1_pf_gen_access_cnt = Mux1H(s1_pf_gen_match_vec, entries.map(_.access_cnt)) 479967327d8SLinJiawei val s1_in_active_page = s1_pf_gen_access_cnt > io.act_threshold 480967327d8SLinJiawei val s1_pf_gen_valid = prev_lookup_valid && (s1_alloc && s1_cross_region_match || s1_update) && Mux(s1_pf_gen_decr_mode, 481f21b441aSLinJiawei !s1_vaddr_dec_cross_max_lim, 482f21b441aSLinJiawei !s1_vaddr_inc_cross_max_lim 483967327d8SLinJiawei ) && s1_in_active_page && io.agt_en 484f21b441aSLinJiawei val s1_pf_gen_paddr_valid = Mux(s1_pf_gen_decr_mode, !s1_vaddr_dec_cross_page, !s1_vaddr_inc_cross_page) 485289fc2f9SLinJiawei val s1_pf_gen_region_addr = Mux(s1_pf_gen_paddr_valid, 486f21b441aSLinJiawei Cat(s1_region_paddr(REGION_ADDR_BITS - 1, REGION_ADDR_PAGE_BIT), s1_pf_gen_vaddr(REGION_ADDR_PAGE_BIT - 1, 0)), 487f21b441aSLinJiawei s1_pf_gen_vaddr 488289fc2f9SLinJiawei ) 489f21b441aSLinJiawei val s1_pf_gen_region_tag = region_hash_tag(s1_pf_gen_vaddr) 490289fc2f9SLinJiawei val s1_pf_gen_incr_region_bits = VecInit((0 until REGION_BLKS).map(i => { 49185de5caeSLinJiawei if(i == 0) true.B else !s1_pf_gen_offset_mask(i - 1, 0).orR 492289fc2f9SLinJiawei })).asUInt 493289fc2f9SLinJiawei val s1_pf_gen_decr_region_bits = VecInit((0 until REGION_BLKS).map(i => { 494289fc2f9SLinJiawei if(i == REGION_BLKS - 1) true.B 49585de5caeSLinJiawei else !s1_pf_gen_offset_mask(REGION_BLKS - 1, i + 1).orR 496289fc2f9SLinJiawei })).asUInt 497289fc2f9SLinJiawei val s1_pf_gen_region_bits = Mux(s1_pf_gen_decr_mode, 498289fc2f9SLinJiawei s1_pf_gen_decr_region_bits, 499289fc2f9SLinJiawei s1_pf_gen_incr_region_bits 500289fc2f9SLinJiawei ) 50185de5caeSLinJiawei val s1_pht_lookup_valid = Wire(Bool()) 50285de5caeSLinJiawei val s1_pht_lookup = Wire(new PhtLookup()) 503289fc2f9SLinJiawei 50485de5caeSLinJiawei s1_pht_lookup_valid := !s1_pf_gen_valid && prev_lookup_valid 50585de5caeSLinJiawei s1_pht_lookup.pht_index := s1_agt_entry.pht_index 50685de5caeSLinJiawei s1_pht_lookup.pht_tag := s1_agt_entry.pht_tag 507f21b441aSLinJiawei s1_pht_lookup.region_vaddr := s1_region_vaddr 508f21b441aSLinJiawei s1_pht_lookup.region_paddr := s1_region_paddr 50985de5caeSLinJiawei s1_pht_lookup.region_offset := s1_region_offset 510a982a3c9Shappy-lx s1_pht_lookup.region_bit_single := s1_bit_region_signal 511289fc2f9SLinJiawei 512967327d8SLinJiawei io.s1_sel_stride := prev_lookup_valid && (s1_alloc && s1_cross_region_match || s1_update) && !s1_in_active_page 513967327d8SLinJiawei 514289fc2f9SLinJiawei // stage2: gen pf reg / evict entry to pht 515a982a3c9Shappy-lx // if no evict, update this time region bits to pht 5164ccb2e8bSYanqin Li val s2_do_dcache_evict = GatedValidRegNext(s1_do_dcache_evict, false.B) 517a982a3c9Shappy-lx val s1_send_update_entry = Mux((s1_alloc || s1_do_dcache_evict) && s1_evict_valid, s1_evict_entry, s1_update_entry) 518a982a3c9Shappy-lx val s2_evict_entry = RegEnable(s1_send_update_entry, s1_alloc || s1_do_dcache_evict || s1_update) 519a982a3c9Shappy-lx val s2_evict_valid = GatedValidRegNext(((s1_alloc || s1_do_dcache_evict) && s1_evict_valid) || s1_update, false.B) 520a982a3c9Shappy-lx val s2_update = RegNext(s1_update, false.B) 521a982a3c9Shappy-lx val s2_real_update = RegNext(((s1_alloc || s1_do_dcache_evict) && s1_evict_valid), false.B) 522289fc2f9SLinJiawei val s2_paddr_valid = RegEnable(s1_pf_gen_paddr_valid, s1_pf_gen_valid) 523289fc2f9SLinJiawei val s2_pf_gen_region_tag = RegEnable(s1_pf_gen_region_tag, s1_pf_gen_valid) 524289fc2f9SLinJiawei val s2_pf_gen_decr_mode = RegEnable(s1_pf_gen_decr_mode, s1_pf_gen_valid) 525289fc2f9SLinJiawei val s2_pf_gen_region_paddr = RegEnable(s1_pf_gen_region_addr, s1_pf_gen_valid) 526967327d8SLinJiawei val s2_pf_gen_alias_bits = RegEnable(get_alias_bits(s1_pf_gen_vaddr), s1_pf_gen_valid) 527289fc2f9SLinJiawei val s2_pf_gen_region_bits = RegEnable(s1_pf_gen_region_bits, s1_pf_gen_valid) 5284ccb2e8bSYanqin Li val s2_pf_gen_valid = GatedValidRegNext(s1_pf_gen_valid, false.B) 5294ccb2e8bSYanqin Li val s2_pht_lookup_valid = GatedValidRegNext(s1_pht_lookup_valid, false.B) && !io.s2_stride_hit 53085de5caeSLinJiawei val s2_pht_lookup = RegEnable(s1_pht_lookup, s1_pht_lookup_valid) 531289fc2f9SLinJiawei 532a982a3c9Shappy-lx io.s2_evict.valid := Mux(s2_real_update, s2_evict_valid && (s2_evict_entry.access_cnt > 1.U), s2_evict_valid) 533289fc2f9SLinJiawei io.s2_evict.bits := s2_evict_entry 534a982a3c9Shappy-lx io.s2_evict.bits.single_update := s2_update && (!s2_real_update) 535289fc2f9SLinJiawei 536289fc2f9SLinJiawei io.s2_pf_gen_req.bits.region_tag := s2_pf_gen_region_tag 537289fc2f9SLinJiawei io.s2_pf_gen_req.bits.region_addr := s2_pf_gen_region_paddr 538967327d8SLinJiawei io.s2_pf_gen_req.bits.alias_bits := s2_pf_gen_alias_bits 539289fc2f9SLinJiawei io.s2_pf_gen_req.bits.region_bits := s2_pf_gen_region_bits 540289fc2f9SLinJiawei io.s2_pf_gen_req.bits.paddr_valid := s2_paddr_valid 541289fc2f9SLinJiawei io.s2_pf_gen_req.bits.decr_mode := s2_pf_gen_decr_mode 542d7fb6da3Shappy-lx io.s2_pf_gen_req.valid := false.B 5432db9ec44SLinJiawei io.s2_pf_gen_req.bits.debug_source_type := HW_PREFETCH_AGT.U 544289fc2f9SLinJiawei 54585de5caeSLinJiawei io.s2_pht_lookup.valid := s2_pht_lookup_valid 54685de5caeSLinJiawei io.s2_pht_lookup.bits := s2_pht_lookup 54785de5caeSLinJiawei 548967327d8SLinJiawei XSPerfAccumulate("sms_agt_in", io.s0_lookup.valid) 549289fc2f9SLinJiawei XSPerfAccumulate("sms_agt_alloc", s1_alloc) // cross region match or filter evict 550289fc2f9SLinJiawei XSPerfAccumulate("sms_agt_update", s1_update) // entry hit 551289fc2f9SLinJiawei XSPerfAccumulate("sms_agt_pf_gen", io.s2_pf_gen_req.valid) 552289fc2f9SLinJiawei XSPerfAccumulate("sms_agt_pf_gen_paddr_valid", 553289fc2f9SLinJiawei io.s2_pf_gen_req.valid && io.s2_pf_gen_req.bits.paddr_valid 554289fc2f9SLinJiawei ) 555289fc2f9SLinJiawei XSPerfAccumulate("sms_agt_pf_gen_decr_mode", 556289fc2f9SLinJiawei io.s2_pf_gen_req.valid && io.s2_pf_gen_req.bits.decr_mode 557289fc2f9SLinJiawei ) 558289fc2f9SLinJiawei for(i <- 0 until smsParams.active_gen_table_size){ 559289fc2f9SLinJiawei XSPerfAccumulate(s"sms_agt_access_entry_$i", 560289fc2f9SLinJiawei s1_alloc && s1_replace_mask(i) || s1_update && s1_update_mask(i) 561289fc2f9SLinJiawei ) 562289fc2f9SLinJiawei } 563d7fb6da3Shappy-lx XSPerfAccumulate("sms_agt_evict", s2_evict_valid) 5646005a7e2Shappy-lx XSPerfAccumulate("sms_agt_evict_by_plru", s2_evict_valid && !s2_do_dcache_evict) 5656005a7e2Shappy-lx XSPerfAccumulate("sms_agt_evict_by_dcache", s2_evict_valid && s2_do_dcache_evict) 566d7fb6da3Shappy-lx XSPerfAccumulate("sms_agt_evict_one_hot_pattern", s2_evict_valid && (s2_evict_entry.access_cnt === 1.U)) 567289fc2f9SLinJiawei} 568289fc2f9SLinJiawei 569289fc2f9SLinJiaweiclass PhtLookup()(implicit p: Parameters) extends XSBundle with HasSMSModuleHelper { 570289fc2f9SLinJiawei val pht_index = UInt(PHT_INDEX_BITS.W) 571289fc2f9SLinJiawei val pht_tag = UInt(PHT_TAG_BITS.W) 572f21b441aSLinJiawei val region_paddr = UInt(REGION_ADDR_BITS.W) 573f21b441aSLinJiawei val region_vaddr = UInt(REGION_ADDR_BITS.W) 574289fc2f9SLinJiawei val region_offset = UInt(REGION_OFFSET.W) 575a982a3c9Shappy-lx val region_bit_single = UInt(REGION_BLKS.W) 576289fc2f9SLinJiawei} 577289fc2f9SLinJiawei 578289fc2f9SLinJiaweiclass PhtEntry()(implicit p: Parameters) extends XSBundle with HasSMSModuleHelper { 579f21b441aSLinJiawei val hist = Vec(2 * (REGION_BLKS - 1), UInt(PHT_HIST_BITS.W)) 580289fc2f9SLinJiawei val tag = UInt(PHT_TAG_BITS.W) 581289fc2f9SLinJiawei val decr_mode = Bool() 582289fc2f9SLinJiawei} 583289fc2f9SLinJiawei 584289fc2f9SLinJiaweiclass PatternHistoryTable()(implicit p: Parameters) extends XSModule with HasSMSModuleHelper { 585289fc2f9SLinJiawei val io = IO(new Bundle() { 586289fc2f9SLinJiawei // receive agt evicted entry 587289fc2f9SLinJiawei val agt_update = Flipped(ValidIO(new AGTEntry())) 58885de5caeSLinJiawei // at stage2, if we know agt missed, lookup pht 58985de5caeSLinJiawei val s2_agt_lookup = Flipped(ValidIO(new PhtLookup())) 590289fc2f9SLinJiawei // pht-generated prefetch req 591289fc2f9SLinJiawei val pf_gen_req = ValidIO(new PfGenReq()) 592289fc2f9SLinJiawei }) 593289fc2f9SLinJiawei 594289fc2f9SLinJiawei val pht_ram = Module(new SRAMTemplate[PhtEntry](new PhtEntry, 595289fc2f9SLinJiawei set = smsParams.pht_size / smsParams.pht_ways, 596289fc2f9SLinJiawei way =smsParams.pht_ways, 597452b5843SHuijin Li singlePort = true, 5984b2c87baS梁森 Liang Sen withClockGate = true, 599*30f35717Scz4e hasMbist = hasMbist, 600*30f35717Scz4e hasSramCtl = hasSramCtl 601289fc2f9SLinJiawei )) 602289fc2f9SLinJiawei def PHT_SETS = smsParams.pht_size / smsParams.pht_ways 6034ccb2e8bSYanqin Li // clockgated on pht_valids 6044ccb2e8bSYanqin Li val pht_valids_reg = RegInit(VecInit(Seq.fill(smsParams.pht_ways){ 6054ccb2e8bSYanqin Li VecInit(Seq.fill(PHT_SETS){false.B}) 6064ccb2e8bSYanqin Li })) 6074ccb2e8bSYanqin Li val pht_valids_enable = WireInit(VecInit(Seq.fill(PHT_SETS) {false.B})) 6084ccb2e8bSYanqin Li val pht_valids_next = WireInit(pht_valids_reg) 6094ccb2e8bSYanqin Li for(j <- 0 until PHT_SETS){ 6104ccb2e8bSYanqin Li when(pht_valids_enable(j)){ 6114ccb2e8bSYanqin Li (0 until smsParams.pht_ways).foreach(i => pht_valids_reg(i)(j) := pht_valids_next(i)(j)) 612289fc2f9SLinJiawei } 6134ccb2e8bSYanqin Li } 6144ccb2e8bSYanqin Li 615289fc2f9SLinJiawei val replacement = Seq.fill(PHT_SETS) { ReplacementPolicy.fromString("plru", smsParams.pht_ways) } 616289fc2f9SLinJiawei 617289fc2f9SLinJiawei val lookup_queue = Module(new OverrideableQueue(new PhtLookup, smsParams.pht_lookup_queue_size)) 61885de5caeSLinJiawei lookup_queue.io.in := io.s2_agt_lookup 619289fc2f9SLinJiawei val lookup = lookup_queue.io.out 620289fc2f9SLinJiawei 621289fc2f9SLinJiawei val evict_queue = Module(new OverrideableQueue(new AGTEntry, smsParams.pht_lookup_queue_size)) 622289fc2f9SLinJiawei evict_queue.io.in := io.agt_update 623289fc2f9SLinJiawei val evict = evict_queue.io.out 624289fc2f9SLinJiawei 625967327d8SLinJiawei XSPerfAccumulate("sms_pht_lookup_in", lookup_queue.io.in.fire) 626967327d8SLinJiawei XSPerfAccumulate("sms_pht_lookup_out", lookup_queue.io.out.fire) 627967327d8SLinJiawei XSPerfAccumulate("sms_pht_evict_in", evict_queue.io.in.fire) 628967327d8SLinJiawei XSPerfAccumulate("sms_pht_evict_out", evict_queue.io.out.fire) 629967327d8SLinJiawei 630289fc2f9SLinJiawei val s3_ram_en = Wire(Bool()) 631289fc2f9SLinJiawei val s1_valid = Wire(Bool()) 632f21b441aSLinJiawei // if s1.raddr == s2.waddr or s3 is using ram port, block s1 633f21b441aSLinJiawei val s1_wait = Wire(Bool()) 634289fc2f9SLinJiawei // pipe s0: select an op from [lookup, update], generate ram read addr 635289fc2f9SLinJiawei val s0_valid = lookup.valid || evict.valid 636289fc2f9SLinJiawei 637f21b441aSLinJiawei evict.ready := !s1_valid || !s1_wait 638289fc2f9SLinJiawei lookup.ready := evict.ready && !evict.valid 639289fc2f9SLinJiawei 640289fc2f9SLinJiawei val s0_ram_raddr = Mux(evict.valid, 641289fc2f9SLinJiawei evict.bits.pht_index, 642289fc2f9SLinJiawei lookup.bits.pht_index 643289fc2f9SLinJiawei ) 644289fc2f9SLinJiawei val s0_tag = Mux(evict.valid, evict.bits.pht_tag, lookup.bits.pht_tag) 645967327d8SLinJiawei val s0_region_offset = Mux(evict.valid, evict.bits.region_offset, lookup.bits.region_offset) 646f21b441aSLinJiawei val s0_region_paddr = lookup.bits.region_paddr 647f21b441aSLinJiawei val s0_region_vaddr = lookup.bits.region_vaddr 648289fc2f9SLinJiawei val s0_region_bits = evict.bits.region_bits 649289fc2f9SLinJiawei val s0_decr_mode = evict.bits.decr_mode 650289fc2f9SLinJiawei val s0_evict = evict.valid 651a982a3c9Shappy-lx val s0_access_cnt_signal = evict.bits.access_cnt 652a982a3c9Shappy-lx val s0_single_update = evict.bits.single_update 653a982a3c9Shappy-lx val s0_has_been_single_update = evict.bits.has_been_signal_updated 654a982a3c9Shappy-lx val s0_region_bit_single = evict.bits.region_bit_single 655289fc2f9SLinJiawei 656289fc2f9SLinJiawei // pipe s1: send addr to ram 657289fc2f9SLinJiawei val s1_valid_r = RegInit(false.B) 658f21b441aSLinJiawei s1_valid_r := Mux(s1_valid && s1_wait, true.B, s0_valid) 659289fc2f9SLinJiawei s1_valid := s1_valid_r 660f21b441aSLinJiawei val s1_reg_en = s0_valid && (!s1_wait || !s1_valid) 661289fc2f9SLinJiawei val s1_ram_raddr = RegEnable(s0_ram_raddr, s1_reg_en) 662289fc2f9SLinJiawei val s1_tag = RegEnable(s0_tag, s1_reg_en) 663a982a3c9Shappy-lx val s1_access_cnt_signal = RegEnable(s0_access_cnt_signal, s1_reg_en) 664289fc2f9SLinJiawei val s1_region_bits = RegEnable(s0_region_bits, s1_reg_en) 665289fc2f9SLinJiawei val s1_decr_mode = RegEnable(s0_decr_mode, s1_reg_en) 666f21b441aSLinJiawei val s1_region_paddr = RegEnable(s0_region_paddr, s1_reg_en) 667f21b441aSLinJiawei val s1_region_vaddr = RegEnable(s0_region_vaddr, s1_reg_en) 668289fc2f9SLinJiawei val s1_region_offset = RegEnable(s0_region_offset, s1_reg_en) 669a982a3c9Shappy-lx val s1_single_update = RegEnable(s0_single_update, s1_reg_en) 670a982a3c9Shappy-lx val s1_has_been_single_update = RegEnable(s0_has_been_single_update, s1_reg_en) 671a982a3c9Shappy-lx val s1_region_bit_single = RegEnable(s0_region_bit_single, s1_reg_en) 6724ccb2e8bSYanqin Li val s1_pht_valids = pht_valids_reg.map(way => Mux1H( 67385de5caeSLinJiawei (0 until PHT_SETS).map(i => i.U === s1_ram_raddr), 67485de5caeSLinJiawei way 67585de5caeSLinJiawei )) 676289fc2f9SLinJiawei val s1_evict = RegEnable(s0_evict, s1_reg_en) 677289fc2f9SLinJiawei val s1_replace_way = Mux1H( 678289fc2f9SLinJiawei (0 until PHT_SETS).map(i => i.U === s1_ram_raddr), 679289fc2f9SLinJiawei replacement.map(_.way) 680289fc2f9SLinJiawei ) 681f21b441aSLinJiawei val s1_hist_update_mask = Cat( 682f21b441aSLinJiawei Fill(REGION_BLKS - 1, true.B), 0.U((REGION_BLKS - 1).W) 683f21b441aSLinJiawei ) >> s1_region_offset 684f21b441aSLinJiawei val s1_hist_bits = Cat( 685f21b441aSLinJiawei s1_region_bits.head(REGION_BLKS - 1) >> s1_region_offset, 686f21b441aSLinJiawei (Cat( 687f21b441aSLinJiawei s1_region_bits.tail(1), 0.U((REGION_BLKS - 1).W) 688f21b441aSLinJiawei ) >> s1_region_offset)(REGION_BLKS - 2, 0) 689f21b441aSLinJiawei ) 690a982a3c9Shappy-lx val s1_hist_single_bit = Cat( 691a982a3c9Shappy-lx s1_region_bit_single.head(REGION_BLKS - 1) >> s1_region_offset, 692a982a3c9Shappy-lx (Cat( 693a982a3c9Shappy-lx s1_region_bit_single.tail(1), 0.U((REGION_BLKS - 1).W) 694a982a3c9Shappy-lx ) >> s1_region_offset)(REGION_BLKS - 2, 0) 695a982a3c9Shappy-lx ) 696289fc2f9SLinJiawei 697289fc2f9SLinJiawei // pipe s2: generate ram write addr/data 6984ccb2e8bSYanqin Li val s2_valid = GatedValidRegNext(s1_valid && !s1_wait, false.B) 699967327d8SLinJiawei val s2_reg_en = s1_valid && !s1_wait 700f21b441aSLinJiawei val s2_hist_update_mask = RegEnable(s1_hist_update_mask, s2_reg_en) 701a982a3c9Shappy-lx val s2_single_update = RegEnable(s1_single_update, s2_reg_en) 702a982a3c9Shappy-lx val s2_has_been_single_update = RegEnable(s1_has_been_single_update, s2_reg_en) 703f21b441aSLinJiawei val s2_hist_bits = RegEnable(s1_hist_bits, s2_reg_en) 704a982a3c9Shappy-lx val s2_hist_bit_single = RegEnable(s1_hist_single_bit, s2_reg_en) 705289fc2f9SLinJiawei val s2_tag = RegEnable(s1_tag, s2_reg_en) 706289fc2f9SLinJiawei val s2_region_bits = RegEnable(s1_region_bits, s2_reg_en) 707289fc2f9SLinJiawei val s2_decr_mode = RegEnable(s1_decr_mode, s2_reg_en) 708f21b441aSLinJiawei val s2_region_paddr = RegEnable(s1_region_paddr, s2_reg_en) 709f21b441aSLinJiawei val s2_region_vaddr = RegEnable(s1_region_vaddr, s2_reg_en) 710289fc2f9SLinJiawei val s2_region_offset = RegEnable(s1_region_offset, s2_reg_en) 711289fc2f9SLinJiawei val s2_region_offset_mask = region_offset_to_bits(s2_region_offset) 712289fc2f9SLinJiawei val s2_evict = RegEnable(s1_evict, s2_reg_en) 713289fc2f9SLinJiawei val s2_pht_valids = s1_pht_valids.map(v => RegEnable(v, s2_reg_en)) 714289fc2f9SLinJiawei val s2_replace_way = RegEnable(s1_replace_way, s2_reg_en) 715289fc2f9SLinJiawei val s2_ram_waddr = RegEnable(s1_ram_raddr, s2_reg_en) 716289fc2f9SLinJiawei val s2_ram_rdata = pht_ram.io.r.resp.data 717289fc2f9SLinJiawei val s2_ram_rtags = s2_ram_rdata.map(_.tag) 718289fc2f9SLinJiawei val s2_tag_match_vec = s2_ram_rtags.map(t => t === s2_tag) 719a982a3c9Shappy-lx val s2_access_cnt_signal = RegEnable(s1_access_cnt_signal, s2_reg_en) 720289fc2f9SLinJiawei val s2_hit_vec = s2_tag_match_vec.zip(s2_pht_valids).map({ 721289fc2f9SLinJiawei case (tag_match, v) => v && tag_match 722289fc2f9SLinJiawei }) 723a982a3c9Shappy-lx 724a982a3c9Shappy-lx //distinguish single update and evict update 725289fc2f9SLinJiawei val s2_hist_update = s2_ram_rdata.map(way => VecInit(way.hist.zipWithIndex.map({ 726f21b441aSLinJiawei case (h, i) => 727f21b441aSLinJiawei val do_update = s2_hist_update_mask(i) 728a982a3c9Shappy-lx val hist_updated = Mux(!s2_single_update, 729a982a3c9Shappy-lx Mux(s2_has_been_single_update, 730a982a3c9Shappy-lx Mux(s2_hist_bits(i), h, Mux(h === 0.U, 0.U, h - 1.U)), Mux(s2_hist_bits(i),Mux(h.andR, h, h + 1.U), Mux(h === 0.U, 0.U, h - 1.U))), 731a982a3c9Shappy-lx Mux(s2_hist_bit_single(i), Mux(h.andR, h, Mux(h===0.U, h+2.U, h+1.U)), h) 732289fc2f9SLinJiawei ) 733f21b441aSLinJiawei Mux(do_update, hist_updated, h) 734289fc2f9SLinJiawei }))) 735a982a3c9Shappy-lx 736a982a3c9Shappy-lx 737f21b441aSLinJiawei val s2_hist_pf_gen = Mux1H(s2_hit_vec, s2_ram_rdata.map(way => VecInit(way.hist.map(_.head(1))).asUInt)) 738f21b441aSLinJiawei val s2_new_hist = VecInit(s2_hist_bits.asBools.map(b => Cat(0.U((PHT_HIST_BITS - 1).W), b))) 739a982a3c9Shappy-lx val s2_new_hist_single = VecInit(s2_hist_bit_single.asBools.map(b => Cat(0.U((PHT_HIST_BITS - 1).W), b))) 740a982a3c9Shappy-lx val s2_new_hist_real = Mux(s2_single_update,s2_new_hist_single,s2_new_hist) 741289fc2f9SLinJiawei val s2_pht_hit = Cat(s2_hit_vec).orR 742a982a3c9Shappy-lx // update when valid bits over 4 743a982a3c9Shappy-lx val signal_update_write = Mux(!s2_single_update, true.B, s2_pht_hit || s2_single_update && (s2_access_cnt_signal >4.U) ) 744a982a3c9Shappy-lx val s2_hist = Mux(s2_pht_hit, Mux1H(s2_hit_vec, s2_hist_update), s2_new_hist_real) 745289fc2f9SLinJiawei val s2_repl_way_mask = UIntToOH(s2_replace_way) 746f275998aSsfencevma val s2_incr_region_vaddr = s2_region_vaddr + 1.U 747f275998aSsfencevma val s2_decr_region_vaddr = s2_region_vaddr - 1.U 748289fc2f9SLinJiawei 749a982a3c9Shappy-lx 750a982a3c9Shappy-lx 751289fc2f9SLinJiawei // pipe s3: send addr/data to ram, gen pf_req 752a982a3c9Shappy-lx val s3_valid = GatedValidRegNext(s2_valid && signal_update_write, false.B) 753289fc2f9SLinJiawei val s3_evict = RegEnable(s2_evict, s2_valid) 754289fc2f9SLinJiawei val s3_hist = RegEnable(s2_hist, s2_valid) 755f21b441aSLinJiawei val s3_hist_pf_gen = RegEnable(s2_hist_pf_gen, s2_valid) 756a982a3c9Shappy-lx 757f21b441aSLinJiawei val s3_hist_update_mask = RegEnable(s2_hist_update_mask.asUInt, s2_valid) 758a982a3c9Shappy-lx 759f21b441aSLinJiawei val s3_region_offset = RegEnable(s2_region_offset, s2_valid) 760289fc2f9SLinJiawei val s3_region_offset_mask = RegEnable(s2_region_offset_mask, s2_valid) 761f21b441aSLinJiawei val s3_decr_mode = RegEnable(s2_decr_mode, s2_valid) 762f21b441aSLinJiawei val s3_region_paddr = RegEnable(s2_region_paddr, s2_valid) 763f21b441aSLinJiawei val s3_region_vaddr = RegEnable(s2_region_vaddr, s2_valid) 764289fc2f9SLinJiawei val s3_pht_tag = RegEnable(s2_tag, s2_valid) 765289fc2f9SLinJiawei val s3_hit_vec = s2_hit_vec.map(h => RegEnable(h, s2_valid)) 766289fc2f9SLinJiawei val s3_hit = Cat(s3_hit_vec).orR 767289fc2f9SLinJiawei val s3_hit_way = OHToUInt(s3_hit_vec) 768289fc2f9SLinJiawei val s3_repl_way = RegEnable(s2_replace_way, s2_valid) 769289fc2f9SLinJiawei val s3_repl_way_mask = RegEnable(s2_repl_way_mask, s2_valid) 770289fc2f9SLinJiawei val s3_repl_update_mask = RegEnable(VecInit((0 until PHT_SETS).map(i => i.U === s2_ram_waddr)), s2_valid) 771289fc2f9SLinJiawei val s3_ram_waddr = RegEnable(s2_ram_waddr, s2_valid) 772f275998aSsfencevma val s3_incr_region_vaddr = RegEnable(s2_incr_region_vaddr, s2_valid) 773f275998aSsfencevma val s3_decr_region_vaddr = RegEnable(s2_decr_region_vaddr, s2_valid) 774289fc2f9SLinJiawei s3_ram_en := s3_valid && s3_evict 775289fc2f9SLinJiawei val s3_ram_wdata = Wire(new PhtEntry()) 776289fc2f9SLinJiawei s3_ram_wdata.hist := s3_hist 777289fc2f9SLinJiawei s3_ram_wdata.tag := s3_pht_tag 778289fc2f9SLinJiawei s3_ram_wdata.decr_mode := s3_decr_mode 779289fc2f9SLinJiawei 780f21b441aSLinJiawei s1_wait := (s2_valid && s2_evict && s2_ram_waddr === s1_ram_raddr) || s3_ram_en 781f21b441aSLinJiawei 7824ccb2e8bSYanqin Li for((valids, way_idx) <- pht_valids_next.zipWithIndex){ 783289fc2f9SLinJiawei val update_way = s3_repl_way_mask(way_idx) 784289fc2f9SLinJiawei for((v, set_idx) <- valids.zipWithIndex){ 785289fc2f9SLinJiawei val update_set = s3_repl_update_mask(set_idx) 786289fc2f9SLinJiawei when(s3_valid && s3_evict && !s3_hit && update_set && update_way){ 7874ccb2e8bSYanqin Li pht_valids_enable(set_idx) := true.B 788289fc2f9SLinJiawei v := true.B 789289fc2f9SLinJiawei } 790289fc2f9SLinJiawei } 791289fc2f9SLinJiawei } 792289fc2f9SLinJiawei for((r, i) <- replacement.zipWithIndex){ 793289fc2f9SLinJiawei when(s3_valid && s3_repl_update_mask(i)){ 794289fc2f9SLinJiawei when(s3_hit){ 795289fc2f9SLinJiawei r.access(s3_hit_way) 796289fc2f9SLinJiawei }.elsewhen(s3_evict){ 797289fc2f9SLinJiawei r.access(s3_repl_way) 798289fc2f9SLinJiawei } 799289fc2f9SLinJiawei } 800289fc2f9SLinJiawei } 801289fc2f9SLinJiawei 802289fc2f9SLinJiawei val s3_way_mask = Mux(s3_hit, 803289fc2f9SLinJiawei VecInit(s3_hit_vec).asUInt, 804289fc2f9SLinJiawei s3_repl_way_mask, 805289fc2f9SLinJiawei ).asUInt 806289fc2f9SLinJiawei 807289fc2f9SLinJiawei pht_ram.io.r( 808289fc2f9SLinJiawei s1_valid, s1_ram_raddr 809289fc2f9SLinJiawei ) 810289fc2f9SLinJiawei pht_ram.io.w( 811289fc2f9SLinJiawei s3_ram_en, s3_ram_wdata, s3_ram_waddr, s3_way_mask 812289fc2f9SLinJiawei ) 813f21b441aSLinJiawei when(s3_valid && s3_hit){ 814f21b441aSLinJiawei assert(!Cat(s3_hit_vec).andR, "sms_pht: multi-hit!") 815f21b441aSLinJiawei } 816f21b441aSLinJiawei 817289fc2f9SLinJiawei // generate pf req if hit 818f21b441aSLinJiawei val s3_hist_hi = s3_hist_pf_gen.head(REGION_BLKS - 1) 819f21b441aSLinJiawei val s3_hist_lo = s3_hist_pf_gen.tail(REGION_BLKS - 1) 820f21b441aSLinJiawei val s3_hist_hi_shifted = (Cat(0.U((REGION_BLKS - 1).W), s3_hist_hi) << s3_region_offset)(2 * (REGION_BLKS - 1) - 1, 0) 821f21b441aSLinJiawei val s3_hist_lo_shifted = (Cat(0.U((REGION_BLKS - 1).W), s3_hist_lo) << s3_region_offset)(2 * (REGION_BLKS - 1) - 1, 0) 822f21b441aSLinJiawei val s3_cur_region_bits = Cat(s3_hist_hi_shifted.tail(REGION_BLKS - 1), 0.U(1.W)) | 823f21b441aSLinJiawei Cat(0.U(1.W), s3_hist_lo_shifted.head(REGION_BLKS - 1)) 824f21b441aSLinJiawei val s3_incr_region_bits = Cat(0.U(1.W), s3_hist_hi_shifted.head(REGION_BLKS - 1)) 825f21b441aSLinJiawei val s3_decr_region_bits = Cat(s3_hist_lo_shifted.tail(REGION_BLKS - 1), 0.U(1.W)) 826f21b441aSLinJiawei val s3_pf_gen_valid = s3_valid && s3_hit && !s3_evict 827f21b441aSLinJiawei val s3_cur_region_valid = s3_pf_gen_valid && (s3_hist_pf_gen & s3_hist_update_mask).orR 828f21b441aSLinJiawei val s3_incr_region_valid = s3_pf_gen_valid && (s3_hist_hi & (~s3_hist_update_mask.head(REGION_BLKS - 1)).asUInt).orR 829f21b441aSLinJiawei val s3_decr_region_valid = s3_pf_gen_valid && (s3_hist_lo & (~s3_hist_update_mask.tail(REGION_BLKS - 1)).asUInt).orR 830967327d8SLinJiawei val s3_incr_alias_bits = get_alias_bits(s3_incr_region_vaddr) 831967327d8SLinJiawei val s3_decr_alias_bits = get_alias_bits(s3_decr_region_vaddr) 832967327d8SLinJiawei val s3_incr_region_paddr = Cat( 833967327d8SLinJiawei s3_region_paddr(REGION_ADDR_BITS - 1, REGION_ADDR_PAGE_BIT), 834967327d8SLinJiawei s3_incr_region_vaddr(REGION_ADDR_PAGE_BIT - 1, 0) 835967327d8SLinJiawei ) 836967327d8SLinJiawei val s3_decr_region_paddr = Cat( 837967327d8SLinJiawei s3_region_paddr(REGION_ADDR_BITS - 1, REGION_ADDR_PAGE_BIT), 838967327d8SLinJiawei s3_decr_region_vaddr(REGION_ADDR_PAGE_BIT - 1, 0) 839967327d8SLinJiawei ) 840f21b441aSLinJiawei val s3_incr_crosspage = s3_incr_region_vaddr(REGION_ADDR_PAGE_BIT) =/= s3_region_vaddr(REGION_ADDR_PAGE_BIT) 841f21b441aSLinJiawei val s3_decr_crosspage = s3_decr_region_vaddr(REGION_ADDR_PAGE_BIT) =/= s3_region_vaddr(REGION_ADDR_PAGE_BIT) 842f21b441aSLinJiawei val s3_cur_region_tag = region_hash_tag(s3_region_vaddr) 843f21b441aSLinJiawei val s3_incr_region_tag = region_hash_tag(s3_incr_region_vaddr) 844f21b441aSLinJiawei val s3_decr_region_tag = region_hash_tag(s3_decr_region_vaddr) 845f21b441aSLinJiawei 846f21b441aSLinJiawei val pf_gen_req_arb = Module(new Arbiter(new PfGenReq, 3)) 847f21b441aSLinJiawei val s4_pf_gen_cur_region_valid = RegInit(false.B) 848f21b441aSLinJiawei val s4_pf_gen_cur_region = Reg(new PfGenReq) 849f21b441aSLinJiawei val s4_pf_gen_incr_region_valid = RegInit(false.B) 850f21b441aSLinJiawei val s4_pf_gen_incr_region = Reg(new PfGenReq) 851f21b441aSLinJiawei val s4_pf_gen_decr_region_valid = RegInit(false.B) 852f21b441aSLinJiawei val s4_pf_gen_decr_region = Reg(new PfGenReq) 853f21b441aSLinJiawei 854f21b441aSLinJiawei s4_pf_gen_cur_region_valid := s3_cur_region_valid 855f21b441aSLinJiawei when(s3_cur_region_valid){ 856f21b441aSLinJiawei s4_pf_gen_cur_region.region_addr := s3_region_paddr 857967327d8SLinJiawei s4_pf_gen_cur_region.alias_bits := get_alias_bits(s3_region_vaddr) 858f21b441aSLinJiawei s4_pf_gen_cur_region.region_tag := s3_cur_region_tag 859f21b441aSLinJiawei s4_pf_gen_cur_region.region_bits := s3_cur_region_bits 860f21b441aSLinJiawei s4_pf_gen_cur_region.paddr_valid := true.B 861f21b441aSLinJiawei s4_pf_gen_cur_region.decr_mode := false.B 862f21b441aSLinJiawei } 863f21b441aSLinJiawei s4_pf_gen_incr_region_valid := s3_incr_region_valid || 864f21b441aSLinJiawei (!pf_gen_req_arb.io.in(1).ready && s4_pf_gen_incr_region_valid) 865f21b441aSLinJiawei when(s3_incr_region_valid){ 866967327d8SLinJiawei s4_pf_gen_incr_region.region_addr := Mux(s3_incr_crosspage, s3_incr_region_vaddr, s3_incr_region_paddr) 867967327d8SLinJiawei s4_pf_gen_incr_region.alias_bits := s3_incr_alias_bits 868f21b441aSLinJiawei s4_pf_gen_incr_region.region_tag := s3_incr_region_tag 869f21b441aSLinJiawei s4_pf_gen_incr_region.region_bits := s3_incr_region_bits 870f21b441aSLinJiawei s4_pf_gen_incr_region.paddr_valid := !s3_incr_crosspage 871f21b441aSLinJiawei s4_pf_gen_incr_region.decr_mode := false.B 872f21b441aSLinJiawei } 873f21b441aSLinJiawei s4_pf_gen_decr_region_valid := s3_decr_region_valid || 874f21b441aSLinJiawei (!pf_gen_req_arb.io.in(2).ready && s4_pf_gen_decr_region_valid) 875f21b441aSLinJiawei when(s3_decr_region_valid){ 876967327d8SLinJiawei s4_pf_gen_decr_region.region_addr := Mux(s3_decr_crosspage, s3_decr_region_vaddr, s3_decr_region_paddr) 877967327d8SLinJiawei s4_pf_gen_decr_region.alias_bits := s3_decr_alias_bits 878f21b441aSLinJiawei s4_pf_gen_decr_region.region_tag := s3_decr_region_tag 879f21b441aSLinJiawei s4_pf_gen_decr_region.region_bits := s3_decr_region_bits 880f21b441aSLinJiawei s4_pf_gen_decr_region.paddr_valid := !s3_decr_crosspage 8815d13017eSLinJiawei s4_pf_gen_decr_region.decr_mode := true.B 882f21b441aSLinJiawei } 883f21b441aSLinJiawei 884f21b441aSLinJiawei pf_gen_req_arb.io.in.head.valid := s4_pf_gen_cur_region_valid 885f21b441aSLinJiawei pf_gen_req_arb.io.in.head.bits := s4_pf_gen_cur_region 8862db9ec44SLinJiawei pf_gen_req_arb.io.in.head.bits.debug_source_type := HW_PREFETCH_PHT_CUR.U 887f21b441aSLinJiawei pf_gen_req_arb.io.in(1).valid := s4_pf_gen_incr_region_valid 888f21b441aSLinJiawei pf_gen_req_arb.io.in(1).bits := s4_pf_gen_incr_region 8892db9ec44SLinJiawei pf_gen_req_arb.io.in(1).bits.debug_source_type := HW_PREFETCH_PHT_INC.U 890f21b441aSLinJiawei pf_gen_req_arb.io.in(2).valid := s4_pf_gen_decr_region_valid 891f21b441aSLinJiawei pf_gen_req_arb.io.in(2).bits := s4_pf_gen_decr_region 8922db9ec44SLinJiawei pf_gen_req_arb.io.in(2).bits.debug_source_type := HW_PREFETCH_PHT_DEC.U 893f21b441aSLinJiawei pf_gen_req_arb.io.out.ready := true.B 894f21b441aSLinJiawei 895f21b441aSLinJiawei io.pf_gen_req.valid := pf_gen_req_arb.io.out.valid 896f21b441aSLinJiawei io.pf_gen_req.bits := pf_gen_req_arb.io.out.bits 897289fc2f9SLinJiawei 898289fc2f9SLinJiawei XSPerfAccumulate("sms_pht_update", io.agt_update.valid) 89985de5caeSLinJiawei XSPerfAccumulate("sms_pht_update_hit", s2_valid && s2_evict && s2_pht_hit) 90085de5caeSLinJiawei XSPerfAccumulate("sms_pht_lookup", io.s2_agt_lookup.valid) 90185de5caeSLinJiawei XSPerfAccumulate("sms_pht_lookup_hit", s2_valid && !s2_evict && s2_pht_hit) 902289fc2f9SLinJiawei for(i <- 0 until smsParams.pht_ways){ 903289fc2f9SLinJiawei XSPerfAccumulate(s"sms_pht_write_way_$i", pht_ram.io.w.req.fire && pht_ram.io.w.req.bits.waymask.get(i)) 904289fc2f9SLinJiawei } 905289fc2f9SLinJiawei for(i <- 0 until PHT_SETS){ 906289fc2f9SLinJiawei XSPerfAccumulate(s"sms_pht_write_set_$i", pht_ram.io.w.req.fire && pht_ram.io.w.req.bits.setIdx === i.U) 907289fc2f9SLinJiawei } 90885de5caeSLinJiawei XSPerfAccumulate(s"sms_pht_pf_gen", io.pf_gen_req.valid) 909289fc2f9SLinJiawei} 910289fc2f9SLinJiawei 911289fc2f9SLinJiaweiclass PrefetchFilterEntry()(implicit p: Parameters) extends XSBundle with HasSMSModuleHelper { 912289fc2f9SLinJiawei val region_tag = UInt(REGION_TAG_WIDTH.W) 913289fc2f9SLinJiawei val region_addr = UInt(REGION_ADDR_BITS.W) 914289fc2f9SLinJiawei val region_bits = UInt(REGION_BLKS.W) 915289fc2f9SLinJiawei val filter_bits = UInt(REGION_BLKS.W) 916967327d8SLinJiawei val alias_bits = UInt(2.W) 917289fc2f9SLinJiawei val paddr_valid = Bool() 918289fc2f9SLinJiawei val decr_mode = Bool() 9192db9ec44SLinJiawei val debug_source_type = UInt(log2Up(nSourceType).W) 920289fc2f9SLinJiawei} 921289fc2f9SLinJiawei 922289fc2f9SLinJiaweiclass PrefetchFilter()(implicit p: Parameters) extends XSModule with HasSMSModuleHelper { 923289fc2f9SLinJiawei val io = IO(new Bundle() { 924289fc2f9SLinJiawei val gen_req = Flipped(ValidIO(new PfGenReq())) 92585de5caeSLinJiawei val tlb_req = new TlbRequestIO(2) 92625a80bceSYanqin Li val pmp_resp = Flipped(new PMPRespBundle()) 927289fc2f9SLinJiawei val l2_pf_addr = ValidIO(UInt(PAddrBits.W)) 928967327d8SLinJiawei val pf_alias_bits = Output(UInt(2.W)) 9292db9ec44SLinJiawei val debug_source_type = Output(UInt(log2Up(nSourceType).W)) 930289fc2f9SLinJiawei }) 931289fc2f9SLinJiawei val entries = Seq.fill(smsParams.pf_filter_size){ Reg(new PrefetchFilterEntry()) } 932289fc2f9SLinJiawei val valids = Seq.fill(smsParams.pf_filter_size){ RegInit(false.B) } 933289fc2f9SLinJiawei val replacement = ReplacementPolicy.fromString("plru", smsParams.pf_filter_size) 934289fc2f9SLinJiawei 9354ccb2e8bSYanqin Li val prev_valid = GatedValidRegNext(io.gen_req.valid, false.B) 93685de5caeSLinJiawei val prev_gen_req = RegEnable(io.gen_req.bits, io.gen_req.valid) 93785de5caeSLinJiawei 938967327d8SLinJiawei val tlb_req_arb = Module(new RRArbiterInit(new TlbReq, smsParams.pf_filter_size)) 939967327d8SLinJiawei val pf_req_arb = Module(new RRArbiterInit(UInt(PAddrBits.W), smsParams.pf_filter_size)) 940289fc2f9SLinJiawei 941289fc2f9SLinJiawei io.l2_pf_addr.valid := pf_req_arb.io.out.valid 942289fc2f9SLinJiawei io.l2_pf_addr.bits := pf_req_arb.io.out.bits 943967327d8SLinJiawei io.pf_alias_bits := Mux1H(entries.zipWithIndex.map({ 944967327d8SLinJiawei case (entry, i) => (i.U === pf_req_arb.io.chosen) -> entry.alias_bits 945967327d8SLinJiawei })) 946289fc2f9SLinJiawei pf_req_arb.io.out.ready := true.B 947289fc2f9SLinJiawei 9482db9ec44SLinJiawei io.debug_source_type := VecInit(entries.map(_.debug_source_type))(pf_req_arb.io.chosen) 9492db9ec44SLinJiawei 950289fc2f9SLinJiawei val s1_valid = Wire(Bool()) 951f21b441aSLinJiawei val s1_hit = Wire(Bool()) 952289fc2f9SLinJiawei val s1_replace_vec = Wire(UInt(smsParams.pf_filter_size.W)) 953289fc2f9SLinJiawei val s1_tlb_fire_vec = Wire(UInt(smsParams.pf_filter_size.W)) 954375a3f86SHaoyuan Feng val s2_tlb_fire_vec = Wire(UInt(smsParams.pf_filter_size.W)) 95525a80bceSYanqin Li val s3_tlb_fire_vec = Wire(UInt(smsParams.pf_filter_size.W)) 95625a80bceSYanqin Li val not_tlbing_vec = VecInit((0 until smsParams.pf_filter_size).map{case i => 95725a80bceSYanqin Li !s1_tlb_fire_vec(i) && !s2_tlb_fire_vec(i) && !s3_tlb_fire_vec(i) 95825a80bceSYanqin Li }) 959289fc2f9SLinJiawei 960289fc2f9SLinJiawei // s0: entries lookup 961289fc2f9SLinJiawei val s0_gen_req = io.gen_req.bits 96285de5caeSLinJiawei val s0_match_prev = prev_valid && (s0_gen_req.region_tag === prev_gen_req.region_tag) 96385de5caeSLinJiawei val s0_gen_req_valid = io.gen_req.valid && !s0_match_prev 964289fc2f9SLinJiawei val s0_match_vec = valids.indices.map(i => { 965f21b441aSLinJiawei valids(i) && entries(i).region_tag === s0_gen_req.region_tag && !(s1_valid && !s1_hit && s1_replace_vec(i)) 966289fc2f9SLinJiawei }) 967289fc2f9SLinJiawei val s0_any_matched = Cat(s0_match_vec).orR 968289fc2f9SLinJiawei val s0_replace_vec = UIntToOH(replacement.way) 969289fc2f9SLinJiawei val s0_hit = s0_gen_req_valid && s0_any_matched 970289fc2f9SLinJiawei 971289fc2f9SLinJiawei for(((v, ent), i) <- valids.zip(entries).zipWithIndex){ 972289fc2f9SLinJiawei val is_evicted = s1_valid && s1_replace_vec(i) 97325a80bceSYanqin Li tlb_req_arb.io.in(i).valid := v && not_tlbing_vec(i) && !ent.paddr_valid && !is_evicted 974289fc2f9SLinJiawei tlb_req_arb.io.in(i).bits.vaddr := Cat(ent.region_addr, 0.U(log2Up(REGION_SIZE).W)) 975289fc2f9SLinJiawei tlb_req_arb.io.in(i).bits.cmd := TlbCmd.read 9768a4dab4dSHaoyuan Feng tlb_req_arb.io.in(i).bits.isPrefetch := true.B 977289fc2f9SLinJiawei tlb_req_arb.io.in(i).bits.size := 3.U 9787f111a00SWilliam Wang tlb_req_arb.io.in(i).bits.kill := false.B 979967327d8SLinJiawei tlb_req_arb.io.in(i).bits.no_translate := false.B 980db6cfb5aSHaoyuan Feng tlb_req_arb.io.in(i).bits.fullva := 0.U 981db6cfb5aSHaoyuan Feng tlb_req_arb.io.in(i).bits.checkfullva := false.B 9828744445eSMaxpicca-Li tlb_req_arb.io.in(i).bits.memidx := DontCare 983289fc2f9SLinJiawei tlb_req_arb.io.in(i).bits.debug := DontCare 984382a2ebdSpeixiaokun tlb_req_arb.io.in(i).bits.hlvx := DontCare 985382a2ebdSpeixiaokun tlb_req_arb.io.in(i).bits.hyperinst := DontCare 986149a2326Sweiding liu tlb_req_arb.io.in(i).bits.pmp_addr := DontCare 987289fc2f9SLinJiawei 988289fc2f9SLinJiawei val pending_req_vec = ent.region_bits & (~ent.filter_bits).asUInt 989289fc2f9SLinJiawei val first_one_offset = PriorityMux( 990289fc2f9SLinJiawei pending_req_vec.asBools, 991967327d8SLinJiawei (0 until smsParams.pf_filter_size).map(_.U(REGION_OFFSET.W)) 992289fc2f9SLinJiawei ) 993289fc2f9SLinJiawei val last_one_offset = PriorityMux( 994289fc2f9SLinJiawei pending_req_vec.asBools.reverse, 995967327d8SLinJiawei (0 until smsParams.pf_filter_size).reverse.map(_.U(REGION_OFFSET.W)) 996289fc2f9SLinJiawei ) 997289fc2f9SLinJiawei val pf_addr = Cat( 998289fc2f9SLinJiawei ent.region_addr, 999289fc2f9SLinJiawei Mux(ent.decr_mode, last_one_offset, first_one_offset), 1000289fc2f9SLinJiawei 0.U(log2Up(dcacheParameters.blockBytes).W) 1001289fc2f9SLinJiawei ) 1002289fc2f9SLinJiawei pf_req_arb.io.in(i).valid := v && Cat(pending_req_vec).orR && ent.paddr_valid && !is_evicted 1003289fc2f9SLinJiawei pf_req_arb.io.in(i).bits := pf_addr 1004289fc2f9SLinJiawei } 1005289fc2f9SLinJiawei 1006289fc2f9SLinJiawei val s0_tlb_fire_vec = VecInit(tlb_req_arb.io.in.map(_.fire)) 1007289fc2f9SLinJiawei val s0_pf_fire_vec = VecInit(pf_req_arb.io.in.map(_.fire)) 1008289fc2f9SLinJiawei 1009967327d8SLinJiawei val s0_update_way = OHToUInt(s0_match_vec) 1010967327d8SLinJiawei val s0_replace_way = replacement.way 1011967327d8SLinJiawei val s0_access_way = Mux(s0_any_matched, s0_update_way, s0_replace_way) 1012967327d8SLinJiawei when(s0_gen_req_valid){ 1013967327d8SLinJiawei replacement.access(s0_access_way) 1014967327d8SLinJiawei } 1015967327d8SLinJiawei 1016289fc2f9SLinJiawei // s1: update or alloc 10174ccb2e8bSYanqin Li val s1_valid_r = GatedValidRegNext(s0_gen_req_valid, false.B) 1018967327d8SLinJiawei val s1_hit_r = RegEnable(s0_hit, false.B, s0_gen_req_valid) 1019289fc2f9SLinJiawei val s1_gen_req = RegEnable(s0_gen_req, s0_gen_req_valid) 1020289fc2f9SLinJiawei val s1_replace_vec_r = RegEnable(s0_replace_vec, s0_gen_req_valid && !s0_hit) 1021289fc2f9SLinJiawei val s1_update_vec = RegEnable(VecInit(s0_match_vec).asUInt, s0_gen_req_valid && s0_hit) 10224ccb2e8bSYanqin Li val s1_tlb_fire_vec_r = GatedValidRegNext(s0_tlb_fire_vec) 1023cd2ff98bShappy-lx // tlb req will latch one cycle after tlb_arb 10244ccb2e8bSYanqin Li val s1_tlb_req_valid = GatedValidRegNext(tlb_req_arb.io.out.fire) 1025cd2ff98bShappy-lx val s1_tlb_req_bits = RegEnable(tlb_req_arb.io.out.bits, tlb_req_arb.io.out.fire) 1026289fc2f9SLinJiawei val s1_alloc_entry = Wire(new PrefetchFilterEntry()) 1027289fc2f9SLinJiawei s1_valid := s1_valid_r 1028f21b441aSLinJiawei s1_hit := s1_hit_r 1029289fc2f9SLinJiawei s1_replace_vec := s1_replace_vec_r 1030289fc2f9SLinJiawei s1_tlb_fire_vec := s1_tlb_fire_vec_r.asUInt 1031289fc2f9SLinJiawei s1_alloc_entry.region_tag := s1_gen_req.region_tag 1032289fc2f9SLinJiawei s1_alloc_entry.region_addr := s1_gen_req.region_addr 1033289fc2f9SLinJiawei s1_alloc_entry.region_bits := s1_gen_req.region_bits 1034289fc2f9SLinJiawei s1_alloc_entry.paddr_valid := s1_gen_req.paddr_valid 1035289fc2f9SLinJiawei s1_alloc_entry.decr_mode := s1_gen_req.decr_mode 1036289fc2f9SLinJiawei s1_alloc_entry.filter_bits := 0.U 1037967327d8SLinJiawei s1_alloc_entry.alias_bits := s1_gen_req.alias_bits 10382db9ec44SLinJiawei s1_alloc_entry.debug_source_type := s1_gen_req.debug_source_type 1039cd2ff98bShappy-lx io.tlb_req.req.valid := s1_tlb_req_valid && !((s1_tlb_fire_vec & s1_replace_vec).orR && s1_valid && !s1_hit) 1040cd2ff98bShappy-lx io.tlb_req.req.bits := s1_tlb_req_bits 1041cd2ff98bShappy-lx io.tlb_req.resp.ready := true.B 1042cd2ff98bShappy-lx io.tlb_req.req_kill := false.B 1043cd2ff98bShappy-lx tlb_req_arb.io.out.ready := true.B 1044375a3f86SHaoyuan Feng 104525a80bceSYanqin Li // s2: get response from tlb 10464ccb2e8bSYanqin Li val s2_tlb_fire_vec_r = GatedValidRegNext(s1_tlb_fire_vec_r) 1047375a3f86SHaoyuan Feng s2_tlb_fire_vec := s2_tlb_fire_vec_r.asUInt 1048375a3f86SHaoyuan Feng 104925a80bceSYanqin Li // s3: get pmp response form PMPChecker 105025a80bceSYanqin Li val s3_tlb_fire_vec_r = GatedValidRegNext(s2_tlb_fire_vec_r) 105125a80bceSYanqin Li val s3_tlb_resp_fire = RegNext(io.tlb_req.resp.fire) 105225a80bceSYanqin Li val s3_tlb_resp = RegEnable(io.tlb_req.resp.bits, io.tlb_req.resp.valid) 105325a80bceSYanqin Li val s3_pmp_resp = io.pmp_resp 105425a80bceSYanqin Li val s3_update_valid = s3_tlb_resp_fire && !s3_tlb_resp.miss 105525a80bceSYanqin Li val s3_drop = s3_update_valid && ( 105625a80bceSYanqin Li // page/access fault 105725a80bceSYanqin Li s3_tlb_resp.excp.head.pf.ld || s3_tlb_resp.excp.head.gpf.ld || s3_tlb_resp.excp.head.af.ld || 105825a80bceSYanqin Li // uncache 105925a80bceSYanqin Li s3_pmp_resp.mmio || Pbmt.isUncache(s3_tlb_resp.pbmt.head) || 106025a80bceSYanqin Li // pmp access fault 106125a80bceSYanqin Li s3_pmp_resp.ld 106225a80bceSYanqin Li ) 106325a80bceSYanqin Li s3_tlb_fire_vec := s3_tlb_fire_vec_r.asUInt 106425a80bceSYanqin Li 1065289fc2f9SLinJiawei for(((v, ent), i) <- valids.zip(entries).zipWithIndex){ 1066289fc2f9SLinJiawei val alloc = s1_valid && !s1_hit && s1_replace_vec(i) 1067289fc2f9SLinJiawei val update = s1_valid && s1_hit && s1_update_vec(i) 1068289fc2f9SLinJiawei // for pf: use s0 data 1069289fc2f9SLinJiawei val pf_fired = s0_pf_fire_vec(i) 107025a80bceSYanqin Li val tlb_fired = s3_tlb_fire_vec(i) && s3_update_valid 1071289fc2f9SLinJiawei when(tlb_fired){ 107225a80bceSYanqin Li when(s3_drop){ 107325a80bceSYanqin Li v := false.B 107425a80bceSYanqin Li }.otherwise{ 107525a80bceSYanqin Li ent.paddr_valid := !s3_tlb_resp.miss 107625a80bceSYanqin Li ent.region_addr := region_addr(s3_tlb_resp.paddr.head) 107725a80bceSYanqin Li } 1078289fc2f9SLinJiawei } 1079289fc2f9SLinJiawei when(update){ 1080289fc2f9SLinJiawei ent.region_bits := ent.region_bits | s1_gen_req.region_bits 1081289fc2f9SLinJiawei } 1082289fc2f9SLinJiawei when(pf_fired){ 1083289fc2f9SLinJiawei val curr_bit = UIntToOH(block_addr(pf_req_arb.io.in(i).bits)(REGION_OFFSET - 1, 0)) 1084289fc2f9SLinJiawei ent.filter_bits := ent.filter_bits | curr_bit 1085289fc2f9SLinJiawei } 1086289fc2f9SLinJiawei when(alloc){ 1087289fc2f9SLinJiawei ent := s1_alloc_entry 1088289fc2f9SLinJiawei v := true.B 1089289fc2f9SLinJiawei } 1090289fc2f9SLinJiawei } 1091f21b441aSLinJiawei when(s1_valid && s1_hit){ 1092f21b441aSLinJiawei assert(PopCount(s1_update_vec) === 1.U, "sms_pf_filter: multi-hit") 1093f21b441aSLinJiawei } 1094cd2ff98bShappy-lx assert(!io.tlb_req.resp.fire || Cat(s2_tlb_fire_vec).orR, "sms_pf_filter: tlb resp fires, but no tlb req from tlb_req_arb 2 cycles ago") 1095289fc2f9SLinJiawei 1096289fc2f9SLinJiawei XSPerfAccumulate("sms_pf_filter_recv_req", io.gen_req.valid) 1097289fc2f9SLinJiawei XSPerfAccumulate("sms_pf_filter_hit", s1_valid && s1_hit) 1098289fc2f9SLinJiawei XSPerfAccumulate("sms_pf_filter_tlb_req", io.tlb_req.req.fire) 1099289fc2f9SLinJiawei XSPerfAccumulate("sms_pf_filter_tlb_resp_miss", io.tlb_req.resp.fire && io.tlb_req.resp.bits.miss) 110025a80bceSYanqin Li XSPerfAccumulate("sms_pf_filter_tlb_resp_drop", s3_drop) 110125a80bceSYanqin Li XSPerfAccumulate("sms_pf_filter_tlb_resp_drop_by_pf_or_af", 110225a80bceSYanqin Li s3_update_valid && (s3_tlb_resp.excp.head.pf.ld || s3_tlb_resp.excp.head.gpf.ld || s3_tlb_resp.excp.head.af.ld) 110325a80bceSYanqin Li ) 110425a80bceSYanqin Li XSPerfAccumulate("sms_pf_filter_tlb_resp_drop_by_uncache", 110525a80bceSYanqin Li s3_update_valid && (s3_pmp_resp.mmio || Pbmt.isUncache(s3_tlb_resp.pbmt.head)) 110625a80bceSYanqin Li ) 110725a80bceSYanqin Li XSPerfAccumulate("sms_pf_filter_tlb_resp_drop_by_pmp_af", 110825a80bceSYanqin Li s3_update_valid && (s3_pmp_resp.ld) 110925a80bceSYanqin Li ) 1110289fc2f9SLinJiawei for(i <- 0 until smsParams.pf_filter_size){ 1111967327d8SLinJiawei XSPerfAccumulate(s"sms_pf_filter_access_way_$i", s0_gen_req_valid && s0_access_way === i.U) 1112289fc2f9SLinJiawei } 1113289fc2f9SLinJiawei XSPerfAccumulate("sms_pf_filter_l2_req", io.l2_pf_addr.valid) 1114289fc2f9SLinJiawei} 1115289fc2f9SLinJiawei 11160d32f713Shappy-lxclass SMSTrainFilter()(implicit p: Parameters) extends XSModule with HasSMSModuleHelper with HasTrainFilterHelper { 11170d32f713Shappy-lx val io = IO(new Bundle() { 11180d32f713Shappy-lx // train input 11190d32f713Shappy-lx // hybrid load store 112099ce5576Scz4e val ld_in = Flipped(Vec(backendParams.LdExuCnt, ValidIO(new LsPrefetchTrainBundle()))) 112199ce5576Scz4e val st_in = Flipped(Vec(backendParams.StaExuCnt, ValidIO(new LsPrefetchTrainBundle()))) 11220d32f713Shappy-lx // filter out 11230d32f713Shappy-lx val train_req = ValidIO(new PrefetchReqBundle()) 11240d32f713Shappy-lx }) 11250d32f713Shappy-lx 11260d32f713Shappy-lx class Ptr(implicit p: Parameters) extends CircularQueuePtr[Ptr]( 11270d32f713Shappy-lx p => smsParams.train_filter_size 11280d32f713Shappy-lx ){ 11290d32f713Shappy-lx } 11300d32f713Shappy-lx 11310d32f713Shappy-lx object Ptr { 11320d32f713Shappy-lx def apply(f: Bool, v: UInt)(implicit p: Parameters): Ptr = { 11330d32f713Shappy-lx val ptr = Wire(new Ptr) 11340d32f713Shappy-lx ptr.flag := f 11350d32f713Shappy-lx ptr.value := v 11360d32f713Shappy-lx ptr 11370d32f713Shappy-lx } 11380d32f713Shappy-lx } 11390d32f713Shappy-lx 11400d32f713Shappy-lx val entries = RegInit(VecInit(Seq.fill(smsParams.train_filter_size){ (0.U.asTypeOf(new PrefetchReqBundle())) })) 11410d32f713Shappy-lx val valids = RegInit(VecInit(Seq.fill(smsParams.train_filter_size){ (false.B) })) 11420d32f713Shappy-lx 114383ba63b3SXuan Hu val enqLen = backendParams.LduCnt + backendParams.StaCnt 11440d32f713Shappy-lx val enqPtrExt = RegInit(VecInit((0 until enqLen).map(_.U.asTypeOf(new Ptr)))) 11450d32f713Shappy-lx val deqPtrExt = RegInit(0.U.asTypeOf(new Ptr)) 11460d32f713Shappy-lx 11470d32f713Shappy-lx val deqPtr = WireInit(deqPtrExt.value) 11480d32f713Shappy-lx 11490d32f713Shappy-lx require(smsParams.train_filter_size >= enqLen) 11500d32f713Shappy-lx 11510d32f713Shappy-lx val ld_reorder = reorder(io.ld_in) 11520d32f713Shappy-lx val st_reorder = reorder(io.st_in) 115399ce5576Scz4e val reqs_ls = ld_reorder.map(_.bits.toPrefetchReqBundle()) ++ st_reorder.map(_.bits.toPrefetchReqBundle()) 11540d32f713Shappy-lx val reqs_vls = ld_reorder.map(_.valid) ++ st_reorder.map(_.valid) 11550d32f713Shappy-lx val needAlloc = Wire(Vec(enqLen, Bool())) 11560d32f713Shappy-lx val canAlloc = Wire(Vec(enqLen, Bool())) 11570d32f713Shappy-lx 11580d32f713Shappy-lx for(i <- (0 until enqLen)) { 11590d32f713Shappy-lx val req = reqs_ls(i) 11600d32f713Shappy-lx val req_v = reqs_vls(i) 11610d32f713Shappy-lx val index = PopCount(needAlloc.take(i)) 11620d32f713Shappy-lx val allocPtr = enqPtrExt(index) 11630d32f713Shappy-lx val entry_match = Cat(entries.zip(valids).map { 11640d32f713Shappy-lx case(e, v) => v && block_hash_tag(e.vaddr) === block_hash_tag(req.vaddr) 11650d32f713Shappy-lx }).orR 11660d32f713Shappy-lx val prev_enq_match = if(i == 0) false.B else Cat(reqs_ls.zip(reqs_vls).take(i).map { 11670d32f713Shappy-lx case(pre, pre_v) => pre_v && block_hash_tag(pre.vaddr) === block_hash_tag(req.vaddr) 11680d32f713Shappy-lx }).orR 11690d32f713Shappy-lx 11700d32f713Shappy-lx needAlloc(i) := req_v && !entry_match && !prev_enq_match 11710d32f713Shappy-lx canAlloc(i) := needAlloc(i) && allocPtr >= deqPtrExt 11720d32f713Shappy-lx 11730d32f713Shappy-lx when(canAlloc(i)) { 11740d32f713Shappy-lx valids(allocPtr.value) := true.B 11750d32f713Shappy-lx entries(allocPtr.value) := req 11760d32f713Shappy-lx } 11770d32f713Shappy-lx } 11780d32f713Shappy-lx val allocNum = PopCount(canAlloc) 11790d32f713Shappy-lx 11804ccb2e8bSYanqin Li enqPtrExt.foreach{case x => when(canAlloc.asUInt.orR) {x := x + allocNum} } 11810d32f713Shappy-lx 11820d32f713Shappy-lx io.train_req.valid := false.B 11830d32f713Shappy-lx io.train_req.bits := DontCare 11840d32f713Shappy-lx valids.zip(entries).zipWithIndex.foreach { 11850d32f713Shappy-lx case((valid, entry), i) => { 11860d32f713Shappy-lx when(deqPtr === i.U) { 11870d32f713Shappy-lx io.train_req.valid := valid 11880d32f713Shappy-lx io.train_req.bits := entry 11890d32f713Shappy-lx } 11900d32f713Shappy-lx } 11910d32f713Shappy-lx } 11920d32f713Shappy-lx 11930d32f713Shappy-lx when(io.train_req.valid) { 11940d32f713Shappy-lx valids(deqPtr) := false.B 11950d32f713Shappy-lx deqPtrExt := deqPtrExt + 1.U 11960d32f713Shappy-lx } 11970d32f713Shappy-lx 11980d32f713Shappy-lx XSPerfAccumulate("sms_train_filter_full", PopCount(valids) === (smsParams.train_filter_size).U) 11990d32f713Shappy-lx XSPerfAccumulate("sms_train_filter_half", PopCount(valids) >= (smsParams.train_filter_size / 2).U) 12000d32f713Shappy-lx XSPerfAccumulate("sms_train_filter_empty", PopCount(valids) === 0.U) 12010d32f713Shappy-lx 12020d32f713Shappy-lx val raw_enq_pattern = Cat(reqs_vls) 12030d32f713Shappy-lx val filtered_enq_pattern = Cat(needAlloc) 12040d32f713Shappy-lx val actual_enq_pattern = Cat(canAlloc) 12050d32f713Shappy-lx XSPerfAccumulate("sms_train_filter_enq", allocNum > 0.U) 12060d32f713Shappy-lx XSPerfAccumulate("sms_train_filter_deq", io.train_req.fire) 12070d32f713Shappy-lx def toBinary(n: Int): String = n match { 12080d32f713Shappy-lx case 0|1 => s"$n" 12090d32f713Shappy-lx case _ => s"${toBinary(n/2)}${n%2}" 12100d32f713Shappy-lx } 12110d32f713Shappy-lx for(i <- 0 until (1 << enqLen)) { 12120d32f713Shappy-lx XSPerfAccumulate(s"sms_train_filter_raw_enq_pattern_${toBinary(i)}", raw_enq_pattern === i.U) 12130d32f713Shappy-lx XSPerfAccumulate(s"sms_train_filter_filtered_enq_pattern_${toBinary(i)}", filtered_enq_pattern === i.U) 12140d32f713Shappy-lx XSPerfAccumulate(s"sms_train_filter_actual_enq_pattern_${toBinary(i)}", actual_enq_pattern === i.U) 12150d32f713Shappy-lx } 12160d32f713Shappy-lx} 12170d32f713Shappy-lx 12180d32f713Shappy-lxclass SMSPrefetcher()(implicit p: Parameters) extends BasePrefecher with HasSMSModuleHelper with HasL1PrefetchSourceParameter { 121945def856STang Haojin import freechips.rocketchip.util._ 1220289fc2f9SLinJiawei 122185de5caeSLinJiawei val io_agt_en = IO(Input(Bool())) 1222967327d8SLinJiawei val io_stride_en = IO(Input(Bool())) 122385de5caeSLinJiawei val io_pht_en = IO(Input(Bool())) 12245d13017eSLinJiawei val io_act_threshold = IO(Input(UInt(REGION_OFFSET.W))) 12255d13017eSLinJiawei val io_act_stride = IO(Input(UInt(6.W))) 12266005a7e2Shappy-lx val io_dcache_evict = IO(Flipped(DecoupledIO(new AGTEvictReq))) 122785de5caeSLinJiawei 12280d32f713Shappy-lx val train_filter = Module(new SMSTrainFilter) 1229289fc2f9SLinJiawei 12300d32f713Shappy-lx train_filter.io.ld_in <> io.ld_in 12310d32f713Shappy-lx train_filter.io.st_in <> io.st_in 1232289fc2f9SLinJiawei 12330d32f713Shappy-lx val train_ld = train_filter.io.train_req.bits 1234967327d8SLinJiawei 1235967327d8SLinJiawei val train_block_tag = block_hash_tag(train_ld.vaddr) 1236289fc2f9SLinJiawei val train_region_tag = train_block_tag.head(REGION_TAG_WIDTH) 1237289fc2f9SLinJiawei 1238289fc2f9SLinJiawei val train_region_addr_raw = region_addr(train_ld.vaddr)(REGION_TAG_WIDTH + 2 * VADDR_HASH_WIDTH - 1, 0) 1239289fc2f9SLinJiawei val train_region_addr_p1 = Cat(0.U(1.W), train_region_addr_raw) + 1.U 1240289fc2f9SLinJiawei val train_region_addr_m1 = Cat(0.U(1.W), train_region_addr_raw) - 1.U 1241289fc2f9SLinJiawei // addr_p1 or addr_m1 is valid? 1242289fc2f9SLinJiawei val train_allow_cross_region_p1 = !train_region_addr_p1.head(1).asBool 1243289fc2f9SLinJiawei val train_allow_cross_region_m1 = !train_region_addr_m1.head(1).asBool 1244289fc2f9SLinJiawei 1245289fc2f9SLinJiawei val train_region_p1_tag = region_hash_tag(train_region_addr_p1.tail(1)) 1246289fc2f9SLinJiawei val train_region_m1_tag = region_hash_tag(train_region_addr_m1.tail(1)) 1247289fc2f9SLinJiawei 1248289fc2f9SLinJiawei val train_region_p1_cross_page = page_bit(train_region_addr_p1) ^ page_bit(train_region_addr_raw) 1249289fc2f9SLinJiawei val train_region_m1_cross_page = page_bit(train_region_addr_m1) ^ page_bit(train_region_addr_raw) 1250289fc2f9SLinJiawei 1251289fc2f9SLinJiawei val train_region_paddr = region_addr(train_ld.paddr) 1252289fc2f9SLinJiawei val train_region_vaddr = region_addr(train_ld.vaddr) 1253289fc2f9SLinJiawei val train_region_offset = train_block_tag(REGION_OFFSET - 1, 0) 12540d32f713Shappy-lx val train_vld = train_filter.io.train_req.valid 1255289fc2f9SLinJiawei 1256289fc2f9SLinJiawei 1257289fc2f9SLinJiawei // prefetch stage0 1258289fc2f9SLinJiawei val active_gen_table = Module(new ActiveGenerationTable()) 1259967327d8SLinJiawei val stride = Module(new StridePF()) 1260289fc2f9SLinJiawei val pht = Module(new PatternHistoryTable()) 1261289fc2f9SLinJiawei val pf_filter = Module(new PrefetchFilter()) 1262289fc2f9SLinJiawei 12634ccb2e8bSYanqin Li val train_vld_s0 = GatedValidRegNext(train_vld, false.B) 1264289fc2f9SLinJiawei val train_s0 = RegEnable(train_ld, train_vld) 1265289fc2f9SLinJiawei val train_region_tag_s0 = RegEnable(train_region_tag, train_vld) 1266289fc2f9SLinJiawei val train_region_p1_tag_s0 = RegEnable(train_region_p1_tag, train_vld) 1267289fc2f9SLinJiawei val train_region_m1_tag_s0 = RegEnable(train_region_m1_tag, train_vld) 1268289fc2f9SLinJiawei val train_allow_cross_region_p1_s0 = RegEnable(train_allow_cross_region_p1, train_vld) 1269289fc2f9SLinJiawei val train_allow_cross_region_m1_s0 = RegEnable(train_allow_cross_region_m1, train_vld) 12700d32f713Shappy-lx val train_pht_tag_s0 = RegEnable(pht_tag(train_ld.pc), train_vld) 12710d32f713Shappy-lx val train_pht_index_s0 = RegEnable(pht_index(train_ld.pc), train_vld) 1272289fc2f9SLinJiawei val train_region_offset_s0 = RegEnable(train_region_offset, train_vld) 1273289fc2f9SLinJiawei val train_region_p1_cross_page_s0 = RegEnable(train_region_p1_cross_page, train_vld) 1274289fc2f9SLinJiawei val train_region_m1_cross_page_s0 = RegEnable(train_region_m1_cross_page, train_vld) 1275289fc2f9SLinJiawei val train_region_paddr_s0 = RegEnable(train_region_paddr, train_vld) 1276289fc2f9SLinJiawei val train_region_vaddr_s0 = RegEnable(train_region_vaddr, train_vld) 1277289fc2f9SLinJiawei 1278967327d8SLinJiawei active_gen_table.io.agt_en := io_agt_en 12795d13017eSLinJiawei active_gen_table.io.act_threshold := io_act_threshold 12805d13017eSLinJiawei active_gen_table.io.act_stride := io_act_stride 1281289fc2f9SLinJiawei active_gen_table.io.s0_lookup.valid := train_vld_s0 1282289fc2f9SLinJiawei active_gen_table.io.s0_lookup.bits.region_tag := train_region_tag_s0 1283289fc2f9SLinJiawei active_gen_table.io.s0_lookup.bits.region_p1_tag := train_region_p1_tag_s0 1284289fc2f9SLinJiawei active_gen_table.io.s0_lookup.bits.region_m1_tag := train_region_m1_tag_s0 1285289fc2f9SLinJiawei active_gen_table.io.s0_lookup.bits.region_offset := train_region_offset_s0 1286289fc2f9SLinJiawei active_gen_table.io.s0_lookup.bits.pht_index := train_pht_index_s0 1287289fc2f9SLinJiawei active_gen_table.io.s0_lookup.bits.pht_tag := train_pht_tag_s0 1288289fc2f9SLinJiawei active_gen_table.io.s0_lookup.bits.allow_cross_region_p1 := train_allow_cross_region_p1_s0 1289289fc2f9SLinJiawei active_gen_table.io.s0_lookup.bits.allow_cross_region_m1 := train_allow_cross_region_m1_s0 1290289fc2f9SLinJiawei active_gen_table.io.s0_lookup.bits.region_p1_cross_page := train_region_p1_cross_page_s0 1291289fc2f9SLinJiawei active_gen_table.io.s0_lookup.bits.region_m1_cross_page := train_region_m1_cross_page_s0 1292289fc2f9SLinJiawei active_gen_table.io.s0_lookup.bits.region_paddr := train_region_paddr_s0 1293289fc2f9SLinJiawei active_gen_table.io.s0_lookup.bits.region_vaddr := train_region_vaddr_s0 1294967327d8SLinJiawei active_gen_table.io.s2_stride_hit := stride.io.s2_gen_req.valid 12956005a7e2Shappy-lx active_gen_table.io.s0_dcache_evict <> io_dcache_evict 1296289fc2f9SLinJiawei 1297967327d8SLinJiawei stride.io.stride_en := io_stride_en 1298967327d8SLinJiawei stride.io.s0_lookup.valid := train_vld_s0 12990d32f713Shappy-lx stride.io.s0_lookup.bits.pc := train_s0.pc(STRIDE_PC_BITS - 1, 0) 1300967327d8SLinJiawei stride.io.s0_lookup.bits.vaddr := Cat( 1301967327d8SLinJiawei train_region_vaddr_s0, train_region_offset_s0, 0.U(log2Up(dcacheParameters.blockBytes).W) 1302967327d8SLinJiawei ) 1303967327d8SLinJiawei stride.io.s0_lookup.bits.paddr := Cat( 1304967327d8SLinJiawei train_region_paddr_s0, train_region_offset_s0, 0.U(log2Up(dcacheParameters.blockBytes).W) 1305967327d8SLinJiawei ) 1306967327d8SLinJiawei stride.io.s1_valid := active_gen_table.io.s1_sel_stride 1307289fc2f9SLinJiawei 130885de5caeSLinJiawei pht.io.s2_agt_lookup := active_gen_table.io.s2_pht_lookup 1309289fc2f9SLinJiawei pht.io.agt_update := active_gen_table.io.s2_evict 1310289fc2f9SLinJiawei 131185de5caeSLinJiawei val pht_gen_valid = pht.io.pf_gen_req.valid && io_pht_en 1312967327d8SLinJiawei val agt_gen_valid = active_gen_table.io.s2_pf_gen_req.valid 1313967327d8SLinJiawei val stride_gen_valid = stride.io.s2_gen_req.valid 1314967327d8SLinJiawei val pf_gen_req = Mux(agt_gen_valid || stride_gen_valid, 1315967327d8SLinJiawei Mux1H(Seq( 1316967327d8SLinJiawei agt_gen_valid -> active_gen_table.io.s2_pf_gen_req.bits, 1317967327d8SLinJiawei stride_gen_valid -> stride.io.s2_gen_req.bits 1318967327d8SLinJiawei )), 1319f21b441aSLinJiawei pht.io.pf_gen_req.bits 1320289fc2f9SLinJiawei ) 1321967327d8SLinJiawei assert(!(agt_gen_valid && stride_gen_valid)) 1322967327d8SLinJiawei pf_filter.io.gen_req.valid := pht_gen_valid || agt_gen_valid || stride_gen_valid 1323289fc2f9SLinJiawei pf_filter.io.gen_req.bits := pf_gen_req 1324289fc2f9SLinJiawei io.tlb_req <> pf_filter.io.tlb_req 132525a80bceSYanqin Li pf_filter.io.pmp_resp := io.pmp_resp 13265bd65c56STang Haojin val is_valid_address = PmemRanges.map(_.cover(pf_filter.io.l2_pf_addr.bits)).reduce(_ || _) 13270d32f713Shappy-lx 1328ffc9de54Swakafa io.l2_req.valid := pf_filter.io.l2_pf_addr.valid && io.enable && is_valid_address 1329ffc9de54Swakafa io.l2_req.bits.addr := pf_filter.io.l2_pf_addr.bits 1330ffc9de54Swakafa io.l2_req.bits.source := MemReqSource.Prefetch2L2SMS.id.U 13310d32f713Shappy-lx 13320d32f713Shappy-lx // for now, sms will not send l1 prefetch requests 1333967327d8SLinJiawei io.l1_req.bits.paddr := pf_filter.io.l2_pf_addr.bits 1334967327d8SLinJiawei io.l1_req.bits.alias := pf_filter.io.pf_alias_bits 1335967327d8SLinJiawei io.l1_req.bits.is_store := true.B 1336967327d8SLinJiawei io.l1_req.bits.confidence := 1.U 13370d32f713Shappy-lx io.l1_req.bits.pf_source.value := L1_HW_PREFETCH_NULL 1338e9fc0cf8SLinJiawei io.l1_req.valid := false.B 1339289fc2f9SLinJiawei 1340cfb0efcfSLinJiawei for((train, i) <- io.ld_in.zipWithIndex){ 1341cfb0efcfSLinJiawei XSPerfAccumulate(s"pf_train_miss_${i}", train.valid && train.bits.miss) 13420d32f713Shappy-lx XSPerfAccumulate(s"pf_train_prefetched_${i}", train.valid && isFromL1Prefetch(train.bits.meta_prefetch)) 1343cfb0efcfSLinJiawei } 13442db9ec44SLinJiawei val trace = Wire(new L1MissTrace) 13452db9ec44SLinJiawei trace.vaddr := 0.U 13462db9ec44SLinJiawei trace.pc := 0.U 1347ffc9de54Swakafa trace.paddr := io.l2_req.bits.addr 13482db9ec44SLinJiawei trace.source := pf_filter.io.debug_source_type 13497ccf006bSWilliam Wang val table = ChiselDB.createTable("L1SMSMissTrace_hart"+ p(XSCoreParamsKey).HartId.toString, new L1MissTrace) 1350ffc9de54Swakafa table.log(trace, io.l2_req.fire, "SMSPrefetcher", clock, reset) 13512db9ec44SLinJiawei 1352289fc2f9SLinJiawei XSPerfAccumulate("sms_pf_gen_conflict", 1353f21b441aSLinJiawei pht_gen_valid && agt_gen_valid 1354289fc2f9SLinJiawei ) 135585de5caeSLinJiawei XSPerfAccumulate("sms_pht_disabled", pht.io.pf_gen_req.valid && !io_pht_en) 135685de5caeSLinJiawei XSPerfAccumulate("sms_agt_disabled", active_gen_table.io.s2_pf_gen_req.valid && !io_agt_en) 1357ffc9de54Swakafa XSPerfAccumulate("sms_pf_real_issued", io.l2_req.valid) 1358967327d8SLinJiawei XSPerfAccumulate("sms_l1_req_valid", io.l1_req.valid) 1359967327d8SLinJiawei XSPerfAccumulate("sms_l1_req_fire", io.l1_req.fire) 1360289fc2f9SLinJiawei} 1361