1package xiangshan.mem.prefetch 2 3import org.chipsalliance.cde.config.Parameters 4import chisel3._ 5import chisel3.util._ 6import utils._ 7import utility._ 8import xiangshan._ 9import xiangshan.mem.L1PrefetchReq 10import xiangshan.mem.Bundles.LsPrefetchTrainBundle 11import xiangshan.mem.trace._ 12import xiangshan.mem.L1PrefetchSource 13import xiangshan.cache.HasDCacheParameters 14import xiangshan.cache.mmu._ 15 16trait HasStreamPrefetchHelper extends HasL1PrefetchHelper { 17 // capacity related 18 val STREAM_FILTER_SIZE = 4 19 val BIT_VEC_ARRAY_SIZE = 16 20 val ACTIVE_THRESHOLD = BIT_VEC_WITDH - 4 21 val INIT_DEC_MODE = false 22 23 // bit_vector [StreamBitVectorBundle]: 24 // `X`: valid; `.`: invalid; `H`: hit 25 // [X X X X X X X X X . . H . X X X] [. . X X X X . . . . . . . . . .] 26 // hit in 12th slot & active ---------------------> prefetch bit_vector [StreamPrefetchReqBundle] 27 // | <---------------------------- depth ----------------------------> 28 // | <-- width -- > 29 val DEPTH_BYTES = 1024 30 val DEPTH_CACHE_BLOCKS = DEPTH_BYTES / dcacheParameters.blockBytes 31 val WIDTH_BYTES = 128 32 val WIDTH_CACHE_BLOCKS = WIDTH_BYTES / dcacheParameters.blockBytes 33 34 val L2_DEPTH_RATIO = 2 35 val L2_WIDTH_BYTES = WIDTH_BYTES * 2 36 val L2_WIDTH_CACHE_BLOCKS = L2_WIDTH_BYTES / dcacheParameters.blockBytes 37 38 val L3_DEPTH_RATIO = 3 39 val L3_WIDTH_BYTES = WIDTH_BYTES * 2 * 2 40 val L3_WIDTH_CACHE_BLOCKS = L3_WIDTH_BYTES / dcacheParameters.blockBytes 41 42 val DEPTH_LOOKAHEAD = 6 43 val DEPTH_BITS = log2Up(DEPTH_CACHE_BLOCKS) + DEPTH_LOOKAHEAD 44 45 val ENABLE_DECR_MODE = false 46 val ENABLE_STRICT_ACTIVE_DETECTION = true 47 48 // constraints 49 require((DEPTH_BYTES >= REGION_SIZE) && ((DEPTH_BYTES % REGION_SIZE) == 0) && ((DEPTH_BYTES / REGION_SIZE) > 0)) 50 require(((VADDR_HASH_WIDTH * 3) + BLK_ADDR_RAW_WIDTH) <= REGION_TAG_BITS) 51 require(WIDTH_BYTES >= dcacheParameters.blockBytes) 52} 53 54class StreamBitVectorBundle(implicit p: Parameters) extends XSBundle with HasStreamPrefetchHelper { 55 val tag = UInt(REGION_TAG_BITS.W) 56 val bit_vec = UInt(BIT_VEC_WITDH.W) 57 val active = Bool() 58 // cnt can be optimized 59 val cnt = UInt((log2Up(BIT_VEC_WITDH) + 1).W) 60 val decr_mode = Bool() 61 62 // debug usage 63 val trigger_full_va = UInt(VAddrBits.W) 64 65 def reset(index: Int) = { 66 tag := index.U 67 bit_vec := 0.U 68 active := false.B 69 cnt := 0.U 70 decr_mode := INIT_DEC_MODE.B 71 trigger_full_va := 0xdeadbeefL.U 72 } 73 74 def tag_match(valid1: Bool, valid2: Bool, new_tag: UInt): Bool = { 75 valid1 && valid2 && region_hash_tag(tag) === region_hash_tag(new_tag) 76 } 77 78 def alloc(alloc_tag: UInt, alloc_bit_vec: UInt, alloc_active: Bool, alloc_decr_mode: Bool, alloc_full_vaddr: UInt) = { 79 tag := alloc_tag 80 bit_vec := alloc_bit_vec 81 active := alloc_active 82 cnt := 1.U 83 trigger_full_va := alloc_full_vaddr 84 if(ENABLE_DECR_MODE) { 85 decr_mode := alloc_decr_mode 86 }else { 87 decr_mode := INIT_DEC_MODE.B 88 } 89 90 91 assert(PopCount(alloc_bit_vec) === 1.U, "alloc vector should be one hot") 92 } 93 94 def update(update_bit_vec: UInt, update_active: Bool) = { 95 // if the slot is 0 before, increment cnt 96 val cnt_en = !((bit_vec & update_bit_vec).orR) 97 val cnt_next = Mux(cnt_en, cnt + 1.U, cnt) 98 99 bit_vec := bit_vec | update_bit_vec 100 cnt := cnt_next 101 when(cnt_next >= ACTIVE_THRESHOLD.U) { 102 active := true.B 103 } 104 when(update_active) { 105 active := true.B 106 } 107 108 assert(PopCount(update_bit_vec) === 1.U, "update vector should be one hot") 109 assert(cnt <= BIT_VEC_WITDH.U, "cnt should always less than bit vector size") 110 } 111} 112 113class StreamPrefetchReqBundle(implicit p: Parameters) extends XSBundle with HasStreamPrefetchHelper { 114 val region = UInt(REGION_TAG_BITS.W) 115 val bit_vec = UInt(BIT_VEC_WITDH.W) 116 val sink = UInt(SINK_BITS.W) 117 val source = new L1PrefetchSource() 118 // debug usage 119 val trigger_pc = UInt(VAddrBits.W) 120 val trigger_va = UInt(VAddrBits.W) 121 122 // align prefetch vaddr and width to region 123 def getStreamPrefetchReqBundle(valid: Bool, vaddr: UInt, width: Int, decr_mode: Bool, sink: UInt, source: UInt, t_pc: UInt, t_va: UInt): StreamPrefetchReqBundle = { 124 val res = Wire(new StreamPrefetchReqBundle) 125 res.region := get_region_tag(vaddr) 126 res.sink := sink 127 res.source.value := source 128 129 res.trigger_pc := t_pc 130 res.trigger_va := t_va 131 132 val region_bits = get_region_bits(vaddr) 133 val region_bit_vec = UIntToOH(region_bits) 134 res.bit_vec := Mux( 135 decr_mode, 136 (0 until width).map{ case i => region_bit_vec >> i}.reduce(_ | _), 137 (0 until width).map{ case i => region_bit_vec << i}.reduce(_ | _) 138 ) 139 140 assert(!valid || PopCount(res.bit_vec) <= width.U, "actual prefetch block number should less than or equals to WIDTH_CACHE_BLOCKS") 141 assert(!valid || PopCount(res.bit_vec) >= 1.U, "at least one block should be included") 142 assert(sink <= SINK_L3, "invalid sink") 143 for(i <- 0 until BIT_VEC_WITDH) { 144 when(decr_mode) { 145 when(i.U > region_bits) { 146 assert(!valid || res.bit_vec(i) === 0.U, s"res.bit_vec(${i}) is not zero in decr_mode, prefetch vector is wrong!") 147 }.elsewhen(i.U === region_bits) { 148 assert(!valid || res.bit_vec(i) === 1.U, s"res.bit_vec(${i}) is zero in decr_mode, prefetch vector is wrong!") 149 } 150 }.otherwise { 151 when(i.U < region_bits) { 152 assert(!valid || res.bit_vec(i) === 0.U, s"res.bit_vec(${i}) is not zero in incr_mode, prefetch vector is wrong!") 153 }.elsewhen(i.U === region_bits) { 154 assert(!valid || res.bit_vec(i) === 1.U, s"res.bit_vec(${i}) is zero in decr_mode, prefetch vector is wrong!") 155 } 156 } 157 } 158 159 res 160 } 161} 162 163class StreamBitVectorArray(implicit p: Parameters) extends XSModule with HasStreamPrefetchHelper { 164 val io = IO(new XSBundle { 165 val enable = Input(Bool()) 166 // TODO: flush all entry when process changing happens, or disable stream prefetch for a while 167 val flush = Input(Bool()) 168 val dynamic_depth = Input(UInt(DEPTH_BITS.W)) 169 val train_req = Flipped(DecoupledIO(new PrefetchReqBundle)) 170 val l1_prefetch_req = ValidIO(new StreamPrefetchReqBundle) 171 val l2_l3_prefetch_req = ValidIO(new StreamPrefetchReqBundle) 172 173 // Stride send lookup req here 174 val stream_lookup_req = Flipped(ValidIO(new PrefetchReqBundle)) 175 val stream_lookup_resp = Output(Bool()) 176 }) 177 178 val array = Reg(Vec(BIT_VEC_ARRAY_SIZE, new StreamBitVectorBundle)) 179 val valids = RegInit(VecInit(Seq.fill(BIT_VEC_ARRAY_SIZE)(false.B))) 180 181 def reset_array(i: Int): Unit = { 182 valids(i) := false.B 183 //only need to rest control signals for firendly area 184 // array(i).reset(i) 185 } 186 187 val replacement = ReplacementPolicy.fromString("plru", BIT_VEC_ARRAY_SIZE) 188 189 // s0: generate region tag, parallel match 190 val s0_can_accept = Wire(Bool()) 191 val s0_valid = io.train_req.fire 192 val s0_pc = io.train_req.bits.pc 193 val s0_vaddr = io.train_req.bits.vaddr 194 val s0_miss = io.train_req.bits.miss 195 val s0_pfHit = io.train_req.bits.pfHitStream 196 val s0_region_bits = get_region_bits(s0_vaddr) 197 val s0_region_tag = get_region_tag(s0_vaddr) 198 val s0_region_tag_plus_one = get_region_tag(s0_vaddr) + 1.U 199 val s0_region_tag_minus_one = get_region_tag(s0_vaddr) - 1.U 200 val s0_region_tag_match_vec = array zip valids map { case (e, v) => e.tag_match(v, s0_valid, s0_region_tag) } 201 val s0_region_tag_plus_one_match_vec = array zip valids map { case (e, v) => e.tag_match(v, s0_valid, s0_region_tag_plus_one) } 202 val s0_region_tag_minus_one_match_vec = array zip valids map { case (e, v) => e.tag_match(v, s0_valid, s0_region_tag_minus_one) } 203 val s0_hit = Cat(s0_region_tag_match_vec).orR 204 val s0_plus_one_hit = Cat(s0_region_tag_plus_one_match_vec).orR 205 val s0_minus_one_hit = Cat(s0_region_tag_minus_one_match_vec).orR 206 val s0_hit_vec = VecInit(s0_region_tag_match_vec).asUInt 207 val s0_index = Mux(s0_hit, OHToUInt(s0_hit_vec), replacement.way) 208 val s0_plus_one_index = OHToUInt(VecInit(s0_region_tag_plus_one_match_vec).asUInt) 209 val s0_minus_one_index = OHToUInt(VecInit(s0_region_tag_minus_one_match_vec).asUInt) 210 io.train_req.ready := s0_can_accept 211 212 when(s0_valid) { 213 replacement.access(s0_index) 214 } 215 216 val stream_pf_train_debug_table = ChiselDB.createTable("StreamTrainTraceTable" + p(XSCoreParamsKey).HartId.toString, new StreamTrainTraceEntry, basicDB = false) 217 218 val spf_log_enable = s0_valid 219 val spf_log_data = Wire(new StreamTrainTraceEntry) 220 221 // WARNING: the type here only indicates trigger by stream, not saying it's sink 222 spf_log_data.Type := MemReqSource.Prefetch2L2Stream.id.U 223 spf_log_data.OldAddr := Mux( 224 !s0_hit, 225 s0_vaddr, 226 array(s0_index).trigger_full_va 227 ) 228 spf_log_data.CurAddr := s0_vaddr 229 spf_log_data.Offset := DontCare 230 spf_log_data.Score := DontCare 231 spf_log_data.Miss := io.train_req.bits.miss 232 233 stream_pf_train_debug_table.log( 234 data = spf_log_data, 235 en = spf_log_enable, 236 site = "StreamTrainTraceTable", 237 clock = clock, 238 reset = reset 239 ) 240 241 assert(!s0_valid || PopCount(VecInit(s0_region_tag_match_vec)) <= 1.U, "req region should match no more than 1 entry") 242 assert(!s0_valid || PopCount(VecInit(s0_region_tag_plus_one_match_vec)) <= 1.U, "req region plus 1 should match no more than 1 entry") 243 assert(!s0_valid || PopCount(VecInit(s0_region_tag_minus_one_match_vec)) <= 1.U, "req region minus 1 should match no more than 1 entry") 244 assert(!s0_valid || !(s0_hit && s0_plus_one_hit && (s0_index === s0_plus_one_index)), "region and region plus 1 index match failed") 245 assert(!s0_valid || !(s0_hit && s0_minus_one_hit && (s0_index === s0_minus_one_index)), "region and region minus 1 index match failed") 246 assert(!s0_valid || !(s0_plus_one_hit && s0_minus_one_hit && (s0_minus_one_index === s0_plus_one_index)), "region plus 1 and region minus 1 index match failed") 247 assert(!(s0_valid && RegNext(s0_valid) && !s0_hit && !RegEnable(s0_hit, s0_valid) && replacement.way === RegEnable(replacement.way, s0_valid)), "replacement error") 248 249 XSPerfAccumulate("s0_valid_train_req", s0_valid) 250 val s0_hit_pattern_vec = Seq(s0_hit, s0_plus_one_hit, s0_minus_one_hit) 251 for(i <- 0 until (1 << s0_hit_pattern_vec.size)) { 252 XSPerfAccumulate(s"s0_hit_pattern_${toBinary(i)}", (VecInit(s0_hit_pattern_vec).asUInt === i.U) && s0_valid) 253 } 254 XSPerfAccumulate("s0_replace_the_neighbor", s0_valid && !s0_hit && ((s0_plus_one_hit && (s0_index === s0_plus_one_index)) || (s0_minus_one_hit && (s0_index === s0_minus_one_index)))) 255 XSPerfAccumulate("s0_req_valid", io.train_req.valid) 256 XSPerfAccumulate("s0_req_cannot_accept", io.train_req.valid && !io.train_req.ready) 257 258 val ratio_const = Constantin.createRecord(s"l2DepthRatio${p(XSCoreParamsKey).HartId}", initValue = L2_DEPTH_RATIO) 259 val ratio = ratio_const(3, 0) 260 261 val l3_ratio_const = Constantin.createRecord(s"l3DepthRatio${p(XSCoreParamsKey).HartId}", initValue = L3_DEPTH_RATIO) 262 val l3_ratio = l3_ratio_const(3, 0) 263 264 // s1: alloc or update 265 val s1_valid = GatedValidRegNext(s0_valid) 266 val s1_index = RegEnable(s0_index, s0_valid) 267 val s1_pc = RegEnable(s0_pc, s0_valid) 268 val s1_vaddr = RegEnable(s0_vaddr, s0_valid) 269 val s1_miss = RegEnable(s0_miss, s0_valid) 270 val s1_pfHit = RegEnable(s0_pfHit, s0_valid) 271 val s1_plus_one_index = RegEnable(s0_plus_one_index, s0_valid) 272 val s1_minus_one_index = RegEnable(s0_minus_one_index, s0_valid) 273 val s1_hit = RegEnable(s0_hit, s0_valid) 274 val s1_plus_one_hit = if(ENABLE_STRICT_ACTIVE_DETECTION) 275 RegEnable(s0_plus_one_hit, s0_valid) && array(s1_plus_one_index).active && (array(s1_plus_one_index).cnt >= ACTIVE_THRESHOLD.U) 276 else 277 RegEnable(s0_plus_one_hit, s0_valid) && array(s1_plus_one_index).active 278 val s1_minus_one_hit = if(ENABLE_STRICT_ACTIVE_DETECTION) 279 RegEnable(s0_minus_one_hit, s0_valid) && array(s1_minus_one_index).active && (array(s1_minus_one_index).cnt >= ACTIVE_THRESHOLD.U) 280 else 281 RegEnable(s0_minus_one_hit, s0_valid) && array(s1_minus_one_index).active 282 val s1_region_tag = RegEnable(s0_region_tag, s0_valid) 283 val s1_region_bits = RegEnable(s0_region_bits, s0_valid) 284 val s1_alloc = s1_valid && !s1_hit 285 val s1_update = s1_valid && s1_hit 286 val s1_pf_l1_incr_vaddr = Cat(region_to_block_addr(s1_region_tag, s1_region_bits) + io.dynamic_depth, 0.U(BLOCK_OFFSET.W)) 287 val s1_pf_l1_decr_vaddr = Cat(region_to_block_addr(s1_region_tag, s1_region_bits) - io.dynamic_depth, 0.U(BLOCK_OFFSET.W)) 288 val s1_pf_l2_incr_vaddr = Cat(region_to_block_addr(s1_region_tag, s1_region_bits) + (io.dynamic_depth << ratio), 0.U(BLOCK_OFFSET.W)) 289 val s1_pf_l2_decr_vaddr = Cat(region_to_block_addr(s1_region_tag, s1_region_bits) - (io.dynamic_depth << ratio), 0.U(BLOCK_OFFSET.W)) 290 val s1_pf_l3_incr_vaddr = Cat(region_to_block_addr(s1_region_tag, s1_region_bits) + (io.dynamic_depth << l3_ratio), 0.U(BLOCK_OFFSET.W)) 291 val s1_pf_l3_decr_vaddr = Cat(region_to_block_addr(s1_region_tag, s1_region_bits) - (io.dynamic_depth << l3_ratio), 0.U(BLOCK_OFFSET.W)) 292 // TODO: remove this 293 val strict_trigger_const = Constantin.createRecord(s"StreamStrictTrigger_${p(XSCoreParamsKey).HartId}", initValue = 1) 294 // If use strict triggering mode, the stream prefetcher will only trigger prefetching 295 // under **cache miss or prefetch hit stream**, but will still perform training on the entire memory access trace. 296 val s1_can_trigger = Mux(strict_trigger_const.orR, s1_miss || s1_pfHit, true.B) 297 val s1_can_send_pf = Mux(s1_update, !((array(s1_index).bit_vec & UIntToOH(s1_region_bits)).orR), true.B) && s1_can_trigger 298 s0_can_accept := !(s1_valid && (region_hash_tag(s1_region_tag) === region_hash_tag(s0_region_tag))) 299 300 when(s1_alloc) { 301 // alloc a new entry 302 valids(s1_index) := true.B 303 array(s1_index).alloc( 304 alloc_tag = s1_region_tag, 305 alloc_bit_vec = UIntToOH(s1_region_bits), 306 alloc_active = s1_plus_one_hit || s1_minus_one_hit, 307 alloc_decr_mode = RegEnable(s0_plus_one_hit, s0_valid), 308 alloc_full_vaddr = RegEnable(s0_vaddr, s0_valid) 309 ) 310 311 }.elsewhen(s1_update) { 312 // update a existing entry 313 assert(array(s1_index).cnt =/= 0.U || valids(s1_index), "entry should have been allocated before") 314 array(s1_index).update( 315 update_bit_vec = UIntToOH(s1_region_bits), 316 update_active = s1_plus_one_hit || s1_minus_one_hit) 317 } 318 319 XSPerfAccumulate("s1_alloc", s1_alloc) 320 XSPerfAccumulate("s1_update", s1_update) 321 XSPerfAccumulate("s1_active_plus_one_hit", s1_valid && s1_plus_one_hit) 322 XSPerfAccumulate("s1_active_minus_one_hit", s1_valid && s1_minus_one_hit) 323 324 // s2: trigger prefetch if hit active bit vector, compute meta of prefetch req 325 val s2_valid = GatedValidRegNext(s1_valid) 326 val s2_index = RegEnable(s1_index, s1_valid) 327 val s2_pc = RegEnable(s1_pc, s1_valid) 328 val s2_vaddr = RegEnable(s1_vaddr, s1_valid) 329 val s2_region_bits = RegEnable(s1_region_bits, s1_valid) 330 val s2_region_tag = RegEnable(s1_region_tag, s1_valid) 331 val s2_pf_l1_incr_vaddr = RegEnable(s1_pf_l1_incr_vaddr, s1_valid) 332 val s2_pf_l1_decr_vaddr = RegEnable(s1_pf_l1_decr_vaddr, s1_valid) 333 val s2_pf_l2_incr_vaddr = RegEnable(s1_pf_l2_incr_vaddr, s1_valid) 334 val s2_pf_l2_decr_vaddr = RegEnable(s1_pf_l2_decr_vaddr, s1_valid) 335 val s2_pf_l3_incr_vaddr = RegEnable(s1_pf_l3_incr_vaddr, s1_valid) 336 val s2_pf_l3_decr_vaddr = RegEnable(s1_pf_l3_decr_vaddr, s1_valid) 337 val s2_can_send_pf = RegEnable(s1_can_send_pf, s1_valid) 338 val s2_active = array(s2_index).active 339 val s2_decr_mode = array(s2_index).decr_mode 340 val s2_l1_vaddr = Mux(s2_decr_mode, s2_pf_l1_decr_vaddr, s2_pf_l1_incr_vaddr) 341 val s2_l2_vaddr = Mux(s2_decr_mode, s2_pf_l2_decr_vaddr, s2_pf_l2_incr_vaddr) 342 val s2_l3_vaddr = Mux(s2_decr_mode, s2_pf_l3_decr_vaddr, s2_pf_l3_incr_vaddr) 343 val s2_will_send_pf = s2_valid && s2_active && s2_can_send_pf 344 val s2_pf_req_valid = s2_will_send_pf && io.enable 345 val s2_pf_l1_req_bits = (new StreamPrefetchReqBundle).getStreamPrefetchReqBundle( 346 valid = s2_valid, 347 vaddr = s2_l1_vaddr, 348 width = WIDTH_CACHE_BLOCKS, 349 decr_mode = s2_decr_mode, 350 sink = SINK_L1, 351 source = L1_HW_PREFETCH_STREAM, 352 t_pc = s2_pc, 353 t_va = s2_vaddr 354 ) 355 val s2_pf_l2_req_bits = (new StreamPrefetchReqBundle).getStreamPrefetchReqBundle( 356 valid = s2_valid, 357 vaddr = s2_l2_vaddr, 358 width = L2_WIDTH_CACHE_BLOCKS, 359 decr_mode = s2_decr_mode, 360 sink = SINK_L2, 361 source = L1_HW_PREFETCH_STREAM, 362 t_pc = s2_pc, 363 t_va = s2_vaddr 364 ) 365 val s2_pf_l3_req_bits = (new StreamPrefetchReqBundle).getStreamPrefetchReqBundle( 366 valid = s2_valid, 367 vaddr = s2_l3_vaddr, 368 width = L3_WIDTH_CACHE_BLOCKS, 369 decr_mode = s2_decr_mode, 370 sink = SINK_L3, 371 source = L1_HW_PREFETCH_STREAM, 372 t_pc = s2_pc, 373 t_va = s2_vaddr 374 ) 375 376 XSPerfAccumulate("s2_valid", s2_valid) 377 XSPerfAccumulate("s2_will_not_send_pf", s2_valid && !s2_will_send_pf) 378 XSPerfAccumulate("s2_will_send_decr_pf", s2_valid && s2_will_send_pf && s2_decr_mode) 379 XSPerfAccumulate("s2_will_send_incr_pf", s2_valid && s2_will_send_pf && !s2_decr_mode) 380 381 // s3: send the l1 prefetch req out 382 val s3_pf_l1_valid = GatedValidRegNext(s2_pf_req_valid) 383 val s3_pf_l1_bits = RegEnable(s2_pf_l1_req_bits, s2_pf_req_valid) 384 val s3_pf_l2_valid = GatedValidRegNext(s2_pf_req_valid) 385 val s3_pf_l2_bits = RegEnable(s2_pf_l2_req_bits, s2_pf_req_valid) 386 val s3_pf_l3_bits = RegEnable(s2_pf_l3_req_bits, s2_pf_req_valid) 387 388 XSPerfAccumulate("s3_pf_sent", s3_pf_l1_valid) 389 390 // s4: send the l2 prefetch req out 391 val s4_pf_l2_valid = GatedValidRegNext(s3_pf_l2_valid) 392 val s4_pf_l2_bits = RegEnable(s3_pf_l2_bits, s3_pf_l2_valid) 393 val s4_pf_l3_bits = RegEnable(s3_pf_l3_bits, s3_pf_l2_valid) 394 395 val enable_l3_pf = Constantin.createRecord(s"enableL3StreamPrefetch${p(XSCoreParamsKey).HartId}", initValue = false) 396 // s5: send the l3 prefetch req out 397 val s5_pf_l3_valid = GatedValidRegNext(s4_pf_l2_valid) && enable_l3_pf 398 val s5_pf_l3_bits = RegEnable(s4_pf_l3_bits, s4_pf_l2_valid) 399 400 io.l1_prefetch_req.valid := s3_pf_l1_valid 401 io.l1_prefetch_req.bits := s3_pf_l1_bits 402 io.l2_l3_prefetch_req.valid := s4_pf_l2_valid || s5_pf_l3_valid 403 io.l2_l3_prefetch_req.bits := Mux(s4_pf_l2_valid, s4_pf_l2_bits, s5_pf_l3_bits) 404 405 XSPerfAccumulate("s4_pf_sent", s4_pf_l2_valid) 406 XSPerfAccumulate("s5_pf_sent", !s4_pf_l2_valid && s5_pf_l3_valid) 407 XSPerfAccumulate("pf_sent", PopCount(Seq(io.l1_prefetch_req.valid, io.l2_l3_prefetch_req.valid))) 408 409 // Stride lookup starts here 410 // S0: Stride send req 411 val s0_lookup_valid = io.stream_lookup_req.valid 412 val s0_lookup_vaddr = io.stream_lookup_req.bits.vaddr 413 val s0_lookup_tag = get_region_tag(s0_lookup_vaddr) 414 // S1: match 415 val s1_lookup_valid = GatedValidRegNext(s0_lookup_valid) 416 val s1_lookup_tag = RegEnable(s0_lookup_tag, s0_lookup_valid) 417 val s1_lookup_tag_match_vec = array zip valids map { case (e, v) => e.tag_match(v, s1_lookup_valid, s1_lookup_tag) } 418 val s1_lookup_hit = VecInit(s1_lookup_tag_match_vec).asUInt.orR 419 val s1_lookup_index = OHToUInt(VecInit(s1_lookup_tag_match_vec)) 420 // S2: read active out 421 val s2_lookup_valid = GatedValidRegNext(s1_lookup_valid) 422 val s2_lookup_hit = RegEnable(s1_lookup_hit, s1_lookup_valid) 423 val s2_lookup_index = RegEnable(s1_lookup_index, s1_lookup_valid) 424 val s2_lookup_active = array(s2_lookup_index).active 425 // S3: send back to Stride 426 val s3_lookup_valid = GatedValidRegNext(s2_lookup_valid) 427 val s3_lookup_hit = RegEnable(s2_lookup_hit, s2_lookup_valid) 428 val s3_lookup_active = RegEnable(s2_lookup_active, s2_lookup_valid) 429 io.stream_lookup_resp := s3_lookup_valid && s3_lookup_hit && s3_lookup_active 430 431 // reset meta to avoid muti-hit problem 432 for(i <- 0 until BIT_VEC_ARRAY_SIZE) { 433 when(GatedValidRegNext(io.flush)) { 434 reset_array(i) 435 } 436 } 437 438 XSPerfHistogram("bit_vector_active", PopCount(VecInit(array.map(_.active)).asUInt), true.B, 0, BIT_VEC_ARRAY_SIZE, 1) 439 XSPerfHistogram("bit_vector_decr_mode", PopCount(VecInit(array.map(_.decr_mode)).asUInt), true.B, 0, BIT_VEC_ARRAY_SIZE, 1) 440 XSPerfAccumulate("hash_conflict", s0_valid && s2_valid && (s0_region_tag =/= s2_region_tag) && (region_hash_tag(s0_region_tag) === region_hash_tag(s2_region_tag))) 441} 442