1package xiangshan.mem.prefetch 2 3import org.chipsalliance.cde.config.Parameters 4import chisel3._ 5import chisel3.util._ 6import freechips.rocketchip.util._ 7import utils._ 8import utility._ 9import xiangshan._ 10import xiangshan.backend.fu.PMPRespBundle 11import xiangshan.mem.L1PrefetchReq 12import xiangshan.mem.Bundles.LsPrefetchTrainBundle 13import xiangshan.mem.trace._ 14import xiangshan.mem.L1PrefetchSource 15import xiangshan.cache.HasDCacheParameters 16import xiangshan.cache.mmu._ 17 18trait HasL1PrefetchHelper extends HasCircularQueuePtrHelper with HasDCacheParameters { 19 // region related 20 val REGION_SIZE = 1024 21 val PAGE_OFFSET = 12 22 val BLOCK_OFFSET = log2Up(dcacheParameters.blockBytes) 23 val BIT_VEC_WITDH = REGION_SIZE / dcacheParameters.blockBytes 24 val REGION_BITS = log2Up(BIT_VEC_WITDH) 25 val REGION_TAG_OFFSET = BLOCK_OFFSET + REGION_BITS 26 val REGION_TAG_BITS = VAddrBits - BLOCK_OFFSET - REGION_BITS 27 28 // hash related 29 val VADDR_HASH_WIDTH = 5 30 val BLK_ADDR_RAW_WIDTH = 10 31 val HASH_TAG_WIDTH = VADDR_HASH_WIDTH + BLK_ADDR_RAW_WIDTH 32 33 // capacity related 34 val MLP_SIZE = 32 35 val MLP_L1_SIZE = 16 36 val MLP_L2L3_SIZE = MLP_SIZE - MLP_L1_SIZE 37 38 // prefetch sink related 39 val SINK_BITS = 2 40 def SINK_L1 = "b00".U 41 def SINK_L2 = "b01".U 42 def SINK_L3 = "b10".U 43 44 // vaddr: | region tag | region bits | block offset | 45 def get_region_tag(vaddr: UInt) = { 46 require(vaddr.getWidth == VAddrBits) 47 vaddr(vaddr.getWidth - 1, REGION_TAG_OFFSET) 48 } 49 50 def get_region_bits(vaddr: UInt) = { 51 require(vaddr.getWidth == VAddrBits) 52 vaddr(REGION_TAG_OFFSET - 1, BLOCK_OFFSET) 53 } 54 55 def block_addr(x: UInt): UInt = { 56 x(x.getWidth - 1, BLOCK_OFFSET) 57 } 58 59 def vaddr_hash(x: UInt): UInt = { 60 val width = VADDR_HASH_WIDTH 61 val low = x(width - 1, 0) 62 val mid = x(2 * width - 1, width) 63 val high = x(3 * width - 1, 2 * width) 64 low ^ mid ^ high 65 } 66 67 def pc_hash_tag(x: UInt): UInt = { 68 val low = x(BLK_ADDR_RAW_WIDTH - 1, 0) 69 val high = x(BLK_ADDR_RAW_WIDTH - 1 + 3 * VADDR_HASH_WIDTH, BLK_ADDR_RAW_WIDTH) 70 val high_hash = vaddr_hash(high) 71 Cat(high_hash, low) 72 } 73 74 def block_hash_tag(x: UInt): UInt = { 75 val blk_addr = block_addr(x) 76 val low = blk_addr(BLK_ADDR_RAW_WIDTH - 1, 0) 77 val high = blk_addr(BLK_ADDR_RAW_WIDTH - 1 + 3 * VADDR_HASH_WIDTH, BLK_ADDR_RAW_WIDTH) 78 val high_hash = vaddr_hash(high) 79 Cat(high_hash, low) 80 } 81 82 def region_hash_tag(region_tag: UInt): UInt = { 83 val low = region_tag(BLK_ADDR_RAW_WIDTH - 1, 0) 84 val high = region_tag(BLK_ADDR_RAW_WIDTH - 1 + 3 * VADDR_HASH_WIDTH, BLK_ADDR_RAW_WIDTH) 85 val high_hash = vaddr_hash(high) 86 Cat(high_hash, low) 87 } 88 89 def region_to_block_addr(region_tag: UInt, region_bits: UInt): UInt = { 90 Cat(region_tag, region_bits) 91 } 92 93 def get_candidate_oh(x: UInt): UInt = { 94 require(x.getWidth == PAddrBits) 95 UIntToOH(x(REGION_BITS + BLOCK_OFFSET - 1, BLOCK_OFFSET)) 96 } 97 98 def toBinary(n: Int): String = n match { 99 case 0|1 => s"$n" 100 case _ => s"${toBinary(n/2)}${n%2}" 101 } 102} 103 104trait HasTrainFilterHelper extends HasCircularQueuePtrHelper { 105 def reorder[T <: LsPrefetchTrainBundle](source: Vec[ValidIO[T]]): Vec[ValidIO[T]] = { 106 if(source.length == 1) { 107 source 108 }else if(source.length == 2) { 109 val source_v = source.map(_.valid) 110 val res = Wire(source.cloneType) 111 // source 1 is older than source 0 (only when source0/1 are both valid) 112 val source_1_older = Mux(Cat(source_v).andR, 113 isBefore(source(1).bits.uop.robIdx, source(0).bits.uop.robIdx), 114 false.B 115 ) 116 when(source_1_older) { 117 res(0) := source(1) 118 res(1) := source(0) 119 }.otherwise { 120 res := source 121 } 122 123 res 124 }else if(source.length == 3) { 125 // TODO: generalize 126 val res_0_1 = Reg(source.cloneType) 127 val res_1_2 = Reg(source.cloneType) 128 val res = Reg(source.cloneType) 129 130 val tmp = reorder(VecInit(source.slice(0, 2))) 131 res_0_1(0) := tmp(0) 132 res_0_1(1) := tmp(1) 133 res_0_1(2) := source(2) 134 val tmp_1 = reorder(VecInit(res_0_1.slice(1, 3))) 135 res_1_2(0) := res_0_1(0) 136 res_1_2(1) := tmp_1(0) 137 res_1_2(2) := tmp_1(1) 138 val tmp_2 = reorder(VecInit(res_1_2.slice(0, 2))) 139 res(0) := tmp_2(0) 140 res(1) := tmp_2(1) 141 res(2) := res_1_2(2) 142 143 res 144 }else { 145 require(false, "for now, 4 or more sources are invalid") 146 source 147 } 148 } 149} 150 151// get prefetch train reqs from `exuParameters.LduCnt` load pipelines (up to `exuParameters.LduCnt`/cycle) 152// filter by cache line address, send out train req to stride (up to 1 req/cycle) 153class TrainFilter(size: Int, name: String)(implicit p: Parameters) extends XSModule with HasL1PrefetchHelper with HasTrainFilterHelper { 154 val io = IO(new Bundle() { 155 val enable = Input(Bool()) 156 val flush = Input(Bool()) 157 // train input, only from load for now 158 val ld_in = Flipped(Vec(backendParams.LduCnt, ValidIO(new LsPrefetchTrainBundle()))) 159 // filter out 160 val train_req = DecoupledIO(new PrefetchReqBundle()) 161 }) 162 163 class Ptr(implicit p: Parameters) extends CircularQueuePtr[Ptr]( p => size ){} 164 object Ptr { 165 def apply(f: Bool, v: UInt)(implicit p: Parameters): Ptr = { 166 val ptr = Wire(new Ptr) 167 ptr.flag := f 168 ptr.value := v 169 ptr 170 } 171 } 172 173 val entries = Reg(Vec(size, new PrefetchReqBundle)) 174 val valids = RegInit(VecInit(Seq.fill(size){ (false.B) })) 175 176 // enq 177 val enqLen = backendParams.LduCnt 178 val enqPtrExt = RegInit(VecInit((0 until enqLen).map(_.U.asTypeOf(new Ptr)))) 179 val deqPtrExt = RegInit(0.U.asTypeOf(new Ptr)) 180 181 val deqPtr = WireInit(deqPtrExt.value) 182 183 require(size >= enqLen) 184 185 val ld_in_reordered = reorder(io.ld_in) 186 val reqs_l = ld_in_reordered.map(_.bits.toPrefetchReqBundle()) 187 val reqs_vl = ld_in_reordered.map(_.valid) 188 val needAlloc = Wire(Vec(enqLen, Bool())) 189 val canAlloc = Wire(Vec(enqLen, Bool())) 190 191 for(i <- (0 until enqLen)) { 192 val req = reqs_l(i) 193 val req_v = reqs_vl(i) 194 val index = PopCount(needAlloc.take(i)) 195 val allocPtr = enqPtrExt(index) 196 val entry_match = Cat(entries.zip(valids).map { 197 case(e, v) => v && block_hash_tag(e.vaddr) === block_hash_tag(req.vaddr) 198 }).orR 199 val prev_enq_match = if(i == 0) false.B else Cat(reqs_l.zip(reqs_vl).take(i).map { 200 case(pre, pre_v) => pre_v && block_hash_tag(pre.vaddr) === block_hash_tag(req.vaddr) 201 }).orR 202 203 needAlloc(i) := req_v && !entry_match && !prev_enq_match 204 canAlloc(i) := needAlloc(i) && allocPtr >= deqPtrExt && io.enable 205 206 when(canAlloc(i)) { 207 valids(allocPtr.value) := true.B 208 entries(allocPtr.value) := req 209 } 210 } 211 val allocNum = PopCount(canAlloc) 212 213 enqPtrExt.foreach{case x => when(canAlloc.asUInt.orR) {x := x + allocNum} } 214 215 // deq 216 io.train_req.valid := false.B 217 io.train_req.bits := DontCare 218 valids.zip(entries).zipWithIndex.foreach { 219 case((valid, entry), i) => { 220 when(deqPtr === i.U) { 221 io.train_req.valid := valid && io.enable 222 io.train_req.bits := entry 223 } 224 } 225 } 226 227 when(io.train_req.fire) { 228 valids(deqPtr) := false.B 229 deqPtrExt := deqPtrExt + 1.U 230 } 231 232 when(RegNext(io.flush)) { 233 valids.foreach {case valid => valid := false.B} 234 (0 until enqLen).map {case i => enqPtrExt(i) := i.U.asTypeOf(new Ptr)} 235 deqPtrExt := 0.U.asTypeOf(new Ptr) 236 } 237 238 XSPerfAccumulate(s"${name}_train_filter_full", PopCount(valids) === size.U) 239 XSPerfAccumulate(s"${name}_train_filter_half", PopCount(valids) >= (size / 2).U) 240 XSPerfAccumulate(s"${name}_train_filter_empty", PopCount(valids) === 0.U) 241 242 val raw_enq_pattern = Cat(reqs_vl) 243 val filtered_enq_pattern = Cat(needAlloc) 244 val actual_enq_pattern = Cat(canAlloc) 245 XSPerfAccumulate(s"${name}_train_filter_enq", allocNum > 0.U) 246 XSPerfAccumulate(s"${name}_train_filter_deq", io.train_req.fire) 247 for(i <- 0 until (1 << enqLen)) { 248 XSPerfAccumulate(s"${name}_train_filter_raw_enq_pattern_${toBinary(i)}", raw_enq_pattern === i.U) 249 XSPerfAccumulate(s"${name}_train_filter_filtered_enq_pattern_${toBinary(i)}", filtered_enq_pattern === i.U) 250 XSPerfAccumulate(s"${name}_train_filter_actual_enq_pattern_${toBinary(i)}", actual_enq_pattern === i.U) 251 } 252} 253 254class MLPReqFilterBundle(implicit p: Parameters) extends XSBundle with HasL1PrefetchHelper { 255 val tag = UInt(HASH_TAG_WIDTH.W) 256 val region = UInt(REGION_TAG_BITS.W) 257 val bit_vec = UInt(BIT_VEC_WITDH.W) 258 // NOTE: l1 will not use sent_vec, for making more prefetch reqs to l1 dcache 259 val sent_vec = UInt(BIT_VEC_WITDH.W) 260 val sink = UInt(SINK_BITS.W) 261 val alias = UInt(2.W) 262 val is_vaddr = Bool() 263 val source = new L1PrefetchSource() 264 val debug_va_region = UInt(REGION_TAG_BITS.W) 265 266 def reset(index: Int) = { 267 tag := region_hash_tag(index.U) 268 region := index.U 269 bit_vec := 0.U 270 sent_vec := 0.U 271 sink := SINK_L1 272 alias := 0.U 273 is_vaddr := false.B 274 source.value := L1_HW_PREFETCH_NULL 275 debug_va_region := 0.U 276 } 277 278 def tag_match(valid1: Bool, valid2: Bool, new_tag: UInt): Bool = { 279 require(new_tag.getWidth == HASH_TAG_WIDTH) 280 (tag === new_tag) && valid1 && valid2 281 } 282 283 def update(update_bit_vec: UInt, update_sink: UInt) = { 284 bit_vec := bit_vec | update_bit_vec 285 when(update_sink < sink) { 286 bit_vec := (bit_vec & ~sent_vec) | update_bit_vec 287 sink := update_sink 288 } 289 290 assert(PopCount(update_bit_vec) >= 1.U, "valid bits in update vector should greater than one") 291 } 292 293 def can_send_pf(valid: Bool): Bool = { 294 Mux( 295 sink === SINK_L1, 296 !is_vaddr && bit_vec.orR, 297 !is_vaddr && (bit_vec & ~sent_vec).orR 298 ) && valid 299 } 300 301 def may_be_replace(valid: Bool): Bool = { 302 // either invalid or has sent out all reqs out 303 !valid || RegNext(PopCount(sent_vec) === BIT_VEC_WITDH.U) 304 } 305 306 def get_pf_addr(): UInt = { 307 require(PAddrBits <= VAddrBits) 308 require((region.getWidth + REGION_BITS + BLOCK_OFFSET) == VAddrBits) 309 310 val candidate = Mux( 311 sink === SINK_L1, 312 PriorityEncoder(bit_vec).asTypeOf(UInt(REGION_BITS.W)), 313 PriorityEncoder(bit_vec & ~sent_vec).asTypeOf(UInt(REGION_BITS.W)) 314 ) 315 Cat(region, candidate, 0.U(BLOCK_OFFSET.W)) 316 } 317 318 def get_pf_debug_vaddr(): UInt = { 319 val candidate = Mux( 320 sink === SINK_L1, 321 PriorityEncoder(bit_vec).asTypeOf(UInt(REGION_BITS.W)), 322 PriorityEncoder(bit_vec & ~sent_vec).asTypeOf(UInt(REGION_BITS.W)) 323 ) 324 Cat(debug_va_region, candidate, 0.U(BLOCK_OFFSET.W)) 325 } 326 327 def get_tlb_va(): UInt = { 328 require((region.getWidth + REGION_TAG_OFFSET) == VAddrBits) 329 Cat(region, 0.U(REGION_TAG_OFFSET.W)) 330 } 331 332 def fromStreamPrefetchReqBundle(x : StreamPrefetchReqBundle): MLPReqFilterBundle = { 333 require(PAGE_OFFSET >= REGION_TAG_OFFSET, "region is greater than 4k, alias bit may be incorrect") 334 335 val res = Wire(new MLPReqFilterBundle) 336 res.tag := region_hash_tag(x.region) 337 res.region := x.region 338 res.bit_vec := x.bit_vec 339 res.sent_vec := 0.U 340 res.sink := x.sink 341 res.is_vaddr := true.B 342 res.source := x.source 343 res.alias := x.region(PAGE_OFFSET - REGION_TAG_OFFSET + 1, PAGE_OFFSET - REGION_TAG_OFFSET) 344 res.debug_va_region := x.region 345 346 res 347 } 348 349 def invalidate() = { 350 // disable sending pf req 351 when(sink === SINK_L1) { 352 bit_vec := 0.U(BIT_VEC_WITDH.W) 353 }.otherwise { 354 sent_vec := ~(0.U(BIT_VEC_WITDH.W)) 355 } 356 // disable sending tlb req 357 is_vaddr := false.B 358 } 359} 360 361// there are 5 independent pipelines inside 362// 1. prefetch enqueue 363// 2. tlb request 364// 3. actual l1 prefetch 365// 4. actual l2 prefetch 366// 5. actual l3 prefetch 367class MutiLevelPrefetchFilter(implicit p: Parameters) extends XSModule with HasL1PrefetchHelper { 368 val io = IO(new XSBundle { 369 val enable = Input(Bool()) 370 val flush = Input(Bool()) 371 val l1_prefetch_req = Flipped(ValidIO(new StreamPrefetchReqBundle)) 372 val l2_l3_prefetch_req = Flipped(ValidIO(new StreamPrefetchReqBundle)) 373 val tlb_req = new TlbRequestIO(nRespDups = 2) 374 val pmp_resp = Flipped(new PMPRespBundle()) 375 val l1_req = DecoupledIO(new L1PrefetchReq()) 376 val l2_pf_addr = ValidIO(new L2PrefetchReq()) 377 val l3_pf_addr = ValidIO(UInt(PAddrBits.W)) // TODO: l3 pf source 378 val confidence = Input(UInt(1.W)) 379 val l2PfqBusy = Input(Bool()) 380 }) 381 382 val l1_array = Reg(Vec(MLP_L1_SIZE, new MLPReqFilterBundle)) 383 val l2_array = Reg(Vec(MLP_L2L3_SIZE, new MLPReqFilterBundle)) 384 val l1_valids = RegInit(VecInit(Seq.fill(MLP_L1_SIZE)(false.B))) 385 val l2_valids = RegInit(VecInit(Seq.fill(MLP_L2L3_SIZE)(false.B))) 386 387 def _invalid(e: MLPReqFilterBundle, v: Bool): Unit = { 388 v := false.B 389 e.invalidate() 390 } 391 392 def invalid_array(i: UInt, isL2: Boolean): Unit = { 393 if (isL2) { 394 _invalid(l2_array(i), l2_valids(i)) 395 } else { 396 _invalid(l1_array(i), l1_valids(i)) 397 } 398 } 399 400 def _reset(e: MLPReqFilterBundle, v: Bool, idx: Int): Unit = { 401 v := false.B 402 //only need to reset control signals for firendly area 403 // e.reset(idx) 404 } 405 406 407 def reset_array(i: Int, isL2: Boolean): Unit = { 408 if(isL2){ 409 _reset(l2_array(i), l2_valids(i), i) 410 }else{ 411 _reset(l1_array(i), l1_valids(i), i) 412 } 413 } 414 415 val l1_replacement = new ValidPseudoLRU(MLP_L1_SIZE) 416 val l2_replacement = new ValidPseudoLRU(MLP_L2L3_SIZE) 417 val tlb_req_arb = Module(new RRArbiterInit(new TlbReq, MLP_SIZE)) 418 val l1_pf_req_arb = Module(new RRArbiterInit(new Bundle { 419 val req = new L1PrefetchReq 420 val debug_vaddr = UInt(VAddrBits.W) 421 }, MLP_L1_SIZE)) 422 val l2_pf_req_arb = Module(new RRArbiterInit(new Bundle { 423 val req = new L2PrefetchReq 424 val debug_vaddr = UInt(VAddrBits.W) 425 }, MLP_L2L3_SIZE)) 426 val l3_pf_req_arb = Module(new RRArbiterInit(UInt(PAddrBits.W), MLP_L2L3_SIZE)) 427 428 val l1_opt_replace_vec = VecInit(l1_array.zip(l1_valids).map{case (e, v) => e.may_be_replace(v)}) 429 val l2_opt_replace_vec = VecInit(l2_array.zip(l2_valids).map{case (e, v) => e.may_be_replace(v)}) 430 // if we have something to replace, then choose it, otherwise follow the plru manner 431 val l1_real_replace_vec = Mux(Cat(l1_opt_replace_vec).orR, l1_opt_replace_vec, VecInit(Seq.fill(MLP_L1_SIZE)(true.B))) 432 val l2_real_replace_vec = Mux(Cat(l2_opt_replace_vec).orR, l2_opt_replace_vec, VecInit(Seq.fill(MLP_L2L3_SIZE)(true.B))) 433 434 // l1 pf req enq 435 // s0: hash tag match 436 val s0_l1_can_accept = Wire(Bool()) 437 val s0_l1_valid = io.l1_prefetch_req.valid && s0_l1_can_accept 438 val s0_l1_region = io.l1_prefetch_req.bits.region 439 val s0_l1_region_hash = region_hash_tag(s0_l1_region) 440 val s0_l1_match_vec = l1_array.zip(l1_valids).map{ case (e, v) => e.tag_match(v, s0_l1_valid, s0_l1_region_hash)} 441 val s0_l1_hit = VecInit(s0_l1_match_vec).asUInt.orR 442 val s0_l1_index = Wire(UInt(log2Up(MLP_L1_SIZE).W)) 443 val s0_l1_prefetch_req = (new MLPReqFilterBundle).fromStreamPrefetchReqBundle(io.l1_prefetch_req.bits) 444 445 s0_l1_index := Mux(s0_l1_hit, OHToUInt(VecInit(s0_l1_match_vec).asUInt), l1_replacement.way(l1_real_replace_vec.reverse)._2) 446 447 when(s0_l1_valid) { 448 l1_replacement.access(s0_l1_index) 449 } 450 451 assert(!s0_l1_valid || PopCount(VecInit(s0_l1_match_vec)) <= 1.U, "req region should match no more than 1 entry") 452 453 XSPerfAccumulate("s0_l1_enq_fire", s0_l1_valid) 454 XSPerfAccumulate("s0_l1_enq_valid", io.l1_prefetch_req.valid) 455 XSPerfAccumulate("s0_l1_cannot_enq", io.l1_prefetch_req.valid && !s0_l1_can_accept) 456 457 // s1: alloc or update 458 val s1_l1_valid = RegNext(s0_l1_valid) 459 val s1_l1_region = RegEnable(s0_l1_region, s0_l1_valid) 460 val s1_l1_region_hash = RegEnable(s0_l1_region_hash, s0_l1_valid) 461 val s1_l1_hit = RegEnable(s0_l1_hit, s0_l1_valid) 462 val s1_l1_index = RegEnable(s0_l1_index, s0_l1_valid) 463 val s1_l1_prefetch_req = RegEnable(s0_l1_prefetch_req, s0_l1_valid) 464 val s1_l1_alloc = s1_l1_valid && !s1_l1_hit 465 val s1_l1_update = s1_l1_valid && s1_l1_hit 466 s0_l1_can_accept := !(s1_l1_valid && s1_l1_alloc && (s0_l1_region_hash === s1_l1_region_hash)) 467 468 when(s1_l1_alloc) { 469 l1_valids(s1_l1_index) := true.B 470 l1_array(s1_l1_index) := s1_l1_prefetch_req 471 }.elsewhen(s1_l1_update) { 472 l1_array(s1_l1_index).update( 473 update_bit_vec = s1_l1_prefetch_req.bit_vec, 474 update_sink = s1_l1_prefetch_req.sink 475 ) 476 } 477 478 XSPerfAccumulate("s1_l1_enq_valid", s1_l1_valid) 479 XSPerfAccumulate("s1_l1_enq_alloc", s1_l1_alloc) 480 XSPerfAccumulate("s1_l1_enq_update", s1_l1_update) 481 XSPerfAccumulate("l1_hash_conflict", s0_l1_valid && RegNext(s1_l1_valid) && (s0_l1_region =/= RegNext(s1_l1_region)) && (s0_l1_region_hash === RegNext(s1_l1_region_hash))) 482 XSPerfAccumulate("s1_l1_enq_evict_useful_entry", s1_l1_alloc && l1_array(s1_l1_index).can_send_pf(l1_valids(s1_l1_index))) 483 484 // l2 l3 pf req enq 485 // s0: hash tag match 486 val s0_l2_can_accept = Wire(Bool()) 487 val s0_l2_valid = io.l2_l3_prefetch_req.valid && s0_l2_can_accept 488 val s0_l2_region = io.l2_l3_prefetch_req.bits.region 489 val s0_l2_region_hash = region_hash_tag(s0_l2_region) 490 val s0_l2_match_vec = l2_array.zip(l2_valids).map{ case (e, v) => e.tag_match(v, s0_l2_valid, s0_l2_region_hash) } 491 val s0_l2_hit = VecInit(s0_l2_match_vec).asUInt.orR 492 val s0_l2_index = Wire(UInt(log2Up(MLP_L2L3_SIZE).W)) 493 val s0_l2_prefetch_req = (new MLPReqFilterBundle).fromStreamPrefetchReqBundle(io.l2_l3_prefetch_req.bits) 494 495 s0_l2_index := Mux(s0_l2_hit, OHToUInt(VecInit(s0_l2_match_vec).asUInt), l2_replacement.way(l2_real_replace_vec.reverse)._2) 496 497 when(s0_l2_valid) { 498 l2_replacement.access(s0_l2_index) 499 } 500 501 assert(!s0_l2_valid || PopCount(VecInit(s0_l2_match_vec)) <= 1.U, "req region should match no more than 1 entry") 502 503 XSPerfAccumulate("s0_l2_enq_fire", s0_l2_valid) 504 XSPerfAccumulate("s0_l2_enq_valid", io.l2_l3_prefetch_req.valid) 505 XSPerfAccumulate("s0_l2_cannot_enq", io.l2_l3_prefetch_req.valid && !s0_l2_can_accept) 506 507 // s1: alloc or update 508 val s1_l2_valid = RegNext(s0_l2_valid) 509 val s1_l2_region = RegEnable(s0_l2_region, s0_l2_valid) 510 val s1_l2_region_hash = RegEnable(s0_l2_region_hash, s0_l2_valid) 511 val s1_l2_hit = RegEnable(s0_l2_hit, s0_l2_valid) 512 val s1_l2_index = RegEnable(s0_l2_index, s0_l2_valid) 513 val s1_l2_prefetch_req = RegEnable(s0_l2_prefetch_req, s0_l2_valid) 514 val s1_l2_alloc = s1_l2_valid && !s1_l2_hit 515 val s1_l2_update = s1_l2_valid && s1_l2_hit 516 s0_l2_can_accept := !(s1_l2_valid && s1_l2_alloc && (s0_l2_region_hash === s1_l2_region_hash)) 517 518 when(s1_l2_alloc) { 519 l2_valids(s1_l2_index) := true.B 520 l2_array(s1_l2_index) := s1_l2_prefetch_req 521 }.elsewhen(s1_l2_update) { 522 l2_array(s1_l2_index).update( 523 update_bit_vec = s1_l2_prefetch_req.bit_vec, 524 update_sink = s1_l2_prefetch_req.sink 525 ) 526 } 527 528 XSPerfAccumulate("s1_l2_enq_valid", s1_l2_valid) 529 XSPerfAccumulate("s1_l2_enq_alloc", s1_l2_alloc) 530 XSPerfAccumulate("s1_l2_enq_update", s1_l2_update) 531 XSPerfAccumulate("l2_hash_conflict", s0_l2_valid && RegNext(s1_l2_valid) && (s0_l2_region =/= RegNext(s1_l2_region)) && (s0_l2_region_hash === RegNext(s1_l2_region_hash))) 532 XSPerfAccumulate("s1_l2_enq_evict_useful_entry", s1_l2_alloc && l2_array(s1_l2_index).can_send_pf(l2_valids(s1_l2_index))) 533 534 // stream pf debug db here 535 // Hit: 536 // now seens only pending = (region_bits & ~filter_bits) are the peeding request 537 // if a PfGen comes, new added request can be new_req = PfGen.region_bits & ~(pending) 538 // Alloc: 539 // new_req = PfGen.region_bits 540 val stream_pf_trace_debug_table = ChiselDB.createTable("StreamPFTrace" + p(XSCoreParamsKey).HartId.toString, new StreamPFTraceInEntry, basicDB = false) 541 for (i <- 0 until BIT_VEC_WITDH) { 542 // l1 enq log 543 val hit_entry = l1_array(s0_l1_index) 544 val new_req = Mux( 545 s0_l1_hit, 546 io.l1_prefetch_req.bits.bit_vec & ~(hit_entry.bit_vec), 547 io.l1_prefetch_req.bits.bit_vec 548 ) 549 val log_enable = s0_l1_valid && new_req(i) && (io.l1_prefetch_req.bits.source.value === L1_HW_PREFETCH_STREAM) 550 val log_data = Wire(new StreamPFTraceInEntry) 551 552 log_data.TriggerPC := io.l1_prefetch_req.bits.trigger_pc 553 log_data.TriggerVaddr := io.l1_prefetch_req.bits.trigger_va 554 log_data.PFVaddr := Cat(s0_l1_region, i.U(REGION_BITS.W), 0.U(log2Up(dcacheParameters.blockBytes).W)) 555 log_data.PFSink := s0_l1_prefetch_req.sink 556 557 stream_pf_trace_debug_table.log( 558 data = log_data, 559 en = log_enable, 560 site = "StreamPFTrace", 561 clock = clock, 562 reset = reset 563 ) 564 } 565 for (i <- 0 until BIT_VEC_WITDH) { 566 // l2 l3 enq log 567 val hit_entry = l2_array(s0_l2_index) 568 val new_req = Mux( 569 s0_l2_hit, 570 io.l2_l3_prefetch_req.bits.bit_vec & ~(hit_entry.bit_vec), 571 io.l2_l3_prefetch_req.bits.bit_vec 572 ) 573 val log_enable = s0_l2_valid && new_req(i) && (io.l2_l3_prefetch_req.bits.source.value === L1_HW_PREFETCH_STREAM) 574 val log_data = Wire(new StreamPFTraceInEntry) 575 576 log_data.TriggerPC := io.l2_l3_prefetch_req.bits.trigger_pc 577 log_data.TriggerVaddr := io.l2_l3_prefetch_req.bits.trigger_va 578 log_data.PFVaddr := Cat(s0_l2_region, i.U(REGION_BITS.W), 0.U(log2Up(dcacheParameters.blockBytes).W)) 579 log_data.PFSink := s0_l2_prefetch_req.sink 580 581 stream_pf_trace_debug_table.log( 582 data = log_data, 583 en = log_enable, 584 site = "StreamPFTrace", 585 clock = clock, 586 reset = reset 587 ) 588 } 589 590 // tlb req 591 // s0: arb all tlb reqs 592 val s0_tlb_fire_vec = VecInit((0 until MLP_SIZE).map{case i => tlb_req_arb.io.in(i).fire}) 593 val s1_tlb_fire_vec = GatedValidRegNext(s0_tlb_fire_vec) 594 val s2_tlb_fire_vec = GatedValidRegNext(s1_tlb_fire_vec) 595 val s3_tlb_fire_vec = GatedValidRegNext(s2_tlb_fire_vec) 596 val not_tlbing_vec = VecInit((0 until MLP_SIZE).map{case i => 597 !s1_tlb_fire_vec(i) && !s2_tlb_fire_vec(i) && !s3_tlb_fire_vec(i) 598 }) 599 600 for(i <- 0 until MLP_SIZE) { 601 val l1_evict = s1_l1_alloc && (s1_l1_index === i.U) 602 val l2_evict = s1_l2_alloc && ((s1_l2_index + MLP_L1_SIZE.U) === i.U) 603 if(i < MLP_L1_SIZE) { 604 tlb_req_arb.io.in(i).valid := l1_valids(i) && l1_array(i).is_vaddr && not_tlbing_vec(i) && !l1_evict 605 tlb_req_arb.io.in(i).bits.vaddr := l1_array(i).get_tlb_va() 606 }else { 607 tlb_req_arb.io.in(i).valid := l2_valids(i - MLP_L1_SIZE) && l2_array(i - MLP_L1_SIZE).is_vaddr && not_tlbing_vec(i) && !l2_evict 608 tlb_req_arb.io.in(i).bits.vaddr := l2_array(i - MLP_L1_SIZE).get_tlb_va() 609 } 610 tlb_req_arb.io.in(i).bits.cmd := TlbCmd.read 611 tlb_req_arb.io.in(i).bits.isPrefetch := true.B 612 tlb_req_arb.io.in(i).bits.size := 3.U 613 tlb_req_arb.io.in(i).bits.kill := false.B 614 tlb_req_arb.io.in(i).bits.no_translate := false.B 615 tlb_req_arb.io.in(i).bits.fullva := 0.U 616 tlb_req_arb.io.in(i).bits.checkfullva := false.B 617 tlb_req_arb.io.in(i).bits.memidx := DontCare 618 tlb_req_arb.io.in(i).bits.debug := DontCare 619 tlb_req_arb.io.in(i).bits.hlvx := DontCare 620 tlb_req_arb.io.in(i).bits.hyperinst := DontCare 621 tlb_req_arb.io.in(i).bits.pmp_addr := DontCare 622 } 623 624 assert(PopCount(s0_tlb_fire_vec) <= 1.U, "s0_tlb_fire_vec should be one-hot or empty") 625 626 // s1: send out the req 627 val s1_tlb_req_valid = GatedValidRegNext(tlb_req_arb.io.out.valid) 628 val s1_tlb_req_bits = RegEnable(tlb_req_arb.io.out.bits, tlb_req_arb.io.out.valid) 629 val s1_tlb_req_index = RegEnable(OHToUInt(s0_tlb_fire_vec.asUInt), tlb_req_arb.io.out.valid) 630 val s1_l1_tlb_evict = s1_l1_alloc && (s1_l1_index === s1_tlb_req_index) 631 val s1_l2_tlb_evict = s1_l2_alloc && ((s1_l2_index + MLP_L1_SIZE.U) === s1_tlb_req_index) 632 val s1_tlb_evict = s1_l1_tlb_evict || s1_l2_tlb_evict 633 io.tlb_req.req.valid := s1_tlb_req_valid && !s1_tlb_evict 634 io.tlb_req.req.bits := s1_tlb_req_bits 635 io.tlb_req.req_kill := false.B 636 tlb_req_arb.io.out.ready := true.B 637 638 XSPerfAccumulate("s1_tlb_req_sent", io.tlb_req.req.valid) 639 XSPerfAccumulate("s1_tlb_req_evict", s1_tlb_req_valid && s1_tlb_evict) 640 641 // s2: get response from tlb 642 val s2_tlb_resp_valid = io.tlb_req.resp.valid 643 val s2_tlb_resp = io.tlb_req.resp.bits 644 val s2_tlb_update_index = RegEnable(s1_tlb_req_index, s1_tlb_req_valid) 645 val s2_l1_tlb_evict = s1_l1_alloc && (s1_l1_index === s2_tlb_update_index) 646 val s2_l2_tlb_evict = s1_l2_alloc && ((s1_l2_index + MLP_L1_SIZE.U) === s2_tlb_update_index) 647 val s2_tlb_evict = s2_l1_tlb_evict || s2_l2_tlb_evict 648 649 // s3: get pmp response form PMPChecker 650 val s3_tlb_resp_valid = RegNext(s2_tlb_resp_valid) 651 val s3_tlb_resp = RegEnable(s2_tlb_resp, s2_tlb_resp_valid) 652 val s3_tlb_update_index = RegEnable(s2_tlb_update_index, s2_tlb_resp_valid) 653 val s3_tlb_evict = RegNext(s2_tlb_evict) 654 val s3_pmp_resp = io.pmp_resp 655 val s3_update_valid = s3_tlb_resp_valid && !s3_tlb_evict && !s3_tlb_resp.miss 656 val s3_drop = s3_update_valid && ( 657 // page/access fault 658 s3_tlb_resp.excp.head.pf.ld || s3_tlb_resp.excp.head.gpf.ld || s3_tlb_resp.excp.head.af.ld || 659 // uncache 660 s3_pmp_resp.mmio || Pbmt.isUncache(s3_tlb_resp.pbmt.head) || 661 // pmp access fault 662 s3_pmp_resp.ld 663 ) 664 when(s3_tlb_resp_valid && !s3_tlb_evict) { 665 when(s3_tlb_update_index < MLP_L1_SIZE.U) { 666 l1_array(s3_tlb_update_index).is_vaddr := s3_tlb_resp.miss 667 668 when(!s3_tlb_resp.miss) { 669 l1_array(s3_tlb_update_index).region := Cat(0.U((VAddrBits - PAddrBits).W), s3_tlb_resp.paddr.head(s3_tlb_resp.paddr.head.getWidth - 1, REGION_TAG_OFFSET)) 670 when(s3_drop) { 671 invalid_array(s3_tlb_update_index, false) 672 } 673 } 674 }.otherwise { 675 val inner_index = s3_tlb_update_index - MLP_L1_SIZE.U 676 l2_array(inner_index).is_vaddr := s3_tlb_resp.miss 677 678 when(!s3_tlb_resp.miss) { 679 l2_array(inner_index).region := Cat(0.U((VAddrBits - PAddrBits).W), s3_tlb_resp.paddr.head(s3_tlb_resp.paddr.head.getWidth - 1, REGION_TAG_OFFSET)) 680 when(s3_drop) { 681 invalid_array(inner_index, true) 682 } 683 } 684 } 685 } 686 io.tlb_req.resp.ready := true.B 687 688 XSPerfAccumulate("s3_tlb_resp_valid", s3_tlb_resp_valid) 689 XSPerfAccumulate("s3_tlb_resp_evict", s3_tlb_resp_valid && s3_tlb_evict) 690 XSPerfAccumulate("s3_tlb_resp_miss", s3_tlb_resp_valid && !s3_tlb_evict && s3_tlb_resp.miss) 691 XSPerfAccumulate("s3_tlb_resp_updated", s3_update_valid) 692 XSPerfAccumulate("s3_tlb_resp_page_fault", s3_update_valid && s3_tlb_resp.excp.head.pf.ld) 693 XSPerfAccumulate("s3_tlb_resp_guestpage_fault", s3_update_valid && s3_tlb_resp.excp.head.gpf.ld) 694 XSPerfAccumulate("s3_tlb_resp_access_fault", s3_update_valid && s3_tlb_resp.excp.head.af.ld) 695 XSPerfAccumulate("s3_tlb_resp_pmp_access_fault", s3_update_valid && s3_pmp_resp.ld) 696 XSPerfAccumulate("s3_tlb_resp_uncache", s3_update_valid && (Pbmt.isUncache(s3_tlb_resp.pbmt.head) || s3_pmp_resp.mmio)) 697 698 // l1 pf 699 // s0: generate prefetch req paddr per entry, arb them 700 val s0_pf_fire_vec = VecInit((0 until MLP_L1_SIZE).map{case i => l1_pf_req_arb.io.in(i).fire}) 701 val s1_pf_fire_vec = GatedValidRegNext(s0_pf_fire_vec) 702 703 val s0_pf_fire = l1_pf_req_arb.io.out.fire 704 val s0_pf_index = l1_pf_req_arb.io.chosen 705 val s0_pf_candidate_oh = get_candidate_oh(l1_pf_req_arb.io.out.bits.req.paddr) 706 707 for(i <- 0 until MLP_L1_SIZE) { 708 val evict = s1_l1_alloc && (s1_l1_index === i.U) 709 l1_pf_req_arb.io.in(i).valid := l1_array(i).can_send_pf(l1_valids(i)) && !evict 710 l1_pf_req_arb.io.in(i).bits.req.paddr := l1_array(i).get_pf_addr() 711 l1_pf_req_arb.io.in(i).bits.req.alias := l1_array(i).alias 712 l1_pf_req_arb.io.in(i).bits.req.confidence := io.confidence 713 l1_pf_req_arb.io.in(i).bits.req.is_store := false.B 714 l1_pf_req_arb.io.in(i).bits.req.pf_source := l1_array(i).source 715 l1_pf_req_arb.io.in(i).bits.debug_vaddr := l1_array(i).get_pf_debug_vaddr() 716 } 717 718 when(s0_pf_fire) { 719 l1_array(s0_pf_index).sent_vec := l1_array(s0_pf_index).sent_vec | s0_pf_candidate_oh 720 } 721 722 assert(PopCount(s0_pf_fire_vec) <= 1.U, "s0_pf_fire_vec should be one-hot or empty") 723 724 // s1: send out to dcache 725 val s1_pf_valid = Reg(Bool()) 726 val s1_pf_bits = RegEnable(l1_pf_req_arb.io.out.bits, l1_pf_req_arb.io.out.fire) 727 val s1_pf_index = RegEnable(s0_pf_index, l1_pf_req_arb.io.out.fire) 728 val s1_pf_candidate_oh = RegEnable(s0_pf_candidate_oh, l1_pf_req_arb.io.out.fire) 729 val s1_pf_evict = s1_l1_alloc && (s1_l1_index === s1_pf_index) 730 val s1_pf_update = s1_l1_update && (s1_l1_index === s1_pf_index) 731 val s1_pf_can_go = io.l1_req.ready && !s1_pf_evict && !s1_pf_update 732 val s1_pf_fire = s1_pf_valid && s1_pf_can_go 733 734 when(s1_pf_can_go) { 735 s1_pf_valid := false.B 736 } 737 738 when(l1_pf_req_arb.io.out.fire) { 739 s1_pf_valid := true.B 740 } 741 742 when(s1_pf_fire) { 743 l1_array(s1_pf_index).bit_vec := l1_array(s1_pf_index).bit_vec & ~s1_pf_candidate_oh 744 } 745 746 val in_pmem = PmemRanges.map(_.cover(s1_pf_bits.req.paddr)).reduce(_ || _) 747 io.l1_req.valid := s1_pf_valid && !s1_pf_evict && !s1_pf_update && in_pmem && io.enable 748 io.l1_req.bits := s1_pf_bits.req 749 750 l1_pf_req_arb.io.out.ready := s1_pf_can_go || !s1_pf_valid 751 752 assert(!((s1_l1_alloc || s1_l1_update) && s1_pf_fire && (s1_l1_index === s1_pf_index)), "pf pipeline & enq pipeline bit_vec harzard!") 753 754 XSPerfAccumulate("s1_pf_valid", s1_pf_valid) 755 XSPerfAccumulate("s1_pf_block_by_pipe_unready", s1_pf_valid && !io.l1_req.ready) 756 XSPerfAccumulate("s1_pf_block_by_enq_alloc_harzard", s1_pf_valid && s1_pf_evict) 757 XSPerfAccumulate("s1_pf_block_by_enq_update_harzard", s1_pf_valid && s1_pf_update) 758 XSPerfAccumulate("s1_pf_fire", s1_pf_fire) 759 760 // l2 pf 761 // s0: generate prefetch req paddr per entry, arb them, sent out 762 io.l2_pf_addr.valid := l2_pf_req_arb.io.out.valid 763 io.l2_pf_addr.bits := l2_pf_req_arb.io.out.bits.req 764 765 l2_pf_req_arb.io.out.ready := true.B 766 767 for(i <- 0 until MLP_L2L3_SIZE) { 768 val evict = s1_l2_alloc && (s1_l2_index === i.U) 769 l2_pf_req_arb.io.in(i).valid := l2_array(i).can_send_pf(l2_valids(i)) && (l2_array(i).sink === SINK_L2) && !evict 770 l2_pf_req_arb.io.in(i).bits.req.addr := l2_array(i).get_pf_addr() 771 l2_pf_req_arb.io.in(i).bits.req.source := MuxLookup(l2_array(i).source.value, MemReqSource.Prefetch2L2Unknown.id.U)(Seq( 772 L1_HW_PREFETCH_STRIDE -> MemReqSource.Prefetch2L2Stride.id.U, 773 L1_HW_PREFETCH_STREAM -> MemReqSource.Prefetch2L2Stream.id.U 774 )) 775 l2_pf_req_arb.io.in(i).bits.debug_vaddr := l2_array(i).get_pf_debug_vaddr() 776 } 777 778 when(l2_pf_req_arb.io.out.valid) { 779 l2_array(l2_pf_req_arb.io.chosen).sent_vec := l2_array(l2_pf_req_arb.io.chosen).sent_vec | get_candidate_oh(l2_pf_req_arb.io.out.bits.req.addr) 780 } 781 782 val stream_out_debug_table = ChiselDB.createTable("StreamPFTraceOut" + p(XSCoreParamsKey).HartId.toString, new StreamPFTraceOutEntry, basicDB = false) 783 val l1_debug_data = Wire(new StreamPFTraceOutEntry) 784 val l2_debug_data = Wire(new StreamPFTraceOutEntry) 785 l1_debug_data.PFVaddr := l1_pf_req_arb.io.out.bits.debug_vaddr 786 l1_debug_data.PFSink := SINK_L1 787 l2_debug_data.PFVaddr := l2_pf_req_arb.io.out.bits.debug_vaddr 788 l2_debug_data.PFSink := SINK_L2 789 790 stream_out_debug_table.log( 791 data = l1_debug_data, 792 en = l1_pf_req_arb.io.out.fire && (l1_pf_req_arb.io.out.bits.req.pf_source.value === L1_HW_PREFETCH_STREAM), 793 site = "StreamPFTraceOut", 794 clock = clock, 795 reset = reset 796 ) 797 stream_out_debug_table.log( 798 data = l2_debug_data, 799 en = l2_pf_req_arb.io.out.fire && (l2_pf_req_arb.io.out.bits.req.source === MemReqSource.Prefetch2L2Stream.id.U), 800 site = "StreamPFTraceOut", 801 clock = clock, 802 reset = reset 803 ) 804 805 // last level cache pf 806 // s0: generate prefetch req paddr per entry, arb them, sent out 807 io.l3_pf_addr.valid := l3_pf_req_arb.io.out.valid 808 io.l3_pf_addr.bits := l3_pf_req_arb.io.out.bits 809 810 l3_pf_req_arb.io.out.ready := true.B 811 812 for(i <- 0 until MLP_L2L3_SIZE) { 813 val evict = s1_l2_alloc && (s1_l2_index === i.U) 814 l3_pf_req_arb.io.in(i).valid := l2_array(i).can_send_pf(l2_valids(i)) && (l2_array(i).sink === SINK_L3) && !evict 815 l3_pf_req_arb.io.in(i).bits := l2_array(i).get_pf_addr() 816 } 817 818 when(l3_pf_req_arb.io.out.valid) { 819 l2_array(l3_pf_req_arb.io.chosen).sent_vec := l2_array(l3_pf_req_arb.io.chosen).sent_vec | get_candidate_oh(l3_pf_req_arb.io.out.bits) 820 } 821 822 // reset meta to avoid muti-hit problem 823 for(i <- 0 until MLP_SIZE) { 824 if(i < MLP_L1_SIZE) { 825 when(RegNext(io.flush)) { 826 reset_array(i, false) 827 } 828 }else { 829 when(RegNext(io.flush)) { 830 reset_array(i - MLP_L1_SIZE, true) 831 } 832 } 833 } 834 835 XSPerfAccumulate("l2_prefetche_queue_busby", io.l2PfqBusy) 836 XSPerfHistogram("filter_active", PopCount(VecInit( 837 l1_array.zip(l1_valids).map{ case (e, v) => e.can_send_pf(v) } ++ 838 l2_array.zip(l2_valids).map{ case (e, v) => e.can_send_pf(v) } 839 ).asUInt), true.B, 0, MLP_SIZE, 1) 840 XSPerfHistogram("l1_filter_active", PopCount(VecInit(l1_array.zip(l1_valids).map{ case (e, v) => e.can_send_pf(v)}).asUInt), true.B, 0, MLP_L1_SIZE, 1) 841 XSPerfHistogram("l2_filter_active", PopCount(VecInit(l2_array.zip(l2_valids).map{ case (e, v) => e.can_send_pf(v) && (e.sink === SINK_L2)}).asUInt), true.B, 0, MLP_L2L3_SIZE, 1) 842 XSPerfHistogram("l3_filter_active", PopCount(VecInit(l2_array.zip(l2_valids).map{ case (e, v) => e.can_send_pf(v) && (e.sink === SINK_L3)}).asUInt), true.B, 0, MLP_L2L3_SIZE, 1) 843} 844 845class L1Prefetcher(implicit p: Parameters) extends BasePrefecher with HasStreamPrefetchHelper with HasStridePrefetchHelper { 846 val pf_ctrl = IO(Input(new PrefetchControlBundle)) 847 val stride_train = IO(Flipped(Vec(backendParams.LduCnt + backendParams.HyuCnt, ValidIO(new LsPrefetchTrainBundle())))) 848 val l2PfqBusy = IO(Input(Bool())) 849 850 val stride_train_filter = Module(new TrainFilter(STRIDE_FILTER_SIZE, "stride")) 851 val stride_meta_array = Module(new StrideMetaArray) 852 val stream_train_filter = Module(new TrainFilter(STREAM_FILTER_SIZE, "stream")) 853 val stream_bit_vec_array = Module(new StreamBitVectorArray) 854 val pf_queue_filter = Module(new MutiLevelPrefetchFilter) 855 856 // for now, if the stream is disabled, train and prefetch process will continue, without sending out and reqs 857 val enable = io.enable 858 val flush = pf_ctrl.flush 859 860 stream_train_filter.io.ld_in.zipWithIndex.foreach { 861 case (ld_in, i) => { 862 ld_in.valid := io.ld_in(i).valid && enable 863 ld_in.bits := io.ld_in(i).bits 864 } 865 } 866 stream_train_filter.io.enable := enable 867 stream_train_filter.io.flush := flush 868 869 stride_train_filter.io.ld_in.zipWithIndex.foreach { 870 case (ld_in, i) => { 871 ld_in.valid := stride_train(i).valid && enable 872 ld_in.bits := stride_train(i).bits 873 } 874 } 875 stride_train_filter.io.enable := enable 876 stride_train_filter.io.flush := flush 877 878 stream_bit_vec_array.io.enable := enable 879 stream_bit_vec_array.io.flush := flush 880 stream_bit_vec_array.io.dynamic_depth := pf_ctrl.dynamic_depth 881 stream_bit_vec_array.io.train_req <> stream_train_filter.io.train_req 882 883 stride_meta_array.io.enable := enable 884 stride_meta_array.io.flush := flush 885 stride_meta_array.io.dynamic_depth := 0.U 886 stride_meta_array.io.train_req <> stride_train_filter.io.train_req 887 stride_meta_array.io.stream_lookup_req <> stream_bit_vec_array.io.stream_lookup_req 888 stride_meta_array.io.stream_lookup_resp <> stream_bit_vec_array.io.stream_lookup_resp 889 890 // stream has higher priority than stride 891 pf_queue_filter.io.l1_prefetch_req.valid := stream_bit_vec_array.io.l1_prefetch_req.valid || stride_meta_array.io.l1_prefetch_req.valid 892 pf_queue_filter.io.l1_prefetch_req.bits := Mux( 893 stream_bit_vec_array.io.l1_prefetch_req.valid, 894 stream_bit_vec_array.io.l1_prefetch_req.bits, 895 stride_meta_array.io.l1_prefetch_req.bits 896 ) 897 898 pf_queue_filter.io.l2_l3_prefetch_req.valid := stream_bit_vec_array.io.l2_l3_prefetch_req.valid || stride_meta_array.io.l2_l3_prefetch_req.valid 899 pf_queue_filter.io.l2_l3_prefetch_req.bits := Mux( 900 stream_bit_vec_array.io.l2_l3_prefetch_req.valid, 901 stream_bit_vec_array.io.l2_l3_prefetch_req.bits, 902 stride_meta_array.io.l2_l3_prefetch_req.bits 903 ) 904 905 io.l1_req.valid := pf_queue_filter.io.l1_req.valid && enable && pf_ctrl.enable 906 io.l1_req.bits := pf_queue_filter.io.l1_req.bits 907 908 pf_queue_filter.io.l1_req.ready := Mux(pf_ctrl.enable, io.l1_req.ready, true.B) 909 pf_queue_filter.io.tlb_req <> io.tlb_req 910 pf_queue_filter.io.pmp_resp := io.pmp_resp 911 pf_queue_filter.io.enable := enable 912 pf_queue_filter.io.flush := flush 913 pf_queue_filter.io.confidence := pf_ctrl.confidence 914 pf_queue_filter.io.l2PfqBusy := l2PfqBusy 915 916 val l2_in_pmem = PmemRanges.map(_.cover(pf_queue_filter.io.l2_pf_addr.bits.addr)).reduce(_ || _) 917 io.l2_req.valid := pf_queue_filter.io.l2_pf_addr.valid && l2_in_pmem && enable && pf_ctrl.enable 918 io.l2_req.bits := pf_queue_filter.io.l2_pf_addr.bits 919 920 val l3_in_pmem = PmemRanges.map(_.cover(pf_queue_filter.io.l3_pf_addr.bits)).reduce(_ || _) 921 io.l3_req.valid := pf_queue_filter.io.l3_pf_addr.valid && l3_in_pmem && enable && pf_ctrl.enable 922 io.l3_req.bits := pf_queue_filter.io.l3_pf_addr.bits 923} 924