xref: /XiangShan/src/main/scala/xiangshan/mem/prefetch/L1PrefetchComponent.scala (revision 99ce5576f0ecce1b5045b7bc0dbbb2debd934fbb)
1package xiangshan.mem.prefetch
2
3import org.chipsalliance.cde.config.Parameters
4import chisel3._
5import chisel3.util._
6import freechips.rocketchip.util._
7import utils._
8import utility._
9import xiangshan._
10import xiangshan.backend.fu.PMPRespBundle
11import xiangshan.mem.L1PrefetchReq
12import xiangshan.mem.Bundles.LsPrefetchTrainBundle
13import xiangshan.mem.trace._
14import xiangshan.mem.L1PrefetchSource
15import xiangshan.cache.HasDCacheParameters
16import xiangshan.cache.mmu._
17
18trait HasL1PrefetchHelper extends HasCircularQueuePtrHelper with HasDCacheParameters {
19  // region related
20  val REGION_SIZE = 1024
21  val PAGE_OFFSET = 12
22  val BLOCK_OFFSET = log2Up(dcacheParameters.blockBytes)
23  val BIT_VEC_WITDH = REGION_SIZE / dcacheParameters.blockBytes
24  val REGION_BITS = log2Up(BIT_VEC_WITDH)
25  val REGION_TAG_OFFSET = BLOCK_OFFSET + REGION_BITS
26  val REGION_TAG_BITS = VAddrBits - BLOCK_OFFSET - REGION_BITS
27
28  // hash related
29  val VADDR_HASH_WIDTH = 5
30  val BLK_ADDR_RAW_WIDTH = 10
31  val HASH_TAG_WIDTH = VADDR_HASH_WIDTH + BLK_ADDR_RAW_WIDTH
32
33  // capacity related
34  val MLP_SIZE = 32
35  val MLP_L1_SIZE = 16
36  val MLP_L2L3_SIZE = MLP_SIZE - MLP_L1_SIZE
37
38  // prefetch sink related
39  val SINK_BITS = 2
40  def SINK_L1 = "b00".U
41  def SINK_L2 = "b01".U
42  def SINK_L3 = "b10".U
43
44  // vaddr: |       region tag        |  region bits  | block offset |
45  def get_region_tag(vaddr: UInt) = {
46    require(vaddr.getWidth == VAddrBits)
47    vaddr(vaddr.getWidth - 1, REGION_TAG_OFFSET)
48  }
49
50  def get_region_bits(vaddr: UInt) = {
51    require(vaddr.getWidth == VAddrBits)
52    vaddr(REGION_TAG_OFFSET - 1, BLOCK_OFFSET)
53  }
54
55  def block_addr(x: UInt): UInt = {
56    x(x.getWidth - 1, BLOCK_OFFSET)
57  }
58
59  def vaddr_hash(x: UInt): UInt = {
60    val width = VADDR_HASH_WIDTH
61    val low = x(width - 1, 0)
62    val mid = x(2 * width - 1, width)
63    val high = x(3 * width - 1, 2 * width)
64    low ^ mid ^ high
65  }
66
67  def pc_hash_tag(x: UInt): UInt = {
68    val low = x(BLK_ADDR_RAW_WIDTH - 1, 0)
69    val high = x(BLK_ADDR_RAW_WIDTH - 1 + 3 * VADDR_HASH_WIDTH, BLK_ADDR_RAW_WIDTH)
70    val high_hash = vaddr_hash(high)
71    Cat(high_hash, low)
72  }
73
74  def block_hash_tag(x: UInt): UInt = {
75    val blk_addr = block_addr(x)
76    val low = blk_addr(BLK_ADDR_RAW_WIDTH - 1, 0)
77    val high = blk_addr(BLK_ADDR_RAW_WIDTH - 1 + 3 * VADDR_HASH_WIDTH, BLK_ADDR_RAW_WIDTH)
78    val high_hash = vaddr_hash(high)
79    Cat(high_hash, low)
80  }
81
82  def region_hash_tag(region_tag: UInt): UInt = {
83    val low = region_tag(BLK_ADDR_RAW_WIDTH - 1, 0)
84    val high = region_tag(BLK_ADDR_RAW_WIDTH - 1 + 3 * VADDR_HASH_WIDTH, BLK_ADDR_RAW_WIDTH)
85    val high_hash = vaddr_hash(high)
86    Cat(high_hash, low)
87  }
88
89  def region_to_block_addr(region_tag: UInt, region_bits: UInt): UInt = {
90    Cat(region_tag, region_bits)
91  }
92
93  def get_candidate_oh(x: UInt): UInt = {
94    require(x.getWidth == PAddrBits)
95    UIntToOH(x(REGION_BITS + BLOCK_OFFSET - 1, BLOCK_OFFSET))
96  }
97
98  def toBinary(n: Int): String = n match {
99    case 0|1 => s"$n"
100    case _   => s"${toBinary(n/2)}${n%2}"
101  }
102}
103
104trait HasTrainFilterHelper extends HasCircularQueuePtrHelper {
105  def reorder[T <: LsPrefetchTrainBundle](source: Vec[ValidIO[T]]): Vec[ValidIO[T]] = {
106    if(source.length == 1) {
107      source
108    }else if(source.length == 2) {
109      val source_v = source.map(_.valid)
110      val res = Wire(source.cloneType)
111      // source 1 is older than source 0 (only when source0/1 are both valid)
112      val source_1_older = Mux(Cat(source_v).andR,
113        isBefore(source(1).bits.uop.robIdx, source(0).bits.uop.robIdx),
114        false.B
115      )
116      when(source_1_older) {
117        res(0) := source(1)
118        res(1) := source(0)
119      }.otherwise {
120        res := source
121      }
122
123      res
124    }else if(source.length == 3) {
125      // TODO: generalize
126      val res_0_1 = Reg(source.cloneType)
127      val res_1_2 = Reg(source.cloneType)
128      val res = Reg(source.cloneType)
129
130      val tmp = reorder(VecInit(source.slice(0, 2)))
131      res_0_1(0) := tmp(0)
132      res_0_1(1) := tmp(1)
133      res_0_1(2) := source(2)
134      val tmp_1 = reorder(VecInit(res_0_1.slice(1, 3)))
135      res_1_2(0) := res_0_1(0)
136      res_1_2(1) := tmp_1(0)
137      res_1_2(2) := tmp_1(1)
138      val tmp_2 = reorder(VecInit(res_1_2.slice(0, 2)))
139      res(0) := tmp_2(0)
140      res(1) := tmp_2(1)
141      res(2) := res_1_2(2)
142
143      res
144    }else {
145      require(false, "for now, 4 or more sources are invalid")
146      source
147    }
148  }
149}
150
151// get prefetch train reqs from `exuParameters.LduCnt` load pipelines (up to `exuParameters.LduCnt`/cycle)
152// filter by cache line address, send out train req to stride (up to 1 req/cycle)
153class TrainFilter(size: Int, name: String)(implicit p: Parameters) extends XSModule with HasL1PrefetchHelper with HasTrainFilterHelper {
154  val io = IO(new Bundle() {
155    val enable = Input(Bool())
156    val flush = Input(Bool())
157    // train input, only from load for now
158    val ld_in = Flipped(Vec(backendParams.LduCnt, ValidIO(new LsPrefetchTrainBundle())))
159    // filter out
160    val train_req = DecoupledIO(new PrefetchReqBundle())
161  })
162
163  class Ptr(implicit p: Parameters) extends CircularQueuePtr[Ptr]( p => size ){}
164  object Ptr {
165    def apply(f: Bool, v: UInt)(implicit p: Parameters): Ptr = {
166      val ptr = Wire(new Ptr)
167      ptr.flag := f
168      ptr.value := v
169      ptr
170    }
171  }
172
173  val entries = Reg(Vec(size, new PrefetchReqBundle))
174  val valids = RegInit(VecInit(Seq.fill(size){ (false.B) }))
175
176  // enq
177  val enqLen = backendParams.LduCnt
178  val enqPtrExt = RegInit(VecInit((0 until enqLen).map(_.U.asTypeOf(new Ptr))))
179  val deqPtrExt = RegInit(0.U.asTypeOf(new Ptr))
180
181  val deqPtr = WireInit(deqPtrExt.value)
182
183  require(size >= enqLen)
184
185  val ld_in_reordered = reorder(io.ld_in)
186  val reqs_l = ld_in_reordered.map(_.bits.toPrefetchReqBundle())
187  val reqs_vl = ld_in_reordered.map(_.valid)
188  val needAlloc = Wire(Vec(enqLen, Bool()))
189  val canAlloc = Wire(Vec(enqLen, Bool()))
190
191  for(i <- (0 until enqLen)) {
192    val req = reqs_l(i)
193    val req_v = reqs_vl(i)
194    val index = PopCount(needAlloc.take(i))
195    val allocPtr = enqPtrExt(index)
196    val entry_match = Cat(entries.zip(valids).map {
197      case(e, v) => v && block_hash_tag(e.vaddr) === block_hash_tag(req.vaddr)
198    }).orR
199    val prev_enq_match = if(i == 0) false.B else Cat(reqs_l.zip(reqs_vl).take(i).map {
200      case(pre, pre_v) => pre_v && block_hash_tag(pre.vaddr) === block_hash_tag(req.vaddr)
201    }).orR
202
203    needAlloc(i) := req_v && !entry_match && !prev_enq_match
204    canAlloc(i) := needAlloc(i) && allocPtr >= deqPtrExt && io.enable
205
206    when(canAlloc(i)) {
207      valids(allocPtr.value) := true.B
208      entries(allocPtr.value) := req
209    }
210  }
211  val allocNum = PopCount(canAlloc)
212
213  enqPtrExt.foreach{case x => when(canAlloc.asUInt.orR) {x := x + allocNum} }
214
215  // deq
216  io.train_req.valid := false.B
217  io.train_req.bits := DontCare
218  valids.zip(entries).zipWithIndex.foreach {
219    case((valid, entry), i) => {
220      when(deqPtr === i.U) {
221        io.train_req.valid := valid && io.enable
222        io.train_req.bits := entry
223      }
224    }
225  }
226
227  when(io.train_req.fire) {
228    valids(deqPtr) := false.B
229    deqPtrExt := deqPtrExt + 1.U
230  }
231
232  when(RegNext(io.flush)) {
233    valids.foreach {case valid => valid := false.B}
234    (0 until enqLen).map {case i => enqPtrExt(i) := i.U.asTypeOf(new Ptr)}
235    deqPtrExt := 0.U.asTypeOf(new Ptr)
236  }
237
238  XSPerfAccumulate(s"${name}_train_filter_full", PopCount(valids) === size.U)
239  XSPerfAccumulate(s"${name}_train_filter_half", PopCount(valids) >= (size / 2).U)
240  XSPerfAccumulate(s"${name}_train_filter_empty", PopCount(valids) === 0.U)
241
242  val raw_enq_pattern = Cat(reqs_vl)
243  val filtered_enq_pattern = Cat(needAlloc)
244  val actual_enq_pattern = Cat(canAlloc)
245  XSPerfAccumulate(s"${name}_train_filter_enq", allocNum > 0.U)
246  XSPerfAccumulate(s"${name}_train_filter_deq", io.train_req.fire)
247  for(i <- 0 until (1 << enqLen)) {
248    XSPerfAccumulate(s"${name}_train_filter_raw_enq_pattern_${toBinary(i)}", raw_enq_pattern === i.U)
249    XSPerfAccumulate(s"${name}_train_filter_filtered_enq_pattern_${toBinary(i)}", filtered_enq_pattern === i.U)
250    XSPerfAccumulate(s"${name}_train_filter_actual_enq_pattern_${toBinary(i)}", actual_enq_pattern === i.U)
251  }
252}
253
254class MLPReqFilterBundle(implicit p: Parameters) extends XSBundle with HasL1PrefetchHelper {
255  val tag = UInt(HASH_TAG_WIDTH.W)
256  val region = UInt(REGION_TAG_BITS.W)
257  val bit_vec = UInt(BIT_VEC_WITDH.W)
258  // NOTE: l1 will not use sent_vec, for making more prefetch reqs to l1 dcache
259  val sent_vec = UInt(BIT_VEC_WITDH.W)
260  val sink = UInt(SINK_BITS.W)
261  val alias = UInt(2.W)
262  val is_vaddr = Bool()
263  val source = new L1PrefetchSource()
264  val debug_va_region = UInt(REGION_TAG_BITS.W)
265
266  def reset(index: Int) = {
267    tag := region_hash_tag(index.U)
268    region := index.U
269    bit_vec := 0.U
270    sent_vec := 0.U
271    sink := SINK_L1
272    alias := 0.U
273    is_vaddr := false.B
274    source.value := L1_HW_PREFETCH_NULL
275    debug_va_region := 0.U
276  }
277
278  def tag_match(valid1: Bool, valid2: Bool, new_tag: UInt): Bool = {
279    require(new_tag.getWidth == HASH_TAG_WIDTH)
280    (tag === new_tag) && valid1 && valid2
281  }
282
283  def update(update_bit_vec: UInt, update_sink: UInt) = {
284    bit_vec := bit_vec | update_bit_vec
285    when(update_sink < sink) {
286      bit_vec := (bit_vec & ~sent_vec) | update_bit_vec
287      sink := update_sink
288    }
289
290    assert(PopCount(update_bit_vec) >= 1.U, "valid bits in update vector should greater than one")
291  }
292
293  def can_send_pf(valid: Bool): Bool = {
294    Mux(
295      sink === SINK_L1,
296      !is_vaddr && bit_vec.orR,
297      !is_vaddr && (bit_vec & ~sent_vec).orR
298    ) && valid
299  }
300
301  def may_be_replace(valid: Bool): Bool = {
302    // either invalid or has sent out all reqs out
303    !valid || RegNext(PopCount(sent_vec) === BIT_VEC_WITDH.U)
304  }
305
306  def get_pf_addr(): UInt = {
307    require(PAddrBits <= VAddrBits)
308    require((region.getWidth + REGION_BITS + BLOCK_OFFSET) == VAddrBits)
309
310    val candidate = Mux(
311      sink === SINK_L1,
312      PriorityEncoder(bit_vec).asTypeOf(UInt(REGION_BITS.W)),
313      PriorityEncoder(bit_vec & ~sent_vec).asTypeOf(UInt(REGION_BITS.W))
314    )
315    Cat(region, candidate, 0.U(BLOCK_OFFSET.W))
316  }
317
318  def get_pf_debug_vaddr(): UInt = {
319    val candidate = Mux(
320      sink === SINK_L1,
321      PriorityEncoder(bit_vec).asTypeOf(UInt(REGION_BITS.W)),
322      PriorityEncoder(bit_vec & ~sent_vec).asTypeOf(UInt(REGION_BITS.W))
323    )
324    Cat(debug_va_region, candidate, 0.U(BLOCK_OFFSET.W))
325  }
326
327  def get_tlb_va(): UInt = {
328    require((region.getWidth + REGION_TAG_OFFSET) == VAddrBits)
329    Cat(region, 0.U(REGION_TAG_OFFSET.W))
330  }
331
332  def fromStreamPrefetchReqBundle(x : StreamPrefetchReqBundle): MLPReqFilterBundle = {
333    require(PAGE_OFFSET >= REGION_TAG_OFFSET, "region is greater than 4k, alias bit may be incorrect")
334
335    val res = Wire(new MLPReqFilterBundle)
336    res.tag := region_hash_tag(x.region)
337    res.region := x.region
338    res.bit_vec := x.bit_vec
339    res.sent_vec := 0.U
340    res.sink := x.sink
341    res.is_vaddr := true.B
342    res.source := x.source
343    res.alias := x.region(PAGE_OFFSET - REGION_TAG_OFFSET + 1, PAGE_OFFSET - REGION_TAG_OFFSET)
344    res.debug_va_region := x.region
345
346    res
347  }
348
349  def invalidate() = {
350    // disable sending pf req
351    when(sink === SINK_L1) {
352      bit_vec := 0.U(BIT_VEC_WITDH.W)
353    }.otherwise {
354      sent_vec := ~(0.U(BIT_VEC_WITDH.W))
355    }
356    // disable sending tlb req
357    is_vaddr := false.B
358  }
359}
360
361// there are 5 independent pipelines inside
362// 1. prefetch enqueue
363// 2. tlb request
364// 3. actual l1 prefetch
365// 4. actual l2 prefetch
366// 5. actual l3 prefetch
367class MutiLevelPrefetchFilter(implicit p: Parameters) extends XSModule with HasL1PrefetchHelper {
368  val io = IO(new XSBundle {
369    val enable = Input(Bool())
370    val flush = Input(Bool())
371    val l1_prefetch_req = Flipped(ValidIO(new StreamPrefetchReqBundle))
372    val l2_l3_prefetch_req = Flipped(ValidIO(new StreamPrefetchReqBundle))
373    val tlb_req = new TlbRequestIO(nRespDups = 2)
374    val pmp_resp = Flipped(new PMPRespBundle())
375    val l1_req = DecoupledIO(new L1PrefetchReq())
376    val l2_pf_addr = ValidIO(new L2PrefetchReq())
377    val l3_pf_addr = ValidIO(UInt(PAddrBits.W)) // TODO: l3 pf source
378    val confidence = Input(UInt(1.W))
379    val l2PfqBusy = Input(Bool())
380  })
381
382  val l1_array = Reg(Vec(MLP_L1_SIZE, new MLPReqFilterBundle))
383  val l2_array = Reg(Vec(MLP_L2L3_SIZE, new MLPReqFilterBundle))
384  val l1_valids = RegInit(VecInit(Seq.fill(MLP_L1_SIZE)(false.B)))
385  val l2_valids = RegInit(VecInit(Seq.fill(MLP_L2L3_SIZE)(false.B)))
386
387  def _invalid(e: MLPReqFilterBundle, v: Bool): Unit = {
388    v := false.B
389    e.invalidate()
390  }
391
392  def invalid_array(i: UInt, isL2: Boolean): Unit = {
393    if (isL2) {
394      _invalid(l2_array(i), l2_valids(i))
395    } else {
396      _invalid(l1_array(i), l1_valids(i))
397    }
398  }
399
400  def _reset(e: MLPReqFilterBundle, v: Bool, idx: Int): Unit = {
401    v := false.B
402    //only need to reset control signals for firendly area
403    // e.reset(idx)
404  }
405
406
407  def reset_array(i: Int, isL2: Boolean): Unit = {
408    if(isL2){
409      _reset(l2_array(i), l2_valids(i), i)
410    }else{
411      _reset(l1_array(i), l1_valids(i), i)
412    }
413  }
414
415  val l1_replacement = new ValidPseudoLRU(MLP_L1_SIZE)
416  val l2_replacement = new ValidPseudoLRU(MLP_L2L3_SIZE)
417  val tlb_req_arb = Module(new RRArbiterInit(new TlbReq, MLP_SIZE))
418  val l1_pf_req_arb = Module(new RRArbiterInit(new Bundle {
419    val req = new L1PrefetchReq
420    val debug_vaddr = UInt(VAddrBits.W)
421  }, MLP_L1_SIZE))
422  val l2_pf_req_arb = Module(new RRArbiterInit(new Bundle {
423    val req = new L2PrefetchReq
424    val debug_vaddr = UInt(VAddrBits.W)
425  }, MLP_L2L3_SIZE))
426  val l3_pf_req_arb = Module(new RRArbiterInit(UInt(PAddrBits.W), MLP_L2L3_SIZE))
427
428  val l1_opt_replace_vec = VecInit(l1_array.zip(l1_valids).map{case (e, v) => e.may_be_replace(v)})
429  val l2_opt_replace_vec = VecInit(l2_array.zip(l2_valids).map{case (e, v) => e.may_be_replace(v)})
430  // if we have something to replace, then choose it, otherwise follow the plru manner
431  val l1_real_replace_vec = Mux(Cat(l1_opt_replace_vec).orR, l1_opt_replace_vec, VecInit(Seq.fill(MLP_L1_SIZE)(true.B)))
432  val l2_real_replace_vec = Mux(Cat(l2_opt_replace_vec).orR, l2_opt_replace_vec, VecInit(Seq.fill(MLP_L2L3_SIZE)(true.B)))
433
434  // l1 pf req enq
435  // s0: hash tag match
436  val s0_l1_can_accept = Wire(Bool())
437  val s0_l1_valid = io.l1_prefetch_req.valid && s0_l1_can_accept
438  val s0_l1_region = io.l1_prefetch_req.bits.region
439  val s0_l1_region_hash = region_hash_tag(s0_l1_region)
440  val s0_l1_match_vec = l1_array.zip(l1_valids).map{ case (e, v) => e.tag_match(v, s0_l1_valid, s0_l1_region_hash)}
441  val s0_l1_hit = VecInit(s0_l1_match_vec).asUInt.orR
442  val s0_l1_index = Wire(UInt(log2Up(MLP_L1_SIZE).W))
443  val s0_l1_prefetch_req = (new MLPReqFilterBundle).fromStreamPrefetchReqBundle(io.l1_prefetch_req.bits)
444
445  s0_l1_index := Mux(s0_l1_hit, OHToUInt(VecInit(s0_l1_match_vec).asUInt), l1_replacement.way(l1_real_replace_vec.reverse)._2)
446
447  when(s0_l1_valid) {
448    l1_replacement.access(s0_l1_index)
449  }
450
451  assert(!s0_l1_valid || PopCount(VecInit(s0_l1_match_vec)) <= 1.U, "req region should match no more than 1 entry")
452
453  XSPerfAccumulate("s0_l1_enq_fire", s0_l1_valid)
454  XSPerfAccumulate("s0_l1_enq_valid", io.l1_prefetch_req.valid)
455  XSPerfAccumulate("s0_l1_cannot_enq", io.l1_prefetch_req.valid && !s0_l1_can_accept)
456
457  // s1: alloc or update
458  val s1_l1_valid = RegNext(s0_l1_valid)
459  val s1_l1_region = RegEnable(s0_l1_region, s0_l1_valid)
460  val s1_l1_region_hash = RegEnable(s0_l1_region_hash, s0_l1_valid)
461  val s1_l1_hit = RegEnable(s0_l1_hit, s0_l1_valid)
462  val s1_l1_index = RegEnable(s0_l1_index, s0_l1_valid)
463  val s1_l1_prefetch_req = RegEnable(s0_l1_prefetch_req, s0_l1_valid)
464  val s1_l1_alloc = s1_l1_valid && !s1_l1_hit
465  val s1_l1_update = s1_l1_valid && s1_l1_hit
466  s0_l1_can_accept := !(s1_l1_valid && s1_l1_alloc && (s0_l1_region_hash === s1_l1_region_hash))
467
468  when(s1_l1_alloc) {
469    l1_valids(s1_l1_index) := true.B
470    l1_array(s1_l1_index) := s1_l1_prefetch_req
471  }.elsewhen(s1_l1_update) {
472    l1_array(s1_l1_index).update(
473      update_bit_vec = s1_l1_prefetch_req.bit_vec,
474      update_sink = s1_l1_prefetch_req.sink
475    )
476  }
477
478  XSPerfAccumulate("s1_l1_enq_valid", s1_l1_valid)
479  XSPerfAccumulate("s1_l1_enq_alloc", s1_l1_alloc)
480  XSPerfAccumulate("s1_l1_enq_update", s1_l1_update)
481  XSPerfAccumulate("l1_hash_conflict", s0_l1_valid && RegNext(s1_l1_valid) && (s0_l1_region =/= RegNext(s1_l1_region)) && (s0_l1_region_hash === RegNext(s1_l1_region_hash)))
482  XSPerfAccumulate("s1_l1_enq_evict_useful_entry", s1_l1_alloc && l1_array(s1_l1_index).can_send_pf(l1_valids(s1_l1_index)))
483
484  // l2 l3 pf req enq
485  // s0: hash tag match
486  val s0_l2_can_accept = Wire(Bool())
487  val s0_l2_valid = io.l2_l3_prefetch_req.valid && s0_l2_can_accept
488  val s0_l2_region = io.l2_l3_prefetch_req.bits.region
489  val s0_l2_region_hash = region_hash_tag(s0_l2_region)
490  val s0_l2_match_vec = l2_array.zip(l2_valids).map{ case (e, v) => e.tag_match(v, s0_l2_valid, s0_l2_region_hash) }
491  val s0_l2_hit = VecInit(s0_l2_match_vec).asUInt.orR
492  val s0_l2_index = Wire(UInt(log2Up(MLP_L2L3_SIZE).W))
493  val s0_l2_prefetch_req = (new MLPReqFilterBundle).fromStreamPrefetchReqBundle(io.l2_l3_prefetch_req.bits)
494
495  s0_l2_index := Mux(s0_l2_hit, OHToUInt(VecInit(s0_l2_match_vec).asUInt), l2_replacement.way(l2_real_replace_vec.reverse)._2)
496
497  when(s0_l2_valid) {
498    l2_replacement.access(s0_l2_index)
499  }
500
501  assert(!s0_l2_valid || PopCount(VecInit(s0_l2_match_vec)) <= 1.U, "req region should match no more than 1 entry")
502
503  XSPerfAccumulate("s0_l2_enq_fire", s0_l2_valid)
504  XSPerfAccumulate("s0_l2_enq_valid", io.l2_l3_prefetch_req.valid)
505  XSPerfAccumulate("s0_l2_cannot_enq", io.l2_l3_prefetch_req.valid && !s0_l2_can_accept)
506
507  // s1: alloc or update
508  val s1_l2_valid = RegNext(s0_l2_valid)
509  val s1_l2_region = RegEnable(s0_l2_region, s0_l2_valid)
510  val s1_l2_region_hash = RegEnable(s0_l2_region_hash, s0_l2_valid)
511  val s1_l2_hit = RegEnable(s0_l2_hit, s0_l2_valid)
512  val s1_l2_index = RegEnable(s0_l2_index, s0_l2_valid)
513  val s1_l2_prefetch_req = RegEnable(s0_l2_prefetch_req, s0_l2_valid)
514  val s1_l2_alloc = s1_l2_valid && !s1_l2_hit
515  val s1_l2_update = s1_l2_valid && s1_l2_hit
516  s0_l2_can_accept := !(s1_l2_valid && s1_l2_alloc && (s0_l2_region_hash === s1_l2_region_hash))
517
518  when(s1_l2_alloc) {
519    l2_valids(s1_l2_index) := true.B
520    l2_array(s1_l2_index) := s1_l2_prefetch_req
521  }.elsewhen(s1_l2_update) {
522    l2_array(s1_l2_index).update(
523      update_bit_vec = s1_l2_prefetch_req.bit_vec,
524      update_sink = s1_l2_prefetch_req.sink
525    )
526  }
527
528  XSPerfAccumulate("s1_l2_enq_valid", s1_l2_valid)
529  XSPerfAccumulate("s1_l2_enq_alloc", s1_l2_alloc)
530  XSPerfAccumulate("s1_l2_enq_update", s1_l2_update)
531  XSPerfAccumulate("l2_hash_conflict", s0_l2_valid && RegNext(s1_l2_valid) && (s0_l2_region =/= RegNext(s1_l2_region)) && (s0_l2_region_hash === RegNext(s1_l2_region_hash)))
532  XSPerfAccumulate("s1_l2_enq_evict_useful_entry", s1_l2_alloc && l2_array(s1_l2_index).can_send_pf(l2_valids(s1_l2_index)))
533
534  // stream pf debug db here
535  // Hit:
536  // now seens only pending = (region_bits & ~filter_bits) are the peeding request
537  // if a PfGen comes, new added request can be new_req = PfGen.region_bits & ~(pending)
538  // Alloc:
539  // new_req = PfGen.region_bits
540  val stream_pf_trace_debug_table = ChiselDB.createTable("StreamPFTrace" + p(XSCoreParamsKey).HartId.toString, new StreamPFTraceInEntry, basicDB = false)
541  for (i <- 0 until BIT_VEC_WITDH) {
542    // l1 enq log
543    val hit_entry = l1_array(s0_l1_index)
544    val new_req = Mux(
545      s0_l1_hit,
546      io.l1_prefetch_req.bits.bit_vec & ~(hit_entry.bit_vec),
547      io.l1_prefetch_req.bits.bit_vec
548    )
549    val log_enable = s0_l1_valid && new_req(i) && (io.l1_prefetch_req.bits.source.value === L1_HW_PREFETCH_STREAM)
550    val log_data = Wire(new StreamPFTraceInEntry)
551
552    log_data.TriggerPC := io.l1_prefetch_req.bits.trigger_pc
553    log_data.TriggerVaddr := io.l1_prefetch_req.bits.trigger_va
554    log_data.PFVaddr := Cat(s0_l1_region, i.U(REGION_BITS.W), 0.U(log2Up(dcacheParameters.blockBytes).W))
555    log_data.PFSink := s0_l1_prefetch_req.sink
556
557    stream_pf_trace_debug_table.log(
558      data = log_data,
559      en = log_enable,
560      site = "StreamPFTrace",
561      clock = clock,
562      reset = reset
563    )
564  }
565  for (i <- 0 until BIT_VEC_WITDH) {
566    // l2 l3 enq log
567    val hit_entry = l2_array(s0_l2_index)
568    val new_req = Mux(
569      s0_l2_hit,
570      io.l2_l3_prefetch_req.bits.bit_vec & ~(hit_entry.bit_vec),
571      io.l2_l3_prefetch_req.bits.bit_vec
572    )
573    val log_enable = s0_l2_valid && new_req(i) && (io.l2_l3_prefetch_req.bits.source.value === L1_HW_PREFETCH_STREAM)
574    val log_data = Wire(new StreamPFTraceInEntry)
575
576    log_data.TriggerPC := io.l2_l3_prefetch_req.bits.trigger_pc
577    log_data.TriggerVaddr := io.l2_l3_prefetch_req.bits.trigger_va
578    log_data.PFVaddr := Cat(s0_l2_region, i.U(REGION_BITS.W), 0.U(log2Up(dcacheParameters.blockBytes).W))
579    log_data.PFSink := s0_l2_prefetch_req.sink
580
581    stream_pf_trace_debug_table.log(
582      data = log_data,
583      en = log_enable,
584      site = "StreamPFTrace",
585      clock = clock,
586      reset = reset
587    )
588  }
589
590  // tlb req
591  // s0: arb all tlb reqs
592  val s0_tlb_fire_vec = VecInit((0 until MLP_SIZE).map{case i => tlb_req_arb.io.in(i).fire})
593  val s1_tlb_fire_vec = GatedValidRegNext(s0_tlb_fire_vec)
594  val s2_tlb_fire_vec = GatedValidRegNext(s1_tlb_fire_vec)
595  val s3_tlb_fire_vec = GatedValidRegNext(s2_tlb_fire_vec)
596  val not_tlbing_vec = VecInit((0 until MLP_SIZE).map{case i =>
597    !s1_tlb_fire_vec(i) && !s2_tlb_fire_vec(i) && !s3_tlb_fire_vec(i)
598  })
599
600  for(i <- 0 until MLP_SIZE) {
601    val l1_evict = s1_l1_alloc && (s1_l1_index === i.U)
602    val l2_evict = s1_l2_alloc && ((s1_l2_index + MLP_L1_SIZE.U) === i.U)
603    if(i < MLP_L1_SIZE) {
604      tlb_req_arb.io.in(i).valid := l1_valids(i) && l1_array(i).is_vaddr && not_tlbing_vec(i) && !l1_evict
605      tlb_req_arb.io.in(i).bits.vaddr := l1_array(i).get_tlb_va()
606    }else {
607      tlb_req_arb.io.in(i).valid := l2_valids(i - MLP_L1_SIZE) && l2_array(i - MLP_L1_SIZE).is_vaddr && not_tlbing_vec(i) && !l2_evict
608      tlb_req_arb.io.in(i).bits.vaddr := l2_array(i - MLP_L1_SIZE).get_tlb_va()
609    }
610    tlb_req_arb.io.in(i).bits.cmd := TlbCmd.read
611    tlb_req_arb.io.in(i).bits.isPrefetch := true.B
612    tlb_req_arb.io.in(i).bits.size := 3.U
613    tlb_req_arb.io.in(i).bits.kill := false.B
614    tlb_req_arb.io.in(i).bits.no_translate := false.B
615    tlb_req_arb.io.in(i).bits.fullva := 0.U
616    tlb_req_arb.io.in(i).bits.checkfullva := false.B
617    tlb_req_arb.io.in(i).bits.memidx := DontCare
618    tlb_req_arb.io.in(i).bits.debug := DontCare
619    tlb_req_arb.io.in(i).bits.hlvx := DontCare
620    tlb_req_arb.io.in(i).bits.hyperinst := DontCare
621    tlb_req_arb.io.in(i).bits.pmp_addr  := DontCare
622  }
623
624  assert(PopCount(s0_tlb_fire_vec) <= 1.U, "s0_tlb_fire_vec should be one-hot or empty")
625
626  // s1: send out the req
627  val s1_tlb_req_valid = GatedValidRegNext(tlb_req_arb.io.out.valid)
628  val s1_tlb_req_bits = RegEnable(tlb_req_arb.io.out.bits, tlb_req_arb.io.out.valid)
629  val s1_tlb_req_index = RegEnable(OHToUInt(s0_tlb_fire_vec.asUInt), tlb_req_arb.io.out.valid)
630  val s1_l1_tlb_evict = s1_l1_alloc && (s1_l1_index === s1_tlb_req_index)
631  val s1_l2_tlb_evict = s1_l2_alloc && ((s1_l2_index + MLP_L1_SIZE.U) === s1_tlb_req_index)
632  val s1_tlb_evict = s1_l1_tlb_evict || s1_l2_tlb_evict
633  io.tlb_req.req.valid := s1_tlb_req_valid && !s1_tlb_evict
634  io.tlb_req.req.bits := s1_tlb_req_bits
635  io.tlb_req.req_kill := false.B
636  tlb_req_arb.io.out.ready := true.B
637
638  XSPerfAccumulate("s1_tlb_req_sent", io.tlb_req.req.valid)
639  XSPerfAccumulate("s1_tlb_req_evict", s1_tlb_req_valid && s1_tlb_evict)
640
641  // s2: get response from tlb
642  val s2_tlb_resp_valid = io.tlb_req.resp.valid
643  val s2_tlb_resp = io.tlb_req.resp.bits
644  val s2_tlb_update_index = RegEnable(s1_tlb_req_index, s1_tlb_req_valid)
645  val s2_l1_tlb_evict = s1_l1_alloc && (s1_l1_index === s2_tlb_update_index)
646  val s2_l2_tlb_evict = s1_l2_alloc && ((s1_l2_index + MLP_L1_SIZE.U) === s2_tlb_update_index)
647  val s2_tlb_evict = s2_l1_tlb_evict || s2_l2_tlb_evict
648
649  // s3: get pmp response form PMPChecker
650  val s3_tlb_resp_valid = RegNext(s2_tlb_resp_valid)
651  val s3_tlb_resp = RegEnable(s2_tlb_resp, s2_tlb_resp_valid)
652  val s3_tlb_update_index = RegEnable(s2_tlb_update_index, s2_tlb_resp_valid)
653  val s3_tlb_evict = RegNext(s2_tlb_evict)
654  val s3_pmp_resp = io.pmp_resp
655  val s3_update_valid = s3_tlb_resp_valid && !s3_tlb_evict && !s3_tlb_resp.miss
656  val s3_drop = s3_update_valid && (
657    // page/access fault
658    s3_tlb_resp.excp.head.pf.ld || s3_tlb_resp.excp.head.gpf.ld || s3_tlb_resp.excp.head.af.ld ||
659    // uncache
660    s3_pmp_resp.mmio || Pbmt.isUncache(s3_tlb_resp.pbmt.head) ||
661    // pmp access fault
662    s3_pmp_resp.ld
663  )
664  when(s3_tlb_resp_valid && !s3_tlb_evict) {
665    when(s3_tlb_update_index < MLP_L1_SIZE.U) {
666      l1_array(s3_tlb_update_index).is_vaddr := s3_tlb_resp.miss
667
668      when(!s3_tlb_resp.miss) {
669        l1_array(s3_tlb_update_index).region := Cat(0.U((VAddrBits - PAddrBits).W), s3_tlb_resp.paddr.head(s3_tlb_resp.paddr.head.getWidth - 1, REGION_TAG_OFFSET))
670        when(s3_drop) {
671          invalid_array(s3_tlb_update_index, false)
672        }
673      }
674    }.otherwise {
675      val inner_index = s3_tlb_update_index - MLP_L1_SIZE.U
676      l2_array(inner_index).is_vaddr := s3_tlb_resp.miss
677
678      when(!s3_tlb_resp.miss) {
679        l2_array(inner_index).region := Cat(0.U((VAddrBits - PAddrBits).W), s3_tlb_resp.paddr.head(s3_tlb_resp.paddr.head.getWidth - 1, REGION_TAG_OFFSET))
680        when(s3_drop) {
681          invalid_array(inner_index, true)
682        }
683      }
684    }
685  }
686  io.tlb_req.resp.ready := true.B
687
688  XSPerfAccumulate("s3_tlb_resp_valid", s3_tlb_resp_valid)
689  XSPerfAccumulate("s3_tlb_resp_evict", s3_tlb_resp_valid && s3_tlb_evict)
690  XSPerfAccumulate("s3_tlb_resp_miss", s3_tlb_resp_valid && !s3_tlb_evict && s3_tlb_resp.miss)
691  XSPerfAccumulate("s3_tlb_resp_updated", s3_update_valid)
692  XSPerfAccumulate("s3_tlb_resp_page_fault", s3_update_valid && s3_tlb_resp.excp.head.pf.ld)
693  XSPerfAccumulate("s3_tlb_resp_guestpage_fault", s3_update_valid && s3_tlb_resp.excp.head.gpf.ld)
694  XSPerfAccumulate("s3_tlb_resp_access_fault", s3_update_valid && s3_tlb_resp.excp.head.af.ld)
695  XSPerfAccumulate("s3_tlb_resp_pmp_access_fault", s3_update_valid && s3_pmp_resp.ld)
696  XSPerfAccumulate("s3_tlb_resp_uncache", s3_update_valid && (Pbmt.isUncache(s3_tlb_resp.pbmt.head) || s3_pmp_resp.mmio))
697
698  // l1 pf
699  // s0: generate prefetch req paddr per entry, arb them
700  val s0_pf_fire_vec = VecInit((0 until MLP_L1_SIZE).map{case i => l1_pf_req_arb.io.in(i).fire})
701  val s1_pf_fire_vec = GatedValidRegNext(s0_pf_fire_vec)
702
703  val s0_pf_fire = l1_pf_req_arb.io.out.fire
704  val s0_pf_index = l1_pf_req_arb.io.chosen
705  val s0_pf_candidate_oh = get_candidate_oh(l1_pf_req_arb.io.out.bits.req.paddr)
706
707  for(i <- 0 until MLP_L1_SIZE) {
708    val evict = s1_l1_alloc && (s1_l1_index === i.U)
709    l1_pf_req_arb.io.in(i).valid := l1_array(i).can_send_pf(l1_valids(i)) && !evict
710    l1_pf_req_arb.io.in(i).bits.req.paddr := l1_array(i).get_pf_addr()
711    l1_pf_req_arb.io.in(i).bits.req.alias := l1_array(i).alias
712    l1_pf_req_arb.io.in(i).bits.req.confidence := io.confidence
713    l1_pf_req_arb.io.in(i).bits.req.is_store := false.B
714    l1_pf_req_arb.io.in(i).bits.req.pf_source := l1_array(i).source
715    l1_pf_req_arb.io.in(i).bits.debug_vaddr := l1_array(i).get_pf_debug_vaddr()
716  }
717
718  when(s0_pf_fire) {
719    l1_array(s0_pf_index).sent_vec := l1_array(s0_pf_index).sent_vec | s0_pf_candidate_oh
720  }
721
722  assert(PopCount(s0_pf_fire_vec) <= 1.U, "s0_pf_fire_vec should be one-hot or empty")
723
724  // s1: send out to dcache
725  val s1_pf_valid = Reg(Bool())
726  val s1_pf_bits = RegEnable(l1_pf_req_arb.io.out.bits, l1_pf_req_arb.io.out.fire)
727  val s1_pf_index = RegEnable(s0_pf_index, l1_pf_req_arb.io.out.fire)
728  val s1_pf_candidate_oh = RegEnable(s0_pf_candidate_oh, l1_pf_req_arb.io.out.fire)
729  val s1_pf_evict = s1_l1_alloc && (s1_l1_index === s1_pf_index)
730  val s1_pf_update = s1_l1_update && (s1_l1_index === s1_pf_index)
731  val s1_pf_can_go = io.l1_req.ready && !s1_pf_evict && !s1_pf_update
732  val s1_pf_fire = s1_pf_valid && s1_pf_can_go
733
734  when(s1_pf_can_go) {
735    s1_pf_valid := false.B
736  }
737
738  when(l1_pf_req_arb.io.out.fire) {
739    s1_pf_valid := true.B
740  }
741
742  when(s1_pf_fire) {
743    l1_array(s1_pf_index).bit_vec := l1_array(s1_pf_index).bit_vec & ~s1_pf_candidate_oh
744  }
745
746  val in_pmem = PmemRanges.map(_.cover(s1_pf_bits.req.paddr)).reduce(_ || _)
747  io.l1_req.valid := s1_pf_valid && !s1_pf_evict && !s1_pf_update && in_pmem && io.enable
748  io.l1_req.bits := s1_pf_bits.req
749
750  l1_pf_req_arb.io.out.ready := s1_pf_can_go || !s1_pf_valid
751
752  assert(!((s1_l1_alloc || s1_l1_update) && s1_pf_fire && (s1_l1_index === s1_pf_index)), "pf pipeline & enq pipeline bit_vec harzard!")
753
754  XSPerfAccumulate("s1_pf_valid", s1_pf_valid)
755  XSPerfAccumulate("s1_pf_block_by_pipe_unready", s1_pf_valid && !io.l1_req.ready)
756  XSPerfAccumulate("s1_pf_block_by_enq_alloc_harzard", s1_pf_valid && s1_pf_evict)
757  XSPerfAccumulate("s1_pf_block_by_enq_update_harzard", s1_pf_valid && s1_pf_update)
758  XSPerfAccumulate("s1_pf_fire", s1_pf_fire)
759
760  // l2 pf
761  // s0: generate prefetch req paddr per entry, arb them, sent out
762  io.l2_pf_addr.valid := l2_pf_req_arb.io.out.valid
763  io.l2_pf_addr.bits := l2_pf_req_arb.io.out.bits.req
764
765  l2_pf_req_arb.io.out.ready := true.B
766
767  for(i <- 0 until MLP_L2L3_SIZE) {
768    val evict = s1_l2_alloc && (s1_l2_index === i.U)
769    l2_pf_req_arb.io.in(i).valid := l2_array(i).can_send_pf(l2_valids(i)) && (l2_array(i).sink === SINK_L2) && !evict
770    l2_pf_req_arb.io.in(i).bits.req.addr := l2_array(i).get_pf_addr()
771    l2_pf_req_arb.io.in(i).bits.req.source := MuxLookup(l2_array(i).source.value, MemReqSource.Prefetch2L2Unknown.id.U)(Seq(
772      L1_HW_PREFETCH_STRIDE -> MemReqSource.Prefetch2L2Stride.id.U,
773      L1_HW_PREFETCH_STREAM -> MemReqSource.Prefetch2L2Stream.id.U
774    ))
775    l2_pf_req_arb.io.in(i).bits.debug_vaddr := l2_array(i).get_pf_debug_vaddr()
776  }
777
778  when(l2_pf_req_arb.io.out.valid) {
779    l2_array(l2_pf_req_arb.io.chosen).sent_vec := l2_array(l2_pf_req_arb.io.chosen).sent_vec | get_candidate_oh(l2_pf_req_arb.io.out.bits.req.addr)
780  }
781
782  val stream_out_debug_table = ChiselDB.createTable("StreamPFTraceOut" + p(XSCoreParamsKey).HartId.toString, new StreamPFTraceOutEntry, basicDB = false)
783  val l1_debug_data = Wire(new StreamPFTraceOutEntry)
784  val l2_debug_data = Wire(new StreamPFTraceOutEntry)
785  l1_debug_data.PFVaddr := l1_pf_req_arb.io.out.bits.debug_vaddr
786  l1_debug_data.PFSink := SINK_L1
787  l2_debug_data.PFVaddr := l2_pf_req_arb.io.out.bits.debug_vaddr
788  l2_debug_data.PFSink := SINK_L2
789
790  stream_out_debug_table.log(
791    data = l1_debug_data,
792    en = l1_pf_req_arb.io.out.fire && (l1_pf_req_arb.io.out.bits.req.pf_source.value === L1_HW_PREFETCH_STREAM),
793    site = "StreamPFTraceOut",
794    clock = clock,
795    reset = reset
796  )
797  stream_out_debug_table.log(
798    data = l2_debug_data,
799    en = l2_pf_req_arb.io.out.fire && (l2_pf_req_arb.io.out.bits.req.source === MemReqSource.Prefetch2L2Stream.id.U),
800    site = "StreamPFTraceOut",
801    clock = clock,
802    reset = reset
803  )
804
805  // last level cache pf
806  // s0: generate prefetch req paddr per entry, arb them, sent out
807  io.l3_pf_addr.valid := l3_pf_req_arb.io.out.valid
808  io.l3_pf_addr.bits := l3_pf_req_arb.io.out.bits
809
810  l3_pf_req_arb.io.out.ready := true.B
811
812  for(i <- 0 until MLP_L2L3_SIZE) {
813    val evict = s1_l2_alloc && (s1_l2_index === i.U)
814    l3_pf_req_arb.io.in(i).valid := l2_array(i).can_send_pf(l2_valids(i)) && (l2_array(i).sink === SINK_L3) && !evict
815    l3_pf_req_arb.io.in(i).bits := l2_array(i).get_pf_addr()
816  }
817
818  when(l3_pf_req_arb.io.out.valid) {
819    l2_array(l3_pf_req_arb.io.chosen).sent_vec := l2_array(l3_pf_req_arb.io.chosen).sent_vec | get_candidate_oh(l3_pf_req_arb.io.out.bits)
820  }
821
822  // reset meta to avoid muti-hit problem
823  for(i <- 0 until MLP_SIZE) {
824    if(i < MLP_L1_SIZE) {
825      when(RegNext(io.flush)) {
826        reset_array(i, false)
827      }
828    }else {
829      when(RegNext(io.flush)) {
830        reset_array(i - MLP_L1_SIZE, true)
831      }
832    }
833  }
834
835  XSPerfAccumulate("l2_prefetche_queue_busby", io.l2PfqBusy)
836  XSPerfHistogram("filter_active", PopCount(VecInit(
837    l1_array.zip(l1_valids).map{ case (e, v) => e.can_send_pf(v) } ++
838    l2_array.zip(l2_valids).map{ case (e, v) => e.can_send_pf(v) }
839    ).asUInt), true.B, 0, MLP_SIZE, 1)
840  XSPerfHistogram("l1_filter_active", PopCount(VecInit(l1_array.zip(l1_valids).map{ case (e, v) => e.can_send_pf(v)}).asUInt), true.B, 0, MLP_L1_SIZE, 1)
841  XSPerfHistogram("l2_filter_active", PopCount(VecInit(l2_array.zip(l2_valids).map{ case (e, v) => e.can_send_pf(v) && (e.sink === SINK_L2)}).asUInt), true.B, 0, MLP_L2L3_SIZE, 1)
842  XSPerfHistogram("l3_filter_active", PopCount(VecInit(l2_array.zip(l2_valids).map{ case (e, v) => e.can_send_pf(v) && (e.sink === SINK_L3)}).asUInt), true.B, 0, MLP_L2L3_SIZE, 1)
843}
844
845class L1Prefetcher(implicit p: Parameters) extends BasePrefecher with HasStreamPrefetchHelper with HasStridePrefetchHelper {
846  val pf_ctrl = IO(Input(new PrefetchControlBundle))
847  val stride_train = IO(Flipped(Vec(backendParams.LduCnt + backendParams.HyuCnt, ValidIO(new LsPrefetchTrainBundle()))))
848  val l2PfqBusy = IO(Input(Bool()))
849
850  val stride_train_filter = Module(new TrainFilter(STRIDE_FILTER_SIZE, "stride"))
851  val stride_meta_array = Module(new StrideMetaArray)
852  val stream_train_filter = Module(new TrainFilter(STREAM_FILTER_SIZE, "stream"))
853  val stream_bit_vec_array = Module(new StreamBitVectorArray)
854  val pf_queue_filter = Module(new MutiLevelPrefetchFilter)
855
856  // for now, if the stream is disabled, train and prefetch process will continue, without sending out and reqs
857  val enable = io.enable
858  val flush = pf_ctrl.flush
859
860  stream_train_filter.io.ld_in.zipWithIndex.foreach {
861    case (ld_in, i) => {
862      ld_in.valid := io.ld_in(i).valid && enable
863      ld_in.bits := io.ld_in(i).bits
864    }
865  }
866  stream_train_filter.io.enable := enable
867  stream_train_filter.io.flush := flush
868
869  stride_train_filter.io.ld_in.zipWithIndex.foreach {
870    case (ld_in, i) => {
871      ld_in.valid := stride_train(i).valid && enable
872      ld_in.bits := stride_train(i).bits
873    }
874  }
875  stride_train_filter.io.enable := enable
876  stride_train_filter.io.flush := flush
877
878  stream_bit_vec_array.io.enable := enable
879  stream_bit_vec_array.io.flush := flush
880  stream_bit_vec_array.io.dynamic_depth := pf_ctrl.dynamic_depth
881  stream_bit_vec_array.io.train_req <> stream_train_filter.io.train_req
882
883  stride_meta_array.io.enable := enable
884  stride_meta_array.io.flush := flush
885  stride_meta_array.io.dynamic_depth := 0.U
886  stride_meta_array.io.train_req <> stride_train_filter.io.train_req
887  stride_meta_array.io.stream_lookup_req <> stream_bit_vec_array.io.stream_lookup_req
888  stride_meta_array.io.stream_lookup_resp <> stream_bit_vec_array.io.stream_lookup_resp
889
890  // stream has higher priority than stride
891  pf_queue_filter.io.l1_prefetch_req.valid := stream_bit_vec_array.io.l1_prefetch_req.valid || stride_meta_array.io.l1_prefetch_req.valid
892  pf_queue_filter.io.l1_prefetch_req.bits := Mux(
893    stream_bit_vec_array.io.l1_prefetch_req.valid,
894    stream_bit_vec_array.io.l1_prefetch_req.bits,
895    stride_meta_array.io.l1_prefetch_req.bits
896  )
897
898  pf_queue_filter.io.l2_l3_prefetch_req.valid := stream_bit_vec_array.io.l2_l3_prefetch_req.valid || stride_meta_array.io.l2_l3_prefetch_req.valid
899  pf_queue_filter.io.l2_l3_prefetch_req.bits := Mux(
900    stream_bit_vec_array.io.l2_l3_prefetch_req.valid,
901    stream_bit_vec_array.io.l2_l3_prefetch_req.bits,
902    stride_meta_array.io.l2_l3_prefetch_req.bits
903  )
904
905  io.l1_req.valid := pf_queue_filter.io.l1_req.valid && enable && pf_ctrl.enable
906  io.l1_req.bits := pf_queue_filter.io.l1_req.bits
907
908  pf_queue_filter.io.l1_req.ready := Mux(pf_ctrl.enable, io.l1_req.ready, true.B)
909  pf_queue_filter.io.tlb_req <> io.tlb_req
910  pf_queue_filter.io.pmp_resp := io.pmp_resp
911  pf_queue_filter.io.enable := enable
912  pf_queue_filter.io.flush := flush
913  pf_queue_filter.io.confidence := pf_ctrl.confidence
914  pf_queue_filter.io.l2PfqBusy := l2PfqBusy
915
916  val l2_in_pmem = PmemRanges.map(_.cover(pf_queue_filter.io.l2_pf_addr.bits.addr)).reduce(_ || _)
917  io.l2_req.valid := pf_queue_filter.io.l2_pf_addr.valid && l2_in_pmem && enable && pf_ctrl.enable
918  io.l2_req.bits := pf_queue_filter.io.l2_pf_addr.bits
919
920  val l3_in_pmem = PmemRanges.map(_.cover(pf_queue_filter.io.l3_pf_addr.bits)).reduce(_ || _)
921  io.l3_req.valid := pf_queue_filter.io.l3_pf_addr.valid && l3_in_pmem && enable && pf_ctrl.enable
922  io.l3_req.bits := pf_queue_filter.io.l3_pf_addr.bits
923}
924