xref: /XiangShan/src/main/scala/xiangshan/cache/mmu/Repeater.scala (revision e9cac6693f9added9825e0d4122f336a2be93c21)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.cache.mmu
18
19import org.chipsalliance.cde.config.Parameters
20import chisel3._
21import chisel3.util._
22import xiangshan._
23import xiangshan.cache.{HasDCacheParameters, MemoryOpConstants}
24import utils._
25import utility._
26import freechips.rocketchip.diplomacy.{LazyModule, LazyModuleImp}
27import freechips.rocketchip.tilelink._
28
29class PTWReapterIO(Width: Int)(implicit p: Parameters) extends MMUIOBaseBundle {
30  val tlb = Flipped(new TlbPtwIO(Width))
31  val ptw = new TlbPtwIO
32
33  def apply(tlb: TlbPtwIO, ptw: TlbPtwIO, sfence: SfenceBundle, csr: TlbCsrBundle): Unit = {
34    this.tlb <> tlb
35    this.ptw <> ptw
36    this.sfence <> sfence
37    this.csr <> csr
38  }
39
40  def apply(tlb: TlbPtwIO, sfence: SfenceBundle, csr: TlbCsrBundle): Unit = {
41    this.tlb <> tlb
42    this.sfence <> sfence
43    this.csr <> csr
44  }
45
46}
47
48class PTWRepeater(Width: Int = 1, FenceDelay: Int)(implicit p: Parameters) extends XSModule with HasPtwConst {
49  val io = IO(new PTWReapterIO(Width))
50
51  val req_in = if (Width == 1) {
52    io.tlb.req(0)
53  } else {
54    val arb = Module(new RRArbiterInit(io.tlb.req(0).bits.cloneType, Width))
55    arb.io.in <> io.tlb.req
56    arb.io.out
57  }
58  val (tlb, ptw, flush) = (io.tlb, io.ptw, DelayN(io.sfence.valid || io.csr.satp.changed || io.csr.vsatp.changed || io.csr.hgatp.changed, FenceDelay))
59  val req = RegEnable(req_in.bits, req_in.fire)
60  val resp = RegEnable(ptw.resp.bits, ptw.resp.fire)
61  val haveOne = BoolStopWatch(req_in.fire, tlb.resp.fire || flush)
62  val sent = BoolStopWatch(ptw.req(0).fire, req_in.fire || flush)
63  val recv = BoolStopWatch(ptw.resp.fire && haveOne, req_in.fire || flush)
64
65  req_in.ready := !haveOne
66  ptw.req(0).valid := haveOne && !sent
67  ptw.req(0).bits := req
68
69  tlb.resp.bits := resp
70  tlb.resp.valid := haveOne && recv
71  ptw.resp.ready := !recv
72
73  XSPerfAccumulate("req_count", ptw.req(0).fire)
74  XSPerfAccumulate("tlb_req_cycle", BoolStopWatch(req_in.fire, tlb.resp.fire || flush))
75  XSPerfAccumulate("ptw_req_cycle", BoolStopWatch(ptw.req(0).fire, ptw.resp.fire || flush))
76
77  XSDebug(haveOne, p"haveOne:${haveOne} sent:${sent} recv:${recv} sfence:${flush} req:${req} resp:${resp}")
78  XSDebug(req_in.valid || io.tlb.resp.valid, p"tlb: ${tlb}\n")
79  XSDebug(io.ptw.req(0).valid || io.ptw.resp.valid, p"ptw: ${ptw}\n")
80  assert(!RegNext(recv && io.ptw.resp.valid, init = false.B), "re-receive ptw.resp")
81  XSError(io.ptw.req(0).valid && io.ptw.resp.valid && !flush, "ptw repeater recv resp when sending")
82  XSError(io.ptw.resp.valid && (req.vpn =/= io.ptw.resp.bits.s1.entry.tag), "ptw repeater recv resp with wrong tag")
83  XSError(io.ptw.resp.valid && !io.ptw.resp.ready, "ptw repeater's ptw resp back, but not ready")
84}
85
86/* dtlb
87 *
88 */
89
90class PTWRepeaterNB(Width: Int = 1, passReady: Boolean = false, FenceDelay: Int)(implicit p: Parameters) extends XSModule with HasPtwConst {
91  val io = IO(new PTWReapterIO(Width))
92
93  val req_in = if (Width == 1) {
94    io.tlb.req(0)
95  } else {
96    val arb = Module(new RRArbiterInit(io.tlb.req(0).bits.cloneType, Width))
97    arb.io.in <> io.tlb.req
98    arb.io.out
99  }
100  val (tlb, ptw, flush) = (io.tlb, io.ptw, DelayN(io.sfence.valid || io.csr.satp.changed || io.csr.vsatp.changed || io.csr.hgatp.changed, FenceDelay))
101  /* sent: tlb -> repeater -> ptw
102   * recv: ptw -> repeater -> tlb
103   * different from PTWRepeater
104   */
105
106  // tlb -> repeater -> ptw
107  val req = RegEnable(req_in.bits, req_in.fire)
108  val sent = BoolStopWatch(req_in.fire, ptw.req(0).fire || flush)
109  req_in.ready := !sent || { if (passReady) ptw.req(0).ready else false.B }
110  ptw.req(0).valid := sent
111  ptw.req(0).bits := req
112
113  // ptw -> repeater -> tlb
114  val resp = RegEnable(ptw.resp.bits, ptw.resp.fire)
115  val recv = BoolStopWatch(ptw.resp.fire, tlb.resp.fire || flush)
116  ptw.resp.ready := !recv || { if (passReady) tlb.resp.ready else false.B }
117  tlb.resp.valid := recv
118  tlb.resp.bits := resp
119
120  XSPerfAccumulate("req", req_in.fire)
121  XSPerfAccumulate("resp", tlb.resp.fire)
122  if (!passReady) {
123    XSPerfAccumulate("req_blank", req_in.valid && sent && ptw.req(0).ready)
124    XSPerfAccumulate("resp_blank", ptw.resp.valid && recv && tlb.resp.ready)
125    XSPerfAccumulate("req_blank_ignore_ready", req_in.valid && sent)
126    XSPerfAccumulate("resp_blank_ignore_ready", ptw.resp.valid && recv)
127  }
128  XSDebug(req_in.valid || io.tlb.resp.valid, p"tlb: ${tlb}\n")
129  XSDebug(io.ptw.req(0).valid || io.ptw.resp.valid, p"ptw: ${ptw}\n")
130}
131
132class PTWFilterIO(Width: Int, hasHint: Boolean = false)(implicit p: Parameters) extends MMUIOBaseBundle {
133  val tlb = Flipped(new VectorTlbPtwIO(Width))
134  val ptw = new TlbPtwIO()
135  val hint = if (hasHint) Some(new TlbHintIO) else None
136  val rob_head_miss_in_tlb = Output(Bool())
137  val debugTopDown = new Bundle {
138    val robHeadVaddr = Flipped(Valid(UInt(VAddrBits.W)))
139  }
140
141  def apply(tlb: VectorTlbPtwIO, ptw: TlbPtwIO, sfence: SfenceBundle, csr: TlbCsrBundle): Unit = {
142    this.tlb <> tlb
143    this.ptw <> ptw
144    this.sfence <> sfence
145    this.csr <> csr
146  }
147
148  def apply(tlb: VectorTlbPtwIO, sfence: SfenceBundle, csr: TlbCsrBundle): Unit = {
149    this.tlb <> tlb
150    this.sfence <> sfence
151    this.csr <> csr
152  }
153
154}
155
156class PTWFilterEntryIO(Width: Int, hasHint: Boolean = false)(implicit p: Parameters) extends PTWFilterIO(Width, hasHint){
157  val flush = Input(Bool())
158  val refill = Output(Bool())
159  val getGpa = Output(Bool())
160  val memidx = Output(new MemBlockidxBundle)
161}
162
163class PTWFilterEntry(Width: Int, Size: Int, hasHint: Boolean = false)(implicit p: Parameters) extends XSModule with HasPtwConst {
164  private val LdExuCnt = backendParams.LdExuCnt
165
166  val io = IO(new PTWFilterEntryIO(Width, hasHint))
167  require(isPow2(Size), s"Filter Size ($Size) must be a power of 2")
168
169  def firstValidIndex(v: Seq[Bool], valid: Bool): UInt = {
170    val index = WireInit(0.U(log2Up(Size).W))
171    for (i <- 0 until v.size) {
172      when (v(i) === valid) {
173        index := i.U
174      }
175    }
176    index
177  }
178
179  val v = RegInit(VecInit(Seq.fill(Size)(false.B)))
180  val sent = RegInit(VecInit(Seq.fill(Size)(false.B)))
181  val vpn = Reg(Vec(Size, UInt(vpnLen.W)))
182  val s2xlate = Reg(Vec(Size, UInt(2.W)))
183  val getGpa = Reg(Vec(Size, Bool()))
184  val memidx = Reg(Vec(Size, new MemBlockidxBundle))
185
186  val enqvalid = WireInit(VecInit(Seq.fill(Width)(false.B)))
187  val canenq = WireInit(VecInit(Seq.fill(Width)(false.B)))
188  val enqidx = WireInit(VecInit(Seq.fill(Width)(0.U(log2Up(Size).W))))
189
190  //val selectCount = RegInit(0.U(log2Up(Width).W))
191
192  val entryIsMatchVec = WireInit(VecInit(Seq.fill(Width)(false.B)))
193  val entryMatchIndexVec = WireInit(VecInit(Seq.fill(Width)(0.U(log2Up(Size).W))))
194  val ptwResp_EntryMatchVec = vpn.zip(v).zip(s2xlate).map{ case ((pi, vi), s2xlatei) => vi && s2xlatei === io.ptw.resp.bits.s2xlate && io.ptw.resp.bits.hit(pi, io.csr.satp.asid, io.csr.vsatp.asid, io.csr.hgatp.vmid, true)}
195  val ptwResp_EntryMatchFirst = firstValidIndex(ptwResp_EntryMatchVec, true.B)
196  val ptwResp_ReqMatchVec = io.tlb.req.map(a => io.ptw.resp.valid && a.bits.s2xlate === io.ptw.resp.bits.s2xlate && io.ptw.resp.bits.hit(a.bits.vpn, io.csr.satp.asid, io.csr.vsatp.asid, io.csr.hgatp.vmid, true))
197
198  io.refill := Cat(ptwResp_EntryMatchVec).orR && io.ptw.resp.fire
199  io.ptw.resp.ready := true.B
200  // DontCare
201  io.tlb.req.map(_.ready := true.B)
202  io.tlb.resp.valid := false.B
203  io.tlb.resp.bits.data := 0.U.asTypeOf(new PtwRespS2withMemIdx)
204  io.tlb.resp.bits.vector := 0.U.asTypeOf(Vec(Width, Bool()))
205  io.tlb.resp.bits.getGpa := 0.U.asTypeOf(Vec(Width, Bool()))
206  io.memidx := 0.U.asTypeOf(new MemBlockidxBundle)
207  io.getGpa := 0.U
208
209  // ugly code, should be optimized later
210  require(Width <= 4, s"DTLB Filter Width ($Width) must equal or less than 4")
211  if (Width == 1) {
212    require(Size == 8, s"prefetch filter Size ($Size) should be 8")
213    canenq(0) := !(Cat(v).andR)
214    enqidx(0) := firstValidIndex(v, false.B)
215  } else if (Width == 2) {
216    require(Size == 8, s"store filter Size ($Size) should be 8")
217    canenq(0) := !(Cat(v.take(Size/2)).andR)
218    enqidx(0) := firstValidIndex(v.take(Size/2), false.B)
219    canenq(1) := !(Cat(v.drop(Size/2)).andR)
220    enqidx(1) := firstValidIndex(v.drop(Size/2), false.B) + (Size/2).U
221  } else if (Width == 3) {
222    require(Size == 16, s"load filter Size ($Size) should be 16")
223    canenq(0) := !(Cat(v.take(8)).andR)
224    enqidx(0) := firstValidIndex(v.take(8), false.B)
225    canenq(1) := !(Cat(v.drop(8).take(4)).andR)
226    enqidx(1) := firstValidIndex(v.drop(8).take(4), false.B) + 8.U
227    // four entries for prefetch
228    canenq(2) := !(Cat(v.drop(12)).andR)
229    enqidx(2) := firstValidIndex(v.drop(12), false.B) + 12.U
230  } else if (Width == 4) {
231    require(Size == 16, s"load filter Size ($Size) should be 16")
232    for (i <- 0 until Width) {
233      canenq(i) := !(Cat(VecInit(v.slice(i * 4, (i + 1) * 4))).andR)
234      enqidx(i) := firstValidIndex(v.slice(i * 4, (i + 1) * 4), false.B) + (i * 4).U
235    }
236  }
237
238  for (i <- 0 until Width) {
239    enqvalid(i) := io.tlb.req(i).valid && !ptwResp_ReqMatchVec(i) && !entryIsMatchVec(i) && canenq(i)
240    when (!enqvalid(i)) {
241      enqidx(i) := entryMatchIndexVec(i)
242    }
243
244    val entryIsMatch = vpn.zip(v).zip(s2xlate).map{ case ((pi, vi), s2xlatei) => vi && s2xlatei === io.tlb.req(i).bits.s2xlate && pi === io.tlb.req(i).bits.vpn}
245    entryIsMatchVec(i) := Cat(entryIsMatch).orR
246    entryMatchIndexVec(i) := firstValidIndex(entryIsMatch, true.B)
247
248    if (i > 0) {
249      for (j <- 0 until i) {
250        val newIsMatch = io.tlb.req(i).bits.vpn === io.tlb.req(j).bits.vpn && io.tlb.req(i).bits.s2xlate === io.tlb.req(j).bits.s2xlate
251        when (newIsMatch && io.tlb.req(j).valid) {
252          enqidx(i) := enqidx(j)
253          canenq(i) := canenq(j)
254          enqvalid(i) := false.B
255        }
256      }
257    }
258
259    when (enqvalid(i)) {
260      v(enqidx(i)) := true.B
261      sent(enqidx(i)) := false.B
262      vpn(enqidx(i)) := io.tlb.req(i).bits.vpn
263      s2xlate(enqidx(i)) := io.tlb.req(i).bits.s2xlate
264      getGpa(enqidx(i)) := io.tlb.req(i).bits.getGpa
265      memidx(enqidx(i)) := io.tlb.req(i).bits.memidx
266    }
267  }
268
269  val issuevec = v.zip(sent).map{ case (v, s) => v && !s}
270  val issueindex = firstValidIndex(issuevec, true.B)
271  val canissue = Cat(issuevec).orR
272  for (i <- 0 until Size) {
273    io.ptw.req(0).valid := canissue
274    io.ptw.req(0).bits.vpn := vpn(issueindex)
275    io.ptw.req(0).bits.s2xlate := s2xlate(issueindex)
276  }
277  when (io.ptw.req(0).fire) {
278    sent(issueindex) := true.B
279  }
280
281  when (io.ptw.resp.fire) {
282    v.zip(ptwResp_EntryMatchVec).map{ case (vi, mi) => when (mi) { vi := false.B }}
283    io.memidx := memidx(ptwResp_EntryMatchFirst)
284    io.getGpa := getGpa(ptwResp_EntryMatchFirst)
285  }
286
287  when (io.flush) {
288    v.map(_ := false.B)
289  }
290
291  if (hasHint) {
292    val hintIO = io.hint.getOrElse(new TlbHintIO)
293    for (i <- 0 until LdExuCnt) {
294      hintIO.req(i).id := enqidx(i)
295      hintIO.req(i).full := !canenq(i) || ptwResp_ReqMatchVec(i)
296    }
297    hintIO.resp.valid := io.refill
298    hintIO.resp.bits.id := ptwResp_EntryMatchFirst
299    hintIO.resp.bits.replay_all := PopCount(ptwResp_EntryMatchVec) > 1.U
300  }
301
302  io.rob_head_miss_in_tlb := VecInit(v.zip(vpn).map{case (vi, vpni) => {
303    vi && io.debugTopDown.robHeadVaddr.valid && vpni === get_pn(io.debugTopDown.robHeadVaddr.bits)
304  }}).asUInt.orR
305
306
307  // Perf Counter
308  val counter = PopCount(v)
309  val inflight_counter = RegInit(0.U(log2Up(Size).W))
310  val inflight_full = inflight_counter === Size.U
311  when (io.ptw.req(0).fire =/= io.ptw.resp.fire) {
312    inflight_counter := Mux(io.ptw.req(0).fire, inflight_counter + 1.U, inflight_counter - 1.U)
313  }
314
315  assert(inflight_counter <= Size.U, "inflight should be no more than Size")
316  when (counter === 0.U) {
317    assert(!io.ptw.req(0).fire, "when counter is 0, should not req")
318  }
319
320  when (io.flush) {
321    inflight_counter := 0.U
322  }
323
324  XSPerfAccumulate("tlb_req_count", PopCount(Cat(io.tlb.req.map(_.valid))))
325  XSPerfAccumulate("tlb_req_count_filtered", PopCount(enqvalid))
326  XSPerfAccumulate("ptw_req_count", io.ptw.req(0).fire)
327  XSPerfAccumulate("ptw_req_cycle", inflight_counter)
328  XSPerfAccumulate("tlb_resp_count", io.tlb.resp.fire)
329  XSPerfAccumulate("ptw_resp_count", io.ptw.resp.fire)
330  XSPerfAccumulate("inflight_cycle", Cat(sent).orR)
331
332  for (i <- 0 until Size + 1) {
333    XSPerfAccumulate(s"counter${i}", counter === i.U)
334  }
335
336}
337
338class PTWNewFilter(Width: Int, Size: Int, FenceDelay: Int)(implicit p: Parameters) extends XSModule with HasPtwConst {
339  require(Size >= Width)
340
341  private val LduCnt = backendParams.LduCnt
342  private val HyuCnt = backendParams.HyuCnt
343  private val StaCnt = backendParams.StaCnt
344  // all load execute units, including ldu and hyu
345  private val LdExuCnt = backendParams.LdExuCnt
346  // all store address execute units, including sta and hyu
347  private val StaExuCnt = backendParams.StaExuCnt
348
349  val io = IO(new PTWFilterIO(Width, hasHint = true))
350
351  val load_filter = VecInit(Seq.fill(1) {
352    val load_entry = Module(new PTWFilterEntry(Width = LdExuCnt + 1, Size = loadfiltersize, hasHint = true))
353    load_entry.io
354  })
355
356  val store_filter = VecInit(Seq.fill(1) {
357    val store_entry = Module(new PTWFilterEntry(Width = StaCnt, Size = storefiltersize))
358    store_entry.io
359  })
360
361  val prefetch_filter = VecInit(Seq.fill(1) {
362    val prefetch_entry = Module(new PTWFilterEntry(Width = 2, Size = prefetchfiltersize))
363    prefetch_entry.io
364  })
365
366  val filter = load_filter ++ store_filter ++ prefetch_filter
367
368  load_filter.map(_.tlb.req := io.tlb.req.take(LdExuCnt + 1))
369  store_filter.map(_.tlb.req := io.tlb.req.drop(LdExuCnt + 1).take(StaCnt))
370  prefetch_filter.map(_.tlb.req := io.tlb.req.drop(LdExuCnt + 1 + StaCnt))
371
372  val flush = DelayN(io.sfence.valid || io.csr.satp.changed || (io.csr.priv.virt && io.csr.vsatp.changed), FenceDelay)
373  val ptwResp = RegEnable(io.ptw.resp.bits, io.ptw.resp.fire)
374  val ptwResp_valid = Cat(filter.map(_.refill)).orR
375  filter.map(_.tlb.resp.ready := true.B)
376  filter.map(_.ptw.resp.valid := GatedValidRegNext(io.ptw.resp.fire, init = false.B))
377  filter.map(_.ptw.resp.bits := ptwResp)
378  filter.map(_.flush := flush)
379  filter.map(_.sfence := io.sfence)
380  filter.map(_.csr := io.csr)
381  filter.map(_.debugTopDown.robHeadVaddr := io.debugTopDown.robHeadVaddr)
382
383  io.tlb.req.map(_.ready := true.B)
384  io.tlb.resp.valid := ptwResp_valid
385  io.tlb.resp.bits.data.s2xlate := ptwResp.s2xlate
386  io.tlb.resp.bits.data.getGpa := DontCare // not used
387  io.tlb.resp.bits.data.s1 := ptwResp.s1
388  io.tlb.resp.bits.data.s2 := ptwResp.s2
389  io.tlb.resp.bits.data.memidx := 0.U.asTypeOf(new MemBlockidxBundle)
390  // vector used to represent different requestors of DTLB
391  // (e.g. the store DTLB has StuCnt requestors)
392  // However, it is only necessary to distinguish between different DTLB now
393  for (i <- 0 until Width) {
394    io.tlb.resp.bits.vector(i) := false.B
395    io.tlb.resp.bits.getGpa(i) := false.B
396  }
397  io.tlb.resp.bits.vector(0) := load_filter(0).refill
398  io.tlb.resp.bits.vector(LdExuCnt + 1) := store_filter(0).refill
399  io.tlb.resp.bits.vector(LdExuCnt + 1 + StaCnt) := prefetch_filter(0).refill
400  io.tlb.resp.bits.getGpa(0) := load_filter(0).getGpa
401  io.tlb.resp.bits.getGpa(LdExuCnt + 1) := store_filter(0).getGpa
402  io.tlb.resp.bits.getGpa(LdExuCnt + 1 + StaCnt) := prefetch_filter(0).getGpa
403
404  val hintIO = io.hint.getOrElse(new TlbHintIO)
405  val load_hintIO = load_filter(0).hint.getOrElse(new TlbHintIO)
406  for (i <- 0 until LdExuCnt) {
407    hintIO.req(i) := RegNext(load_hintIO.req(i))
408  }
409  hintIO.resp.valid := RegNext(load_hintIO.resp.valid)
410  hintIO.resp.bits := RegEnable(load_hintIO.resp.bits, load_hintIO.resp.valid)
411
412  when (load_filter(0).refill) {
413    io.tlb.resp.bits.vector(0) := true.B
414    io.tlb.resp.bits.data.memidx := load_filter(0).memidx
415  }
416  when (store_filter(0).refill) {
417    io.tlb.resp.bits.vector(LdExuCnt + 1) := true.B
418    io.tlb.resp.bits.data.memidx := store_filter(0).memidx
419  }
420  when (prefetch_filter(0).refill) {
421    io.tlb.resp.bits.vector(LdExuCnt + 1 + StaCnt) := true.B
422    io.tlb.resp.bits.data.memidx := 0.U.asTypeOf(new MemBlockidxBundle)
423  }
424
425  val ptw_arb = Module(new RRArbiterInit(new PtwReq, 3))
426  for (i <- 0 until 3) {
427    ptw_arb.io.in(i).valid := filter(i).ptw.req(0).valid
428    ptw_arb.io.in(i).bits.vpn := filter(i).ptw.req(0).bits.vpn
429    ptw_arb.io.in(i).bits.s2xlate := filter(i).ptw.req(0).bits.s2xlate
430    filter(i).ptw.req(0).ready := ptw_arb.io.in(i).ready
431  }
432  ptw_arb.io.out.ready := io.ptw.req(0).ready
433  io.ptw.req(0).valid := ptw_arb.io.out.valid
434  io.ptw.req(0).bits.vpn := ptw_arb.io.out.bits.vpn
435  io.ptw.req(0).bits.s2xlate := ptw_arb.io.out.bits.s2xlate
436  io.ptw.resp.ready := true.B
437
438  io.rob_head_miss_in_tlb := Cat(filter.map(_.rob_head_miss_in_tlb)).orR
439}
440
441class PTWFilter(Width: Int, Size: Int, FenceDelay: Int)(implicit p: Parameters) extends XSModule with HasPtwConst {
442  require(Size >= Width)
443
444  val io = IO(new PTWFilterIO(Width))
445
446  val v = RegInit(VecInit(Seq.fill(Size)(false.B)))
447  val ports = Reg(Vec(Size, Vec(Width, Bool()))) // record which port(s) the entry come from, may not able to cover all the ports
448  val vpn = Reg(Vec(Size, UInt(vpnLen.W)))
449  val s2xlate = Reg(Vec(Size, UInt(2.W)))
450  val getGpa = Reg(Vec(Size, Bool()))
451  val memidx = Reg(Vec(Size, new MemBlockidxBundle))
452  val enqPtr = RegInit(0.U(log2Up(Size).W)) // Enq
453  val issPtr = RegInit(0.U(log2Up(Size).W)) // Iss to Ptw
454  val deqPtr = RegInit(0.U(log2Up(Size).W)) // Deq
455  val mayFullDeq = RegInit(false.B)
456  val mayFullIss = RegInit(false.B)
457  val counter = RegInit(0.U(log2Up(Size+1).W))
458  val flush = DelayN(io.sfence.valid || io.csr.satp.changed || io.csr.vsatp.changed || io.csr.hgatp.changed, FenceDelay)
459  val tlb_req = WireInit(io.tlb.req) // NOTE: tlb_req is not io.tlb.req, see below codes, just use cloneType
460  tlb_req.suggestName("tlb_req")
461
462  val inflight_counter = RegInit(0.U(log2Up(Size + 1).W))
463  val inflight_full = inflight_counter === Size.U
464
465  def ptwResp_hit(vpn: UInt, s2xlate: UInt, resp: PtwRespS2): Bool = {
466    s2xlate === resp.s2xlate && resp.hit(vpn, io.csr.satp.asid, io.csr.vsatp.asid, io.csr.hgatp.vmid, true)
467  }
468
469  when (io.ptw.req(0).fire =/= io.ptw.resp.fire) {
470    inflight_counter := Mux(io.ptw.req(0).fire, inflight_counter + 1.U, inflight_counter - 1.U)
471  }
472
473  val canEnqueue = Wire(Bool()) // NOTE: actually enqueue
474  val ptwResp = RegEnable(io.ptw.resp.bits, io.ptw.resp.fire)
475  val ptwResp_OldMatchVec = vpn.zip(v).zip(s2xlate).map { case (((vpn, v), s2xlate)) =>{
476    v && ptwResp_hit(vpn, s2xlate, io.ptw.resp.bits)
477  }
478  }
479  val ptwResp_valid = GatedValidRegNext(io.ptw.resp.fire && Cat(ptwResp_OldMatchVec).orR, init = false.B)
480  // May send repeated requests to L2 tlb with same vpn(26, 3) when sector tlb
481  val oldMatchVec_early = io.tlb.req.map(a => vpn.zip(v).zip(s2xlate).map{ case ((pi, vi), s2xlate) => vi && pi === a.bits.vpn && s2xlate === a.bits.s2xlate })
482  val lastReqMatchVec_early = io.tlb.req.map(a => tlb_req.map{ b => b.valid && b.bits.vpn === a.bits.vpn && canEnqueue && b.bits.s2xlate === a.bits.s2xlate})
483  val newMatchVec_early = io.tlb.req.map(a => io.tlb.req.map(b => a.bits.vpn === b.bits.vpn && a.bits.s2xlate === b.bits.s2xlate))
484
485  (0 until Width) foreach { i =>
486    tlb_req(i).valid := GatedValidRegNext(io.tlb.req(i).valid &&
487      !(ptwResp_valid && ptwResp_hit(io.tlb.req(i).bits.vpn, io.tlb.req(i).bits.s2xlate, ptwResp)) &&
488      !Cat(lastReqMatchVec_early(i)).orR,
489      init = false.B)
490    tlb_req(i).bits := RegEnable(io.tlb.req(i).bits, io.tlb.req(i).valid)
491  }
492
493
494  val oldMatchVec = oldMatchVec_early.map(a => GatedValidRegNext(Cat(a).orR))
495  val newMatchVec = (0 until Width).map(i => (0 until Width).map(j =>
496    GatedValidRegNext(newMatchVec_early(i)(j)) && tlb_req(j).valid
497  ))
498  val ptwResp_newMatchVec = tlb_req.map(a =>
499    ptwResp_valid && ptwResp_hit(a.bits.vpn, a.bits.s2xlate, ptwResp))
500
501  val oldMatchVec2 = (0 until Width).map(i => oldMatchVec_early(i).map(GatedValidRegNext(_)).map(_ & tlb_req(i).valid))
502  val update_ports = v.indices.map(i => oldMatchVec2.map(j => j(i)))
503  val ports_init = (0 until Width).map(i => (1 << i).U(Width.W))
504  val filter_ports = (0 until Width).map(i => ParallelMux(newMatchVec(i).zip(ports_init).drop(i)))
505  val resp_vector = RegEnable(ParallelMux(ptwResp_OldMatchVec zip ports), io.ptw.resp.fire)
506  val resp_getGpa = RegEnable(ParallelMux(ptwResp_OldMatchVec zip getGpa), io.ptw.resp.fire)
507
508  def canMerge(index: Int) : Bool = {
509    ptwResp_newMatchVec(index) || oldMatchVec(index) ||
510    Cat(newMatchVec(index).take(index)).orR
511  }
512
513  def filter_req() = {
514    val reqs =  tlb_req.indices.map{ i =>
515      val req = Wire(ValidIO(new PtwReqwithMemIdx()))
516      val merge = canMerge(i)
517      req.bits := tlb_req(i).bits
518      req.valid := !merge && tlb_req(i).valid
519      req
520    }
521    reqs
522  }
523
524  val reqs = filter_req()
525  val req_ports = filter_ports
526  val isFull = enqPtr === deqPtr && mayFullDeq
527  val isEmptyDeq = enqPtr === deqPtr && !mayFullDeq
528  val isEmptyIss = enqPtr === issPtr && !mayFullIss
529  val accumEnqNum = (0 until Width).map(i => PopCount(reqs.take(i).map(_.valid)))
530  val enqPtrVecInit = VecInit((0 until Width).map(i => enqPtr + i.U))
531  val enqPtrVec = VecInit((0 until Width).map(i => enqPtrVecInit(accumEnqNum(i))))
532  val enqNum = PopCount(reqs.map(_.valid))
533  canEnqueue := counter +& enqNum <= Size.U
534
535  // the req may recv false ready, but actually received. Filter and TLB will handle it.
536  val enqNum_fake = PopCount(io.tlb.req.map(_.valid))
537  val canEnqueue_fake = counter +& enqNum_fake <= Size.U
538  io.tlb.req.map(_.ready := canEnqueue_fake) // NOTE: just drop un-fire reqs
539
540  // tlb req flushed by ptw resp: last ptw resp && current ptw resp
541  // the flushed tlb req will fakely enq, with a false valid
542  val tlb_req_flushed = reqs.map(a => io.ptw.resp.valid && ptwResp_hit(a.bits.vpn, a.bits.s2xlate, io.ptw.resp.bits))
543
544  io.tlb.resp.valid := ptwResp_valid
545  io.tlb.resp.bits.data.s2xlate := ptwResp.s2xlate
546  io.tlb.resp.bits.data.s1 := ptwResp.s1
547  io.tlb.resp.bits.data.s2 := ptwResp.s2
548  io.tlb.resp.bits.data.memidx := RegNext(PriorityMux(ptwResp_OldMatchVec, memidx))
549  io.tlb.resp.bits.vector := resp_vector
550  io.tlb.resp.bits.data.getGpa := RegNext(PriorityMux(ptwResp_OldMatchVec, getGpa))
551  io.tlb.resp.bits.getGpa := DontCare
552
553  val issue_valid = v(issPtr) && !isEmptyIss && !inflight_full
554  val issue_filtered = ptwResp_valid && ptwResp_hit(io.ptw.req(0).bits.vpn, io.ptw.req(0).bits.s2xlate, ptwResp)
555  val issue_fire_fake = issue_valid && io.ptw.req(0).ready
556  io.ptw.req(0).valid := issue_valid && !issue_filtered
557  io.ptw.req(0).bits.vpn := vpn(issPtr)
558  io.ptw.req(0).bits.s2xlate := s2xlate(issPtr)
559  io.ptw.resp.ready := true.B
560
561  reqs.zipWithIndex.map{
562    case (req, i) =>
563      when (req.valid && canEnqueue) {
564        v(enqPtrVec(i)) := !tlb_req_flushed(i)
565        vpn(enqPtrVec(i)) := req.bits.vpn
566        s2xlate(enqPtrVec(i)) := req.bits.s2xlate
567        getGpa(enqPtrVec(i)) := req.bits.getGpa
568        memidx(enqPtrVec(i)) := req.bits.memidx
569        ports(enqPtrVec(i)) := req_ports(i).asBools
570      }
571  }
572  for (i <- ports.indices) {
573    when (v(i)) {
574      ports(i) := ports(i).zip(update_ports(i)).map(a => a._1 || a._2)
575    }
576  }
577
578  val do_enq = canEnqueue && Cat(reqs.map(_.valid)).orR
579  val do_deq = (!v(deqPtr) && !isEmptyDeq)
580  val do_iss = issue_fire_fake || (!v(issPtr) && !isEmptyIss)
581  when (do_enq) {
582    enqPtr := enqPtr + enqNum
583  }
584  when (do_deq) {
585    deqPtr := deqPtr + 1.U
586  }
587  when (do_iss) {
588    issPtr := issPtr + 1.U
589  }
590  when (issue_fire_fake && issue_filtered) { // issued but is filtered
591    v(issPtr) := false.B
592  }
593  when (do_enq =/= do_deq) {
594    mayFullDeq := do_enq
595  }
596  when (do_enq =/= do_iss) {
597    mayFullIss := do_enq
598  }
599
600  when (io.ptw.resp.fire) {
601    v.zip(ptwResp_OldMatchVec).map{ case (vi, mi) => when (mi) { vi := false.B }}
602  }
603
604  counter := counter - do_deq + Mux(do_enq, enqNum, 0.U)
605  assert(counter <= Size.U, "counter should be no more than Size")
606  assert(inflight_counter <= Size.U, "inflight should be no more than Size")
607  when (counter === 0.U) {
608    assert(!io.ptw.req(0).fire, "when counter is 0, should not req")
609    assert(isEmptyDeq && isEmptyIss, "when counter is 0, should be empty")
610  }
611  when (counter === Size.U) {
612    assert(mayFullDeq, "when counter is Size, should be full")
613  }
614
615  when (flush) {
616    v.map(_ := false.B)
617    deqPtr := 0.U
618    enqPtr := 0.U
619    issPtr := 0.U
620    ptwResp_valid := false.B
621    mayFullDeq := false.B
622    mayFullIss := false.B
623    counter := 0.U
624    inflight_counter := 0.U
625  }
626
627  val robHeadVaddr = io.debugTopDown.robHeadVaddr
628  io.rob_head_miss_in_tlb := VecInit(v.zip(vpn).map{case (vi, vpni) => {
629    vi && robHeadVaddr.valid && vpni === get_pn(robHeadVaddr.bits)
630  }}).asUInt.orR
631
632  // perf
633  XSPerfAccumulate("tlb_req_count", PopCount(Cat(io.tlb.req.map(_.valid))))
634  XSPerfAccumulate("tlb_req_count_filtered", Mux(do_enq, accumEnqNum(Width - 1), 0.U))
635  XSPerfAccumulate("ptw_req_count", io.ptw.req(0).fire)
636  XSPerfAccumulate("ptw_req_cycle", inflight_counter)
637  XSPerfAccumulate("tlb_resp_count", io.tlb.resp.fire)
638  XSPerfAccumulate("ptw_resp_count", io.ptw.resp.fire)
639  XSPerfAccumulate("inflight_cycle", !isEmptyDeq)
640  for (i <- 0 until Size + 1) {
641    XSPerfAccumulate(s"counter${i}", counter === i.U)
642  }
643}
644
645object PTWRepeater {
646  def apply(fenceDelay: Int,
647    tlb: TlbPtwIO,
648    sfence: SfenceBundle,
649    csr: TlbCsrBundle
650  )(implicit p: Parameters) = {
651    val width = tlb.req.size
652    val repeater = Module(new PTWRepeater(width, fenceDelay))
653    repeater.io.apply(tlb, sfence, csr)
654    repeater
655  }
656
657  def apply(fenceDelay: Int,
658    tlb: TlbPtwIO,
659    ptw: TlbPtwIO,
660    sfence: SfenceBundle,
661    csr: TlbCsrBundle
662  )(implicit p: Parameters) = {
663    val width = tlb.req.size
664    val repeater = Module(new PTWRepeater(width, fenceDelay))
665    repeater.io.apply(tlb, ptw, sfence, csr)
666    repeater
667  }
668}
669
670object PTWRepeaterNB {
671  def apply(passReady: Boolean, fenceDelay: Int,
672    tlb: TlbPtwIO,
673    sfence: SfenceBundle,
674    csr: TlbCsrBundle
675  )(implicit p: Parameters) = {
676    val width = tlb.req.size
677    val repeater = Module(new PTWRepeaterNB(width, passReady,fenceDelay))
678    repeater.io.apply(tlb, sfence, csr)
679    repeater
680  }
681
682  def apply(passReady: Boolean, fenceDelay: Int,
683    tlb: TlbPtwIO,
684    ptw: TlbPtwIO,
685    sfence: SfenceBundle,
686    csr: TlbCsrBundle
687  )(implicit p: Parameters) = {
688    val width = tlb.req.size
689    val repeater = Module(new PTWRepeaterNB(width, passReady, fenceDelay))
690    repeater.io.apply(tlb, ptw, sfence, csr)
691    repeater
692  }
693}
694
695object PTWFilter {
696  def apply(fenceDelay: Int,
697    tlb: VectorTlbPtwIO,
698    ptw: TlbPtwIO,
699    sfence: SfenceBundle,
700    csr: TlbCsrBundle,
701    size: Int
702  )(implicit p: Parameters) = {
703    val width = tlb.req.size
704    val filter = Module(new PTWFilter(width, size, fenceDelay))
705    filter.io.apply(tlb, ptw, sfence, csr)
706    filter
707  }
708
709  def apply(fenceDelay: Int,
710    tlb: VectorTlbPtwIO,
711    sfence: SfenceBundle,
712    csr: TlbCsrBundle,
713    size: Int
714  )(implicit p: Parameters) = {
715    val width = tlb.req.size
716    val filter = Module(new PTWFilter(width, size, fenceDelay))
717    filter.io.apply(tlb, sfence, csr)
718    filter
719  }
720}
721
722object PTWNewFilter {
723  def apply(fenceDelay: Int,
724            tlb: VectorTlbPtwIO,
725            ptw: TlbPtwIO,
726            sfence: SfenceBundle,
727            csr: TlbCsrBundle,
728            size: Int
729           )(implicit p: Parameters) = {
730    val width = tlb.req.size
731    val filter = Module(new PTWNewFilter(width, size, fenceDelay))
732    filter.io.apply(tlb, ptw, sfence, csr)
733    filter
734  }
735
736  def apply(fenceDelay: Int,
737            tlb: VectorTlbPtwIO,
738            sfence: SfenceBundle,
739            csr: TlbCsrBundle,
740            size: Int
741           )(implicit p: Parameters) = {
742    val width = tlb.req.size
743    val filter = Module(new PTWNewFilter(width, size, fenceDelay))
744    filter.io.apply(tlb, sfence, csr)
745    filter
746  }
747}
748