xref: /XiangShan/src/main/scala/xiangshan/cache/dcache/mainpipe/Probe.scala (revision 8b33cd30e0034914b58520e0dc3c0c4b1aad6a03)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.cache
18
19import org.chipsalliance.cde.config.Parameters
20import chisel3._
21import chisel3.util._
22import freechips.rocketchip.tilelink.{TLBundleB, TLEdgeOut, TLMessages, TLPermissions}
23import utils.HasTLDump
24import utility.{XSDebug, XSPerfAccumulate, HasPerfEvents}
25
26class ProbeReq(implicit p: Parameters) extends DCacheBundle
27{
28  val source = UInt()
29  val opcode = UInt()
30  val addr   = UInt(PAddrBits.W)
31  val vaddr  = UInt(VAddrBits.W) // l2 uses vaddr index to probe l1
32  val param  = UInt(TLPermissions.bdWidth.W)
33  val needData = Bool()
34
35  // probe queue entry ID
36  val id = UInt(log2Up(cfg.nProbeEntries).W)
37
38  def dump(cond: Bool) = {
39    XSDebug(cond, "ProbeReq source: %d opcode: %d addr: %x param: %d\n",
40      source, opcode, addr, param)
41  }
42}
43
44class ProbeResp(implicit p: Parameters) extends DCacheBundle {
45  // probe queue entry ID
46  val id = UInt(log2Up(cfg.nProbeEntries).W)
47}
48
49class ProbeEntry(implicit p: Parameters) extends DCacheModule {
50  val io = IO(new Bundle {
51    val req = Flipped(Decoupled(new ProbeReq))
52    val pipe_req  = DecoupledIO(new MainPipeReq)
53    val pipe_resp = Input(Valid(new ProbeResp))
54    val lrsc_locked_block = Input(Valid(UInt()))
55    val id = Input(UInt(log2Up(cfg.nProbeEntries).W))
56
57    // the block we are probing
58    val block_addr  = Output(Valid(UInt()))
59  })
60
61  val s_invalid :: s_pipe_req :: s_wait_resp :: Nil = Enum(3)
62
63  val state = RegInit(s_invalid)
64
65  val req = Reg(new ProbeReq)
66
67  // assign default values to signals
68  io.req.ready      := false.B
69  io.pipe_req.valid := false.B
70  io.pipe_req.bits  := DontCare
71
72  io.block_addr.valid := state =/= s_invalid
73  io.block_addr.bits  := req.addr
74
75  XSDebug(state =/= s_invalid, "state: %d\n", state)
76
77  XSDebug(state =/= s_invalid, "ProbeEntry: state: %d block_addr: %x\n", state, io.block_addr.bits)
78
79  when (state === s_invalid) {
80    io.req.ready := true.B
81    when (io.req.fire) {
82      req := io.req.bits
83      state := s_pipe_req
84    }
85  }
86
87  val lrsc_blocked = Mux(
88    io.req.fire,
89    io.lrsc_locked_block.valid && get_block(io.lrsc_locked_block.bits) === get_block(io.req.bits.addr),
90    io.lrsc_locked_block.valid && get_block(io.lrsc_locked_block.bits) === get_block(req.addr)
91  )
92
93  when (state === s_pipe_req) {
94    // Note that probe req will be blocked in the next cycle if a lr updates lrsc_locked_block addr
95    // in this way, we can RegNext(lrsc_blocked) for better timing
96    io.pipe_req.valid := !RegNext(lrsc_blocked)
97
98    val pipe_req = io.pipe_req.bits
99    pipe_req := DontCare
100    pipe_req.miss := false.B
101    pipe_req.probe := true.B
102    pipe_req.probe_param := req.param
103    pipe_req.addr   := req.addr
104    pipe_req.vaddr  := req.vaddr
105    pipe_req.probe_need_data := req.needData
106    pipe_req.error := false.B
107    pipe_req.id := io.id
108
109    when (io.pipe_req.fire) {
110      state := s_wait_resp
111    }
112  }
113
114  when (state === s_wait_resp) {
115    when (io.pipe_resp.valid && io.id === io.pipe_resp.bits.id) {
116      state := s_invalid
117    }
118  }
119
120  // perfoemance counters
121  XSPerfAccumulate("probe_req", state === s_invalid && io.req.fire)
122  XSPerfAccumulate("probe_penalty", state =/= s_invalid)
123  XSPerfAccumulate("probe_penalty_blocked_by_lrsc", state === s_pipe_req && io.lrsc_locked_block.valid && get_block(io.lrsc_locked_block.bits) === get_block(req.addr))
124  XSPerfAccumulate("probe_penalty_blocked_by_pipeline", state === s_pipe_req && io.pipe_req.valid && !io.pipe_req.ready)
125}
126
127class ProbeQueue(edge: TLEdgeOut)(implicit p: Parameters) extends DCacheModule with HasTLDump with HasPerfEvents
128{
129  val io = IO(new Bundle {
130    val mem_probe = Flipped(Decoupled(new TLBundleB(edge.bundle)))
131    val pipe_req  = DecoupledIO(new MainPipeReq)
132    val lrsc_locked_block = Input(Valid(UInt()))
133    val update_resv_set = Input(Bool())
134  })
135
136  val pipe_req_arb = Module(new Arbiter(new MainPipeReq, cfg.nProbeEntries))
137
138  // allocate a free entry for incoming request
139  val primary_ready  = Wire(Vec(cfg.nProbeEntries, Bool()))
140  val allocate = primary_ready.asUInt.orR
141  val alloc_idx = PriorityEncoder(primary_ready)
142
143  // translate to inner req
144  val req = Wire(new ProbeReq)
145  val alias_addr_frag = io.mem_probe.bits.data(2, 1) // add extra 2 bits from vaddr to get vindex
146  req.source := io.mem_probe.bits.source
147  req.opcode := io.mem_probe.bits.opcode
148  req.addr := io.mem_probe.bits.address
149  if(DCacheAboveIndexOffset > DCacheTagOffset) {
150    // have alias problem, extra alias bits needed for index
151    req.vaddr := Cat(
152      io.mem_probe.bits.address(PAddrBits - 1, DCacheAboveIndexOffset), // dontcare
153      alias_addr_frag(DCacheAboveIndexOffset - DCacheTagOffset - 1, 0), // index
154      io.mem_probe.bits.address(DCacheTagOffset - 1, 0)                 // index & others
155    )
156  } else { // no alias problem
157    req.vaddr := io.mem_probe.bits.address
158  }
159  req.param := io.mem_probe.bits.param
160  req.needData := io.mem_probe.bits.data(0)
161  req.id := DontCare
162
163  io.mem_probe.ready := allocate
164
165  val entries = (0 until cfg.nProbeEntries) map { i =>
166    val entry = Module(new ProbeEntry)
167    entry.io.id := i.U
168
169    // entry req
170    entry.io.req.valid := (i.U === alloc_idx) && allocate && io.mem_probe.valid
171    primary_ready(i)   := entry.io.req.ready
172    entry.io.req.bits  := req
173
174    // pipe_req
175    pipe_req_arb.io.in(i) <> entry.io.pipe_req
176
177    // pipe_resp
178    entry.io.pipe_resp.valid := io.pipe_req.fire
179    entry.io.pipe_resp.bits.id := io.pipe_req.bits.id
180
181    entry.io.lrsc_locked_block := io.lrsc_locked_block
182
183    entry
184  }
185
186  // delay probe req for 1 cycle
187  val selected_req_valid = RegInit(false.B)
188  val selected_req_bits = RegEnable(pipe_req_arb.io.out.bits, pipe_req_arb.io.out.fire)
189  val selected_lrsc_blocked = Mux(
190    pipe_req_arb.io.out.fire,
191    io.lrsc_locked_block.valid && get_block(io.lrsc_locked_block.bits) === get_block(pipe_req_arb.io.out.bits.addr),
192    io.lrsc_locked_block.valid && get_block(io.lrsc_locked_block.bits) === get_block(selected_req_bits.addr) && selected_req_valid
193  )
194  val resvsetProbeBlock = RegNext(io.update_resv_set || selected_lrsc_blocked)
195  // When we update update_resv_set, block all probe req in the next cycle
196  // It should give Probe reservation set addr compare an independent cycle,
197  // which will lead to better timing
198  pipe_req_arb.io.out.ready := !selected_req_valid || io.pipe_req.fire
199  io.pipe_req.valid := selected_req_valid && !resvsetProbeBlock
200  io.pipe_req.bits := selected_req_bits
201  when(io.pipe_req.fire){
202    selected_req_valid := false.B
203  }
204  when(pipe_req_arb.io.out.fire){
205    selected_req_valid := true.B
206  }
207
208  // print all input/output requests for debug purpose
209  when (io.mem_probe.valid) {
210    // before a probe finishes, L2 should not further issue probes on this block
211    val probe_conflict = VecInit(entries.map(e => e.io.block_addr.valid && get_block(e.io.block_addr.bits) === get_block(io.mem_probe.bits.address))).asUInt.orR
212    assert (!probe_conflict)
213    // for now, we can only deal with ProbeBlock
214    assert (io.mem_probe.bits.opcode === TLMessages.Probe)
215  }
216
217  // debug output
218  XSDebug(io.mem_probe.fire, "mem_probe: ")
219  io.mem_probe.bits.dump(io.mem_probe.fire)
220
221// io.pipe_req.bits.dump(io.pipe_req.fire)
222
223  XSDebug(io.lrsc_locked_block.valid, "lrsc_locked_block: %x\n", io.lrsc_locked_block.bits)
224  XSPerfAccumulate("ProbeL1DCache", io.mem_probe.fire)
225
226  val perfValidCount = RegNext(PopCount(entries.map(e => e.io.block_addr.valid)))
227  val perfEvents = Seq(
228    ("dcache_probq_req      ", io.pipe_req.fire),
229    ("dcache_probq_1_4_valid", (perfValidCount < (cfg.nProbeEntries.U/4.U))),
230    ("dcache_probq_2_4_valid", (perfValidCount > (cfg.nProbeEntries.U/4.U)) & (perfValidCount <= (cfg.nProbeEntries.U/2.U))),
231    ("dcache_probq_3_4_valid", (perfValidCount > (cfg.nProbeEntries.U/2.U)) & (perfValidCount <= (cfg.nProbeEntries.U*3.U/4.U))),
232    ("dcache_probq_4_4_valid", (perfValidCount > (cfg.nProbeEntries.U*3.U/4.U))),
233  )
234  generatePerfEvent()
235}
236