1/*************************************************************************************** 2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3* Copyright (c) 2020-2021 Peng Cheng Laboratory 4* 5* XiangShan is licensed under Mulan PSL v2. 6* You can use this software according to the terms and conditions of the Mulan PSL v2. 7* You may obtain a copy of Mulan PSL v2 at: 8* http://license.coscl.org.cn/MulanPSL2 9* 10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13* 14* See the Mulan PSL v2 for more details. 15***************************************************************************************/ 16 17package xiangshan.cache 18 19import org.chipsalliance.cde.config.Parameters 20import chisel3._ 21import chisel3.util._ 22import freechips.rocketchip.tilelink.{TLBundleB, TLEdgeOut, TLMessages, TLPermissions} 23import utils.HasTLDump 24import utility.{XSDebug, XSPerfAccumulate, HasPerfEvents} 25 26class ProbeReq(implicit p: Parameters) extends DCacheBundle 27{ 28 val source = UInt() 29 val opcode = UInt() 30 val addr = UInt(PAddrBits.W) 31 val vaddr = UInt(VAddrBits.W) // l2 uses vaddr index to probe l1 32 val param = UInt(TLPermissions.bdWidth.W) 33 val needData = Bool() 34 35 // probe queue entry ID 36 val id = UInt(log2Up(cfg.nProbeEntries).W) 37 38 def dump(cond: Bool) = { 39 XSDebug(cond, "ProbeReq source: %d opcode: %d addr: %x param: %d\n", 40 source, opcode, addr, param) 41 } 42} 43 44class ProbeResp(implicit p: Parameters) extends DCacheBundle { 45 // probe queue entry ID 46 val id = UInt(log2Up(cfg.nProbeEntries).W) 47} 48 49class ProbeEntry(implicit p: Parameters) extends DCacheModule { 50 val io = IO(new Bundle { 51 val req = Flipped(Decoupled(new ProbeReq)) 52 val pipe_req = DecoupledIO(new MainPipeReq) 53 val pipe_resp = Input(Valid(new ProbeResp)) 54 val lrsc_locked_block = Input(Valid(UInt())) 55 val id = Input(UInt(log2Up(cfg.nProbeEntries).W)) 56 57 // the block we are probing 58 val block_addr = Output(Valid(UInt())) 59 }) 60 61 val s_invalid :: s_pipe_req :: s_wait_resp :: Nil = Enum(3) 62 63 val state = RegInit(s_invalid) 64 65 val req = Reg(new ProbeReq) 66 67 // assign default values to signals 68 io.req.ready := false.B 69 io.pipe_req.valid := false.B 70 io.pipe_req.bits := DontCare 71 72 io.block_addr.valid := state =/= s_invalid 73 io.block_addr.bits := req.addr 74 75 XSDebug(state =/= s_invalid, "state: %d\n", state) 76 77 XSDebug(state =/= s_invalid, "ProbeEntry: state: %d block_addr: %x\n", state, io.block_addr.bits) 78 79 when (state === s_invalid) { 80 io.req.ready := true.B 81 when (io.req.fire) { 82 req := io.req.bits 83 state := s_pipe_req 84 } 85 } 86 87 val lrsc_blocked = Mux( 88 io.req.fire, 89 io.lrsc_locked_block.valid && get_block(io.lrsc_locked_block.bits) === get_block(io.req.bits.addr), 90 io.lrsc_locked_block.valid && get_block(io.lrsc_locked_block.bits) === get_block(req.addr) 91 ) 92 93 when (state === s_pipe_req) { 94 // Note that probe req will be blocked in the next cycle if a lr updates lrsc_locked_block addr 95 // in this way, we can RegNext(lrsc_blocked) for better timing 96 io.pipe_req.valid := !RegNext(lrsc_blocked) 97 98 val pipe_req = io.pipe_req.bits 99 pipe_req := DontCare 100 pipe_req.miss := false.B 101 pipe_req.probe := true.B 102 pipe_req.probe_param := req.param 103 pipe_req.addr := req.addr 104 pipe_req.vaddr := req.vaddr 105 pipe_req.probe_need_data := req.needData 106 pipe_req.error := false.B 107 pipe_req.id := io.id 108 109 when (io.pipe_req.fire) { 110 state := s_wait_resp 111 } 112 } 113 114 when (state === s_wait_resp) { 115 when (io.pipe_resp.valid && io.id === io.pipe_resp.bits.id) { 116 state := s_invalid 117 } 118 } 119 120 // perfoemance counters 121 XSPerfAccumulate("probe_req", state === s_invalid && io.req.fire) 122 XSPerfAccumulate("probe_penalty", state =/= s_invalid) 123 XSPerfAccumulate("probe_penalty_blocked_by_lrsc", state === s_pipe_req && io.lrsc_locked_block.valid && get_block(io.lrsc_locked_block.bits) === get_block(req.addr)) 124 XSPerfAccumulate("probe_penalty_blocked_by_pipeline", state === s_pipe_req && io.pipe_req.valid && !io.pipe_req.ready) 125} 126 127class ProbeQueue(edge: TLEdgeOut)(implicit p: Parameters) extends DCacheModule with HasTLDump with HasPerfEvents 128{ 129 val io = IO(new Bundle { 130 val mem_probe = Flipped(Decoupled(new TLBundleB(edge.bundle))) 131 val pipe_req = DecoupledIO(new MainPipeReq) 132 val lrsc_locked_block = Input(Valid(UInt())) 133 val update_resv_set = Input(Bool()) 134 }) 135 136 val pipe_req_arb = Module(new Arbiter(new MainPipeReq, cfg.nProbeEntries)) 137 138 // allocate a free entry for incoming request 139 val primary_ready = Wire(Vec(cfg.nProbeEntries, Bool())) 140 val allocate = primary_ready.asUInt.orR 141 val alloc_idx = PriorityEncoder(primary_ready) 142 143 // translate to inner req 144 val req = Wire(new ProbeReq) 145 val alias_addr_frag = io.mem_probe.bits.data(2, 1) // add extra 2 bits from vaddr to get vindex 146 req.source := io.mem_probe.bits.source 147 req.opcode := io.mem_probe.bits.opcode 148 req.addr := io.mem_probe.bits.address 149 if(DCacheAboveIndexOffset > DCacheTagOffset) { 150 // have alias problem, extra alias bits needed for index 151 req.vaddr := Cat( 152 io.mem_probe.bits.address(PAddrBits - 1, DCacheAboveIndexOffset), // dontcare 153 alias_addr_frag(DCacheAboveIndexOffset - DCacheTagOffset - 1, 0), // index 154 io.mem_probe.bits.address(DCacheTagOffset - 1, 0) // index & others 155 ) 156 } else { // no alias problem 157 req.vaddr := io.mem_probe.bits.address 158 } 159 req.param := io.mem_probe.bits.param 160 req.needData := io.mem_probe.bits.data(0) 161 req.id := DontCare 162 163 io.mem_probe.ready := allocate 164 165 val entries = (0 until cfg.nProbeEntries) map { i => 166 val entry = Module(new ProbeEntry) 167 entry.io.id := i.U 168 169 // entry req 170 entry.io.req.valid := (i.U === alloc_idx) && allocate && io.mem_probe.valid 171 primary_ready(i) := entry.io.req.ready 172 entry.io.req.bits := req 173 174 // pipe_req 175 pipe_req_arb.io.in(i) <> entry.io.pipe_req 176 177 // pipe_resp 178 entry.io.pipe_resp.valid := io.pipe_req.fire 179 entry.io.pipe_resp.bits.id := io.pipe_req.bits.id 180 181 entry.io.lrsc_locked_block := io.lrsc_locked_block 182 183 entry 184 } 185 186 // delay probe req for 1 cycle 187 val selected_req_valid = RegInit(false.B) 188 val selected_req_bits = RegEnable(pipe_req_arb.io.out.bits, pipe_req_arb.io.out.fire) 189 val selected_lrsc_blocked = Mux( 190 pipe_req_arb.io.out.fire, 191 io.lrsc_locked_block.valid && get_block(io.lrsc_locked_block.bits) === get_block(pipe_req_arb.io.out.bits.addr), 192 io.lrsc_locked_block.valid && get_block(io.lrsc_locked_block.bits) === get_block(selected_req_bits.addr) && selected_req_valid 193 ) 194 val resvsetProbeBlock = RegNext(io.update_resv_set || selected_lrsc_blocked) 195 // When we update update_resv_set, block all probe req in the next cycle 196 // It should give Probe reservation set addr compare an independent cycle, 197 // which will lead to better timing 198 pipe_req_arb.io.out.ready := !selected_req_valid || io.pipe_req.fire 199 io.pipe_req.valid := selected_req_valid && !resvsetProbeBlock 200 io.pipe_req.bits := selected_req_bits 201 when(io.pipe_req.fire){ 202 selected_req_valid := false.B 203 } 204 when(pipe_req_arb.io.out.fire){ 205 selected_req_valid := true.B 206 } 207 208 // print all input/output requests for debug purpose 209 when (io.mem_probe.valid) { 210 // before a probe finishes, L2 should not further issue probes on this block 211 val probe_conflict = VecInit(entries.map(e => e.io.block_addr.valid && get_block(e.io.block_addr.bits) === get_block(io.mem_probe.bits.address))).asUInt.orR 212 assert (!probe_conflict) 213 // for now, we can only deal with ProbeBlock 214 assert (io.mem_probe.bits.opcode === TLMessages.Probe) 215 } 216 217 // debug output 218 XSDebug(io.mem_probe.fire, "mem_probe: ") 219 io.mem_probe.bits.dump(io.mem_probe.fire) 220 221// io.pipe_req.bits.dump(io.pipe_req.fire) 222 223 XSDebug(io.lrsc_locked_block.valid, "lrsc_locked_block: %x\n", io.lrsc_locked_block.bits) 224 XSPerfAccumulate("ProbeL1DCache", io.mem_probe.fire) 225 226 val perfValidCount = RegNext(PopCount(entries.map(e => e.io.block_addr.valid))) 227 val perfEvents = Seq( 228 ("dcache_probq_req ", io.pipe_req.fire), 229 ("dcache_probq_1_4_valid", (perfValidCount < (cfg.nProbeEntries.U/4.U))), 230 ("dcache_probq_2_4_valid", (perfValidCount > (cfg.nProbeEntries.U/4.U)) & (perfValidCount <= (cfg.nProbeEntries.U/2.U))), 231 ("dcache_probq_3_4_valid", (perfValidCount > (cfg.nProbeEntries.U/2.U)) & (perfValidCount <= (cfg.nProbeEntries.U*3.U/4.U))), 232 ("dcache_probq_4_4_valid", (perfValidCount > (cfg.nProbeEntries.U*3.U/4.U))), 233 ) 234 generatePerfEvent() 235} 236