1/*************************************************************************************** 2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3* Copyright (c) 2020-2021 Peng Cheng Laboratory 4* 5* XiangShan is licensed under Mulan PSL v2. 6* You can use this software according to the terms and conditions of the Mulan PSL v2. 7* You may obtain a copy of Mulan PSL v2 at: 8* http://license.coscl.org.cn/MulanPSL2 9* 10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13* 14* See the Mulan PSL v2 for more details. 15***************************************************************************************/ 16 17package top 18 19import chipsalliance.rocketchip.config.Parameters 20import freechips.rocketchip.diplomacy.{AdapterNode, LazyModule, LazyModuleImp} 21import freechips.rocketchip.tilelink._ 22import chisel3._ 23import chisel3.util._ 24import utils.{XSPerfAccumulate, XSPerfPrint} 25import freechips.rocketchip.tilelink.TLMessages._ 26import freechips.rocketchip.tilelink.TLPermissions._ 27import utility.{ReqSourceField, ReqSourceKey, GTimer} 28import xiangshan.MemReqSource 29 30class BusPerfMonitor(name: String, stat_latency: Boolean, add_reqkey: Boolean)(implicit p: Parameters) extends LazyModule { 31 val node = if (add_reqkey) TLAdapterNode(managerFn = { m => 32 TLSlavePortParameters.v1( 33 m.managers.map { m => 34 m.v2copy() 35 }, 36 requestKeys = Seq(ReqSourceKey), 37 beatBytes = 32, 38 endSinkId = m.endSinkId 39 ) 40 }) else { 41 TLAdapterNode() 42 } 43 lazy val module = new BusPerfMonitorImp(this, name, stat_latency) 44} 45 46class BusPerfMonitorImp(outer: BusPerfMonitor, name: String, stat_latency: Boolean) 47 extends LazyModuleImp(outer) 48{ 49 50 outer.node.in.zip(outer.node.out).foreach{ 51 case ((in, edgeIn), (out, edgeOut)) => 52 out <> in 53 } 54 55 def PERF_CHN[T <: TLChannel](clientName: String, chn: DecoupledIO[T]) = { 56 57 val channelName = chn.bits.channelName.replaceAll(" ", "_").replaceAll("'", "") 58 XSPerfAccumulate(s"${clientName}_${channelName}_fire", chn.fire) 59 XSPerfAccumulate(s"${clientName}_${channelName}_stall", chn.valid && !chn.ready) 60 61 val ops = chn.bits match { 62 case _: TLBundleA => TLMessages.a.map(_._1) 63 case _: TLBundleB => TLMessages.b.map(_._1) 64 case _: TLBundleC => TLMessages.c.map(_._1) 65 case _: TLBundleD => TLMessages.d.map(_._1) 66 case _: TLBundleE => Nil 67 } 68 69 for((op_raw, i) <- ops.zipWithIndex){ 70 val op = s"${op_raw}".replaceAll(" ", "_") 71 chn.bits match { 72 case a: TLBundleA => 73 XSPerfAccumulate(s"${clientName}_${channelName}_${op}_fire", 74 i.U === a.opcode && chn.fire 75 ) 76 XSPerfAccumulate(s"${clientName}_${channelName}_${op}_stall", 77 i.U === a.opcode && chn.valid && !chn.ready 78 ) 79 case b: TLBundleB => 80 XSPerfAccumulate(s"${clientName}_${channelName}_${op}_fire", 81 i.U === b.opcode && chn.fire 82 ) 83 XSPerfAccumulate(s"${clientName}_${channelName}_${op}_stall", 84 i.U === b.opcode && chn.valid && !chn.ready 85 ) 86 case c: TLBundleC => 87 XSPerfAccumulate(s"${clientName}_${channelName}_${op}_fire", 88 i.U === c.opcode && chn.fire 89 ) 90 XSPerfAccumulate(s"${clientName}_${channelName}_${op}_stall", 91 i.U === c.opcode && chn.valid && !chn.ready 92 ) 93 case d: TLBundleD => 94 XSPerfAccumulate(s"${clientName}_${channelName}_${op}_fire", 95 i.U === d.opcode && chn.fire 96 ) 97 XSPerfAccumulate(s"${clientName}_${channelName}_${op}_stall", 98 i.U === d.opcode && chn.valid && !chn.ready 99 ) 100 } 101 } 102 } 103 104 for (((in, edgeIn), i) <- outer.node.in.zipWithIndex) { 105 val clientName = s"${name}_${edgeIn.master.masters.head.name}_bank_$i" 106 PERF_CHN(clientName, in.a) 107 PERF_CHN(clientName, in.d) 108 if (in.params.hasBCE) { 109 PERF_CHN(clientName, in.b) 110 PERF_CHN(clientName, in.c) 111 PERF_CHN(clientName, in.e) 112 } 113 } 114 115 if (stat_latency) { 116 val nrEdge = outer.node.in.length.toInt 117 val edgeIn = outer.node.in.head._2 118 119 class RecordEntry()(implicit p: Parameters) extends Bundle { 120 val valid = Bool() 121 val timeStamp = UInt(64.W) 122 val reqType = UInt(8.W) 123 } 124 125 // For simplicity, latency statistic works between nodes with SINGLE edge 126 require(nrEdge == 1) 127 val timer = GTimer() 128 val nrSource = math.pow(2, edgeIn.bundle.sourceBits).toInt 129 val latencyRecord = RegInit(VecInit(Seq.fill(nrSource)(0.U.asTypeOf(new RecordEntry())))) 130 val latencySum = RegInit(0.U(128.W)) 131 val nrRecord = RegInit(0.U(128.W)) 132 133 outer.node.in.zip(outer.node.out).zipWithIndex.foreach { 134 case (((in, edgeIn), (out, edgeOut)), i) => 135 val channelA = in.a 136 when(channelA.fire && 137 channelA.bits.opcode =/= Hint && 138 channelA.bits.opcode =/= PutFullData && 139 channelA.bits.opcode =/= PutPartialData 140 ) { 141 // Valid channel A fire, record it 142 assert(latencyRecord(channelA.bits.source).valid === false.B) 143 latencyRecord(channelA.bits.source).valid := true.B 144 latencyRecord(channelA.bits.source).timeStamp := timer 145 latencyRecord(channelA.bits.source).reqType := channelA.bits.user.lift(ReqSourceKey).getOrElse(MemReqSource.NoWhere.id.U) 146 } 147 val channelD = in.d 148 val (first, _, _, _) = edgeIn.count(channelD) 149 // Valid channel D fire, resolve it 150 val resolveRecord = channelD.fire && first && 151 channelD.bits.opcode =/= ReleaseAck && 152 channelD.bits.opcode =/= AccessAck 153 val latency = WireInit(0.U(64.W)) 154 when(resolveRecord) { 155 assert(latencyRecord(channelD.bits.source).valid === true.B) 156 latencyRecord(channelD.bits.source).valid := false.B 157 latency := timer - latencyRecord(channelD.bits.source).timeStamp 158 latencySum := latencySum + timer 159 nrRecord := nrRecord + 1.U 160 // printf("timer: %x\n", latency) 161 } 162 XSPerfAccumulate(name + "_nrRecord_all", resolveRecord) 163 XSPerfAccumulate(name + "_latencySum_all", Mux(resolveRecord, latency, 0.U)) 164 165 for (j <- 0 until MemReqSource.ReqSourceCount.id) { 166 val typeMatch = latencyRecord(channelD.bits.source).reqType === j.U 167 XSPerfAccumulate(name + s"_nrRecord_type${j}", resolveRecord && typeMatch) 168 XSPerfAccumulate(name + s"_latencySum_type${j}", Mux(resolveRecord && typeMatch, latency, 0.U)) 169 } 170 } 171 } 172 173} 174 175object BusPerfMonitor { 176 def apply( 177 name: String, 178 enable: Boolean = false, 179 stat_latency: Boolean = false, 180 add_reqkey: Boolean = false)(implicit p: Parameters) = 181 { 182 if(enable){ 183 val busPMU = LazyModule(new BusPerfMonitor(name, stat_latency, add_reqkey)) 184 busPMU.node 185 } else { 186 TLTempNode() 187 } 188 } 189} 190