xref: /XiangShan/src/main/scala/top/BusPerfMonitor.scala (revision 1b46b9591920008655d659ac88cd0250db769664)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package top
18
19import chipsalliance.rocketchip.config.Parameters
20import freechips.rocketchip.diplomacy.{AdapterNode, LazyModule, LazyModuleImp}
21import freechips.rocketchip.tilelink._
22import chisel3._
23import chisel3.util._
24import utils.{XSPerfAccumulate, XSPerfPrint}
25import freechips.rocketchip.tilelink.TLMessages._
26import freechips.rocketchip.tilelink.TLPermissions._
27import utility.{ReqSourceField, ReqSourceKey, GTimer}
28import xiangshan.MemReqSource
29
30class BusPerfMonitor(name: String, stat_latency: Boolean, add_reqkey: Boolean)(implicit p: Parameters) extends LazyModule {
31  val node = if (add_reqkey) TLAdapterNode(managerFn = { m =>
32    TLSlavePortParameters.v1(
33      m.managers.map { m =>
34        m.v2copy()
35      },
36      requestKeys = Seq(ReqSourceKey),
37      beatBytes = 32,
38      endSinkId = m.endSinkId
39    )
40  }) else {
41    TLAdapterNode()
42  }
43  lazy val module = new BusPerfMonitorImp(this, name, stat_latency)
44}
45
46class BusPerfMonitorImp(outer: BusPerfMonitor, name: String, stat_latency: Boolean)
47  extends LazyModuleImp(outer)
48{
49
50  outer.node.in.zip(outer.node.out).foreach{
51    case ((in, edgeIn), (out, edgeOut)) =>
52      out <> in
53  }
54
55  def PERF_CHN[T <: TLChannel](clientName: String, chn: DecoupledIO[T]) = {
56
57    val channelName = chn.bits.channelName.replaceAll(" ", "_").replaceAll("'", "")
58    XSPerfAccumulate(s"${clientName}_${channelName}_fire", chn.fire)
59    XSPerfAccumulate(s"${clientName}_${channelName}_stall", chn.valid && !chn.ready)
60
61    val ops = chn.bits match {
62      case _: TLBundleA => TLMessages.a.map(_._1)
63      case _: TLBundleB => TLMessages.b.map(_._1)
64      case _: TLBundleC => TLMessages.c.map(_._1)
65      case _: TLBundleD => TLMessages.d.map(_._1)
66      case _: TLBundleE => Nil
67    }
68
69    for((op_raw, i) <- ops.zipWithIndex){
70      val op = s"${op_raw}".replaceAll(" ", "_")
71      chn.bits match {
72        case a: TLBundleA =>
73          XSPerfAccumulate(s"${clientName}_${channelName}_${op}_fire",
74            i.U === a.opcode && chn.fire
75          )
76          XSPerfAccumulate(s"${clientName}_${channelName}_${op}_stall",
77            i.U === a.opcode && chn.valid && !chn.ready
78          )
79        case b: TLBundleB =>
80          XSPerfAccumulate(s"${clientName}_${channelName}_${op}_fire",
81            i.U === b.opcode && chn.fire
82          )
83          XSPerfAccumulate(s"${clientName}_${channelName}_${op}_stall",
84            i.U === b.opcode && chn.valid && !chn.ready
85          )
86        case c: TLBundleC =>
87          XSPerfAccumulate(s"${clientName}_${channelName}_${op}_fire",
88            i.U === c.opcode && chn.fire
89          )
90          XSPerfAccumulate(s"${clientName}_${channelName}_${op}_stall",
91            i.U === c.opcode && chn.valid && !chn.ready
92          )
93        case d: TLBundleD =>
94          XSPerfAccumulate(s"${clientName}_${channelName}_${op}_fire",
95            i.U === d.opcode && chn.fire
96          )
97          XSPerfAccumulate(s"${clientName}_${channelName}_${op}_stall",
98            i.U === d.opcode && chn.valid && !chn.ready
99          )
100      }
101    }
102  }
103
104  for (((in, edgeIn), i) <- outer.node.in.zipWithIndex) {
105    val clientName = s"${name}_${edgeIn.master.masters.head.name}_bank_$i"
106    PERF_CHN(clientName, in.a)
107    PERF_CHN(clientName, in.d)
108    if (in.params.hasBCE) {
109      PERF_CHN(clientName, in.b)
110      PERF_CHN(clientName, in.c)
111      PERF_CHN(clientName, in.e)
112    }
113  }
114
115  if (stat_latency) {
116    val nrEdge = outer.node.in.length.toInt
117    val edgeIn = outer.node.in.head._2
118
119    class RecordEntry()(implicit p: Parameters) extends Bundle {
120      val valid = Bool()
121      val timeStamp = UInt(64.W)
122      val reqType = UInt(8.W)
123    }
124
125    // For simplicity, latency statistic works between nodes with SINGLE edge
126    require(nrEdge == 1)
127    val timer = GTimer()
128    val nrSource = math.pow(2, edgeIn.bundle.sourceBits).toInt
129    val latencyRecord = RegInit(VecInit(Seq.fill(nrSource)(0.U.asTypeOf(new RecordEntry()))))
130    val latencySum = RegInit(0.U(128.W))
131    val nrRecord = RegInit(0.U(128.W))
132
133    outer.node.in.zip(outer.node.out).zipWithIndex.foreach {
134      case (((in, edgeIn), (out, edgeOut)), i) =>
135        val channelA = in.a
136        when(channelA.fire &&
137          channelA.bits.opcode =/= Hint &&
138          channelA.bits.opcode =/= PutFullData &&
139          channelA.bits.opcode =/= PutPartialData
140        ) {
141          // Valid channel A fire, record it
142          assert(latencyRecord(channelA.bits.source).valid === false.B)
143          latencyRecord(channelA.bits.source).valid := true.B
144          latencyRecord(channelA.bits.source).timeStamp := timer
145          latencyRecord(channelA.bits.source).reqType := channelA.bits.user.lift(ReqSourceKey).getOrElse(MemReqSource.NoWhere.id.U)
146        }
147        val channelD = in.d
148        val (first, _, _, _) = edgeIn.count(channelD)
149        // Valid channel D fire, resolve it
150        val resolveRecord = channelD.fire && first &&
151          channelD.bits.opcode =/= ReleaseAck &&
152          channelD.bits.opcode =/= AccessAck
153        val latency = WireInit(0.U(64.W))
154        when(resolveRecord) {
155          assert(latencyRecord(channelD.bits.source).valid === true.B)
156          latencyRecord(channelD.bits.source).valid := false.B
157          latency := timer - latencyRecord(channelD.bits.source).timeStamp
158          latencySum := latencySum + timer
159          nrRecord := nrRecord + 1.U
160          // printf("timer: %x\n", latency)
161        }
162        XSPerfAccumulate(name + "_nrRecord_all", resolveRecord)
163        XSPerfAccumulate(name + "_latencySum_all", Mux(resolveRecord, latency, 0.U))
164
165        for (j <- 0 until MemReqSource.ReqSourceCount.id) {
166          val typeMatch = latencyRecord(channelD.bits.source).reqType === j.U
167          XSPerfAccumulate(name + s"_nrRecord_type${j}", resolveRecord && typeMatch)
168          XSPerfAccumulate(name + s"_latencySum_type${j}", Mux(resolveRecord && typeMatch, latency, 0.U))
169        }
170    }
171  }
172
173}
174
175object BusPerfMonitor {
176  def apply(
177     name: String,
178     enable: Boolean = false,
179     stat_latency: Boolean = false,
180     add_reqkey: Boolean = false)(implicit p: Parameters) =
181  {
182    if(enable){
183      val busPMU = LazyModule(new BusPerfMonitor(name, stat_latency, add_reqkey))
184      busPMU.node
185    } else {
186      TLTempNode()
187    }
188  }
189}
190