xref: /XiangShan/src/main/scala/top/BusPerfMonitor.scala (revision d2b20d1a96e238e36a849bd253f65ec7b6a5db38)
1*d2b20d1aSTang Haojin/***************************************************************************************
2*d2b20d1aSTang Haojin* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3*d2b20d1aSTang Haojin* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*d2b20d1aSTang Haojin*
5*d2b20d1aSTang Haojin* XiangShan is licensed under Mulan PSL v2.
6*d2b20d1aSTang Haojin* You can use this software according to the terms and conditions of the Mulan PSL v2.
7*d2b20d1aSTang Haojin* You may obtain a copy of Mulan PSL v2 at:
8*d2b20d1aSTang Haojin*          http://license.coscl.org.cn/MulanPSL2
9*d2b20d1aSTang Haojin*
10*d2b20d1aSTang Haojin* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11*d2b20d1aSTang Haojin* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12*d2b20d1aSTang Haojin* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*d2b20d1aSTang Haojin*
14*d2b20d1aSTang Haojin* See the Mulan PSL v2 for more details.
15*d2b20d1aSTang Haojin***************************************************************************************/
16*d2b20d1aSTang Haojin
171a2cf152SYinan Xupackage top
181a2cf152SYinan Xu
191a2cf152SYinan Xuimport chipsalliance.rocketchip.config.Parameters
2073be64b3SJiawei Linimport freechips.rocketchip.diplomacy.{AdapterNode, LazyModule, LazyModuleImp}
211a2cf152SYinan Xuimport freechips.rocketchip.tilelink._
221a2cf152SYinan Xuimport chisel3._
231a2cf152SYinan Xuimport chisel3.util._
241a2cf152SYinan Xuimport utils.{XSPerfAccumulate, XSPerfPrint}
25*d2b20d1aSTang Haojinimport freechips.rocketchip.tilelink.TLMessages._
26*d2b20d1aSTang Haojinimport freechips.rocketchip.tilelink.TLPermissions._
27*d2b20d1aSTang Haojinimport utility.{ReqSourceField, ReqSourceKey, GTimer}
28*d2b20d1aSTang Haojinimport xiangshan.MemReqSource
291a2cf152SYinan Xu
30*d2b20d1aSTang Haojinclass BusPerfMonitor(name: String, stat_latency: Boolean, add_reqkey: Boolean)(implicit p: Parameters) extends LazyModule {
31*d2b20d1aSTang Haojin  val node = if (add_reqkey) TLAdapterNode(managerFn = { m =>
32*d2b20d1aSTang Haojin    TLSlavePortParameters.v1(
33*d2b20d1aSTang Haojin      m.managers.map { m =>
34*d2b20d1aSTang Haojin        m.v2copy()
35*d2b20d1aSTang Haojin      },
36*d2b20d1aSTang Haojin      requestKeys = Seq(ReqSourceKey),
37*d2b20d1aSTang Haojin      beatBytes = 32,
38*d2b20d1aSTang Haojin      endSinkId = m.endSinkId
39*d2b20d1aSTang Haojin    )
40*d2b20d1aSTang Haojin  }) else {
41*d2b20d1aSTang Haojin    TLAdapterNode()
42*d2b20d1aSTang Haojin  }
43*d2b20d1aSTang Haojin  lazy val module = new BusPerfMonitorImp(this, name, stat_latency)
441a2cf152SYinan Xu}
451a2cf152SYinan Xu
46*d2b20d1aSTang Haojinclass BusPerfMonitorImp(outer: BusPerfMonitor, name: String, stat_latency: Boolean)
471a2cf152SYinan Xu  extends LazyModuleImp(outer)
481a2cf152SYinan Xu{
4973be64b3SJiawei Lin
501a2cf152SYinan Xu  outer.node.in.zip(outer.node.out).foreach{
511a2cf152SYinan Xu    case ((in, edgeIn), (out, edgeOut)) =>
521a2cf152SYinan Xu      out <> in
531a2cf152SYinan Xu  }
541a2cf152SYinan Xu
551a2cf152SYinan Xu  def PERF_CHN[T <: TLChannel](clientName: String, chn: DecoupledIO[T]) = {
561a2cf152SYinan Xu
57d18dc7e6Swakafa    val channelName = chn.bits.channelName.replaceAll(" ", "_").replaceAll("'", "")
58*d2b20d1aSTang Haojin    XSPerfAccumulate(s"${clientName}_${channelName}_fire", chn.fire)
59d18dc7e6Swakafa    XSPerfAccumulate(s"${clientName}_${channelName}_stall", chn.valid && !chn.ready)
601a2cf152SYinan Xu
611a2cf152SYinan Xu    val ops = chn.bits match {
621a2cf152SYinan Xu      case _: TLBundleA => TLMessages.a.map(_._1)
631a2cf152SYinan Xu      case _: TLBundleB => TLMessages.b.map(_._1)
641a2cf152SYinan Xu      case _: TLBundleC => TLMessages.c.map(_._1)
651a2cf152SYinan Xu      case _: TLBundleD => TLMessages.d.map(_._1)
661a2cf152SYinan Xu      case _: TLBundleE => Nil
671a2cf152SYinan Xu    }
68d18dc7e6Swakafa
69d18dc7e6Swakafa    for((op_raw, i) <- ops.zipWithIndex){
70d18dc7e6Swakafa      val op = s"${op_raw}".replaceAll(" ", "_")
711a2cf152SYinan Xu      chn.bits match {
721a2cf152SYinan Xu        case a: TLBundleA =>
73d18dc7e6Swakafa          XSPerfAccumulate(s"${clientName}_${channelName}_${op}_fire",
74*d2b20d1aSTang Haojin            i.U === a.opcode && chn.fire
751a2cf152SYinan Xu          )
76d18dc7e6Swakafa          XSPerfAccumulate(s"${clientName}_${channelName}_${op}_stall",
771a2cf152SYinan Xu            i.U === a.opcode && chn.valid && !chn.ready
781a2cf152SYinan Xu          )
791a2cf152SYinan Xu        case b: TLBundleB =>
80d18dc7e6Swakafa          XSPerfAccumulate(s"${clientName}_${channelName}_${op}_fire",
81*d2b20d1aSTang Haojin            i.U === b.opcode && chn.fire
821a2cf152SYinan Xu          )
83d18dc7e6Swakafa          XSPerfAccumulate(s"${clientName}_${channelName}_${op}_stall",
841a2cf152SYinan Xu            i.U === b.opcode && chn.valid && !chn.ready
851a2cf152SYinan Xu          )
861a2cf152SYinan Xu        case c: TLBundleC =>
87d18dc7e6Swakafa          XSPerfAccumulate(s"${clientName}_${channelName}_${op}_fire",
88*d2b20d1aSTang Haojin            i.U === c.opcode && chn.fire
891a2cf152SYinan Xu          )
90d18dc7e6Swakafa          XSPerfAccumulate(s"${clientName}_${channelName}_${op}_stall",
911a2cf152SYinan Xu            i.U === c.opcode && chn.valid && !chn.ready
921a2cf152SYinan Xu          )
931a2cf152SYinan Xu        case d: TLBundleD =>
94d18dc7e6Swakafa          XSPerfAccumulate(s"${clientName}_${channelName}_${op}_fire",
95*d2b20d1aSTang Haojin            i.U === d.opcode && chn.fire
961a2cf152SYinan Xu          )
97d18dc7e6Swakafa          XSPerfAccumulate(s"${clientName}_${channelName}_${op}_stall",
981a2cf152SYinan Xu            i.U === d.opcode && chn.valid && !chn.ready
991a2cf152SYinan Xu          )
1001a2cf152SYinan Xu      }
1011a2cf152SYinan Xu    }
1021a2cf152SYinan Xu  }
1031a2cf152SYinan Xu
1041f0e2dc7SJiawei Lin  for (((in, edgeIn), i) <- outer.node.in.zipWithIndex) {
105*d2b20d1aSTang Haojin    val clientName = s"${name}_${edgeIn.master.masters.head.name}_bank_$i"
1061a2cf152SYinan Xu    PERF_CHN(clientName, in.a)
1071a2cf152SYinan Xu    PERF_CHN(clientName, in.d)
1081a2cf152SYinan Xu    if (in.params.hasBCE) {
1091a2cf152SYinan Xu      PERF_CHN(clientName, in.b)
1101a2cf152SYinan Xu      PERF_CHN(clientName, in.c)
1111a2cf152SYinan Xu      PERF_CHN(clientName, in.e)
1121a2cf152SYinan Xu    }
1131a2cf152SYinan Xu  }
114*d2b20d1aSTang Haojin
115*d2b20d1aSTang Haojin  if (stat_latency) {
116*d2b20d1aSTang Haojin    val nrEdge = outer.node.in.length.toInt
117*d2b20d1aSTang Haojin    val edgeIn = outer.node.in.head._2
118*d2b20d1aSTang Haojin
119*d2b20d1aSTang Haojin    class RecordEntry()(implicit p: Parameters) extends Bundle {
120*d2b20d1aSTang Haojin      val valid = Bool()
121*d2b20d1aSTang Haojin      val timeStamp = UInt(64.W)
122*d2b20d1aSTang Haojin      val reqType = UInt(8.W)
123*d2b20d1aSTang Haojin    }
124*d2b20d1aSTang Haojin
125*d2b20d1aSTang Haojin    // For simplicity, latency statistic works between nodes with SINGLE edge
126*d2b20d1aSTang Haojin    require(nrEdge == 1)
127*d2b20d1aSTang Haojin    val timer = GTimer()
128*d2b20d1aSTang Haojin    val nrSource = math.pow(2, edgeIn.bundle.sourceBits).toInt
129*d2b20d1aSTang Haojin    val latencyRecord = RegInit(VecInit(Seq.fill(nrSource)(0.U.asTypeOf(new RecordEntry()))))
130*d2b20d1aSTang Haojin    val latencySum = RegInit(0.U(128.W))
131*d2b20d1aSTang Haojin    val nrRecord = RegInit(0.U(128.W))
132*d2b20d1aSTang Haojin
133*d2b20d1aSTang Haojin    outer.node.in.zip(outer.node.out).zipWithIndex.foreach {
134*d2b20d1aSTang Haojin      case (((in, edgeIn), (out, edgeOut)), i) =>
135*d2b20d1aSTang Haojin        val channelA = in.a
136*d2b20d1aSTang Haojin        when(channelA.fire &&
137*d2b20d1aSTang Haojin          channelA.bits.opcode =/= Hint &&
138*d2b20d1aSTang Haojin          channelA.bits.opcode =/= PutFullData &&
139*d2b20d1aSTang Haojin          channelA.bits.opcode =/= PutPartialData
140*d2b20d1aSTang Haojin        ) {
141*d2b20d1aSTang Haojin          // Valid channel A fire, record it
142*d2b20d1aSTang Haojin          assert(latencyRecord(channelA.bits.source).valid === false.B)
143*d2b20d1aSTang Haojin          latencyRecord(channelA.bits.source).valid := true.B
144*d2b20d1aSTang Haojin          latencyRecord(channelA.bits.source).timeStamp := timer
145*d2b20d1aSTang Haojin          latencyRecord(channelA.bits.source).reqType := channelA.bits.user.lift(ReqSourceKey).getOrElse(MemReqSource.NoWhere.id.U)
146*d2b20d1aSTang Haojin        }
147*d2b20d1aSTang Haojin        val channelD = in.d
148*d2b20d1aSTang Haojin        val (first, _, _, _) = edgeIn.count(channelD)
149*d2b20d1aSTang Haojin        // Valid channel D fire, resolve it
150*d2b20d1aSTang Haojin        val resolveRecord = channelD.fire && first &&
151*d2b20d1aSTang Haojin          channelD.bits.opcode =/= ReleaseAck &&
152*d2b20d1aSTang Haojin          channelD.bits.opcode =/= AccessAck
153*d2b20d1aSTang Haojin        val latency = WireInit(0.U(64.W))
154*d2b20d1aSTang Haojin        when(resolveRecord) {
155*d2b20d1aSTang Haojin          assert(latencyRecord(channelD.bits.source).valid === true.B)
156*d2b20d1aSTang Haojin          latencyRecord(channelD.bits.source).valid := false.B
157*d2b20d1aSTang Haojin          latency := timer - latencyRecord(channelD.bits.source).timeStamp
158*d2b20d1aSTang Haojin          latencySum := latencySum + timer
159*d2b20d1aSTang Haojin          nrRecord := nrRecord + 1.U
160*d2b20d1aSTang Haojin          // printf("timer: %x\n", latency)
161*d2b20d1aSTang Haojin        }
162*d2b20d1aSTang Haojin        XSPerfAccumulate(name + "_nrRecord_all", resolveRecord)
163*d2b20d1aSTang Haojin        XSPerfAccumulate(name + "_latencySum_all", Mux(resolveRecord, latency, 0.U))
164*d2b20d1aSTang Haojin
165*d2b20d1aSTang Haojin        for (j <- 0 until MemReqSource.ReqSourceCount.id) {
166*d2b20d1aSTang Haojin          val typeMatch = latencyRecord(channelD.bits.source).reqType === j.U
167*d2b20d1aSTang Haojin          XSPerfAccumulate(name + s"_nrRecord_type${j}", resolveRecord && typeMatch)
168*d2b20d1aSTang Haojin          XSPerfAccumulate(name + s"_latencySum_type${j}", Mux(resolveRecord && typeMatch, latency, 0.U))
169*d2b20d1aSTang Haojin        }
170*d2b20d1aSTang Haojin    }
171*d2b20d1aSTang Haojin  }
172*d2b20d1aSTang Haojin
1731a2cf152SYinan Xu}
1741a2cf152SYinan Xu
1751a2cf152SYinan Xuobject BusPerfMonitor {
176*d2b20d1aSTang Haojin  def apply(
177*d2b20d1aSTang Haojin     name: String,
178*d2b20d1aSTang Haojin     enable: Boolean = false,
179*d2b20d1aSTang Haojin     stat_latency: Boolean = false,
180*d2b20d1aSTang Haojin     add_reqkey: Boolean = false)(implicit p: Parameters) =
181*d2b20d1aSTang Haojin  {
18273be64b3SJiawei Lin    if(enable){
183*d2b20d1aSTang Haojin      val busPMU = LazyModule(new BusPerfMonitor(name, stat_latency, add_reqkey))
1841a2cf152SYinan Xu      busPMU.node
18573be64b3SJiawei Lin    } else {
18673be64b3SJiawei Lin      TLTempNode()
18773be64b3SJiawei Lin    }
1881a2cf152SYinan Xu  }
1891a2cf152SYinan Xu}
190