1*d2b20d1aSTang Haojin/*************************************************************************************** 2*d2b20d1aSTang Haojin* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3*d2b20d1aSTang Haojin* Copyright (c) 2020-2021 Peng Cheng Laboratory 4*d2b20d1aSTang Haojin* 5*d2b20d1aSTang Haojin* XiangShan is licensed under Mulan PSL v2. 6*d2b20d1aSTang Haojin* You can use this software according to the terms and conditions of the Mulan PSL v2. 7*d2b20d1aSTang Haojin* You may obtain a copy of Mulan PSL v2 at: 8*d2b20d1aSTang Haojin* http://license.coscl.org.cn/MulanPSL2 9*d2b20d1aSTang Haojin* 10*d2b20d1aSTang Haojin* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11*d2b20d1aSTang Haojin* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12*d2b20d1aSTang Haojin* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13*d2b20d1aSTang Haojin* 14*d2b20d1aSTang Haojin* See the Mulan PSL v2 for more details. 15*d2b20d1aSTang Haojin***************************************************************************************/ 16*d2b20d1aSTang Haojin 171a2cf152SYinan Xupackage top 181a2cf152SYinan Xu 191a2cf152SYinan Xuimport chipsalliance.rocketchip.config.Parameters 2073be64b3SJiawei Linimport freechips.rocketchip.diplomacy.{AdapterNode, LazyModule, LazyModuleImp} 211a2cf152SYinan Xuimport freechips.rocketchip.tilelink._ 221a2cf152SYinan Xuimport chisel3._ 231a2cf152SYinan Xuimport chisel3.util._ 241a2cf152SYinan Xuimport utils.{XSPerfAccumulate, XSPerfPrint} 25*d2b20d1aSTang Haojinimport freechips.rocketchip.tilelink.TLMessages._ 26*d2b20d1aSTang Haojinimport freechips.rocketchip.tilelink.TLPermissions._ 27*d2b20d1aSTang Haojinimport utility.{ReqSourceField, ReqSourceKey, GTimer} 28*d2b20d1aSTang Haojinimport xiangshan.MemReqSource 291a2cf152SYinan Xu 30*d2b20d1aSTang Haojinclass BusPerfMonitor(name: String, stat_latency: Boolean, add_reqkey: Boolean)(implicit p: Parameters) extends LazyModule { 31*d2b20d1aSTang Haojin val node = if (add_reqkey) TLAdapterNode(managerFn = { m => 32*d2b20d1aSTang Haojin TLSlavePortParameters.v1( 33*d2b20d1aSTang Haojin m.managers.map { m => 34*d2b20d1aSTang Haojin m.v2copy() 35*d2b20d1aSTang Haojin }, 36*d2b20d1aSTang Haojin requestKeys = Seq(ReqSourceKey), 37*d2b20d1aSTang Haojin beatBytes = 32, 38*d2b20d1aSTang Haojin endSinkId = m.endSinkId 39*d2b20d1aSTang Haojin ) 40*d2b20d1aSTang Haojin }) else { 41*d2b20d1aSTang Haojin TLAdapterNode() 42*d2b20d1aSTang Haojin } 43*d2b20d1aSTang Haojin lazy val module = new BusPerfMonitorImp(this, name, stat_latency) 441a2cf152SYinan Xu} 451a2cf152SYinan Xu 46*d2b20d1aSTang Haojinclass BusPerfMonitorImp(outer: BusPerfMonitor, name: String, stat_latency: Boolean) 471a2cf152SYinan Xu extends LazyModuleImp(outer) 481a2cf152SYinan Xu{ 4973be64b3SJiawei Lin 501a2cf152SYinan Xu outer.node.in.zip(outer.node.out).foreach{ 511a2cf152SYinan Xu case ((in, edgeIn), (out, edgeOut)) => 521a2cf152SYinan Xu out <> in 531a2cf152SYinan Xu } 541a2cf152SYinan Xu 551a2cf152SYinan Xu def PERF_CHN[T <: TLChannel](clientName: String, chn: DecoupledIO[T]) = { 561a2cf152SYinan Xu 57d18dc7e6Swakafa val channelName = chn.bits.channelName.replaceAll(" ", "_").replaceAll("'", "") 58*d2b20d1aSTang Haojin XSPerfAccumulate(s"${clientName}_${channelName}_fire", chn.fire) 59d18dc7e6Swakafa XSPerfAccumulate(s"${clientName}_${channelName}_stall", chn.valid && !chn.ready) 601a2cf152SYinan Xu 611a2cf152SYinan Xu val ops = chn.bits match { 621a2cf152SYinan Xu case _: TLBundleA => TLMessages.a.map(_._1) 631a2cf152SYinan Xu case _: TLBundleB => TLMessages.b.map(_._1) 641a2cf152SYinan Xu case _: TLBundleC => TLMessages.c.map(_._1) 651a2cf152SYinan Xu case _: TLBundleD => TLMessages.d.map(_._1) 661a2cf152SYinan Xu case _: TLBundleE => Nil 671a2cf152SYinan Xu } 68d18dc7e6Swakafa 69d18dc7e6Swakafa for((op_raw, i) <- ops.zipWithIndex){ 70d18dc7e6Swakafa val op = s"${op_raw}".replaceAll(" ", "_") 711a2cf152SYinan Xu chn.bits match { 721a2cf152SYinan Xu case a: TLBundleA => 73d18dc7e6Swakafa XSPerfAccumulate(s"${clientName}_${channelName}_${op}_fire", 74*d2b20d1aSTang Haojin i.U === a.opcode && chn.fire 751a2cf152SYinan Xu ) 76d18dc7e6Swakafa XSPerfAccumulate(s"${clientName}_${channelName}_${op}_stall", 771a2cf152SYinan Xu i.U === a.opcode && chn.valid && !chn.ready 781a2cf152SYinan Xu ) 791a2cf152SYinan Xu case b: TLBundleB => 80d18dc7e6Swakafa XSPerfAccumulate(s"${clientName}_${channelName}_${op}_fire", 81*d2b20d1aSTang Haojin i.U === b.opcode && chn.fire 821a2cf152SYinan Xu ) 83d18dc7e6Swakafa XSPerfAccumulate(s"${clientName}_${channelName}_${op}_stall", 841a2cf152SYinan Xu i.U === b.opcode && chn.valid && !chn.ready 851a2cf152SYinan Xu ) 861a2cf152SYinan Xu case c: TLBundleC => 87d18dc7e6Swakafa XSPerfAccumulate(s"${clientName}_${channelName}_${op}_fire", 88*d2b20d1aSTang Haojin i.U === c.opcode && chn.fire 891a2cf152SYinan Xu ) 90d18dc7e6Swakafa XSPerfAccumulate(s"${clientName}_${channelName}_${op}_stall", 911a2cf152SYinan Xu i.U === c.opcode && chn.valid && !chn.ready 921a2cf152SYinan Xu ) 931a2cf152SYinan Xu case d: TLBundleD => 94d18dc7e6Swakafa XSPerfAccumulate(s"${clientName}_${channelName}_${op}_fire", 95*d2b20d1aSTang Haojin i.U === d.opcode && chn.fire 961a2cf152SYinan Xu ) 97d18dc7e6Swakafa XSPerfAccumulate(s"${clientName}_${channelName}_${op}_stall", 981a2cf152SYinan Xu i.U === d.opcode && chn.valid && !chn.ready 991a2cf152SYinan Xu ) 1001a2cf152SYinan Xu } 1011a2cf152SYinan Xu } 1021a2cf152SYinan Xu } 1031a2cf152SYinan Xu 1041f0e2dc7SJiawei Lin for (((in, edgeIn), i) <- outer.node.in.zipWithIndex) { 105*d2b20d1aSTang Haojin val clientName = s"${name}_${edgeIn.master.masters.head.name}_bank_$i" 1061a2cf152SYinan Xu PERF_CHN(clientName, in.a) 1071a2cf152SYinan Xu PERF_CHN(clientName, in.d) 1081a2cf152SYinan Xu if (in.params.hasBCE) { 1091a2cf152SYinan Xu PERF_CHN(clientName, in.b) 1101a2cf152SYinan Xu PERF_CHN(clientName, in.c) 1111a2cf152SYinan Xu PERF_CHN(clientName, in.e) 1121a2cf152SYinan Xu } 1131a2cf152SYinan Xu } 114*d2b20d1aSTang Haojin 115*d2b20d1aSTang Haojin if (stat_latency) { 116*d2b20d1aSTang Haojin val nrEdge = outer.node.in.length.toInt 117*d2b20d1aSTang Haojin val edgeIn = outer.node.in.head._2 118*d2b20d1aSTang Haojin 119*d2b20d1aSTang Haojin class RecordEntry()(implicit p: Parameters) extends Bundle { 120*d2b20d1aSTang Haojin val valid = Bool() 121*d2b20d1aSTang Haojin val timeStamp = UInt(64.W) 122*d2b20d1aSTang Haojin val reqType = UInt(8.W) 123*d2b20d1aSTang Haojin } 124*d2b20d1aSTang Haojin 125*d2b20d1aSTang Haojin // For simplicity, latency statistic works between nodes with SINGLE edge 126*d2b20d1aSTang Haojin require(nrEdge == 1) 127*d2b20d1aSTang Haojin val timer = GTimer() 128*d2b20d1aSTang Haojin val nrSource = math.pow(2, edgeIn.bundle.sourceBits).toInt 129*d2b20d1aSTang Haojin val latencyRecord = RegInit(VecInit(Seq.fill(nrSource)(0.U.asTypeOf(new RecordEntry())))) 130*d2b20d1aSTang Haojin val latencySum = RegInit(0.U(128.W)) 131*d2b20d1aSTang Haojin val nrRecord = RegInit(0.U(128.W)) 132*d2b20d1aSTang Haojin 133*d2b20d1aSTang Haojin outer.node.in.zip(outer.node.out).zipWithIndex.foreach { 134*d2b20d1aSTang Haojin case (((in, edgeIn), (out, edgeOut)), i) => 135*d2b20d1aSTang Haojin val channelA = in.a 136*d2b20d1aSTang Haojin when(channelA.fire && 137*d2b20d1aSTang Haojin channelA.bits.opcode =/= Hint && 138*d2b20d1aSTang Haojin channelA.bits.opcode =/= PutFullData && 139*d2b20d1aSTang Haojin channelA.bits.opcode =/= PutPartialData 140*d2b20d1aSTang Haojin ) { 141*d2b20d1aSTang Haojin // Valid channel A fire, record it 142*d2b20d1aSTang Haojin assert(latencyRecord(channelA.bits.source).valid === false.B) 143*d2b20d1aSTang Haojin latencyRecord(channelA.bits.source).valid := true.B 144*d2b20d1aSTang Haojin latencyRecord(channelA.bits.source).timeStamp := timer 145*d2b20d1aSTang Haojin latencyRecord(channelA.bits.source).reqType := channelA.bits.user.lift(ReqSourceKey).getOrElse(MemReqSource.NoWhere.id.U) 146*d2b20d1aSTang Haojin } 147*d2b20d1aSTang Haojin val channelD = in.d 148*d2b20d1aSTang Haojin val (first, _, _, _) = edgeIn.count(channelD) 149*d2b20d1aSTang Haojin // Valid channel D fire, resolve it 150*d2b20d1aSTang Haojin val resolveRecord = channelD.fire && first && 151*d2b20d1aSTang Haojin channelD.bits.opcode =/= ReleaseAck && 152*d2b20d1aSTang Haojin channelD.bits.opcode =/= AccessAck 153*d2b20d1aSTang Haojin val latency = WireInit(0.U(64.W)) 154*d2b20d1aSTang Haojin when(resolveRecord) { 155*d2b20d1aSTang Haojin assert(latencyRecord(channelD.bits.source).valid === true.B) 156*d2b20d1aSTang Haojin latencyRecord(channelD.bits.source).valid := false.B 157*d2b20d1aSTang Haojin latency := timer - latencyRecord(channelD.bits.source).timeStamp 158*d2b20d1aSTang Haojin latencySum := latencySum + timer 159*d2b20d1aSTang Haojin nrRecord := nrRecord + 1.U 160*d2b20d1aSTang Haojin // printf("timer: %x\n", latency) 161*d2b20d1aSTang Haojin } 162*d2b20d1aSTang Haojin XSPerfAccumulate(name + "_nrRecord_all", resolveRecord) 163*d2b20d1aSTang Haojin XSPerfAccumulate(name + "_latencySum_all", Mux(resolveRecord, latency, 0.U)) 164*d2b20d1aSTang Haojin 165*d2b20d1aSTang Haojin for (j <- 0 until MemReqSource.ReqSourceCount.id) { 166*d2b20d1aSTang Haojin val typeMatch = latencyRecord(channelD.bits.source).reqType === j.U 167*d2b20d1aSTang Haojin XSPerfAccumulate(name + s"_nrRecord_type${j}", resolveRecord && typeMatch) 168*d2b20d1aSTang Haojin XSPerfAccumulate(name + s"_latencySum_type${j}", Mux(resolveRecord && typeMatch, latency, 0.U)) 169*d2b20d1aSTang Haojin } 170*d2b20d1aSTang Haojin } 171*d2b20d1aSTang Haojin } 172*d2b20d1aSTang Haojin 1731a2cf152SYinan Xu} 1741a2cf152SYinan Xu 1751a2cf152SYinan Xuobject BusPerfMonitor { 176*d2b20d1aSTang Haojin def apply( 177*d2b20d1aSTang Haojin name: String, 178*d2b20d1aSTang Haojin enable: Boolean = false, 179*d2b20d1aSTang Haojin stat_latency: Boolean = false, 180*d2b20d1aSTang Haojin add_reqkey: Boolean = false)(implicit p: Parameters) = 181*d2b20d1aSTang Haojin { 18273be64b3SJiawei Lin if(enable){ 183*d2b20d1aSTang Haojin val busPMU = LazyModule(new BusPerfMonitor(name, stat_latency, add_reqkey)) 1841a2cf152SYinan Xu busPMU.node 18573be64b3SJiawei Lin } else { 18673be64b3SJiawei Lin TLTempNode() 18773be64b3SJiawei Lin } 1881a2cf152SYinan Xu } 1891a2cf152SYinan Xu} 190