1d2b20d1aSTang Haojin/*************************************************************************************** 2d2b20d1aSTang Haojin* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3d2b20d1aSTang Haojin* Copyright (c) 2020-2021 Peng Cheng Laboratory 4d2b20d1aSTang Haojin* 5d2b20d1aSTang Haojin* XiangShan is licensed under Mulan PSL v2. 6d2b20d1aSTang Haojin* You can use this software according to the terms and conditions of the Mulan PSL v2. 7d2b20d1aSTang Haojin* You may obtain a copy of Mulan PSL v2 at: 8d2b20d1aSTang Haojin* http://license.coscl.org.cn/MulanPSL2 9d2b20d1aSTang Haojin* 10d2b20d1aSTang Haojin* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11d2b20d1aSTang Haojin* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12d2b20d1aSTang Haojin* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13d2b20d1aSTang Haojin* 14d2b20d1aSTang Haojin* See the Mulan PSL v2 for more details. 15d2b20d1aSTang Haojin***************************************************************************************/ 16d2b20d1aSTang Haojin 171a2cf152SYinan Xupackage top 181a2cf152SYinan Xu 198891a219SYinan Xuimport org.chipsalliance.cde.config.Parameters 201a2cf152SYinan Xuimport chisel3._ 211a2cf152SYinan Xuimport chisel3.util._ 226695f071SYinan Xuimport freechips.rocketchip.diplomacy.{LazyModule, LazyModuleImp} 23d2b20d1aSTang Haojinimport freechips.rocketchip.tilelink.TLMessages._ 246695f071SYinan Xuimport freechips.rocketchip.tilelink._ 25*bb2f3f51STang Haojinimport utility.{GTimer, MemReqSource, ReqSourceKey, XSPerfAccumulate} 261a2cf152SYinan Xu 276695f071SYinan Xuclass BusPerfMonitor(name: String, stat_latency: Boolean)(implicit p: Parameters) extends LazyModule { 286695f071SYinan Xu val node = TLAdapterNode() 29d2b20d1aSTang Haojin lazy val module = new BusPerfMonitorImp(this, name, stat_latency) 301a2cf152SYinan Xu} 311a2cf152SYinan Xu 32d2b20d1aSTang Haojinclass BusPerfMonitorImp(outer: BusPerfMonitor, name: String, stat_latency: Boolean) 331a2cf152SYinan Xu extends LazyModuleImp(outer) 341a2cf152SYinan Xu{ 3573be64b3SJiawei Lin 361a2cf152SYinan Xu outer.node.in.zip(outer.node.out).foreach{ 371a2cf152SYinan Xu case ((in, edgeIn), (out, edgeOut)) => 381a2cf152SYinan Xu out <> in 391a2cf152SYinan Xu } 401a2cf152SYinan Xu 411a2cf152SYinan Xu def PERF_CHN[T <: TLChannel](clientName: String, chn: DecoupledIO[T]) = { 421a2cf152SYinan Xu 43d18dc7e6Swakafa val channelName = chn.bits.channelName.replaceAll(" ", "_").replaceAll("'", "") 44d2b20d1aSTang Haojin XSPerfAccumulate(s"${clientName}_${channelName}_fire", chn.fire) 45d18dc7e6Swakafa XSPerfAccumulate(s"${clientName}_${channelName}_stall", chn.valid && !chn.ready) 461a2cf152SYinan Xu 471a2cf152SYinan Xu val ops = chn.bits match { 481a2cf152SYinan Xu case _: TLBundleA => TLMessages.a.map(_._1) 491a2cf152SYinan Xu case _: TLBundleB => TLMessages.b.map(_._1) 501a2cf152SYinan Xu case _: TLBundleC => TLMessages.c.map(_._1) 511a2cf152SYinan Xu case _: TLBundleD => TLMessages.d.map(_._1) 521a2cf152SYinan Xu case _: TLBundleE => Nil 531a2cf152SYinan Xu } 54d18dc7e6Swakafa 55d18dc7e6Swakafa for((op_raw, i) <- ops.zipWithIndex){ 56d18dc7e6Swakafa val op = s"${op_raw}".replaceAll(" ", "_") 571a2cf152SYinan Xu chn.bits match { 581a2cf152SYinan Xu case a: TLBundleA => 59d18dc7e6Swakafa XSPerfAccumulate(s"${clientName}_${channelName}_${op}_fire", 60d2b20d1aSTang Haojin i.U === a.opcode && chn.fire 611a2cf152SYinan Xu ) 62d18dc7e6Swakafa XSPerfAccumulate(s"${clientName}_${channelName}_${op}_stall", 631a2cf152SYinan Xu i.U === a.opcode && chn.valid && !chn.ready 641a2cf152SYinan Xu ) 651a2cf152SYinan Xu case b: TLBundleB => 66d18dc7e6Swakafa XSPerfAccumulate(s"${clientName}_${channelName}_${op}_fire", 67d2b20d1aSTang Haojin i.U === b.opcode && chn.fire 681a2cf152SYinan Xu ) 69d18dc7e6Swakafa XSPerfAccumulate(s"${clientName}_${channelName}_${op}_stall", 701a2cf152SYinan Xu i.U === b.opcode && chn.valid && !chn.ready 711a2cf152SYinan Xu ) 721a2cf152SYinan Xu case c: TLBundleC => 73d18dc7e6Swakafa XSPerfAccumulate(s"${clientName}_${channelName}_${op}_fire", 74d2b20d1aSTang Haojin i.U === c.opcode && chn.fire 751a2cf152SYinan Xu ) 76d18dc7e6Swakafa XSPerfAccumulate(s"${clientName}_${channelName}_${op}_stall", 771a2cf152SYinan Xu i.U === c.opcode && chn.valid && !chn.ready 781a2cf152SYinan Xu ) 791a2cf152SYinan Xu case d: TLBundleD => 80d18dc7e6Swakafa XSPerfAccumulate(s"${clientName}_${channelName}_${op}_fire", 81d2b20d1aSTang Haojin i.U === d.opcode && chn.fire 821a2cf152SYinan Xu ) 83d18dc7e6Swakafa XSPerfAccumulate(s"${clientName}_${channelName}_${op}_stall", 841a2cf152SYinan Xu i.U === d.opcode && chn.valid && !chn.ready 851a2cf152SYinan Xu ) 867a96cc7fSHaojin Tang case e: TLBundleE => throw new IllegalArgumentException("Cannot reach here") 871a2cf152SYinan Xu } 881a2cf152SYinan Xu } 891a2cf152SYinan Xu } 901a2cf152SYinan Xu 911f0e2dc7SJiawei Lin for (((in, edgeIn), i) <- outer.node.in.zipWithIndex) { 92d2b20d1aSTang Haojin val clientName = s"${name}_${edgeIn.master.masters.head.name}_bank_$i" 931a2cf152SYinan Xu PERF_CHN(clientName, in.a) 941a2cf152SYinan Xu PERF_CHN(clientName, in.d) 951a2cf152SYinan Xu if (in.params.hasBCE) { 961a2cf152SYinan Xu PERF_CHN(clientName, in.b) 971a2cf152SYinan Xu PERF_CHN(clientName, in.c) 981a2cf152SYinan Xu PERF_CHN(clientName, in.e) 991a2cf152SYinan Xu } 1001a2cf152SYinan Xu } 101d2b20d1aSTang Haojin 102d2b20d1aSTang Haojin if (stat_latency) { 103d2b20d1aSTang Haojin val nrEdge = outer.node.in.length.toInt 10478a8cd25Szhanglinjuan if (outer.node.in.isEmpty) { 10578a8cd25Szhanglinjuan println(s"name = $name") 10678a8cd25Szhanglinjuan } 107d2b20d1aSTang Haojin val edgeIn = outer.node.in.head._2 108d2b20d1aSTang Haojin 109d2b20d1aSTang Haojin class RecordEntry()(implicit p: Parameters) extends Bundle { 110d2b20d1aSTang Haojin val valid = Bool() 111d2b20d1aSTang Haojin val timeStamp = UInt(64.W) 112d2b20d1aSTang Haojin val reqType = UInt(8.W) 113d2b20d1aSTang Haojin } 114d2b20d1aSTang Haojin 115d2b20d1aSTang Haojin // For simplicity, latency statistic works between nodes with SINGLE edge 116d2b20d1aSTang Haojin require(nrEdge == 1) 117d2b20d1aSTang Haojin val timer = GTimer() 118d2b20d1aSTang Haojin val nrSource = math.pow(2, edgeIn.bundle.sourceBits).toInt 119d2b20d1aSTang Haojin val latencyRecord = RegInit(VecInit(Seq.fill(nrSource)(0.U.asTypeOf(new RecordEntry())))) 120d2b20d1aSTang Haojin val latencySum = RegInit(0.U(128.W)) 121d2b20d1aSTang Haojin val nrRecord = RegInit(0.U(128.W)) 122d2b20d1aSTang Haojin 123d2b20d1aSTang Haojin outer.node.in.zip(outer.node.out).zipWithIndex.foreach { 124d2b20d1aSTang Haojin case (((in, edgeIn), (out, edgeOut)), i) => 125d2b20d1aSTang Haojin val channelA = in.a 126d2b20d1aSTang Haojin when(channelA.fire && 127d2b20d1aSTang Haojin channelA.bits.opcode =/= Hint && 128d2b20d1aSTang Haojin channelA.bits.opcode =/= PutFullData && 129d2b20d1aSTang Haojin channelA.bits.opcode =/= PutPartialData 130d2b20d1aSTang Haojin ) { 131d2b20d1aSTang Haojin // Valid channel A fire, record it 132d2b20d1aSTang Haojin assert(latencyRecord(channelA.bits.source).valid === false.B) 133d2b20d1aSTang Haojin latencyRecord(channelA.bits.source).valid := true.B 134d2b20d1aSTang Haojin latencyRecord(channelA.bits.source).timeStamp := timer 135d2b20d1aSTang Haojin latencyRecord(channelA.bits.source).reqType := channelA.bits.user.lift(ReqSourceKey).getOrElse(MemReqSource.NoWhere.id.U) 136d2b20d1aSTang Haojin } 137d2b20d1aSTang Haojin val channelD = in.d 138d2b20d1aSTang Haojin val (first, _, _, _) = edgeIn.count(channelD) 139d2b20d1aSTang Haojin // Valid channel D fire, resolve it 140d2b20d1aSTang Haojin val resolveRecord = channelD.fire && first && 141d2b20d1aSTang Haojin channelD.bits.opcode =/= ReleaseAck && 142d2b20d1aSTang Haojin channelD.bits.opcode =/= AccessAck 143d2b20d1aSTang Haojin val latency = WireInit(0.U(64.W)) 144d2b20d1aSTang Haojin when(resolveRecord) { 145d2b20d1aSTang Haojin assert(latencyRecord(channelD.bits.source).valid === true.B) 146d2b20d1aSTang Haojin latencyRecord(channelD.bits.source).valid := false.B 147d2b20d1aSTang Haojin latency := timer - latencyRecord(channelD.bits.source).timeStamp 148d2b20d1aSTang Haojin latencySum := latencySum + timer 149d2b20d1aSTang Haojin nrRecord := nrRecord + 1.U 150d2b20d1aSTang Haojin // printf("timer: %x\n", latency) 151d2b20d1aSTang Haojin } 152d2b20d1aSTang Haojin XSPerfAccumulate(name + "_nrRecord_all", resolveRecord) 153d2b20d1aSTang Haojin XSPerfAccumulate(name + "_latencySum_all", Mux(resolveRecord, latency, 0.U)) 154d2b20d1aSTang Haojin 155d2b20d1aSTang Haojin for (j <- 0 until MemReqSource.ReqSourceCount.id) { 156d2b20d1aSTang Haojin val typeMatch = latencyRecord(channelD.bits.source).reqType === j.U 157d2b20d1aSTang Haojin XSPerfAccumulate(name + s"_nrRecord_type${j}", resolveRecord && typeMatch) 158d2b20d1aSTang Haojin XSPerfAccumulate(name + s"_latencySum_type${j}", Mux(resolveRecord && typeMatch, latency, 0.U)) 159d2b20d1aSTang Haojin } 160d2b20d1aSTang Haojin } 161d2b20d1aSTang Haojin } 162d2b20d1aSTang Haojin 1631a2cf152SYinan Xu} 1641a2cf152SYinan Xu 1651a2cf152SYinan Xuobject BusPerfMonitor { 166d2b20d1aSTang Haojin def apply( 167d2b20d1aSTang Haojin name: String, 168d2b20d1aSTang Haojin enable: Boolean = false, 169d2b20d1aSTang Haojin stat_latency: Boolean = false, 170d2b20d1aSTang Haojin add_reqkey: Boolean = false)(implicit p: Parameters) = 171d2b20d1aSTang Haojin { 17273be64b3SJiawei Lin if(enable){ 1736695f071SYinan Xu val busPMU = LazyModule(new BusPerfMonitor(name, stat_latency)) 1741a2cf152SYinan Xu busPMU.node 17573be64b3SJiawei Lin } else { 17673be64b3SJiawei Lin TLTempNode() 17773be64b3SJiawei Lin } 1781a2cf152SYinan Xu } 1791a2cf152SYinan Xu} 180