xref: /XiangShan/src/main/scala/xiangshan/frontend/BPU.scala (revision 2a39fd19d0d5c182ae445a846592045ed076e659)
1package xiangshan.frontend
2
3import chisel3._
4import chisel3.util._
5import xiangshan._
6import xiangshan.utils._
7import xiangshan.backend.ALUOpType
8import utils._
9import chisel3.util.experimental.BoringUtils
10import xiangshan.backend.decode.XSTrap
11
12class TableAddr(val idxBits: Int, val banks: Int) extends XSBundle {
13  def tagBits = VAddrBits - idxBits - 2
14
15  val tag = UInt(tagBits.W)
16  val idx = UInt(idxBits.W)
17  val offset = UInt(2.W)
18
19  def fromUInt(x: UInt) = x.asTypeOf(UInt(VAddrBits.W)).asTypeOf(this)
20  def getTag(x: UInt) = fromUInt(x).tag
21  def getIdx(x: UInt) = fromUInt(x).idx
22  def getBank(x: UInt) = getIdx(x)(log2Up(banks) - 1, 0)
23  def getBankIdx(x: UInt) = getIdx(x)(idxBits - 1, log2Up(banks))
24}
25
26class Stage1To2IO extends XSBundle {
27  val pc = Output(UInt(VAddrBits.W))
28  val btb = new Bundle {
29    val hits = Output(UInt(FetchWidth.W))
30    val targets = Output(Vec(FetchWidth, UInt(VAddrBits.W)))
31  }
32  val jbtac = new Bundle {
33    val hitIdx = Output(UInt(FetchWidth.W))
34    val target = Output(UInt(VAddrBits.W))
35  }
36  val tage = new Bundle {
37    val hits = Output(UInt(FetchWidth.W))
38    val takens = Output(Vec(FetchWidth, Bool()))
39  }
40  val hist = Output(Vec(FetchWidth, UInt(HistoryLength.W)))
41  val btbPred = ValidIO(new BranchPrediction)
42}
43
44class BPUStage1 extends XSModule {
45  val io = IO(new Bundle() {
46    val in = new Bundle { val pc = Flipped(Decoupled(UInt(VAddrBits.W))) }
47    // from backend
48    val redirectInfo = Input(new RedirectInfo)
49    // from Stage3
50    val flush = Input(Bool())
51    val s3RollBackHist = Input(UInt(HistoryLength.W))
52    val s3Taken = Input(Bool())
53    // to ifu, quick prediction result
54    val s1OutPred = ValidIO(new BranchPrediction)
55    // to Stage2
56    val out = Decoupled(new Stage1To2IO)
57  })
58
59  io.in.pc.ready := true.B
60
61  // flush Stage1 when io.flush
62  val flushS1 = BoolStopWatch(io.flush, io.in.pc.fire(), startHighPriority = true)
63
64  // global history register
65  val ghr = RegInit(0.U(HistoryLength.W))
66  // modify updateGhr and newGhr when updating ghr
67  val updateGhr = WireInit(false.B)
68  val newGhr = WireInit(0.U(HistoryLength.W))
69  when (updateGhr) { ghr := newGhr }
70  // use hist as global history!!!
71  val hist = Mux(updateGhr, newGhr, ghr)
72
73  // Tage predictor
74  // val tage = Module(new FakeTAGE)
75  val tage = if(EnableBPD) Module(new Tage) else Module(new FakeTAGE)
76  tage.io.req.valid := io.in.pc.fire()
77  tage.io.req.bits.pc := io.in.pc.bits
78  tage.io.req.bits.hist := hist
79  tage.io.redirectInfo <> io.redirectInfo
80  io.out.bits.tage <> tage.io.out
81  io.s1OutPred.bits.tageMeta := tage.io.meta
82
83  // latch pc for 1 cycle latency when reading SRAM
84  val pcLatch = RegEnable(io.in.pc.bits, io.in.pc.fire())
85
86  val r = io.redirectInfo.redirect
87  val updateFetchpc = r.pc - r.fetchIdx << 2.U
88  // BTB
89  val btb = Module(new BTB)
90  btb.io.in.pc <> io.in.pc
91  btb.io.in.pcLatch := pcLatch
92  btb.io.redirectValid := io.redirectInfo.valid
93  btb.io.flush := io.flush
94
95  btb.io.update.fetchPC := updateFetchpc
96  btb.io.update.fetchIdx := r.fetchIdx
97  btb.io.update.hit := r.btbHitWay
98  btb.io.update.misPred := io.redirectInfo.misPred
99  btb.io.update.writeWay := r.btbVictimWay
100  btb.io.update.oldCtr := r.btbPredCtr
101  btb.io.update.taken := r.taken
102  btb.io.update.target := r.brTarget
103  btb.io.update._type := r._type
104
105  val btbHit = btb.io.out.hit
106  val btbTaken = btb.io.out.taken
107  val btbTakenIdx = btb.io.out.takenIdx
108  val btbTakenTarget = btb.io.out.target
109  val btbWriteWay = btb.io.out.writeWay
110  val btbNotTakens = btb.io.out.notTakens
111  val btbCtrs = VecInit(btb.io.out.dEntries.map(_.pred))
112  val btbValids = VecInit(btb.io.out.dEntries.map(_.valid))
113  val btbTargets = VecInit(btb.io.out.dEntries.map(_.target))
114  val btbTypes = VecInit(btb.io.out.dEntries.map(_._type))
115
116
117  val jbtac = Module(new JBTAC)
118  jbtac.io.in.pc <> io.in.pc
119  jbtac.io.in.pcLatch := pcLatch
120  jbtac.io.in.hist := hist
121  jbtac.io.redirectValid := io.redirectInfo.valid
122  jbtac.io.flush := io.flush
123
124  jbtac.io.update.fetchPC := updateFetchpc
125  jbtac.io.update.fetchIdx := r.fetchIdx
126  jbtac.io.update.misPred := io.redirectInfo.misPred
127  jbtac.io.update._type := r._type
128  jbtac.io.update.target := r.target
129  jbtac.io.update.hist := r.hist
130
131  val jbtacHit = jbtac.io.out.hit
132  val jbtacTarget = jbtac.io.out.target
133  val jbtacHitIdx = jbtac.io.out.hitIdx
134
135  // calculate global history of each instr
136  val firstHist = RegNext(hist)
137  val histShift = Wire(Vec(FetchWidth, UInt(log2Up(FetchWidth).W)))
138  val shift = Wire(Vec(FetchWidth, Vec(FetchWidth, UInt(1.W))))
139  (0 until FetchWidth).map(i => shift(i) := Mux(!btbNotTakens(i), 0.U, ~LowerMask(UIntToOH(i.U), FetchWidth)).asTypeOf(Vec(FetchWidth, UInt(1.W))))
140  for (j <- 0 until FetchWidth) {
141    var tmp = 0.U
142    for (i <- 0 until FetchWidth) {
143      tmp = tmp + shift(i)(j)
144    }
145    histShift(j) := tmp
146  }
147  (0 until FetchWidth).map(i => io.s1OutPred.bits.hist(i) := firstHist << histShift(i))
148
149  // update ghr
150  updateGhr := io.s1OutPred.bits.redirect || io.flush
151  val brJumpIdx = Mux(!(btbHit && btbTaken), 0.U, UIntToOH(btbTakenIdx))
152  val indirectIdx = Mux(!jbtacHit, 0.U, UIntToOH(jbtacHitIdx))
153  //val newTaken = Mux(io.redirectInfo.flush(), !(r._type === BTBtype.B && !r.taken), )
154  newGhr := Mux(io.redirectInfo.flush(),    (r.hist << 1.U) | !(r._type === BTBtype.B && !r.taken),
155            Mux(io.flush,                   Mux(io.s3Taken, io.s3RollBackHist << 1.U | 1.U, io.s3RollBackHist),
156            Mux(io.s1OutPred.bits.redirect, PriorityMux(brJumpIdx | indirectIdx, io.s1OutPred.bits.hist) << 1.U | 1.U,
157                                            io.s1OutPred.bits.hist(0) << PopCount(btbNotTakens))))
158
159  // redirect based on BTB and JBTAC
160  // io.out.valid := RegNext(io.in.pc.fire()) && !flushS1
161  io.out.valid := RegNext(io.in.pc.fire()) && !io.flush
162
163  io.s1OutPred.valid := io.out.valid
164  io.s1OutPred.bits.redirect := btbHit && btbTaken || jbtacHit
165  // io.s1OutPred.bits.instrValid := LowerMask(UIntToOH(btbTakenIdx), FetchWidth) & LowerMask(UIntToOH(jbtacHitIdx), FetchWidth)
166  io.s1OutPred.bits.instrValid := Mux(io.s1OutPred.bits.redirect, LowerMask(LowestBit(brJumpIdx | indirectIdx, FetchWidth), FetchWidth), Fill(FetchWidth, 1.U(1.W))).asTypeOf(Vec(FetchWidth, Bool()))
167  io.s1OutPred.bits.target := Mux(brJumpIdx === LowestBit(brJumpIdx | indirectIdx, FetchWidth), btbTakenTarget, jbtacTarget)
168  io.s1OutPred.bits.btbVictimWay := btbWriteWay
169  io.s1OutPred.bits.predCtr := btbCtrs
170  io.s1OutPred.bits.btbHitWay := btbHit
171  io.s1OutPred.bits.rasSp := DontCare
172  io.s1OutPred.bits.rasTopCtr := DontCare
173
174  io.out.bits.pc := pcLatch
175  io.out.bits.btb.hits := btbValids.asUInt
176  (0 until FetchWidth).map(i => io.out.bits.btb.targets(i) := btbTargets(i))
177  io.out.bits.jbtac.hitIdx := UIntToOH(jbtacHitIdx)
178  io.out.bits.jbtac.target := jbtacTarget
179  // TODO: we don't need this repeatedly!
180  io.out.bits.hist := io.s1OutPred.bits.hist
181  io.out.bits.btbPred := io.s1OutPred
182
183
184
185  // debug info
186  XSDebug(true.B, "[BPUS1]in:(%d %d)   pc=%x ghr=%b\n", io.in.pc.valid, io.in.pc.ready, io.in.pc.bits, hist)
187  XSDebug(true.B, "[BPUS1]outPred:(%d) redirect=%d instrValid=%b tgt=%x\n",
188    io.s1OutPred.valid, io.s1OutPred.bits.redirect, io.s1OutPred.bits.instrValid.asUInt, io.s1OutPred.bits.target)
189  XSDebug(io.flush && io.redirectInfo.flush(),
190    "[BPUS1]flush from backend: pc=%x tgt=%x brTgt=%x _type=%b taken=%d oldHist=%b fetchIdx=%d isExcpt=%d\n",
191    r.pc, r.target, r.brTarget, r._type, r.taken, r.hist, r.fetchIdx, r.isException)
192  XSDebug(io.flush && !io.redirectInfo.flush(),
193    "[BPUS1]flush from Stage3:  s3Taken=%d s3RollBackHist=%b\n", io.s3Taken, io.s3RollBackHist)
194
195}
196
197class Stage2To3IO extends Stage1To2IO {
198}
199
200class BPUStage2 extends XSModule {
201  val io = IO(new Bundle() {
202    // flush from Stage3
203    val flush = Input(Bool())
204    val in = Flipped(Decoupled(new Stage1To2IO))
205    val out = Decoupled(new Stage2To3IO)
206  })
207
208  // flush Stage2 when Stage3 or banckend redirects
209  val flushS2 = BoolStopWatch(io.flush, io.in.fire(), startHighPriority = true)
210  io.out.valid := !io.flush && !flushS2 && RegNext(io.in.fire())
211  io.in.ready := !io.out.valid || io.out.fire()
212
213  // do nothing
214  io.out.bits := RegEnable(io.in.bits, io.in.fire())
215
216  // debug info
217  XSDebug(true.B, "[BPUS2]in:(%d %d) pc=%x out:(%d %d) pc=%x\n",
218    io.in.valid, io.in.ready, io.in.bits.pc, io.out.valid, io.out.ready, io.out.bits.pc)
219  XSDebug(io.flush, "[BPUS2]flush!!!\n")
220}
221
222class BPUStage3 extends XSModule {
223  val io = IO(new Bundle() {
224    val flush = Input(Bool())
225    val in = Flipped(Decoupled(new Stage2To3IO))
226    val out = ValidIO(new BranchPrediction)
227    // from icache
228    val predecode = Flipped(ValidIO(new Predecode))
229    // from backend
230    val redirectInfo = Input(new RedirectInfo)
231    // to Stage1 and Stage2
232    val flushBPU = Output(Bool())
233    // to Stage1, restore ghr in stage1 when flushBPU is valid
234    val s1RollBackHist = Output(UInt(HistoryLength.W))
235    val s3Taken = Output(Bool())
236  })
237
238  val flushS3 = BoolStopWatch(io.flush, io.in.fire(), startHighPriority = true)
239  val inLatch = RegInit(0.U.asTypeOf(io.in.bits))
240  val validLatch = RegInit(false.B)
241  when (io.in.fire()) { inLatch := io.in.bits }
242  when (io.in.fire()) {
243    validLatch := !io.flush
244  }.elsewhen (io.out.valid) {
245    validLatch := false.B
246  }
247  io.out.valid := validLatch && io.predecode.valid && !flushS3
248  io.in.ready := !validLatch || io.out.valid
249
250  // RAS
251  // TODO: split retAddr and ctr
252  def rasEntry() = new Bundle {
253    val retAddr = UInt(VAddrBits.W)
254    val ctr = UInt(8.W) // layer of nested call functions
255  }
256  val ras = RegInit(VecInit(Seq.fill(RasSize)(0.U.asTypeOf(rasEntry()))))
257  val sp = Counter(RasSize)
258  val rasTop = ras(sp.value)
259  val rasTopAddr = rasTop.retAddr
260
261  // get the first taken branch/jal/call/jalr/ret in a fetch line
262  // brTakenIdx/jalIdx/callIdx/jalrIdx/retIdx/jmpIdx is one-hot encoded.
263  // brNotTakenIdx indicates all the not-taken branches before the first jump instruction.
264  val brIdx = inLatch.btb.hits & Reverse(Cat(io.predecode.bits.fuOpTypes.map { t => ALUOpType.isBranch(t) }).asUInt) & io.predecode.bits.mask
265  val brTakenIdx = LowestBit(brIdx & inLatch.tage.takens.asUInt, FetchWidth)
266  val jalIdx = LowestBit(inLatch.btb.hits & Reverse(Cat(io.predecode.bits.fuOpTypes.map { t => t === ALUOpType.jal }).asUInt) & io.predecode.bits.mask, FetchWidth)
267  val callIdx = LowestBit(inLatch.btb.hits & io.predecode.bits.mask & Reverse(Cat(io.predecode.bits.fuOpTypes.map { t => t === ALUOpType.call }).asUInt), FetchWidth)
268  val jalrIdx = LowestBit(inLatch.jbtac.hitIdx & io.predecode.bits.mask & Reverse(Cat(io.predecode.bits.fuOpTypes.map { t => t === ALUOpType.jalr }).asUInt), FetchWidth)
269  val retIdx = LowestBit(io.predecode.bits.mask & Reverse(Cat(io.predecode.bits.fuOpTypes.map { t => t === ALUOpType.ret }).asUInt), FetchWidth)
270
271  val jmpIdx = LowestBit(brTakenIdx | jalIdx | callIdx | jalrIdx | retIdx, FetchWidth)
272  val brNotTakenIdx = brIdx & ~inLatch.tage.takens.asUInt & LowerMask(jmpIdx, FetchWidth) & io.predecode.bits.mask
273
274  io.out.bits.redirect := jmpIdx.orR.asBool
275  io.out.bits.target := Mux(jmpIdx === retIdx, rasTopAddr,
276    Mux(jmpIdx === jalrIdx, inLatch.jbtac.target,
277    Mux(jmpIdx === 0.U, inLatch.pc + 32.U, // TODO: RVC
278    PriorityMux(jmpIdx, inLatch.btb.targets))))
279  io.out.bits.instrValid := Mux(jmpIdx.orR, LowerMask(jmpIdx, FetchWidth), Fill(FetchWidth, 1.U(1.W))).asTypeOf(Vec(FetchWidth, Bool()))
280  io.out.bits.btbVictimWay := inLatch.btbPred.bits.btbVictimWay
281  io.out.bits.predCtr := inLatch.btbPred.bits.predCtr
282  io.out.bits.btbHitWay := inLatch.btbPred.bits.btbHitWay
283  io.out.bits.tageMeta := inLatch.btbPred.bits.tageMeta
284  //io.out.bits._type := Mux(jmpIdx === retIdx, BTBtype.R,
285  //  Mux(jmpIdx === jalrIdx, BTBtype.I,
286  //  Mux(jmpIdx === brTakenIdx, BTBtype.B, BTBtype.J)))
287  val firstHist = inLatch.btbPred.bits.hist(0)
288  // there may be several notTaken branches before the first jump instruction,
289  // so we need to calculate how many zeroes should each instruction shift in its global history.
290  // each history is exclusive of instruction's own jump direction.
291  val histShift = Wire(Vec(FetchWidth, UInt(log2Up(FetchWidth).W)))
292  val shift = Wire(Vec(FetchWidth, Vec(FetchWidth, UInt(1.W))))
293  (0 until FetchWidth).map(i => shift(i) := Mux(!brNotTakenIdx(i), 0.U, ~LowerMask(UIntToOH(i.U), FetchWidth)).asTypeOf(Vec(FetchWidth, UInt(1.W))))
294  for (j <- 0 until FetchWidth) {
295    var tmp = 0.U
296    for (i <- 0 until FetchWidth) {
297      tmp = tmp + shift(i)(j)
298    }
299    histShift(j) := tmp
300  }
301  (0 until FetchWidth).map(i => io.out.bits.hist(i) := firstHist << histShift(i))
302  // save ras checkpoint info
303  io.out.bits.rasSp := sp.value
304  io.out.bits.rasTopCtr := rasTop.ctr
305
306  // flush BPU and redirect when target differs from the target predicted in Stage1
307  io.out.bits.redirect := inLatch.btbPred.bits.redirect ^ jmpIdx.orR.asBool ||
308    inLatch.btbPred.bits.redirect && jmpIdx.orR.asBool && io.out.bits.target =/= inLatch.btbPred.bits.target
309  io.flushBPU := io.out.bits.redirect && io.out.valid
310
311  // speculative update RAS
312  val rasWrite = WireInit(0.U.asTypeOf(rasEntry()))
313  rasWrite.retAddr := inLatch.pc + OHToUInt(callIdx) << 2.U + 4.U
314  val allocNewEntry = rasWrite.retAddr =/= rasTopAddr
315  rasWrite.ctr := Mux(allocNewEntry, 1.U, rasTop.ctr + 1.U)
316  when (io.out.valid) {
317    when (jmpIdx === callIdx) {
318      ras(Mux(allocNewEntry, sp.value + 1.U, sp.value)) := rasWrite
319      when (allocNewEntry) { sp.value := sp.value + 1.U }
320    }.elsewhen (jmpIdx === retIdx) {
321      when (rasTop.ctr === 1.U) {
322        sp.value := Mux(sp.value === 0.U, 0.U, sp.value - 1.U)
323      }.otherwise {
324        ras(sp.value) := Cat(rasTop.ctr - 1.U, rasTopAddr).asTypeOf(rasEntry())
325      }
326    }
327  }
328  // use checkpoint to recover RAS
329  val recoverSp = io.redirectInfo.redirect.rasSp
330  val recoverCtr = io.redirectInfo.redirect.rasTopCtr
331  when (io.redirectInfo.valid && io.redirectInfo.misPred) {
332    sp.value := recoverSp
333    ras(recoverSp) := Cat(recoverCtr, ras(recoverSp).retAddr).asTypeOf(rasEntry())
334  }
335
336  // roll back global history in S1 if S3 redirects
337  io.s1RollBackHist := Mux(io.s3Taken, PriorityMux(jmpIdx, io.out.bits.hist), io.out.bits.hist(0) << PopCount(brIdx & ~inLatch.tage.takens.asUInt))
338  // whether Stage3 has a taken jump
339  io.s3Taken := jmpIdx.orR.asBool
340
341  // debug info
342  XSDebug(io.in.fire(), "[BPUS3]in:(%d %d) pc=%x\n", io.in.valid, io.in.ready, io.in.bits.pc)
343  XSDebug(io.out.valid, "[BPUS3]out:%d pc=%x redirect=%d predcdMask=%b instrValid=%b tgt=%x\n",
344    io.out.valid, inLatch.pc, io.out.bits.redirect, io.predecode.bits.mask, io.out.bits.instrValid.asUInt, io.out.bits.target)
345  XSDebug(true.B, "[BPUS3]flushS3=%d\n", flushS3)
346  XSDebug(true.B, "[BPUS3]validLatch=%d predecode.valid=%d\n", validLatch, io.predecode.valid)
347  XSDebug(true.B, "[BPUS3]brIdx=%b brTakenIdx=%b brNTakenIdx=%b jalIdx=%d jalrIdx=%d callIdx=%d retIdx=%b\n",
348    brIdx, brTakenIdx, brNotTakenIdx, jalIdx, jalrIdx, callIdx, retIdx)
349
350  // BPU's TEMP Perf Cnt
351  BoringUtils.addSource(io.out.valid, "MbpS3Cnt")
352  BoringUtils.addSource(io.out.valid && io.out.bits.redirect, "MbpS3TageRed")
353  BoringUtils.addSource(io.out.valid && (inLatch.btbPred.bits.redirect ^ jmpIdx.orR.asBool), "MbpS3TageRedDir")
354  BoringUtils.addSource(io.out.valid && (inLatch.btbPred.bits.redirect
355              && jmpIdx.orR.asBool && (io.out.bits.target =/= inLatch.btbPred.bits.target)), "MbpS3TageRedTar")
356}
357
358class BPU extends XSModule {
359  val io = IO(new Bundle() {
360    // from backend
361    // flush pipeline if misPred and update bpu based on redirect signals from brq
362    val redirectInfo = Input(new RedirectInfo)
363
364    val in = new Bundle { val pc = Flipped(Valid(UInt(VAddrBits.W))) }
365
366    val btbOut = ValidIO(new BranchPrediction)
367    val tageOut = ValidIO(new BranchPrediction)
368
369    // predecode info from icache
370    // TODO: simplify this after implement predecode unit
371    val predecode = Flipped(ValidIO(new Predecode))
372  })
373
374  val s1 = Module(new BPUStage1)
375  val s2 = Module(new BPUStage2)
376  val s3 = Module(new BPUStage3)
377
378  s1.io.redirectInfo <> io.redirectInfo
379  s1.io.flush := s3.io.flushBPU || io.redirectInfo.flush()
380  s1.io.in.pc.valid := io.in.pc.valid
381  s1.io.in.pc.bits <> io.in.pc.bits
382  io.btbOut <> s1.io.s1OutPred
383  s1.io.s3RollBackHist := s3.io.s1RollBackHist
384  s1.io.s3Taken := s3.io.s3Taken
385
386  s1.io.out <> s2.io.in
387  s2.io.flush := s3.io.flushBPU || io.redirectInfo.flush()
388
389  s2.io.out <> s3.io.in
390  s3.io.flush := io.redirectInfo.flush()
391  s3.io.predecode <> io.predecode
392  io.tageOut <> s3.io.out
393  s3.io.redirectInfo <> io.redirectInfo
394
395  // TODO: temp and ugly code, when perf counters is added( may after adding CSR), please mv the below counter
396  val bpuPerfCntList = List(
397    ("MbpInstr","         "),
398    ("MbpRight","         "),
399    ("MbpWrong","         "),
400    ("MbpBRight","        "),
401    ("MbpBWrong","        "),
402    ("MbpJRight","        "),
403    ("MbpJWrong","        "),
404    ("MbpIRight","        "),
405    ("MbpIWrong","        "),
406    ("MbpRRight","        "),
407    ("MbpRWrong","        "),
408    ("MbpS3Cnt","         "),
409    ("MbpS3TageRed","     "),
410    ("MbpS3TageRedDir","  "),
411    ("MbpS3TageRedTar","  ")
412  )
413
414  val bpuPerfCnts = List.fill(bpuPerfCntList.length)(RegInit(0.U(XLEN.W)))
415  val bpuPerfCntConds = List.fill(bpuPerfCntList.length)(WireInit(false.B))
416  (bpuPerfCnts zip bpuPerfCntConds) map { case (cnt, cond) => { when (cond) { cnt := cnt + 1.U }}}
417
418  for(i <- bpuPerfCntList.indices) {
419    BoringUtils.addSink(bpuPerfCntConds(i), bpuPerfCntList(i)._1)
420  }
421
422  val xsTrap = WireInit(false.B)
423  BoringUtils.addSink(xsTrap, "XSTRAP_BPU")
424
425  // if (!p.FPGAPlatform) {
426    when (xsTrap) {
427      printf("=================BPU's PerfCnt================\n")
428      for(i <- bpuPerfCntList.indices) {
429        printf(bpuPerfCntList(i)._1 + bpuPerfCntList(i)._2 + " <- " + "%d\n", bpuPerfCnts(i))
430      }
431    }
432  // }
433}