xref: /XiangShan/src/main/scala/xiangshan/frontend/BPU.scala (revision e299e123d07755c9dbb6bf8b860c3fdb91060f81)
1package xiangshan.frontend
2
3import chisel3._
4import chisel3.util._
5import xiangshan._
6import xiangshan.utils._
7import xiangshan.backend.ALUOpType
8import utils._
9import chisel3.util.experimental.BoringUtils
10import xiangshan.backend.decode.XSTrap
11
12class TableAddr(val idxBits: Int, val banks: Int) extends XSBundle {
13  def tagBits = VAddrBits - idxBits - 1
14
15  val tag = UInt(tagBits.W)
16  val idx = UInt(idxBits.W)
17  val offset = UInt(1.W)
18
19  def fromUInt(x: UInt) = x.asTypeOf(UInt(VAddrBits.W)).asTypeOf(this)
20  def getTag(x: UInt) = fromUInt(x).tag
21  def getIdx(x: UInt) = fromUInt(x).idx
22  def getBank(x: UInt) = getIdx(x)(log2Up(banks) - 1, 0)
23  def getBankIdx(x: UInt) = getIdx(x)(idxBits - 1, log2Up(banks))
24}
25
26class Stage1To2IO extends XSBundle {
27  val pc = Output(UInt(VAddrBits.W))
28  val btb = new Bundle {
29    val hits = Output(UInt(FetchWidth.W))
30    val targets = Output(Vec(FetchWidth, UInt(VAddrBits.W)))
31  }
32  val jbtac = new Bundle {
33    val hitIdx = Output(UInt(FetchWidth.W))
34    val target = Output(UInt(VAddrBits.W))
35  }
36  val tage = new Bundle {
37    val hits = Output(UInt(FetchWidth.W))
38    val takens = Output(Vec(FetchWidth, Bool()))
39  }
40  val hist = Output(Vec(FetchWidth, UInt(HistoryLength.W)))
41  val btbPred = ValidIO(new BranchPrediction)
42}
43
44class BPUStage1 extends XSModule {
45  val io = IO(new Bundle() {
46    val in = new Bundle { val pc = Flipped(Decoupled(UInt(VAddrBits.W))) }
47    // from backend
48    val redirectInfo = Input(new RedirectInfo)
49    // from Stage3
50    val flush = Input(Bool())
51    val s3RollBackHist = Input(UInt(HistoryLength.W))
52    val s3Taken = Input(Bool())
53    // to ifu, quick prediction result
54    val s1OutPred = ValidIO(new BranchPrediction)
55    // to Stage2
56    val out = Decoupled(new Stage1To2IO)
57  })
58
59  io.in.pc.ready := true.B
60
61  // flush Stage1 when io.flush
62  val flushS1 = BoolStopWatch(io.flush, io.in.pc.fire(), startHighPriority = true)
63
64  // global history register
65  val ghr = RegInit(0.U(HistoryLength.W))
66  // modify updateGhr and newGhr when updating ghr
67  val updateGhr = WireInit(false.B)
68  val newGhr = WireInit(0.U(HistoryLength.W))
69  when (updateGhr) { ghr := newGhr }
70  // use hist as global history!!!
71  val hist = Mux(updateGhr, newGhr, ghr)
72
73  // Tage predictor
74  val tage = Module(new FakeTAGE)
75  // val tage = if(EnableBPD) Module(new Tage) else Module(new FakeTAGE)
76  tage.io.req.valid := io.in.pc.fire()
77  tage.io.req.bits.pc := io.in.pc.bits
78  tage.io.req.bits.hist := hist
79  tage.io.redirectInfo <> io.redirectInfo
80  io.out.bits.tage <> tage.io.out
81  io.s1OutPred.bits.tageMeta := tage.io.meta
82
83  // latch pc for 1 cycle latency when reading SRAM
84  val pcLatch = RegEnable(io.in.pc.bits, io.in.pc.fire())
85
86  val r = io.redirectInfo.redirect
87  val updateFetchpc = r.pc - (r.fetchIdx << 2.U)
88  // BTB
89  val btb = Module(new BTB)
90  btb.io.in.pc <> io.in.pc
91  btb.io.in.pcLatch := pcLatch
92  // TODO: pass real mask in
93  btb.io.in.mask := "b1111111111111111".asUInt
94  btb.io.redirectValid := io.redirectInfo.valid
95  btb.io.flush := io.flush
96
97  // btb.io.update.fetchPC := updateFetchpc
98  // btb.io.update.fetchIdx := r.fetchIdx
99  btb.io.update.pc := r.pc
100  btb.io.update.hit := r.btbHitWay
101  btb.io.update.misPred := io.redirectInfo.misPred
102  // btb.io.update.writeWay := r.btbVictimWay
103  btb.io.update.oldCtr := r.btbPredCtr
104  btb.io.update.taken := r.taken
105  btb.io.update.target := r.brTarget
106  btb.io.update._type := r._type
107  // TODO: add RVC logic
108  btb.io.update.isRVC := DontCare
109
110  val btbHit = btb.io.out.hit
111  val btbTaken = btb.io.out.taken
112  val btbTakenIdx = btb.io.out.takenIdx
113  val btbTakenTarget = btb.io.out.target
114  // val btbWriteWay = btb.io.out.writeWay
115  val btbNotTakens = btb.io.out.notTakens
116  val btbCtrs = VecInit(btb.io.out.dEntries.map(_.pred))
117  val btbValids = btb.io.out.hits
118  val btbTargets = VecInit(btb.io.out.dEntries.map(_.target))
119  val btbTypes = VecInit(btb.io.out.dEntries.map(_._type))
120
121
122  val jbtac = Module(new JBTAC)
123  jbtac.io.in.pc <> io.in.pc
124  jbtac.io.in.pcLatch := pcLatch
125  jbtac.io.in.hist := hist
126  jbtac.io.redirectValid := io.redirectInfo.valid
127  jbtac.io.flush := io.flush
128
129  jbtac.io.update.fetchPC := updateFetchpc
130  jbtac.io.update.fetchIdx := r.fetchIdx << 1
131  jbtac.io.update.misPred := io.redirectInfo.misPred
132  jbtac.io.update._type := r._type
133  jbtac.io.update.target := r.target
134  jbtac.io.update.hist := r.hist
135
136  val jbtacHit = jbtac.io.out.hit
137  val jbtacTarget = jbtac.io.out.target
138  val jbtacHitIdx = jbtac.io.out.hitIdx
139
140  // calculate global history of each instr
141  val firstHist = RegNext(hist)
142  val histShift = Wire(Vec(FetchWidth, UInt(log2Up(FetchWidth).W)))
143  val shift = Wire(Vec(FetchWidth, Vec(FetchWidth, UInt(1.W))))
144  (0 until FetchWidth).map(i => shift(i) := Mux(!btbNotTakens(i), 0.U, ~LowerMask(UIntToOH(i.U), FetchWidth)).asTypeOf(Vec(FetchWidth, UInt(1.W))))
145  for (j <- 0 until FetchWidth) {
146    var tmp = 0.U
147    for (i <- 0 until FetchWidth) {
148      tmp = tmp + shift(i)(j)
149    }
150    histShift(j) := tmp
151  }
152  (0 until FetchWidth).map(i => io.s1OutPred.bits.hist(i) := firstHist << histShift(i))
153
154  // update ghr
155  updateGhr := io.s1OutPred.bits.redirect || io.flush
156  val brJumpIdx = Mux(!(btbHit && btbTaken), 0.U, UIntToOH(btbTakenIdx))
157  val indirectIdx = Mux(!jbtacHit, 0.U, UIntToOH(jbtacHitIdx))
158  //val newTaken = Mux(io.redirectInfo.flush(), !(r._type === BTBtype.B && !r.taken), )
159  newGhr := Mux(io.redirectInfo.flush(),    (r.hist << 1.U) | !(r._type === BTBtype.B && !r.taken),
160            Mux(io.flush,                   Mux(io.s3Taken, io.s3RollBackHist << 1.U | 1.U, io.s3RollBackHist),
161            Mux(io.s1OutPred.bits.redirect, PriorityMux(brJumpIdx | indirectIdx, io.s1OutPred.bits.hist) << 1.U | 1.U,
162                                            io.s1OutPred.bits.hist(0) << PopCount(btbNotTakens))))
163
164  // redirect based on BTB and JBTAC
165  // io.out.valid := RegNext(io.in.pc.fire()) && !flushS1
166  io.out.valid := RegNext(io.in.pc.fire()) && !io.flush
167
168  io.s1OutPred.valid := io.out.valid
169  io.s1OutPred.bits.redirect := btbHit && btbTaken || jbtacHit
170  // io.s1OutPred.bits.instrValid := LowerMask(UIntToOH(btbTakenIdx), FetchWidth) & LowerMask(UIntToOH(jbtacHitIdx), FetchWidth)
171  io.s1OutPred.bits.instrValid := Mux(io.s1OutPred.bits.redirect, LowerMask(LowestBit(brJumpIdx | indirectIdx, FetchWidth), FetchWidth), Fill(FetchWidth, 1.U(1.W))).asTypeOf(Vec(FetchWidth, Bool()))
172  io.s1OutPred.bits.target := Mux(brJumpIdx === LowestBit(brJumpIdx | indirectIdx, FetchWidth), btbTakenTarget, jbtacTarget)
173  // io.s1OutPred.bits.btbVictimWay := btbWriteWay
174  io.s1OutPred.bits.predCtr := btbCtrs
175  io.s1OutPred.bits.btbHitWay := btbHit
176  io.s1OutPred.bits.rasSp := DontCare
177  io.s1OutPred.bits.rasTopCtr := DontCare
178
179  io.out.bits.pc := pcLatch
180  io.out.bits.btb.hits := btbValids.asUInt
181  (0 until FetchWidth).map(i => io.out.bits.btb.targets(i) := btbTargets(i))
182  io.out.bits.jbtac.hitIdx := UIntToOH(jbtacHitIdx)
183  io.out.bits.jbtac.target := jbtacTarget
184  // TODO: we don't need this repeatedly!
185  io.out.bits.hist := io.s1OutPred.bits.hist
186  io.out.bits.btbPred := io.s1OutPred
187
188
189
190  // debug info
191  XSDebug(true.B, "[BPUS1]in:(%d %d)   pc=%x ghr=%b\n", io.in.pc.valid, io.in.pc.ready, io.in.pc.bits, hist)
192  XSDebug(true.B, "[BPUS1]outPred:(%d) redirect=%d instrValid=%b tgt=%x\n",
193    io.s1OutPred.valid, io.s1OutPred.bits.redirect, io.s1OutPred.bits.instrValid.asUInt, io.s1OutPred.bits.target)
194  XSDebug(io.flush && io.redirectInfo.flush(),
195    "[BPUS1]flush from backend: pc=%x tgt=%x brTgt=%x _type=%b taken=%d oldHist=%b fetchIdx=%d isExcpt=%d\n",
196    r.pc, r.target, r.brTarget, r._type, r.taken, r.hist, r.fetchIdx, r.isException)
197  XSDebug(io.flush && !io.redirectInfo.flush(),
198    "[BPUS1]flush from Stage3:  s3Taken=%d s3RollBackHist=%b\n", io.s3Taken, io.s3RollBackHist)
199
200}
201
202class Stage2To3IO extends Stage1To2IO {
203}
204
205class BPUStage2 extends XSModule {
206  val io = IO(new Bundle() {
207    // flush from Stage3
208    val flush = Input(Bool())
209    val in = Flipped(Decoupled(new Stage1To2IO))
210    val out = Decoupled(new Stage2To3IO)
211  })
212
213  // flush Stage2 when Stage3 or banckend redirects
214  val flushS2 = BoolStopWatch(io.flush, io.in.fire(), startHighPriority = true)
215  io.out.valid := !io.flush && !flushS2 && RegNext(io.in.fire())
216  io.in.ready := !io.out.valid || io.out.fire()
217
218  // do nothing
219  io.out.bits := RegEnable(io.in.bits, io.in.fire())
220
221  // debug info
222  XSDebug(true.B, "[BPUS2]in:(%d %d) pc=%x out:(%d %d) pc=%x\n",
223    io.in.valid, io.in.ready, io.in.bits.pc, io.out.valid, io.out.ready, io.out.bits.pc)
224  XSDebug(io.flush, "[BPUS2]flush!!!\n")
225}
226
227class BPUStage3 extends XSModule {
228  val io = IO(new Bundle() {
229    val flush = Input(Bool())
230    val in = Flipped(Decoupled(new Stage2To3IO))
231    val out = ValidIO(new BranchPrediction)
232    // from icache
233    val predecode = Flipped(ValidIO(new Predecode))
234    // from backend
235    val redirectInfo = Input(new RedirectInfo)
236    // to Stage1 and Stage2
237    val flushBPU = Output(Bool())
238    // to Stage1, restore ghr in stage1 when flushBPU is valid
239    val s1RollBackHist = Output(UInt(HistoryLength.W))
240    val s3Taken = Output(Bool())
241  })
242
243  val flushS3 = BoolStopWatch(io.flush, io.in.fire(), startHighPriority = true)
244  val inLatch = RegInit(0.U.asTypeOf(io.in.bits))
245  val validLatch = RegInit(false.B)
246  when (io.in.fire()) { inLatch := io.in.bits }
247  when (io.in.fire()) {
248    validLatch := !io.flush
249  }.elsewhen (io.out.valid) {
250    validLatch := false.B
251  }
252  io.out.valid := validLatch && io.predecode.valid && !flushS3
253  io.in.ready := !validLatch || io.out.valid
254
255  // RAS
256  // TODO: split retAddr and ctr
257  def rasEntry() = new Bundle {
258    val retAddr = UInt(VAddrBits.W)
259    val ctr = UInt(8.W) // layer of nested call functions
260  }
261  val ras = RegInit(VecInit(Seq.fill(RasSize)(0.U.asTypeOf(rasEntry()))))
262  val sp = Counter(RasSize)
263  val rasTop = ras(sp.value)
264  val rasTopAddr = rasTop.retAddr
265
266  // get the first taken branch/jal/call/jalr/ret in a fetch line
267  // brTakenIdx/jalIdx/callIdx/jalrIdx/retIdx/jmpIdx is one-hot encoded.
268  // brNotTakenIdx indicates all the not-taken branches before the first jump instruction.
269  val brIdx = inLatch.btb.hits & Reverse(Cat(io.predecode.bits.fuOpTypes.map { t => ALUOpType.isBranch(t) }).asUInt) & io.predecode.bits.mask
270  val brTakenIdx = LowestBit(brIdx & inLatch.tage.takens.asUInt, FetchWidth)
271  val jalIdx = LowestBit(inLatch.btb.hits & Reverse(Cat(io.predecode.bits.fuOpTypes.map { t => t === ALUOpType.jal }).asUInt) & io.predecode.bits.mask, FetchWidth)
272  val callIdx = LowestBit(inLatch.btb.hits & io.predecode.bits.mask & Reverse(Cat(io.predecode.bits.fuOpTypes.map { t => t === ALUOpType.call }).asUInt), FetchWidth)
273  val jalrIdx = LowestBit(inLatch.jbtac.hitIdx & io.predecode.bits.mask & Reverse(Cat(io.predecode.bits.fuOpTypes.map { t => t === ALUOpType.jalr }).asUInt), FetchWidth)
274  val retIdx = LowestBit(io.predecode.bits.mask & Reverse(Cat(io.predecode.bits.fuOpTypes.map { t => t === ALUOpType.ret }).asUInt), FetchWidth)
275
276  val jmpIdx = LowestBit(brTakenIdx | jalIdx | callIdx | jalrIdx | retIdx, FetchWidth)
277  val brNotTakenIdx = brIdx & ~inLatch.tage.takens.asUInt & LowerMask(jmpIdx, FetchWidth) & io.predecode.bits.mask
278
279  io.out.bits.redirect := jmpIdx.orR.asBool
280  io.out.bits.target := Mux(jmpIdx === retIdx, rasTopAddr,
281    Mux(jmpIdx === jalrIdx, inLatch.jbtac.target,
282    Mux(jmpIdx === 0.U, inLatch.pc + 32.U, // TODO: RVC
283    PriorityMux(jmpIdx, inLatch.btb.targets))))
284  io.out.bits.instrValid := Mux(jmpIdx.orR, LowerMask(jmpIdx, FetchWidth), Fill(FetchWidth, 1.U(1.W))).asTypeOf(Vec(FetchWidth, Bool()))
285  // io.out.bits.btbVictimWay := inLatch.btbPred.bits.btbVictimWay
286  io.out.bits.predCtr := inLatch.btbPred.bits.predCtr
287  io.out.bits.btbHitWay := inLatch.btbPred.bits.btbHitWay
288  io.out.bits.tageMeta := inLatch.btbPred.bits.tageMeta
289  //io.out.bits._type := Mux(jmpIdx === retIdx, BTBtype.R,
290  //  Mux(jmpIdx === jalrIdx, BTBtype.I,
291  //  Mux(jmpIdx === brTakenIdx, BTBtype.B, BTBtype.J)))
292  val firstHist = inLatch.btbPred.bits.hist(0)
293  // there may be several notTaken branches before the first jump instruction,
294  // so we need to calculate how many zeroes should each instruction shift in its global history.
295  // each history is exclusive of instruction's own jump direction.
296  val histShift = Wire(Vec(FetchWidth, UInt(log2Up(FetchWidth).W)))
297  val shift = Wire(Vec(FetchWidth, Vec(FetchWidth, UInt(1.W))))
298  (0 until FetchWidth).map(i => shift(i) := Mux(!brNotTakenIdx(i), 0.U, ~LowerMask(UIntToOH(i.U), FetchWidth)).asTypeOf(Vec(FetchWidth, UInt(1.W))))
299  for (j <- 0 until FetchWidth) {
300    var tmp = 0.U
301    for (i <- 0 until FetchWidth) {
302      tmp = tmp + shift(i)(j)
303    }
304    histShift(j) := tmp
305  }
306  (0 until FetchWidth).map(i => io.out.bits.hist(i) := firstHist << histShift(i))
307  // save ras checkpoint info
308  io.out.bits.rasSp := sp.value
309  io.out.bits.rasTopCtr := rasTop.ctr
310
311  // flush BPU and redirect when target differs from the target predicted in Stage1
312  io.out.bits.redirect := inLatch.btbPred.bits.redirect ^ jmpIdx.orR.asBool ||
313    inLatch.btbPred.bits.redirect && jmpIdx.orR.asBool && io.out.bits.target =/= inLatch.btbPred.bits.target
314  io.flushBPU := io.out.bits.redirect && io.out.valid
315
316  // speculative update RAS
317  val rasWrite = WireInit(0.U.asTypeOf(rasEntry()))
318  rasWrite.retAddr := inLatch.pc + OHToUInt(callIdx) << 2.U + 4.U
319  val allocNewEntry = rasWrite.retAddr =/= rasTopAddr
320  rasWrite.ctr := Mux(allocNewEntry, 1.U, rasTop.ctr + 1.U)
321  when (io.out.valid) {
322    when (jmpIdx === callIdx) {
323      ras(Mux(allocNewEntry, sp.value + 1.U, sp.value)) := rasWrite
324      when (allocNewEntry) { sp.value := sp.value + 1.U }
325    }.elsewhen (jmpIdx === retIdx) {
326      when (rasTop.ctr === 1.U) {
327        sp.value := Mux(sp.value === 0.U, 0.U, sp.value - 1.U)
328      }.otherwise {
329        ras(sp.value) := Cat(rasTop.ctr - 1.U, rasTopAddr).asTypeOf(rasEntry())
330      }
331    }
332  }
333  // use checkpoint to recover RAS
334  val recoverSp = io.redirectInfo.redirect.rasSp
335  val recoverCtr = io.redirectInfo.redirect.rasTopCtr
336  when (io.redirectInfo.valid && io.redirectInfo.misPred) {
337    sp.value := recoverSp
338    ras(recoverSp) := Cat(recoverCtr, ras(recoverSp).retAddr).asTypeOf(rasEntry())
339  }
340
341  // roll back global history in S1 if S3 redirects
342  io.s1RollBackHist := Mux(io.s3Taken, PriorityMux(jmpIdx, io.out.bits.hist), io.out.bits.hist(0) << PopCount(brIdx & ~inLatch.tage.takens.asUInt))
343  // whether Stage3 has a taken jump
344  io.s3Taken := jmpIdx.orR.asBool
345
346  // debug info
347  XSDebug(io.in.fire(), "[BPUS3]in:(%d %d) pc=%x\n", io.in.valid, io.in.ready, io.in.bits.pc)
348  XSDebug(io.out.valid, "[BPUS3]out:%d pc=%x redirect=%d predcdMask=%b instrValid=%b tgt=%x\n",
349    io.out.valid, inLatch.pc, io.out.bits.redirect, io.predecode.bits.mask, io.out.bits.instrValid.asUInt, io.out.bits.target)
350  XSDebug(true.B, "[BPUS3]flushS3=%d\n", flushS3)
351  XSDebug(true.B, "[BPUS3]validLatch=%d predecode.valid=%d\n", validLatch, io.predecode.valid)
352  XSDebug(true.B, "[BPUS3]brIdx=%b brTakenIdx=%b brNTakenIdx=%b jalIdx=%d jalrIdx=%d callIdx=%d retIdx=%b\n",
353    brIdx, brTakenIdx, brNotTakenIdx, jalIdx, jalrIdx, callIdx, retIdx)
354
355  // BPU's TEMP Perf Cnt
356  BoringUtils.addSource(io.out.valid, "MbpS3Cnt")
357  BoringUtils.addSource(io.out.valid && io.out.bits.redirect, "MbpS3TageRed")
358  BoringUtils.addSource(io.out.valid && (inLatch.btbPred.bits.redirect ^ jmpIdx.orR.asBool), "MbpS3TageRedDir")
359  BoringUtils.addSource(io.out.valid && (inLatch.btbPred.bits.redirect
360              && jmpIdx.orR.asBool && (io.out.bits.target =/= inLatch.btbPred.bits.target)), "MbpS3TageRedTar")
361}
362
363class BPU extends XSModule {
364  val io = IO(new Bundle() {
365    // from backend
366    // flush pipeline if misPred and update bpu based on redirect signals from brq
367    val redirectInfo = Input(new RedirectInfo)
368
369    val in = new Bundle { val pc = Flipped(Valid(UInt(VAddrBits.W))) }
370
371    val btbOut = ValidIO(new BranchPrediction)
372    val tageOut = ValidIO(new BranchPrediction)
373
374    // predecode info from icache
375    // TODO: simplify this after implement predecode unit
376    val predecode = Flipped(ValidIO(new Predecode))
377  })
378
379  val s1 = Module(new BPUStage1)
380  val s2 = Module(new BPUStage2)
381  val s3 = Module(new BPUStage3)
382
383  s1.io.redirectInfo <> io.redirectInfo
384  s1.io.flush := s3.io.flushBPU || io.redirectInfo.flush()
385  s1.io.in.pc.valid := io.in.pc.valid
386  s1.io.in.pc.bits <> io.in.pc.bits
387  io.btbOut <> s1.io.s1OutPred
388  s1.io.s3RollBackHist := s3.io.s1RollBackHist
389  s1.io.s3Taken := s3.io.s3Taken
390
391  s1.io.out <> s2.io.in
392  s2.io.flush := s3.io.flushBPU || io.redirectInfo.flush()
393
394  s2.io.out <> s3.io.in
395  s3.io.flush := io.redirectInfo.flush()
396  s3.io.predecode <> io.predecode
397  io.tageOut <> s3.io.out
398  s3.io.redirectInfo <> io.redirectInfo
399
400  // TODO: temp and ugly code, when perf counters is added( may after adding CSR), please mv the below counter
401  val bpuPerfCntList = List(
402    ("MbpInstr","         "),
403    ("MbpRight","         "),
404    ("MbpWrong","         "),
405    ("MbpBRight","        "),
406    ("MbpBWrong","        "),
407    ("MbpJRight","        "),
408    ("MbpJWrong","        "),
409    ("MbpIRight","        "),
410    ("MbpIWrong","        "),
411    ("MbpRRight","        "),
412    ("MbpRWrong","        "),
413    ("MbpS3Cnt","         "),
414    ("MbpS3TageRed","     "),
415    ("MbpS3TageRedDir","  "),
416    ("MbpS3TageRedTar","  ")
417  )
418
419  val bpuPerfCnts = List.fill(bpuPerfCntList.length)(RegInit(0.U(XLEN.W)))
420  val bpuPerfCntConds = List.fill(bpuPerfCntList.length)(WireInit(false.B))
421  (bpuPerfCnts zip bpuPerfCntConds) map { case (cnt, cond) => { when (cond) { cnt := cnt + 1.U }}}
422
423  for(i <- bpuPerfCntList.indices) {
424    BoringUtils.addSink(bpuPerfCntConds(i), bpuPerfCntList(i)._1)
425  }
426
427  val xsTrap = WireInit(false.B)
428  BoringUtils.addSink(xsTrap, "XSTRAP_BPU")
429
430  // if (!p.FPGAPlatform) {
431    when (xsTrap) {
432      printf("=================BPU's PerfCnt================\n")
433      for(i <- bpuPerfCntList.indices) {
434        printf(bpuPerfCntList(i)._1 + bpuPerfCntList(i)._2 + " <- " + "%d\n", bpuPerfCnts(i))
435      }
436    }
437  // }
438}