xref: /XiangShan/src/main/scala/xiangshan/frontend/BPU.scala (revision d082eb2488866360d17d3cb1f0636c7a9b841d83)
1package xiangshan.frontend
2
3import chisel3._
4import chisel3.util._
5import xiangshan._
6import xiangshan.utils._
7import xiangshan.backend.ALUOpType
8import utils._
9import chisel3.util.experimental.BoringUtils
10import xiangshan.backend.decode.XSTrap
11
12class TableAddr(val idxBits: Int, val banks: Int) extends XSBundle {
13  def tagBits = VAddrBits - idxBits - 2
14
15  val tag = UInt(tagBits.W)
16  val idx = UInt(idxBits.W)
17  val offset = UInt(2.W)
18
19  def fromUInt(x: UInt) = x.asTypeOf(UInt(VAddrBits.W)).asTypeOf(this)
20  def getTag(x: UInt) = fromUInt(x).tag
21  def getIdx(x: UInt) = fromUInt(x).idx
22  def getBank(x: UInt) = getIdx(x)(log2Up(banks) - 1, 0)
23  def getBankIdx(x: UInt) = getIdx(x)(idxBits - 1, log2Up(banks))
24}
25
26class Stage1To2IO extends XSBundle {
27  val pc = Output(UInt(VAddrBits.W))
28  val btb = new Bundle {
29    val hits = Output(UInt(FetchWidth.W))
30    val targets = Output(Vec(FetchWidth, UInt(VAddrBits.W)))
31  }
32  val jbtac = new Bundle {
33    val hitIdx = Output(UInt(FetchWidth.W))
34    val target = Output(UInt(VAddrBits.W))
35  }
36  val tage = new Bundle {
37    val hits = Output(UInt(FetchWidth.W))
38    val takens = Output(Vec(FetchWidth, Bool()))
39  }
40  val hist = Output(Vec(FetchWidth, UInt(HistoryLength.W)))
41  val btbPred = ValidIO(new BranchPrediction)
42}
43
44class BPUStage1 extends XSModule {
45  val io = IO(new Bundle() {
46    val in = new Bundle { val pc = Flipped(Decoupled(UInt(VAddrBits.W))) }
47    // from backend
48    val redirectInfo = Input(new RedirectInfo)
49    // from Stage3
50    val flush = Input(Bool())
51    val s3RollBackHist = Input(UInt(HistoryLength.W))
52    val s3Taken = Input(Bool())
53    // to ifu, quick prediction result
54    val s1OutPred = ValidIO(new BranchPrediction)
55    // to Stage2
56    val out = Decoupled(new Stage1To2IO)
57  })
58
59  io.in.pc.ready := true.B
60
61  // flush Stage1 when io.flush
62  val flushS1 = BoolStopWatch(io.flush, io.in.pc.fire(), startHighPriority = true)
63
64  // global history register
65  val ghr = RegInit(0.U(HistoryLength.W))
66  // modify updateGhr and newGhr when updating ghr
67  val updateGhr = WireInit(false.B)
68  val newGhr = WireInit(0.U(HistoryLength.W))
69  when (updateGhr) { ghr := newGhr }
70  // use hist as global history!!!
71  val hist = Mux(updateGhr, newGhr, ghr)
72
73  // Tage predictor
74  // val tage = Module(new FakeTAGE)
75  val tage = if(EnableBPD) Module(new Tage) else Module(new FakeTAGE)
76  tage.io.req.valid := io.in.pc.fire()
77  tage.io.req.bits.pc := io.in.pc.bits
78  tage.io.req.bits.hist := hist
79  tage.io.redirectInfo <> io.redirectInfo
80  io.out.bits.tage <> tage.io.out
81  io.s1OutPred.bits.tageMeta := tage.io.meta
82
83  // latch pc for 1 cycle latency when reading SRAM
84  val pcLatch = RegEnable(io.in.pc.bits, io.in.pc.fire())
85
86  val r = io.redirectInfo.redirect
87  val updateFetchpc = r.pc - (r.fetchIdx << 2.U)
88  // BTB
89  val btb = Module(new BTB)
90  btb.io.in.pc <> io.in.pc
91  btb.io.in.pcLatch := pcLatch
92  btb.io.redirectValid := io.redirectInfo.valid
93  btb.io.flush := io.flush
94
95  btb.io.update.fetchPC := updateFetchpc
96  btb.io.update.fetchIdx := r.fetchIdx
97  btb.io.update.hit := r.btbHitWay
98  btb.io.update.misPred := io.redirectInfo.misPred
99  btb.io.update.writeWay := r.btbVictimWay
100  btb.io.update.oldCtr := r.btbPredCtr
101  btb.io.update.taken := r.taken
102  btb.io.update.target := r.brTarget
103  btb.io.update._type := r._type
104
105  val btbHit = btb.io.out.hit
106  val btbTaken = btb.io.out.taken
107  val btbTakenIdx = btb.io.out.takenIdx
108  val btbTakenTarget = btb.io.out.target
109  val btbWriteWay = btb.io.out.writeWay
110  val btbNotTakens = btb.io.out.notTakens
111  val btbCtrs = VecInit(btb.io.out.dEntries.map(_.pred))
112  val btbValids = VecInit(btb.io.out.dEntries.map(_.valid))
113  val btbTargets = VecInit(btb.io.out.dEntries.map(_.target))
114  val btbTypes = VecInit(btb.io.out.dEntries.map(_._type))
115
116
117  val jbtac = Module(new JBTAC)
118  jbtac.io.in.pc <> io.in.pc
119  jbtac.io.in.pcLatch := pcLatch
120  jbtac.io.in.hist := hist
121  jbtac.io.redirectValid := io.redirectInfo.valid
122  jbtac.io.flush := io.flush
123
124  jbtac.io.update.fetchPC := updateFetchpc
125  jbtac.io.update.fetchIdx := r.fetchIdx
126  jbtac.io.update.misPred := io.redirectInfo.misPred
127  jbtac.io.update._type := r._type
128  jbtac.io.update.target := r.target
129  jbtac.io.update.hist := r.hist
130
131  val jbtacHit = jbtac.io.out.hit
132  val jbtacTarget = jbtac.io.out.target
133  val jbtacHitIdx = jbtac.io.out.hitIdx
134
135  // calculate global history of each instr
136  val firstHist = RegNext(hist)
137  val histShift = Wire(Vec(FetchWidth, UInt(log2Up(FetchWidth).W)))
138  val shift = Wire(Vec(FetchWidth, Vec(FetchWidth, UInt(1.W))))
139  (0 until FetchWidth).map(i => shift(i) := Mux(!btbNotTakens(i), 0.U, ~LowerMask(UIntToOH(i.U), FetchWidth)).asTypeOf(Vec(FetchWidth, UInt(1.W))))
140  for (j <- 0 until FetchWidth) {
141    var tmp = 0.U
142    for (i <- 0 until FetchWidth) {
143      tmp = tmp + shift(i)(j)
144    }
145    histShift(j) := tmp
146  }
147  (0 until FetchWidth).map(i => io.s1OutPred.bits.hist(i) := firstHist << histShift(i))
148
149  // update ghr
150  updateGhr := io.s1OutPred.bits.redirect || io.flush
151  val brJumpIdx = Mux(!(btbHit && btbTaken), 0.U, UIntToOH(btbTakenIdx))
152  val indirectIdx = Mux(!jbtacHit, 0.U, UIntToOH(jbtacHitIdx))
153  //val newTaken = Mux(io.redirectInfo.flush(), !(r._type === BTBtype.B && !r.taken), )
154  newGhr := Mux(io.redirectInfo.flush(),    (r.hist << 1.U) | !(r._type === BTBtype.B && !r.taken),
155            Mux(io.flush,                   Mux(io.s3Taken, io.s3RollBackHist << 1.U | 1.U, io.s3RollBackHist),
156            Mux(io.s1OutPred.bits.redirect, PriorityMux(brJumpIdx | indirectIdx, io.s1OutPred.bits.hist) << 1.U | 1.U,
157                                            io.s1OutPred.bits.hist(0) << PopCount(btbNotTakens))))
158
159  // redirect based on BTB and JBTAC
160  // io.out.valid := RegNext(io.in.pc.fire()) && !flushS1
161  io.out.valid := RegNext(io.in.pc.fire()) && !io.flush
162
163  io.s1OutPred.valid := io.out.valid
164  io.s1OutPred.bits.redirect := btbHit && btbTaken || jbtacHit
165  // io.s1OutPred.bits.instrValid := LowerMask(UIntToOH(btbTakenIdx), FetchWidth) & LowerMask(UIntToOH(jbtacHitIdx), FetchWidth)
166  io.s1OutPred.bits.instrValid := Mux(io.s1OutPred.bits.redirect, LowerMask(LowestBit(brJumpIdx | indirectIdx, FetchWidth), FetchWidth), Fill(FetchWidth, 1.U(1.W))).asTypeOf(Vec(FetchWidth, Bool()))
167  io.s1OutPred.bits.target := Mux(brJumpIdx === LowestBit(brJumpIdx | indirectIdx, FetchWidth), btbTakenTarget, jbtacTarget)
168  io.s1OutPred.bits.btbVictimWay := btbWriteWay
169  io.s1OutPred.bits.predCtr := btbCtrs
170  io.s1OutPred.bits.btbHitWay := btbHit
171  io.s1OutPred.bits.rasSp := DontCare
172  io.s1OutPred.bits.rasTopCtr := DontCare
173
174  io.out.bits.pc := pcLatch
175  io.out.bits.btb.hits := btbValids.asUInt
176  (0 until FetchWidth).map(i => io.out.bits.btb.targets(i) := btbTargets(i))
177  io.out.bits.jbtac.hitIdx := UIntToOH(jbtacHitIdx)
178  io.out.bits.jbtac.target := jbtacTarget
179  // TODO: we don't need this repeatedly!
180  io.out.bits.hist := io.s1OutPred.bits.hist
181  io.out.bits.btbPred := io.s1OutPred
182
183
184
185  // debug info
186  XSDebug(true.B, "[BPUS1]in:(%d %d)   pc=%x ghr=%b\n", io.in.pc.valid, io.in.pc.ready, io.in.pc.bits, hist)
187  XSDebug(true.B, "[BPUS1]outPred:(%d) redirect=%d instrValid=%b tgt=%x\n",
188    io.s1OutPred.valid, io.s1OutPred.bits.redirect, io.s1OutPred.bits.instrValid.asUInt, io.s1OutPred.bits.target)
189  XSDebug(io.flush && io.redirectInfo.flush(),
190    "[BPUS1]flush from backend: pc=%x tgt=%x brTgt=%x _type=%b taken=%d oldHist=%b fetchIdx=%d isExcpt=%d\n",
191    r.pc, r.target, r.brTarget, r._type, r.taken, r.hist, r.fetchIdx, r.isException)
192  XSDebug(io.flush && !io.redirectInfo.flush(),
193    "[BPUS1]flush from Stage3:  s3Taken=%d s3RollBackHist=%b\n", io.s3Taken, io.s3RollBackHist)
194
195}
196
197class Stage2To3IO extends Stage1To2IO {
198}
199
200class BPUStage2 extends XSModule {
201  val io = IO(new Bundle() {
202    // flush from Stage3
203    val flush = Input(Bool())
204    val in = Flipped(Decoupled(new Stage1To2IO))
205    val out = Decoupled(new Stage2To3IO)
206  })
207
208  // flush Stage2 when Stage3 or banckend redirects
209  val flushS2 = BoolStopWatch(io.flush, io.in.fire(), startHighPriority = true)
210  val inLatch = RegInit(0.U.asTypeOf(io.in.bits))
211  when (io.in.fire()) { inLatch := io.in.bits }
212  val validLatch = RegInit(false.B)
213  when (io.flush) {
214    validLatch := false.B
215  }.elsewhen (io.in.fire()) {
216    validLatch := true.B
217  }.elsewhen (io.out.fire()) {
218    validLatch := false.B
219  }
220
221  io.out.valid := !io.flush && !flushS2 && validLatch
222  io.in.ready := !validLatch || io.out.fire()
223
224  // do nothing
225  io.out.bits := inLatch
226
227  // debug info
228  XSDebug(true.B, "[BPUS2]in:(%d %d) pc=%x out:(%d %d) pc=%x\n",
229    io.in.valid, io.in.ready, io.in.bits.pc, io.out.valid, io.out.ready, io.out.bits.pc)
230  XSDebug(true.B, "[BPUS2]validLatch=%d pc=%x\n", validLatch, inLatch.pc)
231  XSDebug(io.flush, "[BPUS2]flush!!!\n")
232}
233
234class BPUStage3 extends XSModule {
235  val io = IO(new Bundle() {
236    val flush = Input(Bool())
237    val in = Flipped(Decoupled(new Stage2To3IO))
238    val out = ValidIO(new BranchPrediction)
239    // from icache
240    val predecode = Flipped(ValidIO(new Predecode))
241    // from backend
242    val redirectInfo = Input(new RedirectInfo)
243    // to Stage1 and Stage2
244    val flushBPU = Output(Bool())
245    // to Stage1, restore ghr in stage1 when flushBPU is valid
246    val s1RollBackHist = Output(UInt(HistoryLength.W))
247    val s3Taken = Output(Bool())
248  })
249
250  val flushS3 = BoolStopWatch(io.flush, io.in.fire(), startHighPriority = true)
251  val inLatch = RegInit(0.U.asTypeOf(io.in.bits))
252  val validLatch = RegInit(false.B)
253  when (io.in.fire()) { inLatch := io.in.bits }
254  when (io.flush) {
255    validLatch := false.B
256  }.elsewhen (io.in.fire()) {
257    validLatch := true.B
258  }.elsewhen (io.out.valid) {
259    validLatch := false.B
260  }
261  io.out.valid := validLatch && io.predecode.valid && !flushS3 && !io.flush
262  io.in.ready := !validLatch || io.out.valid
263
264  // RAS
265  // TODO: split retAddr and ctr
266  def rasEntry() = new Bundle {
267    val retAddr = UInt(VAddrBits.W)
268    val ctr = UInt(8.W) // layer of nested call functions
269  }
270  val ras = RegInit(VecInit(Seq.fill(RasSize)(0.U.asTypeOf(rasEntry()))))
271  val sp = Counter(RasSize)
272  val rasTop = ras(sp.value)
273  val rasTopAddr = rasTop.retAddr
274
275  // get the first taken branch/jal/call/jalr/ret in a fetch line
276  // brTakenIdx/jalIdx/callIdx/jalrIdx/retIdx/jmpIdx is one-hot encoded.
277  // brNotTakenIdx indicates all the not-taken branches before the first jump instruction.
278  val brIdx = inLatch.btb.hits & Reverse(Cat(io.predecode.bits.fuOpTypes.map { t => ALUOpType.isBranch(t) }).asUInt) & io.predecode.bits.mask
279  val brTakenIdx = LowestBit(brIdx & inLatch.tage.takens.asUInt, FetchWidth)
280  val jalIdx = LowestBit(inLatch.btb.hits & Reverse(Cat(io.predecode.bits.fuOpTypes.map { t => t === ALUOpType.jal }).asUInt) & io.predecode.bits.mask, FetchWidth)
281  val callIdx = LowestBit(inLatch.btb.hits & io.predecode.bits.mask & Reverse(Cat(io.predecode.bits.fuOpTypes.map { t => t === ALUOpType.call }).asUInt), FetchWidth)
282  val jalrIdx = LowestBit(inLatch.jbtac.hitIdx & io.predecode.bits.mask & Reverse(Cat(io.predecode.bits.fuOpTypes.map { t => t === ALUOpType.jalr }).asUInt), FetchWidth)
283  val retIdx = LowestBit(io.predecode.bits.mask & Reverse(Cat(io.predecode.bits.fuOpTypes.map { t => t === ALUOpType.ret }).asUInt), FetchWidth)
284
285  val jmpIdx = LowestBit(brTakenIdx | jalIdx | callIdx | jalrIdx | retIdx, FetchWidth)
286  val brNotTakenIdx = brIdx & ~inLatch.tage.takens.asUInt & LowerMask(jmpIdx, FetchWidth) & io.predecode.bits.mask
287
288  io.out.bits.redirect := jmpIdx.orR.asBool
289  io.out.bits.target := Mux(jmpIdx === retIdx, rasTopAddr,
290    Mux(jmpIdx === jalrIdx, inLatch.jbtac.target,
291    Mux(jmpIdx === 0.U, inLatch.pc + 32.U, // TODO: RVC
292    PriorityMux(jmpIdx, inLatch.btb.targets))))
293  io.out.bits.instrValid := Mux(jmpIdx.orR, LowerMask(jmpIdx, FetchWidth), Fill(FetchWidth, 1.U(1.W))).asTypeOf(Vec(FetchWidth, Bool()))
294  io.out.bits.btbVictimWay := inLatch.btbPred.bits.btbVictimWay
295  io.out.bits.predCtr := inLatch.btbPred.bits.predCtr
296  io.out.bits.btbHitWay := inLatch.btbPred.bits.btbHitWay
297  io.out.bits.tageMeta := inLatch.btbPred.bits.tageMeta
298  //io.out.bits._type := Mux(jmpIdx === retIdx, BTBtype.R,
299  //  Mux(jmpIdx === jalrIdx, BTBtype.I,
300  //  Mux(jmpIdx === brTakenIdx, BTBtype.B, BTBtype.J)))
301  val firstHist = inLatch.btbPred.bits.hist(0)
302  // there may be several notTaken branches before the first jump instruction,
303  // so we need to calculate how many zeroes should each instruction shift in its global history.
304  // each history is exclusive of instruction's own jump direction.
305  val histShift = Wire(Vec(FetchWidth, UInt(log2Up(FetchWidth).W)))
306  val shift = Wire(Vec(FetchWidth, Vec(FetchWidth, UInt(1.W))))
307  (0 until FetchWidth).map(i => shift(i) := Mux(!brNotTakenIdx(i), 0.U, ~LowerMask(UIntToOH(i.U), FetchWidth)).asTypeOf(Vec(FetchWidth, UInt(1.W))))
308  for (j <- 0 until FetchWidth) {
309    var tmp = 0.U
310    for (i <- 0 until FetchWidth) {
311      tmp = tmp + shift(i)(j)
312    }
313    histShift(j) := tmp
314  }
315  (0 until FetchWidth).map(i => io.out.bits.hist(i) := firstHist << histShift(i))
316  // save ras checkpoint info
317  io.out.bits.rasSp := sp.value
318  io.out.bits.rasTopCtr := rasTop.ctr
319
320  // flush BPU and redirect when target differs from the target predicted in Stage1
321  io.out.bits.redirect := inLatch.btbPred.bits.redirect ^ jmpIdx.orR.asBool ||
322    inLatch.btbPred.bits.redirect && jmpIdx.orR.asBool && io.out.bits.target =/= inLatch.btbPred.bits.target
323  io.flushBPU := io.out.bits.redirect && io.out.valid
324
325  // speculative update RAS
326  val rasWrite = WireInit(0.U.asTypeOf(rasEntry()))
327  rasWrite.retAddr := inLatch.pc + OHToUInt(callIdx) << 2.U + 4.U
328  val allocNewEntry = rasWrite.retAddr =/= rasTopAddr
329  rasWrite.ctr := Mux(allocNewEntry, 1.U, rasTop.ctr + 1.U)
330  when (io.out.valid) {
331    when (jmpIdx === callIdx) {
332      ras(Mux(allocNewEntry, sp.value + 1.U, sp.value)) := rasWrite
333      when (allocNewEntry) { sp.value := sp.value + 1.U }
334    }.elsewhen (jmpIdx === retIdx) {
335      when (rasTop.ctr === 1.U) {
336        sp.value := Mux(sp.value === 0.U, 0.U, sp.value - 1.U)
337      }.otherwise {
338        ras(sp.value) := Cat(rasTop.ctr - 1.U, rasTopAddr).asTypeOf(rasEntry())
339      }
340    }
341  }
342  // use checkpoint to recover RAS
343  val recoverSp = io.redirectInfo.redirect.rasSp
344  val recoverCtr = io.redirectInfo.redirect.rasTopCtr
345  when (io.redirectInfo.valid && io.redirectInfo.misPred) {
346    sp.value := recoverSp
347    ras(recoverSp) := Cat(recoverCtr, ras(recoverSp).retAddr).asTypeOf(rasEntry())
348  }
349
350  // roll back global history in S1 if S3 redirects
351  io.s1RollBackHist := Mux(io.s3Taken, PriorityMux(jmpIdx, io.out.bits.hist), io.out.bits.hist(0) << PopCount(brIdx & ~inLatch.tage.takens.asUInt))
352  // whether Stage3 has a taken jump
353  io.s3Taken := jmpIdx.orR.asBool
354
355  // debug info
356  XSDebug(io.in.fire(), "[BPUS3]in:(%d %d) pc=%x\n", io.in.valid, io.in.ready, io.in.bits.pc)
357  XSDebug(io.out.valid, "[BPUS3]out:%d pc=%x redirect=%d predcdMask=%b instrValid=%b tgt=%x\n",
358    io.out.valid, inLatch.pc, io.out.bits.redirect, io.predecode.bits.mask, io.out.bits.instrValid.asUInt, io.out.bits.target)
359  XSDebug(true.B, "[BPUS3]flushS3=%d\n", flushS3)
360  XSDebug(true.B, "[BPUS3]validLatch=%d predecode.valid=%d\n", validLatch, io.predecode.valid)
361  XSDebug(true.B, "[BPUS3]brIdx=%b brTakenIdx=%b brNTakenIdx=%b jalIdx=%b jalrIdx=%b callIdx=%b retIdx=%b\n",
362    brIdx, brTakenIdx, brNotTakenIdx, jalIdx, jalrIdx, callIdx, retIdx)
363
364  // BPU's TEMP Perf Cnt
365  BoringUtils.addSource(io.out.valid, "MbpS3Cnt")
366  BoringUtils.addSource(io.out.valid && io.out.bits.redirect, "MbpS3TageRed")
367  BoringUtils.addSource(io.out.valid && (inLatch.btbPred.bits.redirect ^ jmpIdx.orR.asBool), "MbpS3TageRedDir")
368  BoringUtils.addSource(io.out.valid && (inLatch.btbPred.bits.redirect
369              && jmpIdx.orR.asBool && (io.out.bits.target =/= inLatch.btbPred.bits.target)), "MbpS3TageRedTar")
370}
371
372class BPU extends XSModule {
373  val io = IO(new Bundle() {
374    // from backend
375    // flush pipeline if misPred and update bpu based on redirect signals from brq
376    val redirectInfo = Input(new RedirectInfo)
377
378    val in = new Bundle { val pc = Flipped(Valid(UInt(VAddrBits.W))) }
379
380    val btbOut = ValidIO(new BranchPrediction)
381    val tageOut = ValidIO(new BranchPrediction)
382
383    // predecode info from icache
384    // TODO: simplify this after implement predecode unit
385    val predecode = Flipped(ValidIO(new Predecode))
386  })
387
388  val s1 = Module(new BPUStage1)
389  val s2 = Module(new BPUStage2)
390  val s3 = Module(new BPUStage3)
391
392  s1.io.redirectInfo <> io.redirectInfo
393  s1.io.flush := s3.io.flushBPU || io.redirectInfo.flush()
394  s1.io.in.pc.valid := io.in.pc.valid
395  s1.io.in.pc.bits <> io.in.pc.bits
396  io.btbOut <> s1.io.s1OutPred
397  s1.io.s3RollBackHist := s3.io.s1RollBackHist
398  s1.io.s3Taken := s3.io.s3Taken
399
400  s1.io.out <> s2.io.in
401  s2.io.flush := s3.io.flushBPU || io.redirectInfo.flush()
402
403  s2.io.out <> s3.io.in
404  s3.io.flush := io.redirectInfo.flush()
405  s3.io.predecode <> io.predecode
406  io.tageOut <> s3.io.out
407  s3.io.redirectInfo <> io.redirectInfo
408
409  // TODO: temp and ugly code, when perf counters is added( may after adding CSR), please mv the below counter
410  val bpuPerfCntList = List(
411    ("MbpInstr","         "),
412    ("MbpRight","         "),
413    ("MbpWrong","         "),
414    ("MbpBRight","        "),
415    ("MbpBWrong","        "),
416    ("MbpJRight","        "),
417    ("MbpJWrong","        "),
418    ("MbpIRight","        "),
419    ("MbpIWrong","        "),
420    ("MbpRRight","        "),
421    ("MbpRWrong","        "),
422    ("MbpS3Cnt","         "),
423    ("MbpS3TageRed","     "),
424    ("MbpS3TageRedDir","  "),
425    ("MbpS3TageRedTar","  ")
426  )
427
428  val bpuPerfCnts = List.fill(bpuPerfCntList.length)(RegInit(0.U(XLEN.W)))
429  val bpuPerfCntConds = List.fill(bpuPerfCntList.length)(WireInit(false.B))
430  (bpuPerfCnts zip bpuPerfCntConds) map { case (cnt, cond) => { when (cond) { cnt := cnt + 1.U }}}
431
432  for(i <- bpuPerfCntList.indices) {
433    BoringUtils.addSink(bpuPerfCntConds(i), bpuPerfCntList(i)._1)
434  }
435
436  val xsTrap = WireInit(false.B)
437  BoringUtils.addSink(xsTrap, "XSTRAP_BPU")
438
439  // if (!p.FPGAPlatform) {
440    when (xsTrap) {
441      printf("=================BPU's PerfCnt================\n")
442      for(i <- bpuPerfCntList.indices) {
443        printf(bpuPerfCntList(i)._1 + bpuPerfCntList(i)._2 + " <- " + "%d\n", bpuPerfCnts(i))
444      }
445    }
446  // }
447}