xref: /XiangShan/src/main/scala/xiangshan/frontend/BPU.scala (revision 395d09f328a4f9fcee58b59af01131f31513bb92)
1package xiangshan.frontend
2
3import chisel3._
4import chisel3.util._
5import xiangshan._
6import xiangshan.utils._
7import xiangshan.backend.ALUOpType
8import utils._
9import chisel3.util.experimental.BoringUtils
10import xiangshan.backend.decode.XSTrap
11
12class TableAddr(val idxBits: Int, val banks: Int) extends XSBundle {
13  def tagBits = VAddrBits - idxBits - 2
14
15  val tag = UInt(tagBits.W)
16  val idx = UInt(idxBits.W)
17  val offset = UInt(2.W)
18
19  def fromUInt(x: UInt) = x.asTypeOf(UInt(VAddrBits.W)).asTypeOf(this)
20  def getTag(x: UInt) = fromUInt(x).tag
21  def getIdx(x: UInt) = fromUInt(x).idx
22  def getBank(x: UInt) = getIdx(x)(log2Up(banks) - 1, 0)
23  def getBankIdx(x: UInt) = getIdx(x)(idxBits - 1, log2Up(banks))
24}
25
26class Stage1To2IO extends XSBundle {
27  val pc = Output(UInt(VAddrBits.W))
28  val btb = new Bundle {
29    val hits = Output(UInt(FetchWidth.W))
30    val targets = Output(Vec(FetchWidth, UInt(VAddrBits.W)))
31  }
32  val jbtac = new Bundle {
33    val hitIdx = Output(UInt(FetchWidth.W))
34    val target = Output(UInt(VAddrBits.W))
35  }
36  val tage = new Bundle {
37    val hits = Output(UInt(FetchWidth.W))
38    val takens = Output(Vec(FetchWidth, Bool()))
39  }
40  val hist = Output(Vec(FetchWidth, UInt(HistoryLength.W)))
41  val btbPred = ValidIO(new BranchPrediction)
42}
43
44class BPUStage1 extends XSModule {
45  val io = IO(new Bundle() {
46    val in = new Bundle { val pc = Flipped(Decoupled(UInt(VAddrBits.W))) }
47    // from backend
48    val redirectInfo = Input(new RedirectInfo)
49    // from Stage3
50    val flush = Input(Bool())
51    val s3RollBackHist = Input(UInt(HistoryLength.W))
52    val s3Taken = Input(Bool())
53    // to ifu, quick prediction result
54    val s1OutPred = ValidIO(new BranchPrediction)
55    // to Stage2
56    val out = Decoupled(new Stage1To2IO)
57  })
58
59  io.in.pc.ready := true.B
60
61  // flush Stage1 when io.flush
62  val flushS1 = BoolStopWatch(io.flush, io.in.pc.fire(), startHighPriority = true)
63
64  // global history register
65  val ghr = RegInit(0.U(HistoryLength.W))
66  // modify updateGhr and newGhr when updating ghr
67  val updateGhr = WireInit(false.B)
68  val newGhr = WireInit(0.U(HistoryLength.W))
69  when (updateGhr) { ghr := newGhr }
70  // use hist as global history!!!
71  val hist = Mux(updateGhr, newGhr, ghr)
72
73  // Tage predictor
74  // val tage = Module(new FakeTAGE)
75  val tage = if(EnableBPD) Module(new Tage) else Module(new FakeTAGE)
76  tage.io.req.valid := io.in.pc.fire()
77  tage.io.req.bits.pc := io.in.pc.bits
78  tage.io.req.bits.hist := hist
79  tage.io.redirectInfo <> io.redirectInfo
80  io.out.bits.tage <> tage.io.out
81  io.s1OutPred.bits.tageMeta := tage.io.meta
82
83  // latch pc for 1 cycle latency when reading SRAM
84  val pcLatch = RegEnable(io.in.pc.bits, io.in.pc.fire())
85
86  val r = io.redirectInfo.redirect
87  val updateFetchpc = r.pc - (r.fetchIdx << 2.U)
88  // BTB
89  val btb = Module(new BTB)
90  btb.io.in.pc <> io.in.pc
91  btb.io.in.pcLatch := pcLatch
92  btb.io.redirectValid := io.redirectInfo.valid
93  btb.io.flush := io.flush
94
95  btb.io.update.fetchPC := updateFetchpc
96  btb.io.update.fetchIdx := r.fetchIdx
97  btb.io.update.hit := r.btbHitWay
98  btb.io.update.misPred := io.redirectInfo.misPred
99  btb.io.update.writeWay := r.btbVictimWay
100  btb.io.update.oldCtr := r.btbPredCtr
101  btb.io.update.taken := r.taken
102  btb.io.update.target := r.brTarget
103  btb.io.update._type := r._type
104
105  val btbHit = btb.io.out.hit
106  val btbTaken = btb.io.out.taken
107  val btbTakenIdx = btb.io.out.takenIdx
108  val btbTakenTarget = btb.io.out.target
109  val btbWriteWay = btb.io.out.writeWay
110  val btbNotTakens = btb.io.out.notTakens
111  val btbCtrs = VecInit(btb.io.out.dEntries.map(_.pred))
112  val btbValids = VecInit(btb.io.out.dEntries.map(_.valid))
113  val btbTargets = VecInit(btb.io.out.dEntries.map(_.target))
114  val btbTypes = VecInit(btb.io.out.dEntries.map(_._type))
115
116
117  val jbtac = Module(new JBTAC)
118  jbtac.io.in.pc <> io.in.pc
119  jbtac.io.in.pcLatch := pcLatch
120  jbtac.io.in.hist := hist
121  jbtac.io.redirectValid := io.redirectInfo.valid
122  jbtac.io.flush := io.flush
123
124  jbtac.io.update.fetchPC := updateFetchpc
125  jbtac.io.update.fetchIdx := r.fetchIdx
126  jbtac.io.update.misPred := io.redirectInfo.misPred
127  jbtac.io.update._type := r._type
128  jbtac.io.update.target := r.target
129  jbtac.io.update.hist := r.hist
130
131  val jbtacHit = jbtac.io.out.hit
132  val jbtacTarget = jbtac.io.out.target
133  val jbtacHitIdx = jbtac.io.out.hitIdx
134
135  // calculate global history of each instr
136  val firstHist = RegNext(hist)
137  val histShift = Wire(Vec(FetchWidth, UInt(log2Up(FetchWidth).W)))
138  val shift = Wire(Vec(FetchWidth, Vec(FetchWidth, UInt(1.W))))
139  (0 until FetchWidth).map(i => shift(i) := Mux(!btbNotTakens(i), 0.U, ~LowerMask(UIntToOH(i.U), FetchWidth)).asTypeOf(Vec(FetchWidth, UInt(1.W))))
140  for (j <- 0 until FetchWidth) {
141    var tmp = 0.U
142    for (i <- 0 until FetchWidth) {
143      tmp = tmp + shift(i)(j)
144    }
145    histShift(j) := tmp
146  }
147  (0 until FetchWidth).map(i => io.s1OutPred.bits.hist(i) := firstHist << histShift(i))
148
149  // update ghr
150  updateGhr := io.s1OutPred.bits.redirect || io.flush
151  val brJumpIdx = Mux(!(btbHit && btbTaken), 0.U, UIntToOH(btbTakenIdx))
152  val indirectIdx = Mux(!jbtacHit, 0.U, UIntToOH(jbtacHitIdx))
153  //val newTaken = Mux(io.redirectInfo.flush(), !(r._type === BTBtype.B && !r.taken), )
154  newGhr := Mux(io.redirectInfo.flush(),    (r.hist << 1.U) | !(r._type === BTBtype.B && !r.taken),
155            Mux(io.flush,                   Mux(io.s3Taken, (io.s3RollBackHist << 1.U) | 1.U, io.s3RollBackHist),
156            Mux(io.s1OutPred.bits.redirect, (PriorityMux(brJumpIdx | indirectIdx, io.s1OutPred.bits.hist) << 1.U) | 1.U,
157                                            io.s1OutPred.bits.hist(0) << PopCount(btbNotTakens))))
158
159  // redirect based on BTB and JBTAC
160  // io.out.valid := RegNext(io.in.pc.fire()) && !flushS1
161  io.out.valid := RegNext(io.in.pc.fire()) && !io.flush
162
163  io.s1OutPred.valid := io.out.valid
164  io.s1OutPred.bits.redirect := btbHit && btbTaken || jbtacHit
165  // io.s1OutPred.bits.instrValid := LowerMask(UIntToOH(btbTakenIdx), FetchWidth) & LowerMask(UIntToOH(jbtacHitIdx), FetchWidth)
166  io.s1OutPred.bits.instrValid := Mux(io.s1OutPred.bits.redirect, LowerMask(LowestBit(brJumpIdx | indirectIdx, FetchWidth), FetchWidth), Fill(FetchWidth, 1.U(1.W))).asTypeOf(Vec(FetchWidth, Bool()))
167  io.s1OutPred.bits.target := Mux(brJumpIdx === LowestBit(brJumpIdx | indirectIdx, FetchWidth), btbTakenTarget, jbtacTarget)
168  io.s1OutPred.bits.btbVictimWay := btbWriteWay
169  io.s1OutPred.bits.predCtr := btbCtrs
170  io.s1OutPred.bits.btbHitWay := btbHit
171  io.s1OutPred.bits.rasSp := DontCare
172  io.s1OutPred.bits.rasTopCtr := DontCare
173
174  io.out.bits.pc := pcLatch
175  io.out.bits.btb.hits := btbValids.asUInt
176  (0 until FetchWidth).map(i => io.out.bits.btb.targets(i) := btbTargets(i))
177  io.out.bits.jbtac.hitIdx := UIntToOH(jbtacHitIdx)
178  io.out.bits.jbtac.target := jbtacTarget
179  // TODO: we don't need this repeatedly!
180  io.out.bits.hist := io.s1OutPred.bits.hist
181  io.out.bits.btbPred := io.s1OutPred
182
183
184
185  // debug info
186  XSDebug(true.B, "[BPUS1]in:(%d %d)   pc=%x ghr=%b\n", io.in.pc.valid, io.in.pc.ready, io.in.pc.bits, hist)
187  XSDebug(true.B, "[BPUS1]outPred:(%d) redirect=%d instrValid=%b tgt=%x\n",
188    io.s1OutPred.valid, io.s1OutPred.bits.redirect, io.s1OutPred.bits.instrValid.asUInt, io.s1OutPred.bits.target)
189  XSDebug(io.flush && io.redirectInfo.flush(),
190    "[BPUS1]flush from backend: pc=%x tgt=%x brTgt=%x _type=%b taken=%d oldHist=%b fetchIdx=%d isExcpt=%d\n",
191    r.pc, r.target, r.brTarget, r._type, r.taken, r.hist, r.fetchIdx, r.isException)
192  XSDebug(io.flush && !io.redirectInfo.flush(),
193    "[BPUS1]flush from Stage3:  s3Taken=%d s3RollBackHist=%b\n", io.s3Taken, io.s3RollBackHist)
194
195}
196
197class Stage2To3IO extends Stage1To2IO {
198}
199
200class BPUStage2 extends XSModule {
201  val io = IO(new Bundle() {
202    // flush from Stage3
203    val flush = Input(Bool())
204    val in = Flipped(Decoupled(new Stage1To2IO))
205    val out = Decoupled(new Stage2To3IO)
206  })
207
208  // flush Stage2 when Stage3 or banckend redirects
209  val flushS2 = BoolStopWatch(io.flush, io.in.fire(), startHighPriority = true)
210  val inLatch = RegInit(0.U.asTypeOf(io.in.bits))
211  when (io.in.fire()) { inLatch := io.in.bits }
212  val validLatch = RegInit(false.B)
213  when (io.flush) {
214    validLatch := false.B
215  }.elsewhen (io.in.fire()) {
216    validLatch := true.B
217  }.elsewhen (io.out.fire()) {
218    validLatch := false.B
219  }
220
221  io.out.valid := !io.flush && !flushS2 && validLatch
222  io.in.ready := !validLatch || io.out.fire()
223
224  // do nothing
225  io.out.bits := inLatch
226
227  // debug info
228  XSDebug(true.B, "[BPUS2]in:(%d %d) pc=%x out:(%d %d) pc=%x\n",
229    io.in.valid, io.in.ready, io.in.bits.pc, io.out.valid, io.out.ready, io.out.bits.pc)
230  XSDebug(true.B, "[BPUS2]validLatch=%d pc=%x\n", validLatch, inLatch.pc)
231  XSDebug(io.flush, "[BPUS2]flush!!!\n")
232}
233
234class BPUStage3 extends XSModule {
235  val io = IO(new Bundle() {
236    val flush = Input(Bool())
237    val in = Flipped(Decoupled(new Stage2To3IO))
238    val out = ValidIO(new BranchPrediction)
239    // from icache
240    val predecode = Flipped(ValidIO(new Predecode))
241    // from backend
242    val redirectInfo = Input(new RedirectInfo)
243    // to Stage1 and Stage2
244    val flushBPU = Output(Bool())
245    // to Stage1, restore ghr in stage1 when flushBPU is valid
246    val s1RollBackHist = Output(UInt(HistoryLength.W))
247    val s3Taken = Output(Bool())
248  })
249
250  val flushS3 = BoolStopWatch(io.flush, io.in.fire(), startHighPriority = true)
251  val inLatch = RegInit(0.U.asTypeOf(io.in.bits))
252  val validLatch = RegInit(false.B)
253  when (io.in.fire()) { inLatch := io.in.bits }
254  when (io.flush) {
255    validLatch := false.B
256  }.elsewhen (io.in.fire()) {
257    validLatch := true.B
258  }.elsewhen (io.out.valid) {
259    validLatch := false.B
260  }
261  io.out.valid := validLatch && io.predecode.valid && !flushS3 && !io.flush
262  io.in.ready := !validLatch || io.out.valid
263
264  // RAS
265  // TODO: split retAddr and ctr
266  def rasEntry() = new Bundle {
267    val retAddr = UInt(VAddrBits.W)
268    val ctr = UInt(8.W) // layer of nested call functions
269  }
270  val ras = RegInit(VecInit(Seq.fill(RasSize)(0.U.asTypeOf(rasEntry()))))
271  val sp = Counter(RasSize)
272  val rasTop = ras(sp.value)
273  val rasTopAddr = rasTop.retAddr
274
275  // get the first taken branch/jal/call/jalr/ret in a fetch line
276  // brTakenIdx/jalIdx/callIdx/jalrIdx/retIdx/jmpIdx is one-hot encoded.
277  // brNotTakenIdx indicates all the not-taken branches before the first jump instruction.
278  val brIdx = inLatch.btb.hits & Reverse(Cat(io.predecode.bits.fuOpTypes.map { t => ALUOpType.isBranch(t) }).asUInt) & io.predecode.bits.mask
279  val brTakenIdx = LowestBit(brIdx & inLatch.tage.takens.asUInt, FetchWidth)
280  val jalIdx = LowestBit(inLatch.btb.hits & Reverse(Cat(io.predecode.bits.fuOpTypes.map { t => t === ALUOpType.jal }).asUInt) & io.predecode.bits.mask, FetchWidth)
281  val callIdx = LowestBit(inLatch.btb.hits & io.predecode.bits.mask & Reverse(Cat(io.predecode.bits.fuOpTypes.map { t => t === ALUOpType.call }).asUInt), FetchWidth)
282  val jalrIdx = LowestBit(inLatch.jbtac.hitIdx & io.predecode.bits.mask & Reverse(Cat(io.predecode.bits.fuOpTypes.map { t => t === ALUOpType.jalr }).asUInt), FetchWidth)
283  val retIdx = LowestBit(io.predecode.bits.mask & Reverse(Cat(io.predecode.bits.fuOpTypes.map { t => t === ALUOpType.ret }).asUInt), FetchWidth)
284
285  val jmpIdx = LowestBit(brTakenIdx | jalIdx | callIdx | jalrIdx | retIdx, FetchWidth)
286  val brNotTakenIdx = brIdx & ~inLatch.tage.takens.asUInt & LowerMask(jmpIdx, FetchWidth) & io.predecode.bits.mask
287
288  io.out.bits.redirect := jmpIdx.orR.asBool
289  io.out.bits.target := Mux(jmpIdx === retIdx, rasTopAddr,
290    Mux(jmpIdx === jalrIdx, inLatch.jbtac.target,
291    Mux(jmpIdx === 0.U, inLatch.pc + 32.U, // TODO: RVC
292    PriorityMux(jmpIdx, inLatch.btb.targets))))
293  io.out.bits.instrValid := Mux(jmpIdx.orR, LowerMask(jmpIdx, FetchWidth), Fill(FetchWidth, 1.U(1.W))).asTypeOf(Vec(FetchWidth, Bool()))
294  io.out.bits.btbVictimWay := inLatch.btbPred.bits.btbVictimWay
295  io.out.bits.predCtr := inLatch.btbPred.bits.predCtr
296  io.out.bits.btbHitWay := inLatch.btbPred.bits.btbHitWay
297  io.out.bits.tageMeta := inLatch.btbPred.bits.tageMeta
298  //io.out.bits._type := Mux(jmpIdx === retIdx, BTBtype.R,
299  //  Mux(jmpIdx === jalrIdx, BTBtype.I,
300  //  Mux(jmpIdx === brTakenIdx, BTBtype.B, BTBtype.J)))
301  val firstHist = inLatch.btbPred.bits.hist(0)
302  // there may be several notTaken branches before the first jump instruction,
303  // so we need to calculate how many zeroes should each instruction shift in its global history.
304  // each history is exclusive of instruction's own jump direction.
305  val histShift = Wire(Vec(FetchWidth, UInt(log2Up(FetchWidth).W)))
306  val shift = Wire(Vec(FetchWidth, Vec(FetchWidth, UInt(1.W))))
307  (0 until FetchWidth).map(i => shift(i) := Mux(!brNotTakenIdx(i), 0.U, ~LowerMask(UIntToOH(i.U), FetchWidth)).asTypeOf(Vec(FetchWidth, UInt(1.W))))
308  for (j <- 0 until FetchWidth) {
309    var tmp = 0.U
310    for (i <- 0 until FetchWidth) {
311      tmp = tmp + shift(i)(j)
312    }
313    histShift(j) := tmp
314  }
315  (0 until FetchWidth).map(i => io.out.bits.hist(i) := firstHist << histShift(i))
316  // save ras checkpoint info
317  io.out.bits.rasSp := sp.value
318  io.out.bits.rasTopCtr := rasTop.ctr
319
320  // flush BPU and redirect when target differs from the target predicted in Stage1
321  io.out.bits.redirect := (if(EnableBPD) (inLatch.btbPred.bits.redirect ^ jmpIdx.orR.asBool ||
322    inLatch.btbPred.bits.redirect && jmpIdx.orR.asBool && io.out.bits.target =/= inLatch.btbPred.bits.target)
323    else false.B)
324  io.flushBPU := io.out.bits.redirect && io.out.valid
325
326  // speculative update RAS
327  val rasWrite = WireInit(0.U.asTypeOf(rasEntry()))
328  rasWrite.retAddr := inLatch.pc + (OHToUInt(callIdx) << 2.U) + 4.U
329  val allocNewEntry = rasWrite.retAddr =/= rasTopAddr
330  rasWrite.ctr := Mux(allocNewEntry, 1.U, rasTop.ctr + 1.U)
331  when (io.out.valid) {
332    when (jmpIdx === callIdx) {
333      ras(Mux(allocNewEntry, sp.value + 1.U, sp.value)) := rasWrite
334      when (allocNewEntry) { sp.value := sp.value + 1.U }
335    }.elsewhen (jmpIdx === retIdx) {
336      when (rasTop.ctr === 1.U) {
337        sp.value := Mux(sp.value === 0.U, 0.U, sp.value - 1.U)
338      }.otherwise {
339        ras(sp.value) := Cat(rasTop.ctr - 1.U, rasTopAddr).asTypeOf(rasEntry())
340      }
341    }
342  }
343  // use checkpoint to recover RAS
344  val recoverSp = io.redirectInfo.redirect.rasSp
345  val recoverCtr = io.redirectInfo.redirect.rasTopCtr
346  when (io.redirectInfo.valid && io.redirectInfo.misPred) {
347    sp.value := recoverSp
348    ras(recoverSp) := Cat(recoverCtr, ras(recoverSp).retAddr).asTypeOf(rasEntry())
349  }
350
351  // roll back global history in S1 if S3 redirects
352  io.s1RollBackHist := Mux(io.s3Taken, PriorityMux(jmpIdx, io.out.bits.hist), io.out.bits.hist(0) << PopCount(brIdx & ~inLatch.tage.takens.asUInt))
353  // whether Stage3 has a taken jump
354  io.s3Taken := jmpIdx.orR.asBool
355
356  // debug info
357  XSDebug(io.in.fire(), "[BPUS3]in:(%d %d) pc=%x\n", io.in.valid, io.in.ready, io.in.bits.pc)
358  XSDebug(io.out.valid, "[BPUS3]out:%d pc=%x redirect=%d predcdMask=%b instrValid=%b tgt=%x\n",
359    io.out.valid, inLatch.pc, io.out.bits.redirect, io.predecode.bits.mask, io.out.bits.instrValid.asUInt, io.out.bits.target)
360  XSDebug(true.B, "[BPUS3]flushS3=%d\n", flushS3)
361  XSDebug(true.B, "[BPUS3]validLatch=%d predecode.valid=%d\n", validLatch, io.predecode.valid)
362  XSDebug(true.B, "[BPUS3]brIdx=%b brTakenIdx=%b brNTakenIdx=%b jalIdx=%b jalrIdx=%b callIdx=%b retIdx=%b\n",
363    brIdx, brTakenIdx, brNotTakenIdx, jalIdx, jalrIdx, callIdx, retIdx)
364
365  // BPU's TEMP Perf Cnt
366  BoringUtils.addSource(io.out.valid, "MbpS3Cnt")
367  BoringUtils.addSource(io.out.valid && io.out.bits.redirect, "MbpS3TageRed")
368  BoringUtils.addSource(io.out.valid && (inLatch.btbPred.bits.redirect ^ jmpIdx.orR.asBool), "MbpS3TageRedDir")
369  BoringUtils.addSource(io.out.valid && (inLatch.btbPred.bits.redirect
370              && jmpIdx.orR.asBool && (io.out.bits.target =/= inLatch.btbPred.bits.target)), "MbpS3TageRedTar")
371}
372
373class BPU extends XSModule {
374  val io = IO(new Bundle() {
375    // from backend
376    // flush pipeline if misPred and update bpu based on redirect signals from brq
377    val redirectInfo = Input(new RedirectInfo)
378
379    val in = new Bundle { val pc = Flipped(Valid(UInt(VAddrBits.W))) }
380
381    val btbOut = ValidIO(new BranchPrediction)
382    val tageOut = ValidIO(new BranchPrediction)
383
384    // predecode info from icache
385    // TODO: simplify this after implement predecode unit
386    val predecode = Flipped(ValidIO(new Predecode))
387  })
388
389  val s1 = Module(new BPUStage1)
390  val s2 = Module(new BPUStage2)
391  val s3 = Module(new BPUStage3)
392
393  s1.io.redirectInfo <> io.redirectInfo
394  s1.io.flush := s3.io.flushBPU || io.redirectInfo.flush()
395  s1.io.in.pc.valid := io.in.pc.valid
396  s1.io.in.pc.bits <> io.in.pc.bits
397  io.btbOut <> s1.io.s1OutPred
398  s1.io.s3RollBackHist := s3.io.s1RollBackHist
399  s1.io.s3Taken := s3.io.s3Taken
400
401  s1.io.out <> s2.io.in
402  s2.io.flush := s3.io.flushBPU || io.redirectInfo.flush()
403
404  s2.io.out <> s3.io.in
405  s3.io.flush := io.redirectInfo.flush()
406  s3.io.predecode <> io.predecode
407  io.tageOut <> s3.io.out
408  s3.io.redirectInfo <> io.redirectInfo
409
410  // TODO: temp and ugly code, when perf counters is added( may after adding CSR), please mv the below counter
411  val bpuPerfCntList = List(
412    ("MbpInstr","         "),
413    ("MbpRight","         "),
414    ("MbpWrong","         "),
415    ("MbpBRight","        "),
416    ("MbpBWrong","        "),
417    ("MbpJRight","        "),
418    ("MbpJWrong","        "),
419    ("MbpIRight","        "),
420    ("MbpIWrong","        "),
421    ("MbpRRight","        "),
422    ("MbpRWrong","        "),
423    ("MbpS3Cnt","         "),
424    ("MbpS3TageRed","     "),
425    ("MbpS3TageRedDir","  "),
426    ("MbpS3TageRedTar","  ")
427  )
428
429  val bpuPerfCnts = List.fill(bpuPerfCntList.length)(RegInit(0.U(XLEN.W)))
430  val bpuPerfCntConds = List.fill(bpuPerfCntList.length)(WireInit(false.B))
431  (bpuPerfCnts zip bpuPerfCntConds) map { case (cnt, cond) => { when (cond) { cnt := cnt + 1.U }}}
432
433  for(i <- bpuPerfCntList.indices) {
434    BoringUtils.addSink(bpuPerfCntConds(i), bpuPerfCntList(i)._1)
435  }
436
437  val xsTrap = WireInit(false.B)
438  BoringUtils.addSink(xsTrap, "XSTRAP_BPU")
439
440  // if (!p.FPGAPlatform) {
441    when (xsTrap) {
442      printf("=================BPU's PerfCnt================\n")
443      for(i <- bpuPerfCntList.indices) {
444        printf(bpuPerfCntList(i)._1 + bpuPerfCntList(i)._2 + " <- " + "%d\n", bpuPerfCnts(i))
445      }
446    }
447  // }
448}