xref: /XiangShan/src/main/scala/xiangshan/frontend/BPU.scala (revision e567b3a76562ee2b93f929de31a261f7c3430585)
1package xiangshan.frontend
2
3import chisel3._
4import chisel3.util._
5import xiangshan._
6import xiangshan.utils._
7import xiangshan.backend.ALUOpType
8import utils._
9import chisel3.util.experimental.BoringUtils
10import xiangshan.backend.decode.XSTrap
11
12class TableAddr(val idxBits: Int, val banks: Int) extends XSBundle {
13  def tagBits = VAddrBits - idxBits - 2
14
15  val tag = UInt(tagBits.W)
16  val idx = UInt(idxBits.W)
17  val offset = UInt(2.W)
18
19  def fromUInt(x: UInt) = x.asTypeOf(UInt(VAddrBits.W)).asTypeOf(this)
20  def getTag(x: UInt) = fromUInt(x).tag
21  def getIdx(x: UInt) = fromUInt(x).idx
22  def getBank(x: UInt) = getIdx(x)(log2Up(banks) - 1, 0)
23  def getBankIdx(x: UInt) = getIdx(x)(idxBits - 1, log2Up(banks))
24}
25
26class Stage1To2IO extends XSBundle {
27  val pc = Output(UInt(VAddrBits.W))
28  val btb = new Bundle {
29    val hits = Output(UInt(FetchWidth.W))
30    val targets = Output(Vec(FetchWidth, UInt(VAddrBits.W)))
31  }
32  val jbtac = new Bundle {
33    val hitIdx = Output(UInt(FetchWidth.W))
34    val target = Output(UInt(VAddrBits.W))
35  }
36  val tage = new Bundle {
37    val hits = Output(UInt(FetchWidth.W))
38    val takens = Output(Vec(FetchWidth, Bool()))
39  }
40  val hist = Output(Vec(FetchWidth, UInt(HistoryLength.W)))
41  val btbPred = ValidIO(new BranchPrediction)
42}
43
44class BPUStage1 extends XSModule {
45  val io = IO(new Bundle() {
46    val in = new Bundle { val pc = Flipped(Decoupled(UInt(VAddrBits.W))) }
47    // from backend
48    val redirectInfo = Input(new RedirectInfo)
49    // from Stage3
50    val flush = Input(Bool())
51    val s3RollBackHist = Input(UInt(HistoryLength.W))
52    val s3Taken = Input(Bool())
53    // to ifu, quick prediction result
54    val s1OutPred = ValidIO(new BranchPrediction)
55    // to Stage2
56    val out = Decoupled(new Stage1To2IO)
57  })
58
59  io.in.pc.ready := true.B
60
61  // flush Stage1 when io.flush
62  val flushS1 = BoolStopWatch(io.flush, io.in.pc.fire(), startHighPriority = true)
63
64  // global history register
65  val ghr = RegInit(0.U(HistoryLength.W))
66  // modify updateGhr and newGhr when updating ghr
67  val updateGhr = WireInit(false.B)
68  val newGhr = WireInit(0.U(HistoryLength.W))
69  when (updateGhr) { ghr := newGhr }
70  // use hist as global history!!!
71  val hist = Mux(updateGhr, newGhr, ghr)
72
73  // Tage predictor
74  // val tage = Module(new FakeTAGE)
75  val tage = if(EnableBPD) Module(new Tage) else Module(new FakeTAGE)
76  tage.io.req.valid := io.in.pc.fire()
77  tage.io.req.bits.pc := io.in.pc.bits
78  tage.io.req.bits.hist := hist
79  tage.io.redirectInfo <> io.redirectInfo
80  io.out.bits.tage <> tage.io.out
81  io.s1OutPred.bits.tageMeta := tage.io.meta
82
83  // latch pc for 1 cycle latency when reading SRAM
84  val pcLatch = RegEnable(io.in.pc.bits, io.in.pc.fire())
85
86  val r = io.redirectInfo.redirect
87  val updateFetchpc = r.pc - (r.fetchIdx << 2.U)
88  // BTB
89  val btb = Module(new BTB)
90  btb.io.in.pc <> io.in.pc
91  btb.io.in.pcLatch := pcLatch
92  btb.io.redirectValid := io.redirectInfo.valid
93  btb.io.flush := io.flush
94
95  btb.io.update.fetchPC := updateFetchpc
96  btb.io.update.fetchIdx := r.fetchIdx
97  btb.io.update.hit := r.btbHitWay
98  btb.io.update.misPred := io.redirectInfo.misPred
99  btb.io.update.writeWay := r.btbVictimWay
100  btb.io.update.oldCtr := r.btbPredCtr
101  btb.io.update.taken := r.taken
102  btb.io.update.target := r.brTarget
103  btb.io.update._type := r._type
104
105  val btbHit = btb.io.out.hit
106  val btbTaken = btb.io.out.taken
107  val btbTakenIdx = btb.io.out.takenIdx
108  val btbTakenTarget = btb.io.out.target
109  val btbWriteWay = btb.io.out.writeWay
110  val btbNotTakens = btb.io.out.notTakens
111  val btbCtrs = VecInit(btb.io.out.dEntries.map(_.pred))
112  val btbValids = btb.io.out.dEntriesValid
113  val btbTargets = VecInit(btb.io.out.dEntries.map(_.target))
114  val btbTypes = VecInit(btb.io.out.dEntries.map(_._type))
115
116
117  val jbtac = Module(new JBTAC)
118  jbtac.io.in.pc <> io.in.pc
119  jbtac.io.in.pcLatch := pcLatch
120  jbtac.io.in.hist := hist
121  jbtac.io.redirectValid := io.redirectInfo.valid
122  jbtac.io.flush := io.flush
123
124  jbtac.io.update.fetchPC := updateFetchpc
125  jbtac.io.update.fetchIdx := r.fetchIdx
126  jbtac.io.update.misPred := io.redirectInfo.misPred
127  jbtac.io.update._type := r._type
128  jbtac.io.update.target := r.target
129  jbtac.io.update.hist := r.hist
130
131  val jbtacHit = jbtac.io.out.hit
132  val jbtacTarget = jbtac.io.out.target
133  val jbtacHitIdx = jbtac.io.out.hitIdx
134
135  // calculate global history of each instr
136  val firstHist = RegNext(hist)
137  val histShift = Wire(Vec(FetchWidth, UInt(log2Up(FetchWidth).W)))
138  val shift = Wire(Vec(FetchWidth, Vec(FetchWidth, UInt(1.W))))
139  (0 until FetchWidth).map(i => shift(i) := Mux(!btbNotTakens(i), 0.U, ~LowerMask(UIntToOH(i.U), FetchWidth)).asTypeOf(Vec(FetchWidth, UInt(1.W))))
140  for (j <- 0 until FetchWidth) {
141    var tmp = 0.U
142    for (i <- 0 until FetchWidth) {
143      tmp = tmp + shift(i)(j)
144    }
145    histShift(j) := tmp
146  }
147  (0 until FetchWidth).map(i => io.s1OutPred.bits.hist(i) := firstHist << histShift(i))
148
149  // update ghr
150  updateGhr := io.s1OutPred.bits.redirect || io.flush
151  val brJumpIdx = Mux(!(btbHit && btbTaken), 0.U, UIntToOH(btbTakenIdx))
152  val indirectIdx = Mux(!jbtacHit, 0.U, UIntToOH(jbtacHitIdx))
153  //val newTaken = Mux(io.redirectInfo.flush(), !(r._type === BTBtype.B && !r.taken), )
154  newGhr := Mux(io.redirectInfo.flush(),    (r.hist << 1.U) | !(r._type === BTBtype.B && !r.taken),
155            Mux(io.flush,                   Mux(io.s3Taken, (io.s3RollBackHist << 1.U) | 1.U, io.s3RollBackHist),
156            Mux(io.s1OutPred.bits.redirect, (PriorityMux(brJumpIdx | indirectIdx, io.s1OutPred.bits.hist) << 1.U) | 1.U,
157                                            io.s1OutPred.bits.hist(0) << PopCount(btbNotTakens))))
158
159  // redirect based on BTB and JBTAC
160  // io.out.valid := RegNext(io.in.pc.fire()) && !flushS1
161  io.out.valid := RegNext(io.in.pc.fire()) && !io.flush
162
163  io.s1OutPred.valid := io.out.valid
164  io.s1OutPred.bits.redirect := btbHit && btbTaken || jbtacHit
165  // io.s1OutPred.bits.instrValid := LowerMask(UIntToOH(btbTakenIdx), FetchWidth) & LowerMask(UIntToOH(jbtacHitIdx), FetchWidth)
166  io.s1OutPred.bits.instrValid := Mux(io.s1OutPred.bits.redirect, LowerMask(LowestBit(brJumpIdx | indirectIdx, FetchWidth), FetchWidth), Fill(FetchWidth, 1.U(1.W))).asTypeOf(Vec(FetchWidth, Bool()))
167  io.s1OutPred.bits.target := Mux(brJumpIdx === LowestBit(brJumpIdx | indirectIdx, FetchWidth), btbTakenTarget, jbtacTarget)
168  io.s1OutPred.bits.btbVictimWay := btbWriteWay
169  io.s1OutPred.bits.predCtr := btbCtrs
170  io.s1OutPred.bits.btbHitWay := btbHit
171  io.s1OutPred.bits.rasSp := DontCare
172  io.s1OutPred.bits.rasTopCtr := DontCare
173
174  io.out.bits.pc := pcLatch
175  io.out.bits.btb.hits := btbValids.asUInt
176  (0 until FetchWidth).map(i => io.out.bits.btb.targets(i) := btbTargets(i))
177  io.out.bits.jbtac.hitIdx := UIntToOH(jbtacHitIdx)
178  io.out.bits.jbtac.target := jbtacTarget
179  // TODO: we don't need this repeatedly!
180  io.out.bits.hist := io.s1OutPred.bits.hist
181  io.out.bits.btbPred := io.s1OutPred
182
183
184
185  // debug info
186  XSDebug(true.B, "[BPUS1]in:(%d %d)   pc=%x ghr=%b\n", io.in.pc.valid, io.in.pc.ready, io.in.pc.bits, hist)
187  XSDebug(true.B, "[BPUS1]outPred:(%d) redirect=%d instrValid=%b tgt=%x\n",
188    io.s1OutPred.valid, io.s1OutPred.bits.redirect, io.s1OutPred.bits.instrValid.asUInt, io.s1OutPred.bits.target)
189  XSDebug(true.B, "[BPUS1]btbHit=%d btbTaken=%d brJumpIdx=%b jbtacHit=%d indirectIdx=%b\n", btbHit, btbTaken, brJumpIdx, jbtacHit, indirectIdx)
190  XSDebug(io.flush && io.redirectInfo.flush(),
191    "[BPUS1]flush from backend: pc=%x tgt=%x brTgt=%x _type=%b taken=%d oldHist=%b fetchIdx=%d isExcpt=%d\n",
192    r.pc, r.target, r.brTarget, r._type, r.taken, r.hist, r.fetchIdx, r.isException)
193  XSDebug(io.flush && !io.redirectInfo.flush(),
194    "[BPUS1]flush from Stage3:  s3Taken=%d s3RollBackHist=%b\n", io.s3Taken, io.s3RollBackHist)
195
196}
197
198class Stage2To3IO extends Stage1To2IO {
199}
200
201class BPUStage2 extends XSModule {
202  val io = IO(new Bundle() {
203    // flush from Stage3
204    val flush = Input(Bool())
205    val in = Flipped(Decoupled(new Stage1To2IO))
206    val out = Decoupled(new Stage2To3IO)
207  })
208
209  // flush Stage2 when Stage3 or banckend redirects
210  val flushS2 = BoolStopWatch(io.flush, io.in.fire(), startHighPriority = true)
211  val inLatch = RegInit(0.U.asTypeOf(io.in.bits))
212  when (io.in.fire()) { inLatch := io.in.bits }
213  val validLatch = RegInit(false.B)
214  when (io.flush) {
215    validLatch := false.B
216  }.elsewhen (io.in.fire()) {
217    validLatch := true.B
218  }.elsewhen (io.out.fire()) {
219    validLatch := false.B
220  }
221
222  io.out.valid := !io.flush && !flushS2 && validLatch
223  io.in.ready := !validLatch || io.out.fire()
224
225  // do nothing
226  io.out.bits := inLatch
227
228  // debug info
229  XSDebug(true.B, "[BPUS2]in:(%d %d) pc=%x out:(%d %d) pc=%x\n",
230    io.in.valid, io.in.ready, io.in.bits.pc, io.out.valid, io.out.ready, io.out.bits.pc)
231  XSDebug(true.B, "[BPUS2]validLatch=%d pc=%x\n", validLatch, inLatch.pc)
232  XSDebug(io.flush, "[BPUS2]flush!!!\n")
233}
234
235class BPUStage3 extends XSModule {
236  val io = IO(new Bundle() {
237    val flush = Input(Bool())
238    val in = Flipped(Decoupled(new Stage2To3IO))
239    val out = ValidIO(new BranchPrediction)
240    // from icache
241    val predecode = Flipped(ValidIO(new Predecode))
242    // from backend
243    val redirectInfo = Input(new RedirectInfo)
244    // to Stage1 and Stage2
245    val flushBPU = Output(Bool())
246    // to Stage1, restore ghr in stage1 when flushBPU is valid
247    val s1RollBackHist = Output(UInt(HistoryLength.W))
248    val s3Taken = Output(Bool())
249  })
250
251  val flushS3 = BoolStopWatch(io.flush, io.in.fire(), startHighPriority = true)
252  val inLatch = RegInit(0.U.asTypeOf(io.in.bits))
253  val validLatch = RegInit(false.B)
254  when (io.in.fire()) { inLatch := io.in.bits }
255  when (io.flush) {
256    validLatch := false.B
257  }.elsewhen (io.in.fire()) {
258    validLatch := true.B
259  }.elsewhen (io.out.valid) {
260    validLatch := false.B
261  }
262  io.out.valid := validLatch && io.predecode.valid && !flushS3 && !io.flush
263  io.in.ready := !validLatch || io.out.valid
264
265  // RAS
266  // TODO: split retAddr and ctr
267  def rasEntry() = new Bundle {
268    val retAddr = UInt(VAddrBits.W)
269    val ctr = UInt(8.W) // layer of nested call functions
270  }
271  val ras = RegInit(VecInit(Seq.fill(RasSize)(0.U.asTypeOf(rasEntry()))))
272  val sp = Counter(RasSize)
273  val rasTop = ras(sp.value)
274  val rasTopAddr = rasTop.retAddr
275
276  // get the first taken branch/jal/call/jalr/ret in a fetch line
277  // brTakenIdx/jalIdx/callIdx/jalrIdx/retIdx/jmpIdx is one-hot encoded.
278  // brNotTakenIdx indicates all the not-taken branches before the first jump instruction.
279  val brIdx = inLatch.btb.hits & Reverse(Cat(io.predecode.bits.fuOpTypes.map { t => ALUOpType.isBranch(t) }).asUInt) & io.predecode.bits.mask
280  val brTakenIdx = LowestBit(brIdx & inLatch.tage.takens.asUInt, FetchWidth)
281  val jalIdx = LowestBit(inLatch.btb.hits & Reverse(Cat(io.predecode.bits.fuOpTypes.map { t => t === ALUOpType.jal }).asUInt) & io.predecode.bits.mask, FetchWidth)
282  val callIdx = LowestBit(inLatch.btb.hits & io.predecode.bits.mask & Reverse(Cat(io.predecode.bits.fuOpTypes.map { t => t === ALUOpType.call }).asUInt), FetchWidth)
283  val jalrIdx = LowestBit(inLatch.jbtac.hitIdx & io.predecode.bits.mask & Reverse(Cat(io.predecode.bits.fuOpTypes.map { t => t === ALUOpType.jalr }).asUInt), FetchWidth)
284  val retIdx = LowestBit(io.predecode.bits.mask & Reverse(Cat(io.predecode.bits.fuOpTypes.map { t => t === ALUOpType.ret }).asUInt), FetchWidth)
285
286  val jmpIdx = LowestBit(brTakenIdx | jalIdx | callIdx | jalrIdx | retIdx, FetchWidth)
287  val brNotTakenIdx = brIdx & ~inLatch.tage.takens.asUInt & LowerMask(jmpIdx, FetchWidth) & io.predecode.bits.mask
288
289  io.out.bits.redirect := jmpIdx.orR.asBool
290  io.out.bits.target := Mux(jmpIdx === retIdx, rasTopAddr,
291    Mux(jmpIdx === jalrIdx, inLatch.jbtac.target,
292    Mux(jmpIdx === 0.U, inLatch.pc + 32.U, // TODO: RVC
293    PriorityMux(jmpIdx, inLatch.btb.targets))))
294  io.out.bits.instrValid := Mux(jmpIdx.orR, LowerMask(jmpIdx, FetchWidth), Fill(FetchWidth, 1.U(1.W))).asTypeOf(Vec(FetchWidth, Bool()))
295  io.out.bits.btbVictimWay := inLatch.btbPred.bits.btbVictimWay
296  io.out.bits.predCtr := inLatch.btbPred.bits.predCtr
297  io.out.bits.btbHitWay := inLatch.btbPred.bits.btbHitWay
298  io.out.bits.tageMeta := inLatch.btbPred.bits.tageMeta
299  //io.out.bits._type := Mux(jmpIdx === retIdx, BTBtype.R,
300  //  Mux(jmpIdx === jalrIdx, BTBtype.I,
301  //  Mux(jmpIdx === brTakenIdx, BTBtype.B, BTBtype.J)))
302  val firstHist = inLatch.btbPred.bits.hist(0)
303  // there may be several notTaken branches before the first jump instruction,
304  // so we need to calculate how many zeroes should each instruction shift in its global history.
305  // each history is exclusive of instruction's own jump direction.
306  val histShift = Wire(Vec(FetchWidth, UInt(log2Up(FetchWidth).W)))
307  val shift = Wire(Vec(FetchWidth, Vec(FetchWidth, UInt(1.W))))
308  (0 until FetchWidth).map(i => shift(i) := Mux(!brNotTakenIdx(i), 0.U, ~LowerMask(UIntToOH(i.U), FetchWidth)).asTypeOf(Vec(FetchWidth, UInt(1.W))))
309  for (j <- 0 until FetchWidth) {
310    var tmp = 0.U
311    for (i <- 0 until FetchWidth) {
312      tmp = tmp + shift(i)(j)
313    }
314    histShift(j) := tmp
315  }
316  (0 until FetchWidth).map(i => io.out.bits.hist(i) := firstHist << histShift(i))
317  // save ras checkpoint info
318  io.out.bits.rasSp := sp.value
319  io.out.bits.rasTopCtr := rasTop.ctr
320
321  // flush BPU and redirect when target differs from the target predicted in Stage1
322  io.out.bits.redirect := (if(EnableBPD) (inLatch.btbPred.bits.redirect ^ jmpIdx.orR.asBool ||
323    inLatch.btbPred.bits.redirect && jmpIdx.orR.asBool && io.out.bits.target =/= inLatch.btbPred.bits.target)
324    else false.B)
325  io.flushBPU := io.out.bits.redirect && io.out.valid
326
327  // speculative update RAS
328  val rasWrite = WireInit(0.U.asTypeOf(rasEntry()))
329  rasWrite.retAddr := inLatch.pc + (OHToUInt(callIdx) << 2.U) + 4.U
330  val allocNewEntry = rasWrite.retAddr =/= rasTopAddr
331  rasWrite.ctr := Mux(allocNewEntry, 1.U, rasTop.ctr + 1.U)
332  when (io.out.valid) {
333    when (jmpIdx === callIdx) {
334      ras(Mux(allocNewEntry, sp.value + 1.U, sp.value)) := rasWrite
335      when (allocNewEntry) { sp.value := sp.value + 1.U }
336    }.elsewhen (jmpIdx === retIdx) {
337      when (rasTop.ctr === 1.U) {
338        sp.value := Mux(sp.value === 0.U, 0.U, sp.value - 1.U)
339      }.otherwise {
340        ras(sp.value) := Cat(rasTop.ctr - 1.U, rasTopAddr).asTypeOf(rasEntry())
341      }
342    }
343  }
344  // use checkpoint to recover RAS
345  val recoverSp = io.redirectInfo.redirect.rasSp
346  val recoverCtr = io.redirectInfo.redirect.rasTopCtr
347  when (io.redirectInfo.valid && io.redirectInfo.misPred) {
348    sp.value := recoverSp
349    ras(recoverSp) := Cat(recoverCtr, ras(recoverSp).retAddr).asTypeOf(rasEntry())
350  }
351
352  // roll back global history in S1 if S3 redirects
353  io.s1RollBackHist := Mux(io.s3Taken, PriorityMux(jmpIdx, io.out.bits.hist), io.out.bits.hist(0) << PopCount(brIdx & ~inLatch.tage.takens.asUInt))
354  // whether Stage3 has a taken jump
355  io.s3Taken := jmpIdx.orR.asBool
356
357  // debug info
358  XSDebug(io.in.fire(), "[BPUS3]in:(%d %d) pc=%x\n", io.in.valid, io.in.ready, io.in.bits.pc)
359  XSDebug(io.out.valid, "[BPUS3]out:%d pc=%x redirect=%d predcdMask=%b instrValid=%b tgt=%x\n",
360    io.out.valid, inLatch.pc, io.out.bits.redirect, io.predecode.bits.mask, io.out.bits.instrValid.asUInt, io.out.bits.target)
361  XSDebug(true.B, "[BPUS3]flushS3=%d\n", flushS3)
362  XSDebug(true.B, "[BPUS3]validLatch=%d predecode.valid=%d\n", validLatch, io.predecode.valid)
363  XSDebug(true.B, "[BPUS3]brIdx=%b brTakenIdx=%b brNTakenIdx=%b jalIdx=%b jalrIdx=%b callIdx=%b retIdx=%b\n",
364    brIdx, brTakenIdx, brNotTakenIdx, jalIdx, jalrIdx, callIdx, retIdx)
365
366  // BPU's TEMP Perf Cnt
367  BoringUtils.addSource(io.out.valid, "MbpS3Cnt")
368  BoringUtils.addSource(io.out.valid && io.out.bits.redirect, "MbpS3TageRed")
369  BoringUtils.addSource(io.out.valid && (inLatch.btbPred.bits.redirect ^ jmpIdx.orR.asBool), "MbpS3TageRedDir")
370  BoringUtils.addSource(io.out.valid && (inLatch.btbPred.bits.redirect
371              && jmpIdx.orR.asBool && (io.out.bits.target =/= inLatch.btbPred.bits.target)), "MbpS3TageRedTar")
372}
373
374class BPU extends XSModule {
375  val io = IO(new Bundle() {
376    // from backend
377    // flush pipeline if misPred and update bpu based on redirect signals from brq
378    val redirectInfo = Input(new RedirectInfo)
379
380    val in = new Bundle { val pc = Flipped(Valid(UInt(VAddrBits.W))) }
381
382    val btbOut = ValidIO(new BranchPrediction)
383    val tageOut = ValidIO(new BranchPrediction)
384
385    // predecode info from icache
386    // TODO: simplify this after implement predecode unit
387    val predecode = Flipped(ValidIO(new Predecode))
388  })
389
390  val s1 = Module(new BPUStage1)
391  val s2 = Module(new BPUStage2)
392  val s3 = Module(new BPUStage3)
393
394  s1.io.redirectInfo <> io.redirectInfo
395  s1.io.flush := s3.io.flushBPU || io.redirectInfo.flush()
396  s1.io.in.pc.valid := io.in.pc.valid
397  s1.io.in.pc.bits <> io.in.pc.bits
398  io.btbOut <> s1.io.s1OutPred
399  s1.io.s3RollBackHist := s3.io.s1RollBackHist
400  s1.io.s3Taken := s3.io.s3Taken
401
402  s1.io.out <> s2.io.in
403  s2.io.flush := s3.io.flushBPU || io.redirectInfo.flush()
404
405  s2.io.out <> s3.io.in
406  s3.io.flush := io.redirectInfo.flush()
407  s3.io.predecode <> io.predecode
408  io.tageOut <> s3.io.out
409  s3.io.redirectInfo <> io.redirectInfo
410
411  // TODO: temp and ugly code, when perf counters is added( may after adding CSR), please mv the below counter
412  val bpuPerfCntList = List(
413    ("MbpInstr","         "),
414    ("MbpRight","         "),
415    ("MbpWrong","         "),
416    ("MbpBRight","        "),
417    ("MbpBWrong","        "),
418    ("MbpJRight","        "),
419    ("MbpJWrong","        "),
420    ("MbpIRight","        "),
421    ("MbpIWrong","        "),
422    ("MbpRRight","        "),
423    ("MbpRWrong","        "),
424    ("MbpS3Cnt","         "),
425    ("MbpS3TageRed","     "),
426    ("MbpS3TageRedDir","  "),
427    ("MbpS3TageRedTar","  ")
428  )
429
430  val bpuPerfCnts = List.fill(bpuPerfCntList.length)(RegInit(0.U(XLEN.W)))
431  val bpuPerfCntConds = List.fill(bpuPerfCntList.length)(WireInit(false.B))
432  (bpuPerfCnts zip bpuPerfCntConds) map { case (cnt, cond) => { when (cond) { cnt := cnt + 1.U }}}
433
434  for(i <- bpuPerfCntList.indices) {
435    BoringUtils.addSink(bpuPerfCntConds(i), bpuPerfCntList(i)._1)
436  }
437
438  val xsTrap = WireInit(false.B)
439  BoringUtils.addSink(xsTrap, "XSTRAP_BPU")
440
441  // if (!p.FPGAPlatform) {
442    when (xsTrap) {
443      printf("=================BPU's PerfCnt================\n")
444      for(i <- bpuPerfCntList.indices) {
445        printf(bpuPerfCntList(i)._1 + bpuPerfCntList(i)._2 + " <- " + "%d\n", bpuPerfCnts(i))
446      }
447    }
448  // }
449}