xref: /XiangShan/src/main/scala/xiangshan/frontend/BPU.scala (revision f523fa794d6861011ca0e961ead36fe17f6cb671)
1package xiangshan.frontend
2
3import chisel3._
4import chisel3.util._
5import utils._
6import xiangshan._
7import xiangshan.backend.ALUOpType
8import xiangshan.backend.JumpOpType
9import chisel3.util.experimental.BoringUtils
10import xiangshan.backend.decode.XSTrap
11
12class TableAddr(val idxBits: Int, val banks: Int) extends XSBundle {
13  def tagBits = VAddrBits - idxBits - 1
14
15  val tag = UInt(tagBits.W)
16  val idx = UInt(idxBits.W)
17  val offset = UInt(1.W)
18
19  def fromUInt(x: UInt) = x.asTypeOf(UInt(VAddrBits.W)).asTypeOf(this)
20  def getTag(x: UInt) = fromUInt(x).tag
21  def getIdx(x: UInt) = fromUInt(x).idx
22  def getBank(x: UInt) = getIdx(x)(log2Up(banks) - 1, 0)
23  def getBankIdx(x: UInt) = getIdx(x)(idxBits - 1, log2Up(banks))
24}
25
26class Stage1To2IO extends XSBundle {
27  val pc = Output(UInt(VAddrBits.W))
28  val btb = new Bundle {
29    val hits = Output(UInt(PredictWidth.W))
30    val targets = Output(Vec(PredictWidth, UInt(VAddrBits.W)))
31  }
32  val jbtac = new Bundle {
33    val hitIdx = Output(UInt(PredictWidth.W))
34    val target = Output(UInt(VAddrBits.W))
35  }
36  val tage = new Bundle {
37    val hits = Output(UInt(FetchWidth.W))
38    val takens = Output(Vec(FetchWidth, Bool()))
39  }
40  val hist = Output(Vec(PredictWidth, UInt(HistoryLength.W)))
41  val btbPred = ValidIO(new BranchPrediction)
42}
43
44class BPUStage1 extends XSModule {
45  val io = IO(new Bundle() {
46    val in = new Bundle { val pc = Flipped(Decoupled(UInt(VAddrBits.W))) }
47    // from backend
48    val redirectInfo = Input(new RedirectInfo)
49    // from Stage3
50    val flush = Input(Bool())
51    val s3RollBackHist = Input(UInt(HistoryLength.W))
52    val s3Taken = Input(Bool())
53    // to ifu, quick prediction result
54    val s1OutPred = ValidIO(new BranchPrediction)
55    // to Stage2
56    val out = Decoupled(new Stage1To2IO)
57  })
58
59  io.in.pc.ready := true.B
60
61  // flush Stage1 when io.flush
62  val flushS1 = BoolStopWatch(io.flush, io.in.pc.fire(), startHighPriority = true)
63
64  // global history register
65  val ghr = RegInit(0.U(HistoryLength.W))
66  // modify updateGhr and newGhr when updating ghr
67  val updateGhr = WireInit(false.B)
68  val newGhr = WireInit(0.U(HistoryLength.W))
69  when (updateGhr) { ghr := newGhr }
70  // use hist as global history!!!
71  val hist = Mux(updateGhr, newGhr, ghr)
72
73  // Tage predictor
74  val tage = if(EnableBPD) Module(new Tage) else Module(new FakeTAGE)
75  tage.io.req.valid := io.in.pc.fire()
76  tage.io.req.bits.pc := io.in.pc.bits
77  tage.io.req.bits.hist := hist
78  tage.io.redirectInfo <> io.redirectInfo
79  io.out.bits.tage <> tage.io.out
80  // io.s1OutPred.bits.tageMeta := tage.io.meta
81
82  // latch pc for 1 cycle latency when reading SRAM
83  val pcLatch = RegEnable(io.in.pc.bits, io.in.pc.fire())
84  // TODO: pass real mask in
85  val maskLatch = RegEnable(btb.io.in.mask, io.in.pc.fire())
86
87  val r = io.redirectInfo.redirect
88  val updateFetchpc = r.pc - (r.fetchIdx << 1.U)
89  // BTB
90  val btb = Module(new BTB)
91  btb.io.in.pc <> io.in.pc
92  btb.io.in.pcLatch := pcLatch
93  // TODO: pass real mask in
94  btb.io.in.mask := Fill(PredictWidth, 1.U(1.W))
95  btb.io.redirectValid := io.redirectInfo.valid
96  btb.io.flush := io.flush
97
98  // btb.io.update.fetchPC := updateFetchpc
99  // btb.io.update.fetchIdx := r.fetchIdx
100  btb.io.update.pc := r.pc
101  btb.io.update.hit := r.btbHit
102  btb.io.update.misPred := io.redirectInfo.misPred
103  // btb.io.update.writeWay := r.btbVictimWay
104  btb.io.update.oldCtr := r.btbPredCtr
105  btb.io.update.taken := r.taken
106  btb.io.update.target := r.brTarget
107  btb.io.update._type := r._type
108  // TODO: add RVC logic
109  btb.io.update.isRVC := r.isRVC
110
111  // val btbHit = btb.io.out.hit
112  val btbTaken = btb.io.out.taken
113  val btbTakenIdx = btb.io.out.takenIdx
114  val btbTakenTarget = btb.io.out.target
115  // val btbWriteWay = btb.io.out.writeWay
116  val btbNotTakens = btb.io.out.notTakens
117  val btbCtrs = VecInit(btb.io.out.dEntries.map(_.pred))
118  val btbValids = btb.io.out.hits
119  val btbTargets = VecInit(btb.io.out.dEntries.map(_.target))
120  val btbTypes = VecInit(btb.io.out.dEntries.map(_._type))
121
122
123  val jbtac = Module(new JBTAC)
124  jbtac.io.in.pc <> io.in.pc
125  jbtac.io.in.pcLatch := pcLatch
126  // TODO: pass real mask in
127  jbtac.io.in.mask := Fill(PredictWidth, 1.U(1.W))
128  jbtac.io.in.hist := hist
129  jbtac.io.redirectValid := io.redirectInfo.valid
130  jbtac.io.flush := io.flush
131
132  jbtac.io.update.fetchPC := updateFetchpc
133  jbtac.io.update.fetchIdx := r.fetchIdx
134  jbtac.io.update.misPred := io.redirectInfo.misPred
135  jbtac.io.update._type := r._type
136  jbtac.io.update.target := r.target
137  jbtac.io.update.hist := r.hist
138
139  val jbtacHit = jbtac.io.out.hit
140  val jbtacTarget = jbtac.io.out.target
141  val jbtacHitIdx = jbtac.io.out.hitIdx
142
143  // calculate global history of each instr
144  val firstHist = RegNext(hist)
145  val histShift = Wire(Vec(PredictWidth, UInt(log2Up(PredictWidth).W)))
146  val shift = Wire(Vec(PredictWidth, Vec(PredictWidth, UInt(1.W))))
147  (0 until PredictWidth).map(i => shift(i) := Mux(!btbNotTakens(i), 0.U, ~LowerMask(UIntToOH(i.U), PredictWidth)).asTypeOf(Vec(PredictWidth, UInt(1.W))))
148  for (j <- 0 until PredictWidth) {
149    var tmp = 0.U
150    for (i <- 0 until PredictWidth) {
151      tmp = tmp + shift(i)(j)
152    }
153    histShift(j) := tmp
154  }
155  (0 until PredictWidth).map(i => io.s1OutPred.bits.hist(i) := firstHist << histShift(i))
156
157  // update ghr
158  updateGhr := io.flush || io.s1OutPred.bits.redirect || RegNext(io.in.pc.fire) && (btbNotTakens.asUInt & maskLatch).orR.asBool
159  val brJumpIdx = Mux(!btbTaken, 0.U, UIntToOH(btbTakenIdx))
160  val indirectIdx = Mux(!jbtacHit, 0.U, UIntToOH(jbtacHitIdx))
161  newGhr := Mux(io.redirectInfo.flush(),    (r.hist << 1.U) | !(r._type === BTBtype.B && !r.taken),
162            Mux(io.flush,                   Mux(io.s3Taken, (io.s3RollBackHist << 1.U) | 1.U, io.s3RollBackHist),
163            Mux(io.s1OutPred.bits.redirect, (PriorityMux(brJumpIdx | indirectIdx, io.s1OutPred.bits.hist) << 1.U | 1.U),
164                                            io.s1OutPred.bits.hist(0) << PopCount(btbNotTakens.asUInt & maskLatch))))
165
166  // redirect based on BTB and JBTAC
167  val takenIdx = LowestBit(brJumpIdx | indirectIdx, PredictWidth)
168  io.out.valid := RegNext(io.in.pc.fire()) && !io.flush
169
170  io.s1OutPred.valid := io.out.valid
171  io.s1OutPred.bits.redirect := btbTaken || jbtacHit
172  io.s1OutPred.bits.instrValid := Mux(io.s1OutPred.bits.redirect, LowerMask(takenIdx, PredictWidth), maskLatch).asTypeOf(Vec(PredictWidth, Bool()))
173  io.s1OutPred.bits.target := Mux(brJumpIdx === takenIdx, btbTakenTarget, Mux(indirectIdx === takenIdx, jbtacTarget, pcLatch + PopCount(maskLatch) << 1.U))
174  io.s1OutPred.bits.lateJump := btb.io.out.isRVILateJump || jbtac.io.out.isRVILateJump
175  // io.s1OutPred.bits.btbVictimWay := btbWriteWay
176  io.s1OutPred.bits.predCtr := btbCtrs
177  io.s1OutPred.bits.btbHit := btbValids
178  io.s1OutPred.bits.tageMeta := DontCare
179  io.s1OutPred.bits.rasSp := DontCare
180  io.s1OutPred.bits.rasTopCtr := DontCare
181
182  io.out.bits.pc := pcLatch
183  io.out.bits.btb.hits := btbValids.asUInt
184  (0 until PredictWidth).map(i => io.out.bits.btb.targets(i) := btbTargets(i))
185  io.out.bits.jbtac.hitIdx := Mux(jbtacHit, UIntToOH(jbtacHitIdx), 0.U)
186  io.out.bits.jbtac.target := jbtacTarget
187  // TODO: we don't need this repeatedly!
188  io.out.bits.hist := io.s1OutPred.bits.hist
189  io.out.bits.btbPred := io.s1OutPred
190
191
192
193  // debug info
194  XSDebug("in:(%d %d)   pc=%x ghr=%b\n", io.in.pc.valid, io.in.pc.ready, io.in.pc.bits, hist)
195  XSDebug("outPred:(%d) pc=0x%x, redirect=%d instrValid=%b tgt=%x\n",
196    io.s1OutPred.valid, pcLatch, io.s1OutPred.bits.redirect, io.s1OutPred.bits.instrValid.asUInt, io.s1OutPred.bits.target)
197  XSDebug(io.flush && io.redirectInfo.flush(),
198    "flush from backend: pc=%x tgt=%x brTgt=%x _type=%b taken=%d oldHist=%b fetchIdx=%d isExcpt=%d\n",
199    r.pc, r.target, r.brTarget, r._type, r.taken, r.hist, r.fetchIdx, r.isException)
200  XSDebug(io.flush && !io.redirectInfo.flush(),
201    "flush from Stage3:  s3Taken=%d s3RollBackHist=%b\n", io.s3Taken, io.s3RollBackHist)
202
203}
204
205class Stage2To3IO extends Stage1To2IO {
206}
207
208class BPUStage2 extends XSModule {
209  val io = IO(new Bundle() {
210    // flush from Stage3
211    val flush = Input(Bool())
212    val in = Flipped(Decoupled(new Stage1To2IO))
213    val out = Decoupled(new Stage2To3IO)
214  })
215
216  // flush Stage2 when Stage3 or banckend redirects
217  val flushS2 = BoolStopWatch(io.flush, io.in.fire(), startHighPriority = true)
218  val inLatch = RegInit(0.U.asTypeOf(io.in.bits))
219  when (io.in.fire()) { inLatch := io.in.bits }
220  val validLatch = RegInit(false.B)
221  when (io.flush) {
222    validLatch := false.B
223  }.elsewhen (io.in.fire()) {
224    validLatch := true.B
225  }.elsewhen (io.out.fire()) {
226    validLatch := false.B
227  }
228
229  io.out.valid := !io.flush && !flushS2 && validLatch
230  io.in.ready := !validLatch || io.out.fire()
231
232  // do nothing
233  io.out.bits := inLatch
234
235  // debug info
236  XSDebug("in:(%d %d) pc=%x out:(%d %d) pc=%x\n",
237    io.in.valid, io.in.ready, io.in.bits.pc, io.out.valid, io.out.ready, io.out.bits.pc)
238  XSDebug("validLatch=%d pc=%x\n", validLatch, inLatch.pc)
239  XSDebug(io.flush, "flush!!!\n")
240}
241
242class BPUStage3 extends XSModule {
243  val io = IO(new Bundle() {
244    val flush = Input(Bool())
245    val in = Flipped(Decoupled(new Stage2To3IO))
246    val out = ValidIO(new BranchPrediction)
247    // from icache
248    val predecode = Flipped(ValidIO(new Predecode))
249    // from backend
250    val redirectInfo = Input(new RedirectInfo)
251    // to Stage1 and Stage2
252    val flushBPU = Output(Bool())
253    // to Stage1, restore ghr in stage1 when flushBPU is valid
254    val s1RollBackHist = Output(UInt(HistoryLength.W))
255    val s3Taken = Output(Bool())
256  })
257
258  val flushS3 = BoolStopWatch(io.flush, io.in.fire(), startHighPriority = true)
259  val inLatch = RegInit(0.U.asTypeOf(io.in.bits))
260  val validLatch = RegInit(false.B)
261  when (io.in.fire()) { inLatch := io.in.bits }
262  when (io.flush) {
263    validLatch := false.B
264  }.elsewhen (io.in.fire()) {
265    validLatch := true.B
266  }.elsewhen (io.out.valid) {
267    validLatch := false.B
268  }
269  io.out.valid := validLatch && io.predecode.valid && !flushS3 && !io.flush
270  io.in.ready := !validLatch || io.out.valid
271
272  // RAS
273  // TODO: split retAddr and ctr
274  def rasEntry() = new Bundle {
275    val retAddr = UInt(VAddrBits.W)
276    val ctr = UInt(8.W) // layer of nested call functions
277  }
278  val ras = RegInit(VecInit(Seq.fill(RasSize)(0.U.asTypeOf(rasEntry()))))
279  val sp = Counter(RasSize)
280  val rasTop = ras(sp.value)
281  val rasTopAddr = rasTop.retAddr
282
283  // get the first taken branch/jal/call/jalr/ret in a fetch line
284  // brTakenIdx/jalIdx/callIdx/jalrIdx/retIdx/jmpIdx is one-hot encoded.
285  // brNotTakenIdx indicates all the not-taken branches before the first jump instruction.
286  val brIdx = inLatch.btb.hits & Reverse(Cat(io.predecode.bits.fuOpTypes.map { t => ALUOpType.isBranch(t) }).asUInt) & io.predecode.bits.mask
287  val brTakenIdx = if(HasBPD) {
288    LowestBit(brIdx & Reverse(Cat(inLatch.tage.takens.map {t => Fill(2, t.asUInt)}).asUInt), PredictWidth)
289  } else {
290    LowestBit(brIdx & Reverse(Cat(inLatch.btbPred.bits.predCtr.map {c => c(1)}).asUInt), PredictWidth)
291  }
292  // TODO: btb doesn't need to hit, jalIdx/callIdx can be calculated based on instructions read in Cache
293  val jalIdx = LowestBit(inLatch.btb.hits & Reverse(Cat(io.predecode.bits.fuOpTypes.map { t => t === JumpOpType.jal }).asUInt) & io.predecode.bits.mask, PredictWidth)
294  val callIdx = LowestBit(inLatch.btb.hits & io.predecode.bits.mask & Reverse(Cat(io.predecode.bits.fuOpTypes.map { t => t === JumpOpType.call }).asUInt), PredictWidth)
295  val jalrIdx = LowestBit(inLatch.jbtac.hitIdx & io.predecode.bits.mask & Reverse(Cat(io.predecode.bits.fuOpTypes.map { t => t === JumpOpType.jalr }).asUInt), PredictWidth)
296  val retIdx = LowestBit(io.predecode.bits.mask & Reverse(Cat(io.predecode.bits.fuOpTypes.map { t => t === JumpOpType.ret }).asUInt), PredictWidth)
297
298  val jmpIdx = LowestBit(brTakenIdx | jalIdx | callIdx | jalrIdx | retIdx, PredictWidth)
299  val brNotTakenIdx = brIdx & LowerMask(jmpIdx, PredictWidth) & (
300    if(HasBPD) ~Reverse(Cat(inLatch.tage.takens.map {t => Fill(2, t.asUInt)}).asUInt)
301    else ~Reverse(Cat(inLatch.btbPred.bits.predCtr.map {c => c(1)}).asUInt))
302
303  io.out.bits.redirect := jmpIdx.orR.asBool
304  io.out.bits.target := Mux(jmpIdx === retIdx, rasTopAddr,
305    Mux(jmpIdx === jalrIdx, inLatch.jbtac.target,
306    Mux(jmpIdx === 0.U, inLatch.pc + 32.U, // TODO: RVC
307    PriorityMux(jmpIdx, inLatch.btb.targets))))
308  io.out.bits.instrValid := Mux(jmpIdx.orR, LowerMask(jmpIdx, FetchWidth), Fill(FetchWidth, 1.U(1.W))).asTypeOf(Vec(FetchWidth, Bool()))
309  // io.out.bits.btbVictimWay := inLatch.btbPred.bits.btbVictimWay
310  io.out.bits.predCtr := inLatch.btbPred.bits.predCtr
311  io.out.bits.btbHitWay := inLatch.btbPred.bits.btbHitWay
312  io.out.bits.tageMeta := inLatch.btbPred.bits.tageMeta
313  //io.out.bits._type := Mux(jmpIdx === retIdx, BTBtype.R,
314  //  Mux(jmpIdx === jalrIdx, BTBtype.I,
315  //  Mux(jmpIdx === brTakenIdx, BTBtype.B, BTBtype.J)))
316  val firstHist = inLatch.btbPred.bits.hist(0)
317  // there may be several notTaken branches before the first jump instruction,
318  // so we need to calculate how many zeroes should each instruction shift in its global history.
319  // each history is exclusive of instruction's own jump direction.
320  val histShift = Wire(Vec(FetchWidth, UInt(log2Up(FetchWidth).W)))
321  val shift = Wire(Vec(FetchWidth, Vec(FetchWidth, UInt(1.W))))
322  (0 until FetchWidth).map(i => shift(i) := Mux(!brNotTakenIdx(i), 0.U, ~LowerMask(UIntToOH(i.U), FetchWidth)).asTypeOf(Vec(FetchWidth, UInt(1.W))))
323  for (j <- 0 until FetchWidth) {
324    var tmp = 0.U
325    for (i <- 0 until FetchWidth) {
326      tmp = tmp + shift(i)(j)
327    }
328    histShift(j) := tmp
329  }
330  (0 until FetchWidth).map(i => io.out.bits.hist(i) := firstHist << histShift(i))
331  // save ras checkpoint info
332  io.out.bits.rasSp := sp.value
333  io.out.bits.rasTopCtr := rasTop.ctr
334
335  // flush BPU and redirect when target differs from the target predicted in Stage1
336  io.out.bits.redirect := (if(EnableBPD) (inLatch.btbPred.bits.redirect ^ jmpIdx.orR.asBool ||
337    inLatch.btbPred.bits.redirect && jmpIdx.orR.asBool && io.out.bits.target =/= inLatch.btbPred.bits.target)
338    else false.B)
339  io.flushBPU := io.out.bits.redirect && io.out.valid
340
341  // speculative update RAS
342  val rasWrite = WireInit(0.U.asTypeOf(rasEntry()))
343  rasWrite.retAddr := inLatch.pc + (OHToUInt(callIdx) << 2.U) + 4.U
344  val allocNewEntry = rasWrite.retAddr =/= rasTopAddr
345  rasWrite.ctr := Mux(allocNewEntry, 1.U, rasTop.ctr + 1.U)
346  when (io.out.valid) {
347    when (jmpIdx === callIdx) {
348      ras(Mux(allocNewEntry, sp.value + 1.U, sp.value)) := rasWrite
349      when (allocNewEntry) { sp.value := sp.value + 1.U }
350    }.elsewhen (jmpIdx === retIdx) {
351      when (rasTop.ctr === 1.U) {
352        sp.value := Mux(sp.value === 0.U, 0.U, sp.value - 1.U)
353      }.otherwise {
354        ras(sp.value) := Cat(rasTop.ctr - 1.U, rasTopAddr).asTypeOf(rasEntry())
355      }
356    }
357  }
358  // use checkpoint to recover RAS
359  val recoverSp = io.redirectInfo.redirect.rasSp
360  val recoverCtr = io.redirectInfo.redirect.rasTopCtr
361  when (io.redirectInfo.valid && io.redirectInfo.misPred) {
362    sp.value := recoverSp
363    ras(recoverSp) := Cat(recoverCtr, ras(recoverSp).retAddr).asTypeOf(rasEntry())
364  }
365
366  // roll back global history in S1 if S3 redirects
367  io.s1RollBackHist := Mux(io.s3Taken, PriorityMux(jmpIdx, io.out.bits.hist), io.out.bits.hist(0) << PopCount(brIdx & ~inLatch.tage.takens.asUInt))
368  // whether Stage3 has a taken jump
369  io.s3Taken := jmpIdx.orR.asBool
370
371  // debug info
372  XSDebug(io.in.fire(), "[BPUS3]in:(%d %d) pc=%x\n", io.in.valid, io.in.ready, io.in.bits.pc)
373  XSDebug(io.out.valid, "[BPUS3]out:%d pc=%x redirect=%d predcdMask=%b instrValid=%b tgt=%x\n",
374    io.out.valid, inLatch.pc, io.out.bits.redirect, io.predecode.bits.mask, io.out.bits.instrValid.asUInt, io.out.bits.target)
375  XSDebug(true.B, "[BPUS3]flushS3=%d\n", flushS3)
376  XSDebug(true.B, "[BPUS3]validLatch=%d predecode.valid=%d\n", validLatch, io.predecode.valid)
377  XSDebug(true.B, "[BPUS3]brIdx=%b brTakenIdx=%b brNTakenIdx=%b jalIdx=%b jalrIdx=%b callIdx=%b retIdx=%b\n",
378    brIdx, brTakenIdx, brNotTakenIdx, jalIdx, jalrIdx, callIdx, retIdx)
379
380  // BPU's TEMP Perf Cnt
381  BoringUtils.addSource(io.out.valid, "MbpS3Cnt")
382  BoringUtils.addSource(io.out.valid && io.out.bits.redirect, "MbpS3TageRed")
383  BoringUtils.addSource(io.out.valid && (inLatch.btbPred.bits.redirect ^ jmpIdx.orR.asBool), "MbpS3TageRedDir")
384  BoringUtils.addSource(io.out.valid && (inLatch.btbPred.bits.redirect
385              && jmpIdx.orR.asBool && (io.out.bits.target =/= inLatch.btbPred.bits.target)), "MbpS3TageRedTar")
386}
387
388class BPU extends XSModule {
389  val io = IO(new Bundle() {
390    // from backend
391    // flush pipeline if misPred and update bpu based on redirect signals from brq
392    val redirectInfo = Input(new RedirectInfo)
393
394    val in = new Bundle { val pc = Flipped(Valid(UInt(VAddrBits.W))) }
395
396    val btbOut = ValidIO(new BranchPrediction)
397    val tageOut = ValidIO(new BranchPrediction)
398
399    // predecode info from icache
400    // TODO: simplify this after implement predecode unit
401    val predecode = Flipped(ValidIO(new Predecode))
402  })
403
404  val s1 = Module(new BPUStage1)
405  val s2 = Module(new BPUStage2)
406  val s3 = Module(new BPUStage3)
407
408  s1.io.redirectInfo <> io.redirectInfo
409  s1.io.flush := s3.io.flushBPU || io.redirectInfo.flush()
410  s1.io.in.pc.valid := io.in.pc.valid
411  s1.io.in.pc.bits <> io.in.pc.bits
412  io.btbOut <> s1.io.s1OutPred
413  s1.io.s3RollBackHist := s3.io.s1RollBackHist
414  s1.io.s3Taken := s3.io.s3Taken
415
416  s1.io.out <> s2.io.in
417  s2.io.flush := s3.io.flushBPU || io.redirectInfo.flush()
418
419  s2.io.out <> s3.io.in
420  s3.io.flush := io.redirectInfo.flush()
421  s3.io.predecode <> io.predecode
422  io.tageOut <> s3.io.out
423  s3.io.redirectInfo <> io.redirectInfo
424
425  // TODO: temp and ugly code, when perf counters is added( may after adding CSR), please mv the below counter
426  val bpuPerfCntList = List(
427    ("MbpInstr","         "),
428    ("MbpRight","         "),
429    ("MbpWrong","         "),
430    ("MbpBRight","        "),
431    ("MbpBWrong","        "),
432    ("MbpJRight","        "),
433    ("MbpJWrong","        "),
434    ("MbpIRight","        "),
435    ("MbpIWrong","        "),
436    ("MbpRRight","        "),
437    ("MbpRWrong","        "),
438    ("MbpS3Cnt","         "),
439    ("MbpS3TageRed","     "),
440    ("MbpS3TageRedDir","  "),
441    ("MbpS3TageRedTar","  ")
442  )
443
444  val bpuPerfCnts = List.fill(bpuPerfCntList.length)(RegInit(0.U(XLEN.W)))
445  val bpuPerfCntConds = List.fill(bpuPerfCntList.length)(WireInit(false.B))
446  (bpuPerfCnts zip bpuPerfCntConds) map { case (cnt, cond) => { when (cond) { cnt := cnt + 1.U }}}
447
448  for(i <- bpuPerfCntList.indices) {
449    BoringUtils.addSink(bpuPerfCntConds(i), bpuPerfCntList(i)._1)
450  }
451
452  val xsTrap = WireInit(false.B)
453  BoringUtils.addSink(xsTrap, "XSTRAP_BPU")
454
455  // if (!p.FPGAPlatform) {
456    when (xsTrap) {
457      printf("=================BPU's PerfCnt================\n")
458      for(i <- bpuPerfCntList.indices) {
459        printf(bpuPerfCntList(i)._1 + bpuPerfCntList(i)._2 + " <- " + "%d\n", bpuPerfCnts(i))
460      }
461    }
462  // }
463}