xref: /XiangShan/src/main/scala/xiangshan/frontend/NewFtq.scala (revision c776f0d5c16e451f85ce9f5dc32a87e9a3cf5587)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.frontend
18
19import org.chipsalliance.cde.config.Parameters
20import chisel3._
21import chisel3.util._
22import utils._
23import utility._
24import xiangshan._
25import xiangshan.frontend.icache._
26import xiangshan.backend.CtrlToFtqIO
27import xiangshan.backend.decode.ImmUnion
28import utility.ChiselDB
29
30class FtqDebugBundle extends Bundle {
31  val pc = UInt(39.W)
32  val target = UInt(39.W)
33  val isBr = Bool()
34  val isJmp = Bool()
35  val isCall = Bool()
36  val isRet = Bool()
37  val misPred = Bool()
38  val isTaken = Bool()
39  val predStage = UInt(2.W)
40}
41
42class FtqPtr(entries: Int) extends CircularQueuePtr[FtqPtr](
43  entries
44){
45  def this()(implicit p: Parameters) = this(p(XSCoreParamsKey).FtqSize)
46}
47
48object FtqPtr {
49  def apply(f: Bool, v: UInt)(implicit p: Parameters): FtqPtr = {
50    val ptr = Wire(new FtqPtr)
51    ptr.flag := f
52    ptr.value := v
53    ptr
54  }
55  def inverse(ptr: FtqPtr)(implicit p: Parameters): FtqPtr = {
56    apply(!ptr.flag, ptr.value)
57  }
58}
59
60class FtqNRSRAM[T <: Data](gen: T, numRead: Int)(implicit p: Parameters) extends XSModule {
61
62  val io = IO(new Bundle() {
63    val raddr = Input(Vec(numRead, UInt(log2Up(FtqSize).W)))
64    val ren = Input(Vec(numRead, Bool()))
65    val rdata = Output(Vec(numRead, gen))
66    val waddr = Input(UInt(log2Up(FtqSize).W))
67    val wen = Input(Bool())
68    val wdata = Input(gen)
69  })
70
71  for(i <- 0 until numRead){
72    val sram = Module(new SRAMTemplate(gen, FtqSize))
73    sram.io.r.req.valid := io.ren(i)
74    sram.io.r.req.bits.setIdx := io.raddr(i)
75    io.rdata(i) := sram.io.r.resp.data(0)
76    sram.io.w.req.valid := io.wen
77    sram.io.w.req.bits.setIdx := io.waddr
78    sram.io.w.req.bits.data := VecInit(io.wdata)
79  }
80
81}
82
83class Ftq_RF_Components(implicit p: Parameters) extends XSBundle with BPUUtils {
84  val startAddr = UInt(VAddrBits.W)
85  val nextLineAddr = UInt(VAddrBits.W)
86  val isNextMask = Vec(PredictWidth, Bool())
87  val fallThruError = Bool()
88  // val carry = Bool()
89  def getPc(offset: UInt) = {
90    def getHigher(pc: UInt) = pc(VAddrBits-1, log2Ceil(PredictWidth)+instOffsetBits+1)
91    def getOffset(pc: UInt) = pc(log2Ceil(PredictWidth)+instOffsetBits, instOffsetBits)
92    Cat(getHigher(Mux(isNextMask(offset) && startAddr(log2Ceil(PredictWidth)+instOffsetBits), nextLineAddr, startAddr)),
93        getOffset(startAddr)+offset, 0.U(instOffsetBits.W))
94  }
95  def fromBranchPrediction(resp: BranchPredictionBundle) = {
96    def carryPos(addr: UInt) = addr(instOffsetBits+log2Ceil(PredictWidth)+1)
97    this.startAddr := resp.pc(3)
98    this.nextLineAddr := resp.pc(3) + (FetchWidth * 4 * 2).U // may be broken on other configs
99    this.isNextMask := VecInit((0 until PredictWidth).map(i =>
100      (resp.pc(3)(log2Ceil(PredictWidth), 1) +& i.U)(log2Ceil(PredictWidth)).asBool
101    ))
102    this.fallThruError := resp.fallThruError(3)
103    this
104  }
105  override def toPrintable: Printable = {
106    p"startAddr:${Hexadecimal(startAddr)}"
107  }
108}
109
110class Ftq_pd_Entry(implicit p: Parameters) extends XSBundle {
111  val brMask = Vec(PredictWidth, Bool())
112  val jmpInfo = ValidUndirectioned(Vec(3, Bool()))
113  val jmpOffset = UInt(log2Ceil(PredictWidth).W)
114  val jalTarget = UInt(VAddrBits.W)
115  val rvcMask = Vec(PredictWidth, Bool())
116  def hasJal  = jmpInfo.valid && !jmpInfo.bits(0)
117  def hasJalr = jmpInfo.valid && jmpInfo.bits(0)
118  def hasCall = jmpInfo.valid && jmpInfo.bits(1)
119  def hasRet  = jmpInfo.valid && jmpInfo.bits(2)
120
121  def fromPdWb(pdWb: PredecodeWritebackBundle) = {
122    val pds = pdWb.pd
123    this.brMask := VecInit(pds.map(pd => pd.isBr && pd.valid))
124    this.jmpInfo.valid := VecInit(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid)).asUInt.orR
125    this.jmpInfo.bits := ParallelPriorityMux(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid),
126                                             pds.map(pd => VecInit(pd.isJalr, pd.isCall, pd.isRet)))
127    this.jmpOffset := ParallelPriorityEncoder(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid))
128    this.rvcMask := VecInit(pds.map(pd => pd.isRVC))
129    this.jalTarget := pdWb.jalTarget
130  }
131
132  def toPd(offset: UInt) = {
133    require(offset.getWidth == log2Ceil(PredictWidth))
134    val pd = Wire(new PreDecodeInfo)
135    pd.valid := true.B
136    pd.isRVC := rvcMask(offset)
137    val isBr = brMask(offset)
138    val isJalr = offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(0)
139    pd.brType := Cat(offset === jmpOffset && jmpInfo.valid, isJalr || isBr)
140    pd.isCall := offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(1)
141    pd.isRet  := offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(2)
142    pd
143  }
144}
145
146class PrefetchPtrDB(implicit p: Parameters) extends Bundle {
147  val fromFtqPtr  = UInt(log2Up(p(XSCoreParamsKey).FtqSize).W)
148  val fromIfuPtr  = UInt(log2Up(p(XSCoreParamsKey).FtqSize).W)
149}
150
151class Ftq_Redirect_SRAMEntry(implicit p: Parameters) extends SpeculativeInfo {
152  val sc_disagree = if (!env.FPGAPlatform) Some(Vec(numBr, Bool())) else None
153}
154
155class Ftq_1R_SRAMEntry(implicit p: Parameters) extends XSBundle with HasBPUConst {
156  val meta = UInt(MaxMetaLength.W)
157  val ftb_entry = new FTBEntry
158}
159
160class Ftq_Pred_Info(implicit p: Parameters) extends XSBundle {
161  val target = UInt(VAddrBits.W)
162  val cfiIndex = ValidUndirectioned(UInt(log2Ceil(PredictWidth).W))
163}
164
165
166class FtqRead[T <: Data](private val gen: T)(implicit p: Parameters) extends XSBundle {
167  val valid = Output(Bool())
168  val ptr = Output(new FtqPtr)
169  val offset = Output(UInt(log2Ceil(PredictWidth).W))
170  val data = Input(gen)
171  def apply(valid: Bool, ptr: FtqPtr, offset: UInt) = {
172    this.valid := valid
173    this.ptr := ptr
174    this.offset := offset
175    this.data
176  }
177}
178
179
180class FtqToBpuIO(implicit p: Parameters) extends XSBundle {
181  val redirect = Valid(new BranchPredictionRedirect)
182  val update = Valid(new BranchPredictionUpdate)
183  val enq_ptr = Output(new FtqPtr)
184  val redirctFromIFU = Output(Bool())
185}
186
187class BpuFlushInfo(implicit p: Parameters) extends XSBundle with HasCircularQueuePtrHelper {
188  // when ifu pipeline is not stalled,
189  // a packet from bpu s3 can reach f1 at most
190  val s2 = Valid(new FtqPtr)
191  val s3 = Valid(new FtqPtr)
192  def shouldFlushBy(src: Valid[FtqPtr], idx_to_flush: FtqPtr) = {
193    src.valid && !isAfter(src.bits, idx_to_flush)
194  }
195  def shouldFlushByStage2(idx: FtqPtr) = shouldFlushBy(s2, idx)
196  def shouldFlushByStage3(idx: FtqPtr) = shouldFlushBy(s3, idx)
197}
198
199class FtqToIfuIO(implicit p: Parameters) extends XSBundle {
200  val req = Decoupled(new FetchRequestBundle)
201  val redirect = Valid(new BranchPredictionRedirect)
202  val topdown_redirect = Valid(new BranchPredictionRedirect)
203  val flushFromBpu = new BpuFlushInfo
204}
205
206class FtqToICacheIO(implicit p: Parameters) extends XSBundle {
207  //NOTE: req.bits must be prepare in T cycle
208  // while req.valid is set true in T + 1 cycle
209  val req = Decoupled(new FtqToICacheRequestBundle)
210}
211
212class FtqToPrefetchIO(implicit p: Parameters) extends XSBundle {
213  val req = Decoupled(new FtqICacheInfo)
214  val flushFromBpu = new BpuFlushInfo
215}
216
217trait HasBackendRedirectInfo extends HasXSParameter {
218  def isLoadReplay(r: Valid[Redirect]) = r.bits.flushItself()
219}
220
221class FtqToCtrlIO(implicit p: Parameters) extends XSBundle with HasBackendRedirectInfo {
222  // write to backend pc mem
223  val pc_mem_wen = Output(Bool())
224  val pc_mem_waddr = Output(UInt(log2Ceil(FtqSize).W))
225  val pc_mem_wdata = Output(new Ftq_RF_Components)
226  // newest target
227  val newest_entry_en = Output(Bool())
228  val newest_entry_target = Output(UInt(VAddrBits.W))
229  val newest_entry_ptr = Output(new FtqPtr)
230}
231
232class FTBEntryGen(implicit p: Parameters) extends XSModule with HasBackendRedirectInfo with HasBPUParameter {
233  val io = IO(new Bundle {
234    val start_addr = Input(UInt(VAddrBits.W))
235    val old_entry = Input(new FTBEntry)
236    val pd = Input(new Ftq_pd_Entry)
237    val cfiIndex = Flipped(Valid(UInt(log2Ceil(PredictWidth).W)))
238    val target = Input(UInt(VAddrBits.W))
239    val hit = Input(Bool())
240    val mispredict_vec = Input(Vec(PredictWidth, Bool()))
241
242    val new_entry = Output(new FTBEntry)
243    val new_br_insert_pos = Output(Vec(numBr, Bool()))
244    val taken_mask = Output(Vec(numBr, Bool()))
245    val jmp_taken = Output(Bool())
246    val mispred_mask = Output(Vec(numBr+1, Bool()))
247
248    // for perf counters
249    val is_init_entry = Output(Bool())
250    val is_old_entry = Output(Bool())
251    val is_new_br = Output(Bool())
252    val is_jalr_target_modified = Output(Bool())
253    val is_always_taken_modified = Output(Bool())
254    val is_br_full = Output(Bool())
255  })
256
257  // no mispredictions detected at predecode
258  val hit = io.hit
259  val pd = io.pd
260
261  val init_entry = WireInit(0.U.asTypeOf(new FTBEntry))
262
263
264  val cfi_is_br = pd.brMask(io.cfiIndex.bits) && io.cfiIndex.valid
265  val entry_has_jmp = pd.jmpInfo.valid
266  val new_jmp_is_jal  = entry_has_jmp && !pd.jmpInfo.bits(0) && io.cfiIndex.valid
267  val new_jmp_is_jalr = entry_has_jmp &&  pd.jmpInfo.bits(0) && io.cfiIndex.valid
268  val new_jmp_is_call = entry_has_jmp &&  pd.jmpInfo.bits(1) && io.cfiIndex.valid
269  val new_jmp_is_ret  = entry_has_jmp &&  pd.jmpInfo.bits(2) && io.cfiIndex.valid
270  val last_jmp_rvi = entry_has_jmp && pd.jmpOffset === (PredictWidth-1).U && !pd.rvcMask.last
271  // val last_br_rvi = cfi_is_br && io.cfiIndex.bits === (PredictWidth-1).U && !pd.rvcMask.last
272
273  val cfi_is_jal = io.cfiIndex.bits === pd.jmpOffset && new_jmp_is_jal
274  val cfi_is_jalr = io.cfiIndex.bits === pd.jmpOffset && new_jmp_is_jalr
275
276  def carryPos = log2Ceil(PredictWidth)+instOffsetBits
277  def getLower(pc: UInt) = pc(carryPos-1, instOffsetBits)
278  // if not hit, establish a new entry
279  init_entry.valid := true.B
280  // tag is left for ftb to assign
281
282  // case br
283  val init_br_slot = init_entry.getSlotForBr(0)
284  when (cfi_is_br) {
285    init_br_slot.valid := true.B
286    init_br_slot.offset := io.cfiIndex.bits
287    init_br_slot.setLowerStatByTarget(io.start_addr, io.target, numBr == 1)
288    init_entry.always_taken(0) := true.B // set to always taken on init
289  }
290
291  // case jmp
292  when (entry_has_jmp) {
293    init_entry.tailSlot.offset := pd.jmpOffset
294    init_entry.tailSlot.valid := new_jmp_is_jal || new_jmp_is_jalr
295    init_entry.tailSlot.setLowerStatByTarget(io.start_addr, Mux(cfi_is_jalr, io.target, pd.jalTarget), isShare=false)
296  }
297
298  val jmpPft = getLower(io.start_addr) +& pd.jmpOffset +& Mux(pd.rvcMask(pd.jmpOffset), 1.U, 2.U)
299  init_entry.pftAddr := Mux(entry_has_jmp && !last_jmp_rvi, jmpPft, getLower(io.start_addr))
300  init_entry.carry   := Mux(entry_has_jmp && !last_jmp_rvi, jmpPft(carryPos-instOffsetBits), true.B)
301  init_entry.isJalr := new_jmp_is_jalr
302  init_entry.isCall := new_jmp_is_call
303  init_entry.isRet  := new_jmp_is_ret
304  // that means fall thru points to the middle of an inst
305  init_entry.last_may_be_rvi_call := pd.jmpOffset === (PredictWidth-1).U && !pd.rvcMask(pd.jmpOffset)
306
307  // if hit, check whether a new cfi(only br is possible) is detected
308  val oe = io.old_entry
309  val br_recorded_vec = oe.getBrRecordedVec(io.cfiIndex.bits)
310  val br_recorded = br_recorded_vec.asUInt.orR
311  val is_new_br = cfi_is_br && !br_recorded
312  val new_br_offset = io.cfiIndex.bits
313  // vec(i) means new br will be inserted BEFORE old br(i)
314  val allBrSlotsVec = oe.allSlotsForBr
315  val new_br_insert_onehot = VecInit((0 until numBr).map{
316    i => i match {
317      case 0 =>
318        !allBrSlotsVec(0).valid || new_br_offset < allBrSlotsVec(0).offset
319      case idx =>
320        allBrSlotsVec(idx-1).valid && new_br_offset > allBrSlotsVec(idx-1).offset &&
321        (!allBrSlotsVec(idx).valid || new_br_offset < allBrSlotsVec(idx).offset)
322    }
323  })
324
325  val old_entry_modified = WireInit(io.old_entry)
326  for (i <- 0 until numBr) {
327    val slot = old_entry_modified.allSlotsForBr(i)
328    when (new_br_insert_onehot(i)) {
329      slot.valid := true.B
330      slot.offset := new_br_offset
331      slot.setLowerStatByTarget(io.start_addr, io.target, i == numBr-1)
332      old_entry_modified.always_taken(i) := true.B
333    }.elsewhen (new_br_offset > oe.allSlotsForBr(i).offset) {
334      old_entry_modified.always_taken(i) := false.B
335      // all other fields remain unchanged
336    }.otherwise {
337      // case i == 0, remain unchanged
338      if (i != 0) {
339        val noNeedToMoveFromFormerSlot = (i == numBr-1).B && !oe.brSlots.last.valid
340        when (!noNeedToMoveFromFormerSlot) {
341          slot.fromAnotherSlot(oe.allSlotsForBr(i-1))
342          old_entry_modified.always_taken(i) := oe.always_taken(i)
343        }
344      }
345    }
346  }
347
348  // two circumstances:
349  // 1. oe: | br | j  |, new br should be in front of j, thus addr of j should be new pft
350  // 2. oe: | br | br |, new br could be anywhere between, thus new pft is the addr of either
351  //        the previous last br or the new br
352  val may_have_to_replace = oe.noEmptySlotForNewBr
353  val pft_need_to_change = is_new_br && may_have_to_replace
354  // it should either be the given last br or the new br
355  when (pft_need_to_change) {
356    val new_pft_offset =
357      Mux(!new_br_insert_onehot.asUInt.orR,
358        new_br_offset, oe.allSlotsForBr.last.offset)
359
360    // set jmp to invalid
361    old_entry_modified.pftAddr := getLower(io.start_addr) + new_pft_offset
362    old_entry_modified.carry := (getLower(io.start_addr) +& new_pft_offset).head(1).asBool
363    old_entry_modified.last_may_be_rvi_call := false.B
364    old_entry_modified.isCall := false.B
365    old_entry_modified.isRet := false.B
366    old_entry_modified.isJalr := false.B
367  }
368
369  val old_entry_jmp_target_modified = WireInit(oe)
370  val old_target = oe.tailSlot.getTarget(io.start_addr) // may be wrong because we store only 20 lowest bits
371  val old_tail_is_jmp = !oe.tailSlot.sharing
372  val jalr_target_modified = cfi_is_jalr && (old_target =/= io.target) && old_tail_is_jmp // TODO: pass full jalr target
373  when (jalr_target_modified) {
374    old_entry_jmp_target_modified.setByJmpTarget(io.start_addr, io.target)
375    old_entry_jmp_target_modified.always_taken := 0.U.asTypeOf(Vec(numBr, Bool()))
376  }
377
378  val old_entry_always_taken = WireInit(oe)
379  val always_taken_modified_vec = Wire(Vec(numBr, Bool())) // whether modified or not
380  for (i <- 0 until numBr) {
381    old_entry_always_taken.always_taken(i) :=
382      oe.always_taken(i) && io.cfiIndex.valid && oe.brValids(i) && io.cfiIndex.bits === oe.brOffset(i)
383    always_taken_modified_vec(i) := oe.always_taken(i) && !old_entry_always_taken.always_taken(i)
384  }
385  val always_taken_modified = always_taken_modified_vec.reduce(_||_)
386
387
388
389  val derived_from_old_entry =
390    Mux(is_new_br, old_entry_modified,
391      Mux(jalr_target_modified, old_entry_jmp_target_modified, old_entry_always_taken))
392
393
394  io.new_entry := Mux(!hit, init_entry, derived_from_old_entry)
395
396  io.new_br_insert_pos := new_br_insert_onehot
397  io.taken_mask := VecInit((io.new_entry.brOffset zip io.new_entry.brValids).map{
398    case (off, v) => io.cfiIndex.bits === off && io.cfiIndex.valid && v
399  })
400  io.jmp_taken := io.new_entry.jmpValid && io.new_entry.tailSlot.offset === io.cfiIndex.bits
401  for (i <- 0 until numBr) {
402    io.mispred_mask(i) := io.new_entry.brValids(i) && io.mispredict_vec(io.new_entry.brOffset(i))
403  }
404  io.mispred_mask.last := io.new_entry.jmpValid && io.mispredict_vec(pd.jmpOffset)
405
406  // for perf counters
407  io.is_init_entry := !hit
408  io.is_old_entry := hit && !is_new_br && !jalr_target_modified && !always_taken_modified
409  io.is_new_br := hit && is_new_br
410  io.is_jalr_target_modified := hit && jalr_target_modified
411  io.is_always_taken_modified := hit && always_taken_modified
412  io.is_br_full := hit && is_new_br && may_have_to_replace
413}
414
415class FtqPcMemWrapper(numOtherReads: Int)(implicit p: Parameters) extends XSModule with HasBackendRedirectInfo {
416  val io = IO(new Bundle {
417    val ifuPtr_w       = Input(new FtqPtr)
418    val ifuPtrPlus1_w  = Input(new FtqPtr)
419    val ifuPtrPlus2_w  = Input(new FtqPtr)
420    val pfPtr_w        = Input(new FtqPtr)
421    val pfPtrPlus1_w   = Input(new FtqPtr)
422    val commPtr_w      = Input(new FtqPtr)
423    val commPtrPlus1_w = Input(new FtqPtr)
424    val ifuPtr_rdata       = Output(new Ftq_RF_Components)
425    val ifuPtrPlus1_rdata  = Output(new Ftq_RF_Components)
426    val ifuPtrPlus2_rdata  = Output(new Ftq_RF_Components)
427    val pfPtr_rdata        = Output(new Ftq_RF_Components)
428    val pfPtrPlus1_rdata   = Output(new Ftq_RF_Components)
429    val commPtr_rdata      = Output(new Ftq_RF_Components)
430    val commPtrPlus1_rdata = Output(new Ftq_RF_Components)
431
432    val wen = Input(Bool())
433    val waddr = Input(UInt(log2Ceil(FtqSize).W))
434    val wdata = Input(new Ftq_RF_Components)
435  })
436
437  val num_pc_read = numOtherReads + 5
438  val mem = Module(new SyncDataModuleTemplate(new Ftq_RF_Components, FtqSize,
439    num_pc_read, 1, "FtqPC"))
440  mem.io.wen(0)   := io.wen
441  mem.io.waddr(0) := io.waddr
442  mem.io.wdata(0) := io.wdata
443
444  // read one cycle ahead for ftq local reads
445  val raddr_vec = VecInit(Seq(io.ifuPtr_w.value, io.ifuPtrPlus1_w.value, io.ifuPtrPlus2_w.value,
446                              io.pfPtr_w.value, io.pfPtrPlus1_w.value,
447                              io.commPtrPlus1_w.value, io.commPtr_w.value))
448
449  mem.io.raddr := raddr_vec
450
451  io.ifuPtr_rdata       := mem.io.rdata.dropRight(6).last
452  io.ifuPtrPlus1_rdata  := mem.io.rdata.dropRight(5).last
453  io.ifuPtrPlus2_rdata  := mem.io.rdata.dropRight(4).last
454  io.pfPtr_rdata        := mem.io.rdata.dropRight(3).last
455  io.pfPtrPlus1_rdata   := mem.io.rdata.dropRight(2).last
456  io.commPtrPlus1_rdata := mem.io.rdata.dropRight(1).last
457  io.commPtr_rdata      := mem.io.rdata.last
458}
459
460class Ftq(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelper
461  with HasBackendRedirectInfo with BPUUtils with HasBPUConst with HasPerfEvents
462  with HasICacheParameters{
463  val io = IO(new Bundle {
464    val fromBpu = Flipped(new BpuToFtqIO)
465    val fromIfu = Flipped(new IfuToFtqIO)
466    val fromBackend = Flipped(new CtrlToFtqIO)
467
468    val toBpu = new FtqToBpuIO
469    val toIfu = new FtqToIfuIO
470    val toICache = new FtqToICacheIO
471    val toBackend = new FtqToCtrlIO
472    val toPrefetch = new FtqToPrefetchIO
473    val icacheFlush = Output(Bool())
474
475    val bpuInfo = new Bundle {
476      val bpRight = Output(UInt(XLEN.W))
477      val bpWrong = Output(UInt(XLEN.W))
478    }
479
480    val mmioCommitRead = Flipped(new mmioCommitRead)
481
482    // for perf
483    val ControlBTBMissBubble = Output(Bool())
484    val TAGEMissBubble = Output(Bool())
485    val SCMissBubble = Output(Bool())
486    val ITTAGEMissBubble = Output(Bool())
487    val RASMissBubble = Output(Bool())
488  })
489  io.bpuInfo := DontCare
490
491  val topdown_stage = RegInit(0.U.asTypeOf(new FrontendTopDownBundle))
492  // only driven by clock, not valid-ready
493  topdown_stage := io.fromBpu.resp.bits.topdown_info
494  io.toIfu.req.bits.topdown_info := topdown_stage
495
496  val ifuRedirected = RegInit(VecInit(Seq.fill(FtqSize)(false.B)))
497
498
499  // io.fromBackend.ftqIdxAhead: bju(BjuCnt) + ldReplay + exception
500  val ftqIdxAhead = VecInit(Seq.tabulate(FtqRedirectAheadNum)(i => io.fromBackend.ftqIdxAhead(i))) // only bju
501  val ftqIdxSelOH = io.fromBackend.ftqIdxSelOH.bits(FtqRedirectAheadNum - 1, 0)
502
503  val aheadValid   = ftqIdxAhead.map(_.valid).reduce(_|_) && !io.fromBackend.redirect.valid
504  val realAhdValid = io.fromBackend.redirect.valid && (ftqIdxSelOH > 0.U) && RegNext(aheadValid)
505  val backendRedirect = Wire(Valid(new BranchPredictionRedirect))
506  val backendRedirectReg = Wire(Valid(new BranchPredictionRedirect))
507  backendRedirectReg.valid := RegNext(Mux(realAhdValid, false.B, backendRedirect.valid))
508  backendRedirectReg.bits := RegEnable(backendRedirect.bits, backendRedirect.valid)
509  val fromBackendRedirect = Wire(Valid(new BranchPredictionRedirect))
510  fromBackendRedirect := Mux(realAhdValid, backendRedirect, backendRedirectReg)
511
512  val stage2Flush = backendRedirect.valid
513  val backendFlush = stage2Flush || RegNext(stage2Flush)
514  val ifuFlush = Wire(Bool())
515
516  val flush = stage2Flush || RegNext(stage2Flush)
517
518  val allowBpuIn, allowToIfu = WireInit(false.B)
519  val flushToIfu = !allowToIfu
520  allowBpuIn := !ifuFlush && !backendRedirect.valid && !backendRedirectReg.valid
521  allowToIfu := !ifuFlush && !backendRedirect.valid && !backendRedirectReg.valid
522
523  def copyNum = 5
524  val bpuPtr, ifuPtr, pfPtr, ifuWbPtr, commPtr, robCommPtr = RegInit(FtqPtr(false.B, 0.U))
525  val ifuPtrPlus1 = RegInit(FtqPtr(false.B, 1.U))
526  val ifuPtrPlus2 = RegInit(FtqPtr(false.B, 2.U))
527  val pfPtrPlus1  = RegInit(FtqPtr(false.B, 1.U))
528  val commPtrPlus1 = RegInit(FtqPtr(false.B, 1.U))
529  val copied_ifu_ptr = Seq.fill(copyNum)(RegInit(FtqPtr(false.B, 0.U)))
530  val copied_bpu_ptr = Seq.fill(copyNum)(RegInit(FtqPtr(false.B, 0.U)))
531  require(FtqSize >= 4)
532  val ifuPtr_write       = WireInit(ifuPtr)
533  val ifuPtrPlus1_write  = WireInit(ifuPtrPlus1)
534  val ifuPtrPlus2_write  = WireInit(ifuPtrPlus2)
535  val pfPtr_write        = WireInit(pfPtr)
536  val pfPtrPlus1_write   = WireInit(pfPtrPlus1)
537  val ifuWbPtr_write     = WireInit(ifuWbPtr)
538  val commPtr_write      = WireInit(commPtr)
539  val commPtrPlus1_write = WireInit(commPtrPlus1)
540  val robCommPtr_write   = WireInit(robCommPtr)
541  ifuPtr       := ifuPtr_write
542  ifuPtrPlus1  := ifuPtrPlus1_write
543  ifuPtrPlus2  := ifuPtrPlus2_write
544  pfPtr        := pfPtr_write
545  pfPtrPlus1   := pfPtrPlus1_write
546  ifuWbPtr     := ifuWbPtr_write
547  commPtr      := commPtr_write
548  commPtrPlus1 := commPtrPlus1_write
549  copied_ifu_ptr.map{ptr =>
550    ptr := ifuPtr_write
551    dontTouch(ptr)
552  }
553  robCommPtr   := robCommPtr_write
554  val validEntries = distanceBetween(bpuPtr, commPtr)
555  val canCommit = Wire(Bool())
556
557  // **********************************************************************
558  // **************************** enq from bpu ****************************
559  // **********************************************************************
560  val new_entry_ready = validEntries < FtqSize.U || canCommit
561  io.fromBpu.resp.ready := new_entry_ready
562
563  val bpu_s2_resp = io.fromBpu.resp.bits.s2
564  val bpu_s3_resp = io.fromBpu.resp.bits.s3
565  val bpu_s2_redirect = bpu_s2_resp.valid(3) && bpu_s2_resp.hasRedirect(3)
566  val bpu_s3_redirect = bpu_s3_resp.valid(3) && bpu_s3_resp.hasRedirect(3)
567
568  io.toBpu.enq_ptr := bpuPtr
569  val enq_fire = io.fromBpu.resp.fire && allowBpuIn // from bpu s1
570  val bpu_in_fire = (io.fromBpu.resp.fire || bpu_s2_redirect || bpu_s3_redirect) && allowBpuIn
571
572  val bpu_in_resp = io.fromBpu.resp.bits.selectedResp
573  val bpu_in_stage = io.fromBpu.resp.bits.selectedRespIdxForFtq
574  val bpu_in_resp_ptr = Mux(bpu_in_stage === BP_S1, bpuPtr, bpu_in_resp.ftq_idx)
575  val bpu_in_resp_idx = bpu_in_resp_ptr.value
576
577  // read ports:      pfReq1 + pfReq2 ++  ifuReq1 + ifuReq2 + ifuReq3 + commitUpdate2 + commitUpdate
578  val ftq_pc_mem = Module(new FtqPcMemWrapper(2))
579  // resp from uBTB
580  ftq_pc_mem.io.wen := bpu_in_fire
581  ftq_pc_mem.io.waddr := bpu_in_resp_idx
582  ftq_pc_mem.io.wdata.fromBranchPrediction(bpu_in_resp)
583
584  //                                                            ifuRedirect + backendRedirect + commit
585  val ftq_redirect_mem = Module(new SyncDataModuleTemplate(new Ftq_Redirect_SRAMEntry,
586    FtqSize, IfuRedirectNum+FtqRedirectAheadNum+1, 1, hasRen = true))
587  // these info is intended to enq at the last stage of bpu
588  ftq_redirect_mem.io.wen(0) := io.fromBpu.resp.bits.lastStage.valid(3)
589  ftq_redirect_mem.io.waddr(0) := io.fromBpu.resp.bits.lastStage.ftq_idx.value
590  ftq_redirect_mem.io.wdata(0) := io.fromBpu.resp.bits.last_stage_spec_info
591  println(f"ftq redirect MEM: entry ${ftq_redirect_mem.io.wdata(0).getWidth} * ${FtqSize} * 3")
592
593  val ftq_meta_1r_sram = Module(new FtqNRSRAM(new Ftq_1R_SRAMEntry, 1))
594  // these info is intended to enq at the last stage of bpu
595  ftq_meta_1r_sram.io.wen := io.fromBpu.resp.bits.lastStage.valid(3)
596  ftq_meta_1r_sram.io.waddr := io.fromBpu.resp.bits.lastStage.ftq_idx.value
597  ftq_meta_1r_sram.io.wdata.meta := io.fromBpu.resp.bits.last_stage_meta
598  ftq_meta_1r_sram.io.wdata.ftb_entry := io.fromBpu.resp.bits.last_stage_ftb_entry
599  //                                                            ifuRedirect + backendRedirect (commit moved to ftq_meta_1r_sram)
600  val ftb_entry_mem = Module(new SyncDataModuleTemplate(new FTBEntry_FtqMem,
601    FtqSize, IfuRedirectNum+FtqRedirectAheadNum, 1, hasRen = true))
602  ftb_entry_mem.io.wen(0) := io.fromBpu.resp.bits.lastStage.valid(3)
603  ftb_entry_mem.io.waddr(0) := io.fromBpu.resp.bits.lastStage.ftq_idx.value
604  ftb_entry_mem.io.wdata(0) := io.fromBpu.resp.bits.last_stage_ftb_entry
605
606
607  // multi-write
608  val update_target = Reg(Vec(FtqSize, UInt(VAddrBits.W))) // could be taken target or fallThrough //TODO: remove this
609  val newest_entry_target = Reg(UInt(VAddrBits.W))
610  val newest_entry_target_modified = RegInit(false.B)
611  val newest_entry_ptr = Reg(new FtqPtr)
612  val newest_entry_ptr_modified = RegInit(false.B)
613  val cfiIndex_vec = Reg(Vec(FtqSize, ValidUndirectioned(UInt(log2Ceil(PredictWidth).W))))
614  val mispredict_vec = Reg(Vec(FtqSize, Vec(PredictWidth, Bool())))
615  val pred_stage = Reg(Vec(FtqSize, UInt(2.W)))
616  val pred_s1_cycle = if (!env.FPGAPlatform) Some(Reg(Vec(FtqSize, UInt(64.W)))) else None
617
618  val c_empty :: c_toCommit :: c_committed :: c_flushed :: Nil = Enum(4)
619  val commitStateQueueReg = RegInit(VecInit(Seq.fill(FtqSize) {
620    VecInit(Seq.fill(PredictWidth)(c_empty))
621  }))
622  val commitStateQueueEnable = WireInit(VecInit(Seq.fill(FtqSize)(false.B)))
623  val commitStateQueueNext = WireInit(commitStateQueueReg)
624
625  for (f <- 0 until FtqSize) {
626    when(commitStateQueueEnable(f)) {
627      commitStateQueueReg(f) := commitStateQueueNext(f)
628    }
629  }
630
631  val f_to_send :: f_sent :: Nil = Enum(2)
632  val entry_fetch_status = RegInit(VecInit(Seq.fill(FtqSize)(f_sent)))
633
634  val h_not_hit :: h_false_hit :: h_hit :: Nil = Enum(3)
635  val entry_hit_status = RegInit(VecInit(Seq.fill(FtqSize)(h_not_hit)))
636
637  // modify registers one cycle later to cut critical path
638  val last_cycle_bpu_in = RegNext(bpu_in_fire)
639  val last_cycle_bpu_in_ptr = RegEnable(bpu_in_resp_ptr, bpu_in_fire)
640  val last_cycle_bpu_in_idx = last_cycle_bpu_in_ptr.value
641  val last_cycle_bpu_target = RegEnable(bpu_in_resp.getTarget(3), bpu_in_fire)
642  val last_cycle_cfiIndex = RegEnable(bpu_in_resp.cfiIndex(3), bpu_in_fire)
643  val last_cycle_bpu_in_stage = RegEnable(bpu_in_stage, bpu_in_fire)
644
645  def extra_copyNum_for_commitStateQueue = 2
646  val copied_last_cycle_bpu_in =
647    VecInit(Seq.fill(copyNum + extra_copyNum_for_commitStateQueue)(RegNext(bpu_in_fire)))
648  val copied_last_cycle_bpu_in_ptr_for_ftq =
649    VecInit(Seq.fill(extra_copyNum_for_commitStateQueue)(RegEnable(bpu_in_resp_ptr, bpu_in_fire)))
650
651  newest_entry_target_modified := false.B
652  newest_entry_ptr_modified := false.B
653  when (last_cycle_bpu_in) {
654    entry_fetch_status(last_cycle_bpu_in_idx) := f_to_send
655    cfiIndex_vec(last_cycle_bpu_in_idx) := last_cycle_cfiIndex
656    pred_stage(last_cycle_bpu_in_idx) := last_cycle_bpu_in_stage
657
658    update_target(last_cycle_bpu_in_idx) := last_cycle_bpu_target // TODO: remove this
659    newest_entry_target_modified := true.B
660    newest_entry_target := last_cycle_bpu_target
661    newest_entry_ptr_modified := true.B
662    newest_entry_ptr := last_cycle_bpu_in_ptr
663  }
664
665  // reduce fanout by delay write for a cycle
666  when (RegNext(last_cycle_bpu_in)) {
667    mispredict_vec(RegEnable(last_cycle_bpu_in_idx, last_cycle_bpu_in)) :=
668      WireInit(VecInit(Seq.fill(PredictWidth)(false.B)))
669  }
670
671  // record s1 pred cycles
672  pred_s1_cycle.map(vec => {
673    when (bpu_in_fire && (bpu_in_stage === BP_S1)) {
674      vec(bpu_in_resp_ptr.value) := bpu_in_resp.full_pred(0).predCycle.getOrElse(0.U)
675    }
676  })
677
678  // reduce fanout using copied last_cycle_bpu_in and copied last_cycle_bpu_in_ptr
679  val copied_last_cycle_bpu_in_for_ftq = copied_last_cycle_bpu_in.takeRight(extra_copyNum_for_commitStateQueue)
680  copied_last_cycle_bpu_in_for_ftq.zip(copied_last_cycle_bpu_in_ptr_for_ftq).zipWithIndex.map {
681    case ((in, ptr), i) =>
682      when (in) {
683        val perSetEntries = FtqSize / extra_copyNum_for_commitStateQueue // 32
684        require(FtqSize % extra_copyNum_for_commitStateQueue == 0)
685        for (j <- 0 until perSetEntries) {
686          when (ptr.value === (i * perSetEntries + j).U) {
687            commitStateQueueNext(i * perSetEntries + j) := VecInit(Seq.fill(PredictWidth)(c_empty))
688            // Clock gating optimization, use 1 gate cell to control a row
689            commitStateQueueEnable(i * perSetEntries + j) := true.B
690          }
691        }
692      }
693  }
694
695  bpuPtr := bpuPtr + enq_fire
696  copied_bpu_ptr.map(_ := bpuPtr + enq_fire)
697  when (io.toIfu.req.fire && allowToIfu) {
698    ifuPtr_write := ifuPtrPlus1
699    ifuPtrPlus1_write := ifuPtrPlus2
700    ifuPtrPlus2_write := ifuPtrPlus2 + 1.U
701  }
702  when (io.toPrefetch.req.fire && allowToIfu) {
703    pfPtr_write := pfPtrPlus1
704    pfPtrPlus1_write := pfPtrPlus1 + 1.U
705  }
706
707  // only use ftb result to assign hit status
708  when (bpu_s2_resp.valid(3)) {
709    entry_hit_status(bpu_s2_resp.ftq_idx.value) := Mux(bpu_s2_resp.full_pred(3).hit, h_hit, h_not_hit)
710  }
711
712
713  io.toIfu.flushFromBpu.s2.valid := bpu_s2_redirect
714  io.toIfu.flushFromBpu.s2.bits := bpu_s2_resp.ftq_idx
715  io.toPrefetch.flushFromBpu.s2.valid := bpu_s2_redirect
716  io.toPrefetch.flushFromBpu.s2.bits := bpu_s2_resp.ftq_idx
717  when (bpu_s2_redirect) {
718    bpuPtr := bpu_s2_resp.ftq_idx + 1.U
719    copied_bpu_ptr.map(_ := bpu_s2_resp.ftq_idx + 1.U)
720    // only when ifuPtr runs ahead of bpu s2 resp should we recover it
721    when (!isBefore(ifuPtr, bpu_s2_resp.ftq_idx)) {
722      ifuPtr_write := bpu_s2_resp.ftq_idx
723      ifuPtrPlus1_write := bpu_s2_resp.ftq_idx + 1.U
724      ifuPtrPlus2_write := bpu_s2_resp.ftq_idx + 2.U
725    }
726    when (!isBefore(pfPtr, bpu_s2_resp.ftq_idx)) {
727      pfPtr_write := bpu_s2_resp.ftq_idx
728      pfPtrPlus1_write := bpu_s2_resp.ftq_idx + 1.U
729    }
730  }
731
732  io.toIfu.flushFromBpu.s3.valid := bpu_s3_redirect
733  io.toIfu.flushFromBpu.s3.bits := bpu_s3_resp.ftq_idx
734  io.toPrefetch.flushFromBpu.s3.valid := bpu_s3_redirect
735  io.toPrefetch.flushFromBpu.s3.bits := bpu_s3_resp.ftq_idx
736  when (bpu_s3_redirect) {
737    bpuPtr := bpu_s3_resp.ftq_idx + 1.U
738    copied_bpu_ptr.map(_ := bpu_s3_resp.ftq_idx + 1.U)
739    // only when ifuPtr runs ahead of bpu s2 resp should we recover it
740    when (!isBefore(ifuPtr, bpu_s3_resp.ftq_idx)) {
741      ifuPtr_write := bpu_s3_resp.ftq_idx
742      ifuPtrPlus1_write := bpu_s3_resp.ftq_idx + 1.U
743      ifuPtrPlus2_write := bpu_s3_resp.ftq_idx + 2.U
744    }
745    when (!isBefore(pfPtr, bpu_s3_resp.ftq_idx)) {
746      pfPtr_write := bpu_s3_resp.ftq_idx
747      pfPtrPlus1_write := bpu_s3_resp.ftq_idx + 1.U
748    }
749  }
750
751  XSError(isBefore(bpuPtr, ifuPtr) && !isFull(bpuPtr, ifuPtr), "\nifuPtr is before bpuPtr!\n")
752  XSError(isBefore(bpuPtr, pfPtr) && !isFull(bpuPtr, pfPtr), "\npfPtr is before bpuPtr!\n")
753  XSError(isBefore(ifuWbPtr, commPtr) && !isFull(ifuWbPtr, commPtr), "\ncommPtr is before ifuWbPtr!\n")
754
755  (0 until copyNum).map{i =>
756    XSError(copied_bpu_ptr(i) =/= bpuPtr, "\ncopiedBpuPtr is different from bpuPtr!\n")
757  }
758
759  // ****************************************************************
760  // **************************** to ifu ****************************
761  // ****************************************************************
762  // 0  for ifu, and 1-4 for ICache
763  val bpu_in_bypass_buf = RegEnable(ftq_pc_mem.io.wdata, bpu_in_fire)
764  val copied_bpu_in_bypass_buf = VecInit(Seq.fill(copyNum)(RegEnable(ftq_pc_mem.io.wdata, bpu_in_fire)))
765  val bpu_in_bypass_buf_for_ifu = bpu_in_bypass_buf
766  val bpu_in_bypass_ptr = RegEnable(bpu_in_resp_ptr, bpu_in_fire)
767  val last_cycle_to_ifu_fire = RegNext(io.toIfu.req.fire)
768  val last_cycle_to_pf_fire = RegNext(io.toPrefetch.req.fire)
769
770  val copied_bpu_in_bypass_ptr = VecInit(Seq.fill(copyNum)(RegEnable(bpu_in_resp_ptr, bpu_in_fire)))
771  val copied_last_cycle_to_ifu_fire = VecInit(Seq.fill(copyNum)(RegNext(io.toIfu.req.fire)))
772
773  // read pc and target
774  ftq_pc_mem.io.ifuPtr_w       := ifuPtr_write
775  ftq_pc_mem.io.ifuPtrPlus1_w  := ifuPtrPlus1_write
776  ftq_pc_mem.io.ifuPtrPlus2_w  := ifuPtrPlus2_write
777  ftq_pc_mem.io.pfPtr_w        := pfPtr_write
778  ftq_pc_mem.io.pfPtrPlus1_w   := pfPtrPlus1_write
779  ftq_pc_mem.io.commPtr_w      := commPtr_write
780  ftq_pc_mem.io.commPtrPlus1_w := commPtrPlus1_write
781
782
783  io.toIfu.req.bits.ftqIdx := ifuPtr
784
785  val toICachePcBundle = Wire(Vec(copyNum,new Ftq_RF_Components))
786  val toICacheEntryToSend = Wire(Vec(copyNum,Bool()))
787  val toPrefetchPcBundle = Wire(new Ftq_RF_Components)
788  val toPrefetchEntryToSend = Wire(Bool())
789  val toIfuPcBundle = Wire(new Ftq_RF_Components)
790  val entry_is_to_send = WireInit(entry_fetch_status(ifuPtr.value) === f_to_send)
791  val entry_ftq_offset = WireInit(cfiIndex_vec(ifuPtr.value))
792  val entry_next_addr  = Wire(UInt(VAddrBits.W))
793
794  val pc_mem_ifu_ptr_rdata   = VecInit(Seq.fill(copyNum)(RegNext(ftq_pc_mem.io.ifuPtr_rdata)))
795  val pc_mem_ifu_plus1_rdata = VecInit(Seq.fill(copyNum)(RegNext(ftq_pc_mem.io.ifuPtrPlus1_rdata)))
796  val diff_entry_next_addr = WireInit(update_target(ifuPtr.value)) //TODO: remove this
797
798  val copied_ifu_plus1_to_send = VecInit(Seq.fill(copyNum)(RegNext(entry_fetch_status(ifuPtrPlus1.value) === f_to_send) || RegNext(last_cycle_bpu_in && bpu_in_bypass_ptr === (ifuPtrPlus1))))
799  val copied_ifu_ptr_to_send   = VecInit(Seq.fill(copyNum)(RegNext(entry_fetch_status(ifuPtr.value) === f_to_send) || RegNext(last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtr)))
800
801  for(i <- 0 until copyNum){
802    when(copied_last_cycle_bpu_in(i) && copied_bpu_in_bypass_ptr(i) === copied_ifu_ptr(i)){
803      toICachePcBundle(i) := copied_bpu_in_bypass_buf(i)
804      toICacheEntryToSend(i)   := true.B
805    }.elsewhen(copied_last_cycle_to_ifu_fire(i)){
806      toICachePcBundle(i) := pc_mem_ifu_plus1_rdata(i)
807      toICacheEntryToSend(i)   := copied_ifu_plus1_to_send(i)
808    }.otherwise{
809      toICachePcBundle(i) := pc_mem_ifu_ptr_rdata(i)
810      toICacheEntryToSend(i)   := copied_ifu_ptr_to_send(i)
811    }
812  }
813
814  when(last_cycle_bpu_in && bpu_in_bypass_ptr === pfPtr){
815    toPrefetchPcBundle      := bpu_in_bypass_buf
816    toPrefetchEntryToSend   := true.B
817  }.elsewhen(last_cycle_to_pf_fire){
818    toPrefetchPcBundle      := RegNext(ftq_pc_mem.io.pfPtrPlus1_rdata)
819    toPrefetchEntryToSend   := RegNext(entry_fetch_status(pfPtrPlus1.value) === f_to_send) ||
820                               RegNext(last_cycle_bpu_in && bpu_in_bypass_ptr === (pfPtrPlus1))
821  }.otherwise{
822    toPrefetchPcBundle      := RegNext(ftq_pc_mem.io.pfPtr_rdata)
823    toPrefetchEntryToSend   := RegNext(entry_fetch_status(pfPtr.value) === f_to_send) ||
824                               RegNext(last_cycle_bpu_in && bpu_in_bypass_ptr === pfPtr) // reduce potential bubbles
825  }
826
827  // TODO: reconsider target address bypass logic
828  when (last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtr) {
829    toIfuPcBundle := bpu_in_bypass_buf_for_ifu
830    entry_is_to_send := true.B
831    entry_next_addr := last_cycle_bpu_target
832    entry_ftq_offset := last_cycle_cfiIndex
833    diff_entry_next_addr := last_cycle_bpu_target // TODO: remove this
834  }.elsewhen (last_cycle_to_ifu_fire) {
835    toIfuPcBundle := RegNext(ftq_pc_mem.io.ifuPtrPlus1_rdata)
836    entry_is_to_send := RegNext(entry_fetch_status(ifuPtrPlus1.value) === f_to_send) ||
837                        RegNext(last_cycle_bpu_in && bpu_in_bypass_ptr === (ifuPtrPlus1)) // reduce potential bubbles
838    entry_next_addr := Mux(last_cycle_bpu_in && bpu_in_bypass_ptr === (ifuPtrPlus1),
839                          bpu_in_bypass_buf_for_ifu.startAddr,
840                          Mux(ifuPtr === newest_entry_ptr,
841                            newest_entry_target,
842                            RegNext(ftq_pc_mem.io.ifuPtrPlus2_rdata.startAddr))) // ifuPtr+2
843  }.otherwise {
844    toIfuPcBundle := RegNext(ftq_pc_mem.io.ifuPtr_rdata)
845    entry_is_to_send := RegNext(entry_fetch_status(ifuPtr.value) === f_to_send) ||
846                        RegNext(last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtr) // reduce potential bubbles
847    entry_next_addr := Mux(last_cycle_bpu_in && bpu_in_bypass_ptr === (ifuPtrPlus1),
848                          bpu_in_bypass_buf_for_ifu.startAddr,
849                          Mux(ifuPtr === newest_entry_ptr,
850                            newest_entry_target,
851                            RegNext(ftq_pc_mem.io.ifuPtrPlus1_rdata.startAddr))) // ifuPtr+1
852  }
853
854  io.toIfu.req.valid := entry_is_to_send && ifuPtr =/= bpuPtr
855  io.toIfu.req.bits.nextStartAddr := entry_next_addr
856  io.toIfu.req.bits.ftqOffset := entry_ftq_offset
857  io.toIfu.req.bits.fromFtqPcBundle(toIfuPcBundle)
858
859  io.toICache.req.valid := entry_is_to_send && ifuPtr =/= bpuPtr
860  io.toICache.req.bits.readValid.zipWithIndex.map{case(copy, i) => copy := toICacheEntryToSend(i) && copied_ifu_ptr(i) =/= copied_bpu_ptr(i)}
861  io.toICache.req.bits.pcMemRead.zipWithIndex.foreach{case(copy,i) =>
862    copy.fromFtqPcBundle(toICachePcBundle(i))
863    copy.ftqIdx := ifuPtr
864  }
865
866  io.toPrefetch.req.valid := toPrefetchEntryToSend && pfPtr =/= bpuPtr
867  io.toPrefetch.req.bits.fromFtqPcBundle(toPrefetchPcBundle)
868  io.toPrefetch.req.bits.ftqIdx := pfPtr
869  // io.toICache.req.bits.bypassSelect := last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtr
870  // io.toICache.req.bits.bpuBypassWrite.zipWithIndex.map{case(bypassWrtie, i) =>
871  //   bypassWrtie.startAddr := bpu_in_bypass_buf.tail(i).startAddr
872  //   bypassWrtie.nextlineStart := bpu_in_bypass_buf.tail(i).nextLineAddr
873  // }
874
875  // TODO: remove this
876  XSError(io.toIfu.req.valid && diff_entry_next_addr =/= entry_next_addr,
877          p"\nifu_req_target wrong! ifuPtr: ${ifuPtr}, entry_next_addr: ${Hexadecimal(entry_next_addr)} diff_entry_next_addr: ${Hexadecimal(diff_entry_next_addr)}\n")
878
879  // when fall through is smaller in value than start address, there must be a false hit
880  when (toIfuPcBundle.fallThruError && entry_hit_status(ifuPtr.value) === h_hit) {
881    when (io.toIfu.req.fire &&
882      !(bpu_s2_redirect && bpu_s2_resp.ftq_idx === ifuPtr) &&
883      !(bpu_s3_redirect && bpu_s3_resp.ftq_idx === ifuPtr)
884    ) {
885      entry_hit_status(ifuPtr.value) := h_false_hit
886      // XSError(true.B, "FTB false hit by fallThroughError, startAddr: %x, fallTHru: %x\n", io.toIfu.req.bits.startAddr, io.toIfu.req.bits.nextStartAddr)
887    }
888    XSDebug(true.B, "fallThruError! start:%x, fallThru:%x\n", io.toIfu.req.bits.startAddr, io.toIfu.req.bits.nextStartAddr)
889  }
890
891  XSPerfAccumulate(f"fall_through_error_to_ifu", toIfuPcBundle.fallThruError && entry_hit_status(ifuPtr.value) === h_hit &&
892    io.toIfu.req.fire && !(bpu_s2_redirect && bpu_s2_resp.ftq_idx === ifuPtr) && !(bpu_s3_redirect && bpu_s3_resp.ftq_idx === ifuPtr))
893
894  val ifu_req_should_be_flushed =
895    io.toIfu.flushFromBpu.shouldFlushByStage2(io.toIfu.req.bits.ftqIdx) ||
896    io.toIfu.flushFromBpu.shouldFlushByStage3(io.toIfu.req.bits.ftqIdx)
897
898    when (io.toIfu.req.fire && !ifu_req_should_be_flushed) {
899      entry_fetch_status(ifuPtr.value) := f_sent
900    }
901
902  // *********************************************************************
903  // **************************** wb from ifu ****************************
904  // *********************************************************************
905  val pdWb = io.fromIfu.pdWb
906  val pds = pdWb.bits.pd
907  val ifu_wb_valid = pdWb.valid
908  val ifu_wb_idx = pdWb.bits.ftqIdx.value
909  // read ports:                                                         commit update
910  val ftq_pd_mem = Module(new SyncDataModuleTemplate(new Ftq_pd_Entry, FtqSize, FtqRedirectAheadNum+1, 1, hasRen = true))
911  ftq_pd_mem.io.wen(0) := ifu_wb_valid
912  ftq_pd_mem.io.waddr(0) := pdWb.bits.ftqIdx.value
913  ftq_pd_mem.io.wdata(0).fromPdWb(pdWb.bits)
914
915  val hit_pd_valid = entry_hit_status(ifu_wb_idx) === h_hit && ifu_wb_valid
916  val hit_pd_mispred = hit_pd_valid && pdWb.bits.misOffset.valid
917  val hit_pd_mispred_reg = RegNext(hit_pd_mispred, init=false.B)
918  val pd_reg       = RegEnable(pds,             pdWb.valid)
919  val start_pc_reg = RegEnable(pdWb.bits.pc(0), pdWb.valid)
920  val wb_idx_reg   = RegEnable(ifu_wb_idx,      pdWb.valid)
921
922  when (ifu_wb_valid) {
923    val comm_stq_wen = VecInit(pds.map(_.valid).zip(pdWb.bits.instrRange).map{
924      case (v, inRange) => v && inRange
925    })
926    commitStateQueueEnable(ifu_wb_idx) := true.B
927    (commitStateQueueNext(ifu_wb_idx) zip comm_stq_wen).map {
928      case (qe, v) => when(v) {
929        qe := c_toCommit
930      }
931    }
932  }
933
934  when (ifu_wb_valid) {
935    ifuWbPtr_write := ifuWbPtr + 1.U
936  }
937
938  XSError(ifu_wb_valid && isAfter(pdWb.bits.ftqIdx, ifuPtr), "IFU returned a predecode before its req, check IFU")
939
940  ftb_entry_mem.io.ren.get.head := ifu_wb_valid
941  ftb_entry_mem.io.raddr.head := ifu_wb_idx
942  val has_false_hit = WireInit(false.B)
943  when (RegNext(hit_pd_valid)) {
944    // check for false hit
945    val pred_ftb_entry = ftb_entry_mem.io.rdata.head
946    val brSlots = pred_ftb_entry.brSlots
947    val tailSlot = pred_ftb_entry.tailSlot
948    // we check cfis that bpu predicted
949
950    // bpu predicted branches but denied by predecode
951    val br_false_hit =
952      brSlots.map{
953        s => s.valid && !(pd_reg(s.offset).valid && pd_reg(s.offset).isBr)
954      }.reduce(_||_) ||
955      (tailSlot.valid && pred_ftb_entry.tailSlot.sharing &&
956        !(pd_reg(tailSlot.offset).valid && pd_reg(tailSlot.offset).isBr))
957
958    val jmpOffset = tailSlot.offset
959    val jmp_pd = pd_reg(jmpOffset)
960    val jal_false_hit = pred_ftb_entry.jmpValid &&
961      ((pred_ftb_entry.isJal  && !(jmp_pd.valid && jmp_pd.isJal)) ||
962       (pred_ftb_entry.isJalr && !(jmp_pd.valid && jmp_pd.isJalr)) ||
963       (pred_ftb_entry.isCall && !(jmp_pd.valid && jmp_pd.isCall)) ||
964       (pred_ftb_entry.isRet  && !(jmp_pd.valid && jmp_pd.isRet))
965      )
966
967    has_false_hit := br_false_hit || jal_false_hit || hit_pd_mispred_reg
968    XSDebug(has_false_hit, "FTB false hit by br or jal or hit_pd, startAddr: %x\n", pdWb.bits.pc(0))
969
970    // assert(!has_false_hit)
971  }
972
973  when (has_false_hit) {
974    entry_hit_status(wb_idx_reg) := h_false_hit
975  }
976
977  // *******************************************************************************
978  // **************************** redirect from backend ****************************
979  // *******************************************************************************
980
981  // redirect read cfiInfo, couples to redirectGen s2
982  // ftqIdxAhead(0-3) => ftq_redirect_mem(1-4), reuse ftq_redirect_mem(1)
983  val ftq_redirect_rdata = Wire(Vec(FtqRedirectAheadNum, new Ftq_Redirect_SRAMEntry))
984  val ftb_redirect_rdata = Wire(Vec(FtqRedirectAheadNum, new FTBEntry_FtqMem))
985
986  val ftq_pd_rdata = Wire(Vec(FtqRedirectAheadNum, new Ftq_pd_Entry))
987  for (i <- 1 until FtqRedirectAheadNum) {
988    ftq_redirect_mem.io.ren.get(i + IfuRedirectNum) := ftqIdxAhead(i).valid
989    ftq_redirect_mem.io.raddr(i + IfuRedirectNum) := ftqIdxAhead(i).bits.value
990    ftb_entry_mem.io.ren.get(i + IfuRedirectNum) := ftqIdxAhead(i).valid
991    ftb_entry_mem.io.raddr(i + IfuRedirectNum) := ftqIdxAhead(i).bits.value
992
993    ftq_pd_mem.io.ren.get(i)  := ftqIdxAhead(i).valid
994    ftq_pd_mem.io.raddr(i)    := ftqIdxAhead(i).bits.value
995  }
996  ftq_redirect_mem.io.ren.get(IfuRedirectNum) := Mux(aheadValid, ftqIdxAhead(0).valid, backendRedirect.valid)
997  ftq_redirect_mem.io.raddr(IfuRedirectNum) := Mux(aheadValid, ftqIdxAhead(0).bits.value, backendRedirect.bits.ftqIdx.value)
998  ftb_entry_mem.io.ren.get(IfuRedirectNum) := Mux(aheadValid, ftqIdxAhead(0).valid, backendRedirect.valid)
999  ftb_entry_mem.io.raddr(IfuRedirectNum) := Mux(aheadValid, ftqIdxAhead(0).bits.value, backendRedirect.bits.ftqIdx.value)
1000
1001  ftq_pd_mem.io.ren.get(0)  := Mux(aheadValid, ftqIdxAhead(0).valid, backendRedirect.valid)
1002  ftq_pd_mem.io.raddr(0)    := Mux(aheadValid, ftqIdxAhead(0).bits.value, backendRedirect.bits.ftqIdx.value)
1003
1004  for (i <- 0 until FtqRedirectAheadNum) {
1005    ftq_redirect_rdata(i) := ftq_redirect_mem.io.rdata(i + IfuRedirectNum)
1006    ftb_redirect_rdata(i) := ftb_entry_mem.io.rdata(i + IfuRedirectNum)
1007
1008    ftq_pd_rdata(i) := ftq_pd_mem.io.rdata(i)
1009  }
1010  val stage3CfiInfo = Mux(realAhdValid, Mux1H(ftqIdxSelOH, ftq_redirect_rdata), ftq_redirect_mem.io.rdata(IfuRedirectNum))
1011  val stage3PdInfo  = Mux(realAhdValid, Mux1H(ftqIdxSelOH, ftq_pd_rdata), ftq_pd_mem.io.rdata(0))
1012  val backendRedirectCfi = fromBackendRedirect.bits.cfiUpdate
1013  backendRedirectCfi.fromFtqRedirectSram(stage3CfiInfo)
1014  backendRedirectCfi.pd := stage3PdInfo.toPd(fromBackendRedirect.bits.ftqOffset)
1015
1016
1017  val r_ftb_entry = Mux(realAhdValid, Mux1H(ftqIdxSelOH, ftb_redirect_rdata), ftb_entry_mem.io.rdata(IfuRedirectNum))
1018  val r_ftqOffset = fromBackendRedirect.bits.ftqOffset
1019
1020  backendRedirectCfi.br_hit := r_ftb_entry.brIsSaved(r_ftqOffset)
1021  backendRedirectCfi.jr_hit := r_ftb_entry.isJalr && r_ftb_entry.tailSlot.offset === r_ftqOffset
1022  // FIXME: not portable
1023  val sc_disagree = stage3CfiInfo.sc_disagree.getOrElse(VecInit(Seq.fill(numBr)(false.B)))
1024  backendRedirectCfi.sc_hit := backendRedirectCfi.br_hit && Mux(r_ftb_entry.brSlots(0).offset === r_ftqOffset,
1025    sc_disagree(0), sc_disagree(1))
1026
1027  when (entry_hit_status(fromBackendRedirect.bits.ftqIdx.value) === h_hit) {
1028    backendRedirectCfi.shift := PopCount(r_ftb_entry.getBrMaskByOffset(r_ftqOffset)) +&
1029      (backendRedirectCfi.pd.isBr && !r_ftb_entry.brIsSaved(r_ftqOffset) &&
1030      !r_ftb_entry.newBrCanNotInsert(r_ftqOffset))
1031
1032    backendRedirectCfi.addIntoHist := backendRedirectCfi.pd.isBr && (r_ftb_entry.brIsSaved(r_ftqOffset) ||
1033        !r_ftb_entry.newBrCanNotInsert(r_ftqOffset))
1034  }.otherwise {
1035    backendRedirectCfi.shift := (backendRedirectCfi.pd.isBr && backendRedirectCfi.taken).asUInt
1036    backendRedirectCfi.addIntoHist := backendRedirectCfi.pd.isBr.asUInt
1037  }
1038
1039
1040  // ***************************************************************************
1041  // **************************** redirect from ifu ****************************
1042  // ***************************************************************************
1043  val fromIfuRedirect = WireInit(0.U.asTypeOf(Valid(new BranchPredictionRedirect)))
1044  fromIfuRedirect.valid := pdWb.valid && pdWb.bits.misOffset.valid && !backendFlush
1045  fromIfuRedirect.bits.ftqIdx := pdWb.bits.ftqIdx
1046  fromIfuRedirect.bits.ftqOffset := pdWb.bits.misOffset.bits
1047  fromIfuRedirect.bits.level := RedirectLevel.flushAfter
1048  fromIfuRedirect.bits.BTBMissBubble := true.B
1049  fromIfuRedirect.bits.debugIsMemVio := false.B
1050  fromIfuRedirect.bits.debugIsCtrl := false.B
1051
1052  val ifuRedirectCfiUpdate = fromIfuRedirect.bits.cfiUpdate
1053  ifuRedirectCfiUpdate.pc := pdWb.bits.pc(pdWb.bits.misOffset.bits)
1054  ifuRedirectCfiUpdate.pd := pdWb.bits.pd(pdWb.bits.misOffset.bits)
1055  ifuRedirectCfiUpdate.predTaken := cfiIndex_vec(pdWb.bits.ftqIdx.value).valid
1056  ifuRedirectCfiUpdate.target := pdWb.bits.target
1057  ifuRedirectCfiUpdate.taken := pdWb.bits.cfiOffset.valid
1058  ifuRedirectCfiUpdate.isMisPred := pdWb.bits.misOffset.valid
1059
1060  val ifuRedirectReg = RegNextWithEnable(fromIfuRedirect, hasInit = true)
1061  val ifuRedirectToBpu = WireInit(ifuRedirectReg)
1062  ifuFlush := fromIfuRedirect.valid || ifuRedirectToBpu.valid
1063
1064  ftq_redirect_mem.io.ren.get.head := fromIfuRedirect.valid
1065  ftq_redirect_mem.io.raddr.head := fromIfuRedirect.bits.ftqIdx.value
1066
1067  val toBpuCfi = ifuRedirectToBpu.bits.cfiUpdate
1068  toBpuCfi.fromFtqRedirectSram(ftq_redirect_mem.io.rdata.head)
1069  when (ifuRedirectReg.bits.cfiUpdate.pd.isRet && ifuRedirectReg.bits.cfiUpdate.pd.valid) {
1070    toBpuCfi.target := toBpuCfi.topAddr
1071  }
1072
1073  when (ifuRedirectReg.valid) {
1074    ifuRedirected(ifuRedirectReg.bits.ftqIdx.value) := true.B
1075  } .elsewhen(RegNext(pdWb.valid)) {
1076    // if pdWb and no redirect, set to false
1077    ifuRedirected(last_cycle_bpu_in_ptr.value) := false.B
1078  }
1079
1080  // **********************************************************************
1081  // ***************************** to backend *****************************
1082  // **********************************************************************
1083  // to backend pc mem / target
1084  io.toBackend.pc_mem_wen := RegNext(last_cycle_bpu_in)
1085  io.toBackend.pc_mem_waddr := RegEnable(last_cycle_bpu_in_idx, last_cycle_bpu_in)
1086  io.toBackend.pc_mem_wdata := RegEnable(bpu_in_bypass_buf_for_ifu, last_cycle_bpu_in)
1087
1088  // num cycle is fixed
1089  val newest_entry_en: Bool = RegNext(last_cycle_bpu_in || backendRedirect.valid || ifuRedirectToBpu.valid)
1090  io.toBackend.newest_entry_en := RegNext(newest_entry_en)
1091  io.toBackend.newest_entry_ptr := RegEnable(newest_entry_ptr, newest_entry_en)
1092  io.toBackend.newest_entry_target := RegEnable(newest_entry_target, newest_entry_en)
1093
1094  // *********************************************************************
1095  // **************************** wb from exu ****************************
1096  // *********************************************************************
1097
1098  backendRedirect.valid := io.fromBackend.redirect.valid
1099  backendRedirect.bits.connectRedirect(io.fromBackend.redirect.bits)
1100  backendRedirect.bits.BTBMissBubble := false.B
1101
1102
1103  def extractRedirectInfo(wb: Valid[Redirect]) = {
1104    val ftqPtr = wb.bits.ftqIdx
1105    val ftqOffset = wb.bits.ftqOffset
1106    val taken = wb.bits.cfiUpdate.taken
1107    val mispred = wb.bits.cfiUpdate.isMisPred
1108    (wb.valid, ftqPtr, ftqOffset, taken, mispred)
1109  }
1110
1111  // fix mispredict entry
1112  val lastIsMispredict = RegNext(
1113    backendRedirect.valid && backendRedirect.bits.level === RedirectLevel.flushAfter, init = false.B
1114  )
1115
1116  def updateCfiInfo(redirect: Valid[Redirect], isBackend: Boolean = true) = {
1117    val (r_valid, r_ptr, r_offset, r_taken, r_mispred) = extractRedirectInfo(redirect)
1118    val r_idx = r_ptr.value
1119    val cfiIndex_bits_wen = r_valid && r_taken && r_offset < cfiIndex_vec(r_idx).bits
1120    val cfiIndex_valid_wen = r_valid && r_offset === cfiIndex_vec(r_idx).bits
1121    when (cfiIndex_bits_wen || cfiIndex_valid_wen) {
1122      cfiIndex_vec(r_idx).valid := cfiIndex_bits_wen || cfiIndex_valid_wen && r_taken
1123    } .elsewhen (r_valid && !r_taken && r_offset =/= cfiIndex_vec(r_idx).bits) {
1124      cfiIndex_vec(r_idx).valid :=false.B
1125    }
1126    when (cfiIndex_bits_wen) {
1127      cfiIndex_vec(r_idx).bits := r_offset
1128    }
1129    newest_entry_target_modified := true.B
1130    newest_entry_target := redirect.bits.cfiUpdate.target
1131    newest_entry_ptr_modified := true.B
1132    newest_entry_ptr := r_ptr
1133
1134    update_target(r_idx) := redirect.bits.cfiUpdate.target // TODO: remove this
1135    if (isBackend) {
1136      mispredict_vec(r_idx)(r_offset) := r_mispred
1137    }
1138  }
1139
1140  when(fromBackendRedirect.valid) {
1141    updateCfiInfo(fromBackendRedirect)
1142  }.elsewhen (ifuRedirectToBpu.valid) {
1143    updateCfiInfo(ifuRedirectToBpu, isBackend=false)
1144  }
1145
1146  when (fromBackendRedirect.valid) {
1147    when (fromBackendRedirect.bits.ControlRedirectBubble) {
1148      when (fromBackendRedirect.bits.ControlBTBMissBubble) {
1149        topdown_stage.reasons(TopDownCounters.BTBMissBubble.id) := true.B
1150        io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.BTBMissBubble.id) := true.B
1151      } .elsewhen (fromBackendRedirect.bits.TAGEMissBubble) {
1152        topdown_stage.reasons(TopDownCounters.TAGEMissBubble.id) := true.B
1153        io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.TAGEMissBubble.id) := true.B
1154      } .elsewhen (fromBackendRedirect.bits.SCMissBubble) {
1155        topdown_stage.reasons(TopDownCounters.SCMissBubble.id) := true.B
1156        io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.SCMissBubble.id) := true.B
1157      } .elsewhen (fromBackendRedirect.bits.ITTAGEMissBubble) {
1158        topdown_stage.reasons(TopDownCounters.ITTAGEMissBubble.id) := true.B
1159        io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.ITTAGEMissBubble.id) := true.B
1160      } .elsewhen (fromBackendRedirect.bits.RASMissBubble) {
1161        topdown_stage.reasons(TopDownCounters.RASMissBubble.id) := true.B
1162        io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.RASMissBubble.id) := true.B
1163      }
1164
1165
1166    } .elsewhen (backendRedirect.bits.MemVioRedirectBubble) {
1167      topdown_stage.reasons(TopDownCounters.MemVioRedirectBubble.id) := true.B
1168      io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.MemVioRedirectBubble.id) := true.B
1169    } .otherwise {
1170      topdown_stage.reasons(TopDownCounters.OtherRedirectBubble.id) := true.B
1171      io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.OtherRedirectBubble.id) := true.B
1172    }
1173  } .elsewhen (ifuRedirectReg.valid) {
1174    topdown_stage.reasons(TopDownCounters.BTBMissBubble.id) := true.B
1175    io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.BTBMissBubble.id) := true.B
1176  }
1177
1178  io.ControlBTBMissBubble := fromBackendRedirect.bits.ControlBTBMissBubble
1179  io.TAGEMissBubble := fromBackendRedirect.bits.TAGEMissBubble
1180  io.SCMissBubble := fromBackendRedirect.bits.SCMissBubble
1181  io.ITTAGEMissBubble := fromBackendRedirect.bits.ITTAGEMissBubble
1182  io.RASMissBubble := fromBackendRedirect.bits.RASMissBubble
1183
1184  // ***********************************************************************************
1185  // **************************** flush ptr and state queue ****************************
1186  // ***********************************************************************************
1187
1188  val redirectVec = VecInit(backendRedirect, fromIfuRedirect)
1189
1190  // when redirect, we should reset ptrs and status queues
1191  io.icacheFlush := redirectVec.map(r => r.valid).reduce(_||_)
1192  XSPerfAccumulate("icacheFlushFromBackend", backendRedirect.valid)
1193  XSPerfAccumulate("icacheFlushFromIFU", fromIfuRedirect.valid)
1194  when(redirectVec.map(r => r.valid).reduce(_||_)){
1195    val r = PriorityMux(redirectVec.map(r => (r.valid -> r.bits)))
1196    val notIfu = redirectVec.dropRight(1).map(r => r.valid).reduce(_||_)
1197    val (idx, offset, flushItSelf) = (r.ftqIdx, r.ftqOffset, RedirectLevel.flushItself(r.level))
1198    val next = idx + 1.U
1199    bpuPtr := next
1200    copied_bpu_ptr.map(_ := next)
1201    ifuPtr_write := next
1202    ifuWbPtr_write := next
1203    ifuPtrPlus1_write := idx + 2.U
1204    ifuPtrPlus2_write := idx + 3.U
1205    pfPtr_write := next
1206    pfPtrPlus1_write := idx + 2.U
1207  }
1208  when(RegNext(redirectVec.map(r => r.valid).reduce(_||_))){
1209    val r = PriorityMux(redirectVec.map(r => (r.valid -> r.bits)))
1210    val notIfu = redirectVec.dropRight(1).map(r => r.valid).reduce(_||_)
1211    val (idx, offset, flushItSelf) = (r.ftqIdx, r.ftqOffset, RedirectLevel.flushItself(r.level))
1212    when (RegNext(notIfu)) {
1213      commitStateQueueEnable(RegNext(idx.value)) := true.B
1214      commitStateQueueNext(RegNext(idx.value)).zipWithIndex.foreach({ case (s, i) =>
1215        when(i.U > RegNext(offset)) {
1216          s := c_empty
1217        }
1218        when (i.U === RegNext(offset) && RegNext(flushItSelf)) {
1219          s := c_flushed
1220        }
1221      })
1222    }
1223  }
1224
1225
1226  // only the valid bit is actually needed
1227  io.toIfu.redirect.bits    := backendRedirect.bits
1228  io.toIfu.redirect.valid   := stage2Flush
1229  io.toIfu.topdown_redirect := fromBackendRedirect
1230
1231  // commit
1232  for (c <- io.fromBackend.rob_commits) {
1233    when(c.valid) {
1234      commitStateQueueEnable(c.bits.ftqIdx.value) := true.B
1235      commitStateQueueNext(c.bits.ftqIdx.value)(c.bits.ftqOffset) := c_committed
1236      // TODO: remove this
1237      // For instruction fusions, we also update the next instruction
1238      when (c.bits.commitType === 4.U) {
1239        commitStateQueueNext(c.bits.ftqIdx.value)(c.bits.ftqOffset + 1.U) := c_committed
1240      }.elsewhen(c.bits.commitType === 5.U) {
1241        commitStateQueueNext(c.bits.ftqIdx.value)(c.bits.ftqOffset + 2.U) := c_committed
1242      }.elsewhen(c.bits.commitType === 6.U) {
1243        val index = (c.bits.ftqIdx + 1.U).value
1244        commitStateQueueEnable(index) := true.B
1245        commitStateQueueNext(index)(0) := c_committed
1246      }.elsewhen(c.bits.commitType === 7.U) {
1247        val index = (c.bits.ftqIdx + 1.U).value
1248        commitStateQueueEnable(index) := true.B
1249        commitStateQueueNext(index)(1) := c_committed
1250      }
1251    }
1252  }
1253
1254  // ****************************************************************
1255  // **************************** to bpu ****************************
1256  // ****************************************************************
1257
1258  io.toBpu.redirctFromIFU := ifuRedirectToBpu.valid
1259  io.toBpu.redirect := Mux(fromBackendRedirect.valid, fromBackendRedirect, ifuRedirectToBpu)
1260  val dummy_s1_pred_cycle_vec = VecInit(List.tabulate(FtqSize)(_=>0.U(64.W)))
1261  val redirect_latency = GTimer() - pred_s1_cycle.getOrElse(dummy_s1_pred_cycle_vec)(io.toBpu.redirect.bits.ftqIdx.value) + 1.U
1262  XSPerfHistogram("backend_redirect_latency", redirect_latency, fromBackendRedirect.valid, 0, 60, 1)
1263  XSPerfHistogram("ifu_redirect_latency", redirect_latency, !fromBackendRedirect.valid && ifuRedirectToBpu.valid, 0, 60, 1)
1264
1265  XSError(io.toBpu.redirect.valid && isBefore(io.toBpu.redirect.bits.ftqIdx, commPtr), "Ftq received a redirect after its commit, check backend or replay")
1266
1267  val may_have_stall_from_bpu = Wire(Bool())
1268  val bpu_ftb_update_stall = RegInit(0.U(2.W)) // 2-cycle stall, so we need 3 states
1269  may_have_stall_from_bpu := bpu_ftb_update_stall =/= 0.U
1270
1271  val validInstructions = commitStateQueueReg(commPtr.value).map(s => s === c_toCommit || s === c_committed)
1272  val lastInstructionStatus = PriorityMux(validInstructions.reverse.zip(commitStateQueueReg(commPtr.value).reverse))
1273  val firstInstructionFlushed = commitStateQueueReg(commPtr.value)(0) === c_flushed
1274  canCommit := commPtr =/= ifuWbPtr && !may_have_stall_from_bpu &&
1275    (isAfter(robCommPtr, commPtr) ||
1276      validInstructions.reduce(_ || _) && lastInstructionStatus === c_committed)
1277  val canMoveCommPtr = commPtr =/= ifuWbPtr && !may_have_stall_from_bpu &&
1278    (isAfter(robCommPtr, commPtr) ||
1279      validInstructions.reduce(_ || _) && lastInstructionStatus === c_committed ||
1280      firstInstructionFlushed)
1281
1282  when (io.fromBackend.rob_commits.map(_.valid).reduce(_ | _)) {
1283    robCommPtr_write := ParallelPriorityMux(io.fromBackend.rob_commits.map(_.valid).reverse, io.fromBackend.rob_commits.map(_.bits.ftqIdx).reverse)
1284  } .elsewhen (isAfter(commPtr, robCommPtr)) {
1285    robCommPtr_write := commPtr
1286  } .otherwise {
1287    robCommPtr_write := robCommPtr
1288  }
1289
1290  /**
1291    *************************************************************************************
1292    * MMIO instruction fetch is allowed only if MMIO is the oldest instruction.
1293    *************************************************************************************
1294    */
1295  val mmioReadPtr = io.mmioCommitRead.mmioFtqPtr
1296  val mmioLastCommit = isAfter(commPtr, mmioReadPtr) ||
1297    commPtr === mmioReadPtr && validInstructions.reduce(_ || _) && lastInstructionStatus === c_committed
1298  io.mmioCommitRead.mmioLastCommit := RegNext(mmioLastCommit)
1299
1300  // commit reads
1301  val commit_pc_bundle = RegNext(ftq_pc_mem.io.commPtr_rdata)
1302  val commit_target =
1303    Mux(RegNext(commPtr === newest_entry_ptr),
1304      RegEnable(newest_entry_target, newest_entry_target_modified),
1305      RegNext(ftq_pc_mem.io.commPtrPlus1_rdata.startAddr))
1306  ftq_pd_mem.io.ren.get.last := canCommit
1307  ftq_pd_mem.io.raddr.last := commPtr.value
1308  val commit_pd = ftq_pd_mem.io.rdata.last
1309  ftq_redirect_mem.io.ren.get.last := canCommit
1310  ftq_redirect_mem.io.raddr.last := commPtr.value
1311  val commit_spec_meta = ftq_redirect_mem.io.rdata.last
1312  ftq_meta_1r_sram.io.ren(0) := canCommit
1313  ftq_meta_1r_sram.io.raddr(0) := commPtr.value
1314  val commit_meta = ftq_meta_1r_sram.io.rdata(0).meta
1315  val commit_ftb_entry = ftq_meta_1r_sram.io.rdata(0).ftb_entry
1316
1317  // need one cycle to read mem and srams
1318  val do_commit_ptr = RegEnable(commPtr, canCommit)
1319  val do_commit = RegNext(canCommit, init=false.B)
1320  when (canMoveCommPtr) {
1321    commPtr_write := commPtrPlus1
1322    commPtrPlus1_write := commPtrPlus1 + 1.U
1323  }
1324  val commit_state = RegEnable(commitStateQueueReg(commPtr.value), canCommit)
1325  val can_commit_cfi = WireInit(cfiIndex_vec(commPtr.value))
1326  val do_commit_cfi = WireInit(cfiIndex_vec(do_commit_ptr.value))
1327  //
1328  //when (commitStateQueue(commPtr.value)(can_commit_cfi.bits) =/= c_commited) {
1329  //  can_commit_cfi.valid := false.B
1330  //}
1331  val commit_cfi = RegEnable(can_commit_cfi, canCommit)
1332  val debug_cfi = commitStateQueueReg(do_commit_ptr.value)(do_commit_cfi.bits) =/= c_committed && do_commit_cfi.valid
1333
1334  val commit_mispredict  : Vec[Bool] = VecInit((RegEnable(mispredict_vec(commPtr.value), canCommit) zip commit_state).map {
1335    case (mis, state) => mis && state === c_committed
1336  })
1337  val commit_instCommited: Vec[Bool] = VecInit(commit_state.map(_ === c_committed)) // [PredictWidth]
1338  val can_commit_hit                 = entry_hit_status(commPtr.value)
1339  val commit_hit                     = RegEnable(can_commit_hit, canCommit)
1340  val diff_commit_target             = RegEnable(update_target(commPtr.value), canCommit) // TODO: remove this
1341  val commit_stage                   = RegEnable(pred_stage(commPtr.value), canCommit)
1342  val commit_valid                   = commit_hit === h_hit || commit_cfi.valid // hit or taken
1343
1344  val to_bpu_hit = can_commit_hit === h_hit || can_commit_hit === h_false_hit
1345  switch (bpu_ftb_update_stall) {
1346    is (0.U) {
1347      when (can_commit_cfi.valid && !to_bpu_hit && canCommit) {
1348        bpu_ftb_update_stall := 2.U // 2-cycle stall
1349      }
1350    }
1351    is (2.U) {
1352      bpu_ftb_update_stall := 1.U
1353    }
1354    is (1.U) {
1355      bpu_ftb_update_stall := 0.U
1356    }
1357    is (3.U) {
1358      XSError(true.B, "bpu_ftb_update_stall should be 0, 1 or 2")
1359    }
1360  }
1361
1362  // TODO: remove this
1363  XSError(do_commit && diff_commit_target =/= commit_target, "\ncommit target should be the same as update target\n")
1364
1365  // update latency stats
1366  val update_latency = GTimer() - pred_s1_cycle.getOrElse(dummy_s1_pred_cycle_vec)(do_commit_ptr.value) + 1.U
1367  XSPerfHistogram("bpu_update_latency", update_latency, io.toBpu.update.valid, 0, 64, 2)
1368
1369  io.toBpu.update := DontCare
1370  io.toBpu.update.valid := commit_valid && do_commit
1371  val update = io.toBpu.update.bits
1372  update.false_hit   := commit_hit === h_false_hit
1373  update.pc          := commit_pc_bundle.startAddr
1374  update.meta        := commit_meta
1375  update.cfi_idx     := commit_cfi
1376  update.full_target := commit_target
1377  update.from_stage  := commit_stage
1378  update.spec_info   := commit_spec_meta
1379  XSError(commit_valid && do_commit && debug_cfi, "\ncommit cfi can be non c_commited\n")
1380
1381  val commit_real_hit = commit_hit === h_hit
1382  val update_ftb_entry = update.ftb_entry
1383
1384  val ftbEntryGen = Module(new FTBEntryGen).io
1385  ftbEntryGen.start_addr     := commit_pc_bundle.startAddr
1386  ftbEntryGen.old_entry      := commit_ftb_entry
1387  ftbEntryGen.pd             := commit_pd
1388  ftbEntryGen.cfiIndex       := commit_cfi
1389  ftbEntryGen.target         := commit_target
1390  ftbEntryGen.hit            := commit_real_hit
1391  ftbEntryGen.mispredict_vec := commit_mispredict
1392
1393  update_ftb_entry         := ftbEntryGen.new_entry
1394  update.new_br_insert_pos := ftbEntryGen.new_br_insert_pos
1395  update.mispred_mask      := ftbEntryGen.mispred_mask
1396  update.old_entry         := ftbEntryGen.is_old_entry
1397  update.pred_hit          := commit_hit === h_hit || commit_hit === h_false_hit
1398  update.br_taken_mask     := ftbEntryGen.taken_mask
1399  update.br_committed      := (ftbEntryGen.new_entry.brValids zip ftbEntryGen.new_entry.brOffset) map {
1400    case (valid, offset) => valid && commit_instCommited(offset)
1401  }
1402  update.jmp_taken         := ftbEntryGen.jmp_taken
1403
1404  // update.full_pred.fromFtbEntry(ftbEntryGen.new_entry, update.pc)
1405  // update.full_pred.jalr_target := commit_target
1406  // update.full_pred.hit := true.B
1407  // when (update.full_pred.is_jalr) {
1408  //   update.full_pred.targets.last := commit_target
1409  // }
1410
1411  // ******************************************************************************
1412  // **************************** commit perf counters ****************************
1413  // ******************************************************************************
1414
1415  val commit_inst_mask    = VecInit(commit_state.map(c => c === c_committed && do_commit)).asUInt
1416  val commit_mispred_mask = commit_mispredict.asUInt
1417  val commit_not_mispred_mask = ~commit_mispred_mask
1418
1419  val commit_br_mask = commit_pd.brMask.asUInt
1420  val commit_jmp_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.jmpInfo.valid.asTypeOf(UInt(1.W)))
1421  val commit_cfi_mask = (commit_br_mask | commit_jmp_mask)
1422
1423  val mbpInstrs = commit_inst_mask & commit_cfi_mask
1424
1425  val mbpRights = mbpInstrs & commit_not_mispred_mask
1426  val mbpWrongs = mbpInstrs & commit_mispred_mask
1427
1428  io.bpuInfo.bpRight := PopCount(mbpRights)
1429  io.bpuInfo.bpWrong := PopCount(mbpWrongs)
1430
1431  val hartId = p(XSCoreParamsKey).HartId
1432  val isWriteFTQTable = Constantin.createRecord(s"isWriteFTQTable$hartId")
1433  val ftqBranchTraceDB = ChiselDB.createTable(s"FTQTable$hartId", new FtqDebugBundle)
1434  // Cfi Info
1435  for (i <- 0 until PredictWidth) {
1436    val pc = commit_pc_bundle.startAddr + (i * instBytes).U
1437    val v = commit_state(i) === c_committed
1438    val isBr = commit_pd.brMask(i)
1439    val isJmp = commit_pd.jmpInfo.valid && commit_pd.jmpOffset === i.U
1440    val isCfi = isBr || isJmp
1441    val isTaken = commit_cfi.valid && commit_cfi.bits === i.U
1442    val misPred = commit_mispredict(i)
1443    // val ghist = commit_spec_meta.ghist.predHist
1444    val histPtr = commit_spec_meta.histPtr
1445    val predCycle = commit_meta(63, 0)
1446    val target = commit_target
1447
1448    val brIdx = OHToUInt(Reverse(Cat(update_ftb_entry.brValids.zip(update_ftb_entry.brOffset).map{case(v, offset) => v && offset === i.U})))
1449    val inFtbEntry = update_ftb_entry.brValids.zip(update_ftb_entry.brOffset).map{case(v, offset) => v && offset === i.U}.reduce(_||_)
1450    val addIntoHist = ((commit_hit === h_hit) && inFtbEntry) || ((!(commit_hit === h_hit) && i.U === commit_cfi.bits && isBr && commit_cfi.valid))
1451    XSDebug(v && do_commit && isCfi, p"cfi_update: isBr(${isBr}) pc(${Hexadecimal(pc)}) " +
1452    p"taken(${isTaken}) mispred(${misPred}) cycle($predCycle) hist(${histPtr.value}) " +
1453    p"startAddr(${Hexadecimal(commit_pc_bundle.startAddr)}) AddIntoHist(${addIntoHist}) " +
1454    p"brInEntry(${inFtbEntry}) brIdx(${brIdx}) target(${Hexadecimal(target)})\n")
1455
1456    val logbundle = Wire(new FtqDebugBundle)
1457    logbundle.pc := pc
1458    logbundle.target := target
1459    logbundle.isBr := isBr
1460    logbundle.isJmp := isJmp
1461    logbundle.isCall := isJmp && commit_pd.hasCall
1462    logbundle.isRet := isJmp && commit_pd.hasRet
1463    logbundle.misPred := misPred
1464    logbundle.isTaken := isTaken
1465    logbundle.predStage := commit_stage
1466
1467    ftqBranchTraceDB.log(
1468      data = logbundle /* hardware of type T */,
1469      en = isWriteFTQTable.orR && v && do_commit && isCfi,
1470      site = "FTQ" + p(XSCoreParamsKey).HartId.toString,
1471      clock = clock,
1472      reset = reset
1473    )
1474  }
1475
1476  val enq = io.fromBpu.resp
1477  val perf_redirect = backendRedirect
1478
1479  XSPerfAccumulate("entry", validEntries)
1480  XSPerfAccumulate("bpu_to_ftq_stall", enq.valid && !enq.ready)
1481  XSPerfAccumulate("mispredictRedirect", perf_redirect.valid && RedirectLevel.flushAfter === perf_redirect.bits.level)
1482  XSPerfAccumulate("replayRedirect", perf_redirect.valid && RedirectLevel.flushItself(perf_redirect.bits.level))
1483  XSPerfAccumulate("predecodeRedirect", fromIfuRedirect.valid)
1484
1485  XSPerfAccumulate("to_ifu_bubble", io.toIfu.req.ready && !io.toIfu.req.valid)
1486
1487  XSPerfAccumulate("to_ifu_stall", io.toIfu.req.valid && !io.toIfu.req.ready)
1488  XSPerfAccumulate("from_bpu_real_bubble", !enq.valid && enq.ready && allowBpuIn)
1489  XSPerfAccumulate("bpu_to_ifu_bubble", bpuPtr === ifuPtr)
1490  XSPerfAccumulate("bpu_to_ifu_bubble_when_ftq_full", (bpuPtr === ifuPtr) && isFull(bpuPtr, commPtr) && io.toIfu.req.ready)
1491
1492  XSPerfAccumulate("redirectAhead_ValidNum", ftqIdxAhead.map(_.valid).reduce(_|_))
1493  XSPerfAccumulate("fromBackendRedirect_ValidNum", io.fromBackend.redirect.valid)
1494  XSPerfAccumulate("toBpuRedirect_ValidNum", io.toBpu.redirect.valid)
1495
1496  val from_bpu = io.fromBpu.resp.bits
1497  val to_ifu = io.toIfu.req.bits
1498
1499
1500  XSPerfHistogram("commit_num_inst", PopCount(commit_inst_mask), do_commit, 0, PredictWidth+1, 1)
1501
1502
1503
1504
1505  val commit_jal_mask  = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasJal.asTypeOf(UInt(1.W)))
1506  val commit_jalr_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasJalr.asTypeOf(UInt(1.W)))
1507  val commit_call_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasCall.asTypeOf(UInt(1.W)))
1508  val commit_ret_mask  = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasRet.asTypeOf(UInt(1.W)))
1509
1510
1511  val mbpBRights = mbpRights & commit_br_mask
1512  val mbpJRights = mbpRights & commit_jal_mask
1513  val mbpIRights = mbpRights & commit_jalr_mask
1514  val mbpCRights = mbpRights & commit_call_mask
1515  val mbpRRights = mbpRights & commit_ret_mask
1516
1517  val mbpBWrongs = mbpWrongs & commit_br_mask
1518  val mbpJWrongs = mbpWrongs & commit_jal_mask
1519  val mbpIWrongs = mbpWrongs & commit_jalr_mask
1520  val mbpCWrongs = mbpWrongs & commit_call_mask
1521  val mbpRWrongs = mbpWrongs & commit_ret_mask
1522
1523  val commit_pred_stage = RegNext(pred_stage(commPtr.value))
1524
1525  def pred_stage_map(src: UInt, name: String) = {
1526    (0 until numBpStages).map(i =>
1527      f"${name}_stage_${i+1}" -> PopCount(src.asBools.map(_ && commit_pred_stage === BP_STAGES(i)))
1528    ).foldLeft(Map[String, UInt]())(_+_)
1529  }
1530
1531  val mispred_stage_map      = pred_stage_map(mbpWrongs,  "mispredict")
1532  val br_mispred_stage_map   = pred_stage_map(mbpBWrongs, "br_mispredict")
1533  val jalr_mispred_stage_map = pred_stage_map(mbpIWrongs, "jalr_mispredict")
1534  val correct_stage_map      = pred_stage_map(mbpRights,  "correct")
1535  val br_correct_stage_map   = pred_stage_map(mbpBRights, "br_correct")
1536  val jalr_correct_stage_map = pred_stage_map(mbpIRights, "jalr_correct")
1537
1538  val update_valid = io.toBpu.update.valid
1539  def u(cond: Bool) = update_valid && cond
1540  val ftb_false_hit = u(update.false_hit)
1541  // assert(!ftb_false_hit)
1542  val ftb_hit = u(commit_hit === h_hit)
1543
1544  val ftb_new_entry = u(ftbEntryGen.is_init_entry)
1545  val ftb_new_entry_only_br = ftb_new_entry && !update_ftb_entry.jmpValid
1546  val ftb_new_entry_only_jmp = ftb_new_entry && !update_ftb_entry.brValids(0)
1547  val ftb_new_entry_has_br_and_jmp = ftb_new_entry && update_ftb_entry.brValids(0) && update_ftb_entry.jmpValid
1548
1549  val ftb_old_entry = u(ftbEntryGen.is_old_entry)
1550
1551  val ftb_modified_entry = u(ftbEntryGen.is_new_br || ftbEntryGen.is_jalr_target_modified || ftbEntryGen.is_always_taken_modified)
1552  val ftb_modified_entry_new_br = u(ftbEntryGen.is_new_br)
1553  val ftb_modified_entry_ifu_redirected = u(ifuRedirected(do_commit_ptr.value))
1554  val ftb_modified_entry_jalr_target_modified = u(ftbEntryGen.is_jalr_target_modified)
1555  val ftb_modified_entry_br_full = ftb_modified_entry && ftbEntryGen.is_br_full
1556  val ftb_modified_entry_always_taken = ftb_modified_entry && ftbEntryGen.is_always_taken_modified
1557
1558  def getFtbEntryLen(pc: UInt, entry: FTBEntry) = (entry.getFallThrough(pc) - pc) >> instOffsetBits
1559  val gen_ftb_entry_len = getFtbEntryLen(update.pc, ftbEntryGen.new_entry)
1560  XSPerfHistogram("ftb_init_entry_len", gen_ftb_entry_len, ftb_new_entry, 0, PredictWidth+1, 1)
1561  XSPerfHistogram("ftb_modified_entry_len", gen_ftb_entry_len, ftb_modified_entry, 0, PredictWidth+1, 1)
1562  val s3_ftb_entry_len = getFtbEntryLen(from_bpu.s3.pc(0), from_bpu.last_stage_ftb_entry)
1563  XSPerfHistogram("s3_ftb_entry_len", s3_ftb_entry_len, from_bpu.s3.valid(0), 0, PredictWidth+1, 1)
1564
1565  XSPerfHistogram("ftq_has_entry", validEntries, true.B, 0, FtqSize+1, 1)
1566
1567  val perfCountsMap = Map(
1568    "BpInstr" -> PopCount(mbpInstrs),
1569    "BpBInstr" -> PopCount(mbpBRights | mbpBWrongs),
1570    "BpRight"  -> PopCount(mbpRights),
1571    "BpWrong"  -> PopCount(mbpWrongs),
1572    "BpBRight" -> PopCount(mbpBRights),
1573    "BpBWrong" -> PopCount(mbpBWrongs),
1574    "BpJRight" -> PopCount(mbpJRights),
1575    "BpJWrong" -> PopCount(mbpJWrongs),
1576    "BpIRight" -> PopCount(mbpIRights),
1577    "BpIWrong" -> PopCount(mbpIWrongs),
1578    "BpCRight" -> PopCount(mbpCRights),
1579    "BpCWrong" -> PopCount(mbpCWrongs),
1580    "BpRRight" -> PopCount(mbpRRights),
1581    "BpRWrong" -> PopCount(mbpRWrongs),
1582
1583    "ftb_false_hit"                -> PopCount(ftb_false_hit),
1584    "ftb_hit"                      -> PopCount(ftb_hit),
1585    "ftb_new_entry"                -> PopCount(ftb_new_entry),
1586    "ftb_new_entry_only_br"        -> PopCount(ftb_new_entry_only_br),
1587    "ftb_new_entry_only_jmp"       -> PopCount(ftb_new_entry_only_jmp),
1588    "ftb_new_entry_has_br_and_jmp" -> PopCount(ftb_new_entry_has_br_and_jmp),
1589    "ftb_old_entry"                -> PopCount(ftb_old_entry),
1590    "ftb_modified_entry"           -> PopCount(ftb_modified_entry),
1591    "ftb_modified_entry_new_br"    -> PopCount(ftb_modified_entry_new_br),
1592    "ftb_jalr_target_modified"     -> PopCount(ftb_modified_entry_jalr_target_modified),
1593    "ftb_modified_entry_br_full"   -> PopCount(ftb_modified_entry_br_full),
1594    "ftb_modified_entry_always_taken" -> PopCount(ftb_modified_entry_always_taken)
1595  ) ++ mispred_stage_map ++ br_mispred_stage_map ++ jalr_mispred_stage_map ++
1596       correct_stage_map ++ br_correct_stage_map ++ jalr_correct_stage_map
1597
1598  for((key, value) <- perfCountsMap) {
1599    XSPerfAccumulate(key, value)
1600  }
1601
1602  // --------------------------- Debug --------------------------------
1603  // XSDebug(enq_fire, p"enq! " + io.fromBpu.resp.bits.toPrintable)
1604  XSDebug(io.toIfu.req.fire, p"fire to ifu " + io.toIfu.req.bits.toPrintable)
1605  XSDebug(do_commit, p"deq! [ptr] $do_commit_ptr\n")
1606  XSDebug(true.B, p"[bpuPtr] $bpuPtr, [ifuPtr] $ifuPtr, [ifuWbPtr] $ifuWbPtr [commPtr] $commPtr\n")
1607  XSDebug(true.B, p"[in] v:${io.fromBpu.resp.valid} r:${io.fromBpu.resp.ready} " +
1608    p"[out] v:${io.toIfu.req.valid} r:${io.toIfu.req.ready}\n")
1609  XSDebug(do_commit, p"[deq info] cfiIndex: $commit_cfi, $commit_pc_bundle, target: ${Hexadecimal(commit_target)}\n")
1610
1611  //   def ubtbCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1612  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1613  //       case (((valid, pd), ans), taken) =>
1614  //       Mux(valid && pd.isBr,
1615  //         isWrong ^ Mux(ans.hit.asBool,
1616  //           Mux(ans.taken.asBool, taken && ans.target === commitEntry.target,
1617  //           !taken),
1618  //         !taken),
1619  //       false.B)
1620  //     }
1621  //   }
1622
1623  //   def btbCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1624  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1625  //       case (((valid, pd), ans), taken) =>
1626  //       Mux(valid && pd.isBr,
1627  //         isWrong ^ Mux(ans.hit.asBool,
1628  //           Mux(ans.taken.asBool, taken && ans.target === commitEntry.target,
1629  //           !taken),
1630  //         !taken),
1631  //       false.B)
1632  //     }
1633  //   }
1634
1635  //   def tageCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1636  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1637  //       case (((valid, pd), ans), taken) =>
1638  //       Mux(valid && pd.isBr,
1639  //         isWrong ^ (ans.taken.asBool === taken),
1640  //       false.B)
1641  //     }
1642  //   }
1643
1644  //   def loopCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1645  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1646  //       case (((valid, pd), ans), taken) =>
1647  //       Mux(valid && (pd.isBr) && ans.hit.asBool,
1648  //         isWrong ^ (!taken),
1649  //           false.B)
1650  //     }
1651  //   }
1652
1653  //   def rasCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1654  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1655  //       case (((valid, pd), ans), taken) =>
1656  //       Mux(valid && pd.isRet.asBool /*&& taken*/ && ans.hit.asBool,
1657  //         isWrong ^ (ans.target === commitEntry.target),
1658  //           false.B)
1659  //     }
1660  //   }
1661
1662  //   val ubtbRights = ubtbCheck(commitEntry, commitEntry.metas.map(_.ubtbAns), false.B)
1663  //   val ubtbWrongs = ubtbCheck(commitEntry, commitEntry.metas.map(_.ubtbAns), true.B)
1664  //   // btb and ubtb pred jal and jalr as well
1665  //   val btbRights = btbCheck(commitEntry, commitEntry.metas.map(_.btbAns), false.B)
1666  //   val btbWrongs = btbCheck(commitEntry, commitEntry.metas.map(_.btbAns), true.B)
1667  //   val tageRights = tageCheck(commitEntry, commitEntry.metas.map(_.tageAns), false.B)
1668  //   val tageWrongs = tageCheck(commitEntry, commitEntry.metas.map(_.tageAns), true.B)
1669
1670  //   val loopRights = loopCheck(commitEntry, commitEntry.metas.map(_.loopAns), false.B)
1671  //   val loopWrongs = loopCheck(commitEntry, commitEntry.metas.map(_.loopAns), true.B)
1672
1673  //   val rasRights = rasCheck(commitEntry, commitEntry.metas.map(_.rasAns), false.B)
1674  //   val rasWrongs = rasCheck(commitEntry, commitEntry.metas.map(_.rasAns), true.B)
1675
1676  val perfEvents = Seq(
1677    ("bpu_s2_redirect        ", bpu_s2_redirect                                                             ),
1678    ("bpu_s3_redirect        ", bpu_s3_redirect                                                             ),
1679    ("bpu_to_ftq_stall       ", enq.valid && ~enq.ready                                                     ),
1680    ("mispredictRedirect     ", perf_redirect.valid && RedirectLevel.flushAfter === perf_redirect.bits.level),
1681    ("replayRedirect         ", perf_redirect.valid && RedirectLevel.flushItself(perf_redirect.bits.level)  ),
1682    ("predecodeRedirect      ", fromIfuRedirect.valid                                                       ),
1683    ("to_ifu_bubble          ", io.toIfu.req.ready && !io.toIfu.req.valid                                   ),
1684    ("from_bpu_real_bubble   ", !enq.valid && enq.ready && allowBpuIn                                       ),
1685    ("BpInstr                ", PopCount(mbpInstrs)                                                         ),
1686    ("BpBInstr               ", PopCount(mbpBRights | mbpBWrongs)                                           ),
1687    ("BpRight                ", PopCount(mbpRights)                                                         ),
1688    ("BpWrong                ", PopCount(mbpWrongs)                                                         ),
1689    ("BpBRight               ", PopCount(mbpBRights)                                                        ),
1690    ("BpBWrong               ", PopCount(mbpBWrongs)                                                        ),
1691    ("BpJRight               ", PopCount(mbpJRights)                                                        ),
1692    ("BpJWrong               ", PopCount(mbpJWrongs)                                                        ),
1693    ("BpIRight               ", PopCount(mbpIRights)                                                        ),
1694    ("BpIWrong               ", PopCount(mbpIWrongs)                                                        ),
1695    ("BpCRight               ", PopCount(mbpCRights)                                                        ),
1696    ("BpCWrong               ", PopCount(mbpCWrongs)                                                        ),
1697    ("BpRRight               ", PopCount(mbpRRights)                                                        ),
1698    ("BpRWrong               ", PopCount(mbpRWrongs)                                                        ),
1699    ("ftb_false_hit          ", PopCount(ftb_false_hit)                                                     ),
1700    ("ftb_hit                ", PopCount(ftb_hit)                                                           ),
1701  )
1702  generatePerfEvent()
1703}
1704