xref: /XiangShan/src/main/scala/xiangshan/frontend/NewFtq.scala (revision 2caa7ef23d5d6566d68f5f98a59dc7ee9066b96a)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15*
16*
17* Acknowledgement
18*
19* This implementation is inspired by several key papers:
20* [1] Glenn Reinman, Todd Austin, and Brad Calder. "[A scalable front-end architecture for fast instruction delivery.]
21* (https://doi.org/10.1109/ISCA.1999.765954)" 26th International Symposium on Computer Architecture (ISCA). 1999.
22*
23***************************************************************************************/
24
25package xiangshan.frontend
26
27import chisel3._
28import chisel3.util._
29import org.chipsalliance.cde.config.Parameters
30import utility._
31import utility.ChiselDB
32import utility.mbist.MbistPipeline
33import utils._
34import xiangshan._
35import xiangshan.backend.CtrlToFtqIO
36import xiangshan.frontend.icache._
37
38class FtqDebugBundle extends Bundle {
39  val pc        = UInt(39.W)
40  val target    = UInt(39.W)
41  val isBr      = Bool()
42  val isJmp     = Bool()
43  val isCall    = Bool()
44  val isRet     = Bool()
45  val misPred   = Bool()
46  val isTaken   = Bool()
47  val predStage = UInt(2.W)
48}
49
50class FtqPtr(entries: Int) extends CircularQueuePtr[FtqPtr](
51      entries
52    ) {
53  def this()(implicit p: Parameters) = this(p(XSCoreParamsKey).FtqSize)
54}
55
56object FtqPtr {
57  def apply(f: Bool, v: UInt)(implicit p: Parameters): FtqPtr = {
58    val ptr = Wire(new FtqPtr)
59    ptr.flag  := f
60    ptr.value := v
61    ptr
62  }
63  def inverse(ptr: FtqPtr)(implicit p: Parameters): FtqPtr =
64    apply(!ptr.flag, ptr.value)
65}
66
67class FtqNRSRAM[T <: Data](gen: T, numRead: Int)(implicit p: Parameters) extends XSModule {
68
69  val io = IO(new Bundle() {
70    val raddr = Input(Vec(numRead, UInt(log2Up(FtqSize).W)))
71    val ren   = Input(Vec(numRead, Bool()))
72    val rdata = Output(Vec(numRead, gen))
73    val waddr = Input(UInt(log2Up(FtqSize).W))
74    val wen   = Input(Bool())
75    val wdata = Input(gen)
76  })
77
78  for (i <- 0 until numRead) {
79    val sram = Module(new SRAMTemplate(gen, FtqSize, withClockGate = true, hasMbist = hasMbist))
80    sram.io.r.req.valid       := io.ren(i)
81    sram.io.r.req.bits.setIdx := io.raddr(i)
82    io.rdata(i)               := sram.io.r.resp.data(0)
83    sram.io.w.req.valid       := io.wen
84    sram.io.w.req.bits.setIdx := io.waddr
85    sram.io.w.req.bits.data   := VecInit(io.wdata)
86  }
87
88}
89
90class Ftq_RF_Components(implicit p: Parameters) extends XSBundle with BPUUtils {
91  val startAddr     = UInt(VAddrBits.W)
92  val nextLineAddr  = UInt(VAddrBits.W)
93  val isNextMask    = Vec(PredictWidth, Bool())
94  val fallThruError = Bool()
95  // val carry = Bool()
96  def getPc(offset: UInt) = {
97    def getHigher(pc: UInt) = pc(VAddrBits - 1, log2Ceil(PredictWidth) + instOffsetBits + 1)
98    def getOffset(pc: UInt) = pc(log2Ceil(PredictWidth) + instOffsetBits, instOffsetBits)
99    Cat(
100      getHigher(Mux(isNextMask(offset) && startAddr(log2Ceil(PredictWidth) + instOffsetBits), nextLineAddr, startAddr)),
101      getOffset(startAddr) + offset,
102      0.U(instOffsetBits.W)
103    )
104  }
105  def fromBranchPrediction(resp: BranchPredictionBundle) = {
106    def carryPos(addr: UInt) = addr(instOffsetBits + log2Ceil(PredictWidth) + 1)
107    this.startAddr    := resp.pc(3)
108    this.nextLineAddr := resp.pc(3) + (FetchWidth * 4 * 2).U // may be broken on other configs
109    this.isNextMask := VecInit((0 until PredictWidth).map(i =>
110      (resp.pc(3)(log2Ceil(PredictWidth), 1) +& i.U)(log2Ceil(PredictWidth)).asBool
111    ))
112    this.fallThruError := resp.fallThruError(3)
113    this
114  }
115  override def toPrintable: Printable =
116    p"startAddr:${Hexadecimal(startAddr)}"
117}
118
119class Ftq_pd_Entry(implicit p: Parameters) extends XSBundle {
120  val brMask    = Vec(PredictWidth, Bool())
121  val jmpInfo   = ValidUndirectioned(Vec(3, Bool()))
122  val jmpOffset = UInt(log2Ceil(PredictWidth).W)
123  val jalTarget = UInt(VAddrBits.W)
124  val rvcMask   = Vec(PredictWidth, Bool())
125  def hasJal    = jmpInfo.valid && !jmpInfo.bits(0)
126  def hasJalr   = jmpInfo.valid && jmpInfo.bits(0)
127  def hasCall   = jmpInfo.valid && jmpInfo.bits(1)
128  def hasRet    = jmpInfo.valid && jmpInfo.bits(2)
129
130  def fromPdWb(pdWb: PredecodeWritebackBundle) = {
131    val pds = pdWb.pd
132    this.brMask        := VecInit(pds.map(pd => pd.isBr && pd.valid))
133    this.jmpInfo.valid := VecInit(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid)).asUInt.orR
134    this.jmpInfo.bits := ParallelPriorityMux(
135      pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid),
136      pds.map(pd => VecInit(pd.isJalr, pd.isCall, pd.isRet))
137    )
138    this.jmpOffset := ParallelPriorityEncoder(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid))
139    this.rvcMask   := VecInit(pds.map(pd => pd.isRVC))
140    this.jalTarget := pdWb.jalTarget
141  }
142
143  def toPd(offset: UInt) = {
144    require(offset.getWidth == log2Ceil(PredictWidth))
145    val pd = Wire(new PreDecodeInfo)
146    pd.valid := true.B
147    pd.isRVC := rvcMask(offset)
148    val isBr   = brMask(offset)
149    val isJalr = offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(0)
150    pd.brType := Cat(offset === jmpOffset && jmpInfo.valid, isJalr || isBr)
151    pd.isCall := offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(1)
152    pd.isRet  := offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(2)
153    pd
154  }
155}
156
157class PrefetchPtrDB(implicit p: Parameters) extends Bundle {
158  val fromFtqPtr = UInt(log2Up(p(XSCoreParamsKey).FtqSize).W)
159  val fromIfuPtr = UInt(log2Up(p(XSCoreParamsKey).FtqSize).W)
160}
161
162class Ftq_Redirect_SRAMEntry(implicit p: Parameters) extends SpeculativeInfo {
163  val sc_disagree = if (!env.FPGAPlatform) Some(Vec(numBr, Bool())) else None
164}
165
166class Ftq_1R_SRAMEntry(implicit p: Parameters) extends XSBundle with HasBPUConst {
167  val meta      = UInt(MaxMetaLength.W)
168  val ftb_entry = new FTBEntry
169}
170
171class Ftq_Pred_Info(implicit p: Parameters) extends XSBundle {
172  val target   = UInt(VAddrBits.W)
173  val cfiIndex = ValidUndirectioned(UInt(log2Ceil(PredictWidth).W))
174}
175
176class FtqRead[T <: Data](private val gen: T)(implicit p: Parameters) extends XSBundle {
177  val valid  = Output(Bool())
178  val ptr    = Output(new FtqPtr)
179  val offset = Output(UInt(log2Ceil(PredictWidth).W))
180  val data   = Input(gen)
181  def apply(valid: Bool, ptr: FtqPtr, offset: UInt) = {
182    this.valid  := valid
183    this.ptr    := ptr
184    this.offset := offset
185    this.data
186  }
187}
188
189class FtqToBpuIO(implicit p: Parameters) extends XSBundle {
190  val redirect       = Valid(new BranchPredictionRedirect)
191  val update         = Valid(new BranchPredictionUpdate)
192  val enq_ptr        = Output(new FtqPtr)
193  val redirctFromIFU = Output(Bool())
194}
195
196class BpuFlushInfo(implicit p: Parameters) extends XSBundle with HasCircularQueuePtrHelper {
197  // when ifu pipeline is not stalled,
198  // a packet from bpu s3 can reach f1 at most
199  val s2 = Valid(new FtqPtr)
200  val s3 = Valid(new FtqPtr)
201  def shouldFlushBy(src: Valid[FtqPtr], idx_to_flush: FtqPtr) =
202    src.valid && !isAfter(src.bits, idx_to_flush)
203  def shouldFlushByStage2(idx: FtqPtr) = shouldFlushBy(s2, idx)
204  def shouldFlushByStage3(idx: FtqPtr) = shouldFlushBy(s3, idx)
205}
206
207class FtqToIfuIO(implicit p: Parameters) extends XSBundle {
208  val req              = Decoupled(new FetchRequestBundle)
209  val redirect         = Valid(new BranchPredictionRedirect)
210  val topdown_redirect = Valid(new BranchPredictionRedirect)
211  val flushFromBpu     = new BpuFlushInfo
212}
213
214class FtqToICacheIO(implicit p: Parameters) extends XSBundle {
215  // NOTE: req.bits must be prepare in T cycle
216  // while req.valid is set true in T + 1 cycle
217  val req = Decoupled(new FtqToICacheRequestBundle)
218}
219
220class FtqToPrefetchIO(implicit p: Parameters) extends XSBundle {
221  val req              = Decoupled(new FtqICacheInfo)
222  val flushFromBpu     = new BpuFlushInfo
223  val backendException = UInt(ExceptionType.width.W)
224}
225
226trait HasBackendRedirectInfo extends HasXSParameter {
227  def isLoadReplay(r: Valid[Redirect]) = r.bits.flushItself()
228}
229
230class FtqToCtrlIO(implicit p: Parameters) extends XSBundle with HasBackendRedirectInfo {
231  // write to backend pc mem
232  val pc_mem_wen   = Output(Bool())
233  val pc_mem_waddr = Output(UInt(log2Ceil(FtqSize).W))
234  val pc_mem_wdata = Output(new Ftq_RF_Components)
235  // newest target
236  val newest_entry_en     = Output(Bool())
237  val newest_entry_target = Output(UInt(VAddrBits.W))
238  val newest_entry_ptr    = Output(new FtqPtr)
239}
240
241class FTBEntryGen(implicit p: Parameters) extends XSModule with HasBackendRedirectInfo with HasBPUParameter {
242  val io = IO(new Bundle {
243    val start_addr     = Input(UInt(VAddrBits.W))
244    val old_entry      = Input(new FTBEntry)
245    val pd             = Input(new Ftq_pd_Entry)
246    val cfiIndex       = Flipped(Valid(UInt(log2Ceil(PredictWidth).W)))
247    val target         = Input(UInt(VAddrBits.W))
248    val hit            = Input(Bool())
249    val mispredict_vec = Input(Vec(PredictWidth, Bool()))
250
251    val new_entry         = Output(new FTBEntry)
252    val new_br_insert_pos = Output(Vec(numBr, Bool()))
253    val taken_mask        = Output(Vec(numBr, Bool()))
254    val jmp_taken         = Output(Bool())
255    val mispred_mask      = Output(Vec(numBr + 1, Bool()))
256
257    // for perf counters
258    val is_init_entry           = Output(Bool())
259    val is_old_entry            = Output(Bool())
260    val is_new_br               = Output(Bool())
261    val is_jalr_target_modified = Output(Bool())
262    val is_strong_bias_modified = Output(Bool())
263    val is_br_full              = Output(Bool())
264  })
265
266  // no mispredictions detected at predecode
267  val hit = io.hit
268  val pd  = io.pd
269
270  val init_entry = WireInit(0.U.asTypeOf(new FTBEntry))
271
272  val cfi_is_br       = pd.brMask(io.cfiIndex.bits) && io.cfiIndex.valid
273  val entry_has_jmp   = pd.jmpInfo.valid
274  val new_jmp_is_jal  = entry_has_jmp && !pd.jmpInfo.bits(0) && io.cfiIndex.valid
275  val new_jmp_is_jalr = entry_has_jmp && pd.jmpInfo.bits(0) && io.cfiIndex.valid
276  val new_jmp_is_call = entry_has_jmp && pd.jmpInfo.bits(1) && io.cfiIndex.valid
277  val new_jmp_is_ret  = entry_has_jmp && pd.jmpInfo.bits(2) && io.cfiIndex.valid
278  val last_jmp_rvi    = entry_has_jmp && pd.jmpOffset === (PredictWidth - 1).U && !pd.rvcMask.last
279  // val last_br_rvi = cfi_is_br && io.cfiIndex.bits === (PredictWidth-1).U && !pd.rvcMask.last
280
281  val cfi_is_jal  = io.cfiIndex.bits === pd.jmpOffset && new_jmp_is_jal
282  val cfi_is_jalr = io.cfiIndex.bits === pd.jmpOffset && new_jmp_is_jalr
283
284  def carryPos = log2Ceil(PredictWidth) + instOffsetBits
285  def getLower(pc: UInt) = pc(carryPos - 1, instOffsetBits)
286  // if not hit, establish a new entry
287  init_entry.valid := true.B
288  // tag is left for ftb to assign
289
290  // case br
291  val init_br_slot = init_entry.getSlotForBr(0)
292  when(cfi_is_br) {
293    init_br_slot.valid  := true.B
294    init_br_slot.offset := io.cfiIndex.bits
295    init_br_slot.setLowerStatByTarget(io.start_addr, io.target, numBr == 1)
296    init_entry.strong_bias(0) := true.B // set to strong bias on init
297  }
298
299  // case jmp
300  when(entry_has_jmp) {
301    init_entry.tailSlot.offset := pd.jmpOffset
302    init_entry.tailSlot.valid  := new_jmp_is_jal || new_jmp_is_jalr
303    init_entry.tailSlot.setLowerStatByTarget(io.start_addr, Mux(cfi_is_jalr, io.target, pd.jalTarget), isShare = false)
304    init_entry.strong_bias(numBr - 1) := new_jmp_is_jalr // set strong bias for the jalr on init
305  }
306
307  val jmpPft = getLower(io.start_addr) +& pd.jmpOffset +& Mux(pd.rvcMask(pd.jmpOffset), 1.U, 2.U)
308  init_entry.pftAddr := Mux(entry_has_jmp && !last_jmp_rvi, jmpPft, getLower(io.start_addr))
309  init_entry.carry   := Mux(entry_has_jmp && !last_jmp_rvi, jmpPft(carryPos - instOffsetBits), true.B)
310
311  require(
312    isPow2(PredictWidth),
313    "If PredictWidth does not satisfy the power of 2," +
314      "pftAddr := getLower(io.start_addr) and carry := true.B  not working!!"
315  )
316
317  init_entry.isJalr := new_jmp_is_jalr
318  init_entry.isCall := new_jmp_is_call
319  init_entry.isRet  := new_jmp_is_ret
320  // that means fall thru points to the middle of an inst
321  init_entry.last_may_be_rvi_call := pd.jmpOffset === (PredictWidth - 1).U && !pd.rvcMask(pd.jmpOffset)
322
323  // if hit, check whether a new cfi(only br is possible) is detected
324  val oe              = io.old_entry
325  val br_recorded_vec = oe.getBrRecordedVec(io.cfiIndex.bits)
326  val br_recorded     = br_recorded_vec.asUInt.orR
327  val is_new_br       = cfi_is_br && !br_recorded
328  val new_br_offset   = io.cfiIndex.bits
329  // vec(i) means new br will be inserted BEFORE old br(i)
330  val allBrSlotsVec = oe.allSlotsForBr
331  val new_br_insert_onehot = VecInit((0 until numBr).map {
332    i =>
333      i match {
334        case 0 =>
335          !allBrSlotsVec(0).valid || new_br_offset < allBrSlotsVec(0).offset
336        case idx =>
337          allBrSlotsVec(idx - 1).valid && new_br_offset > allBrSlotsVec(idx - 1).offset &&
338          (!allBrSlotsVec(idx).valid || new_br_offset < allBrSlotsVec(idx).offset)
339      }
340  })
341
342  val old_entry_modified = WireInit(io.old_entry)
343  for (i <- 0 until numBr) {
344    val slot = old_entry_modified.allSlotsForBr(i)
345    when(new_br_insert_onehot(i)) {
346      slot.valid  := true.B
347      slot.offset := new_br_offset
348      slot.setLowerStatByTarget(io.start_addr, io.target, i == numBr - 1)
349      old_entry_modified.strong_bias(i) := true.B
350    }.elsewhen(new_br_offset > oe.allSlotsForBr(i).offset) {
351      old_entry_modified.strong_bias(i) := false.B
352      // all other fields remain unchanged
353    }.otherwise {
354      // case i == 0, remain unchanged
355      if (i != 0) {
356        val noNeedToMoveFromFormerSlot = (i == numBr - 1).B && !oe.brSlots.last.valid
357        when(!noNeedToMoveFromFormerSlot) {
358          slot.fromAnotherSlot(oe.allSlotsForBr(i - 1))
359          old_entry_modified.strong_bias(i) := oe.strong_bias(i)
360        }
361      }
362    }
363  }
364
365  // two circumstances:
366  // 1. oe: | br | j  |, new br should be in front of j, thus addr of j should be new pft
367  // 2. oe: | br | br |, new br could be anywhere between, thus new pft is the addr of either
368  //        the previous last br or the new br
369  val may_have_to_replace = oe.noEmptySlotForNewBr
370  val pft_need_to_change  = is_new_br && may_have_to_replace
371  // it should either be the given last br or the new br
372  when(pft_need_to_change) {
373    val new_pft_offset =
374      Mux(!new_br_insert_onehot.asUInt.orR, new_br_offset, oe.allSlotsForBr.last.offset)
375
376    // set jmp to invalid
377    old_entry_modified.pftAddr              := getLower(io.start_addr) + new_pft_offset
378    old_entry_modified.carry                := (getLower(io.start_addr) +& new_pft_offset).head(1).asBool
379    old_entry_modified.last_may_be_rvi_call := false.B
380    old_entry_modified.isCall               := false.B
381    old_entry_modified.isRet                := false.B
382    old_entry_modified.isJalr               := false.B
383  }
384
385  val old_entry_jmp_target_modified = WireInit(oe)
386  val old_target      = oe.tailSlot.getTarget(io.start_addr) // may be wrong because we store only 20 lowest bits
387  val old_tail_is_jmp = !oe.tailSlot.sharing
388  val jalr_target_modified = cfi_is_jalr && (old_target =/= io.target) && old_tail_is_jmp // TODO: pass full jalr target
389  when(jalr_target_modified) {
390    old_entry_jmp_target_modified.setByJmpTarget(io.start_addr, io.target)
391    old_entry_jmp_target_modified.strong_bias := 0.U.asTypeOf(Vec(numBr, Bool()))
392  }
393
394  val old_entry_strong_bias    = WireInit(oe)
395  val strong_bias_modified_vec = Wire(Vec(numBr, Bool())) // whether modified or not
396  for (i <- 0 until numBr) {
397    when(br_recorded_vec(0)) {
398      old_entry_strong_bias.strong_bias(0) :=
399        oe.strong_bias(0) && io.cfiIndex.valid && oe.brValids(0) && io.cfiIndex.bits === oe.brOffset(0)
400    }.elsewhen(br_recorded_vec(numBr - 1)) {
401      old_entry_strong_bias.strong_bias(0) := false.B
402      old_entry_strong_bias.strong_bias(numBr - 1) :=
403        oe.strong_bias(numBr - 1) && io.cfiIndex.valid && oe.brValids(numBr - 1) && io.cfiIndex.bits === oe.brOffset(
404          numBr - 1
405        )
406    }
407    strong_bias_modified_vec(i) := oe.strong_bias(i) && oe.brValids(i) && !old_entry_strong_bias.strong_bias(i)
408  }
409  val strong_bias_modified = strong_bias_modified_vec.reduce(_ || _)
410
411  val derived_from_old_entry =
412    Mux(is_new_br, old_entry_modified, Mux(jalr_target_modified, old_entry_jmp_target_modified, old_entry_strong_bias))
413
414  io.new_entry := Mux(!hit, init_entry, derived_from_old_entry)
415
416  io.new_br_insert_pos := new_br_insert_onehot
417  io.taken_mask := VecInit((io.new_entry.brOffset zip io.new_entry.brValids).map {
418    case (off, v) => io.cfiIndex.bits === off && io.cfiIndex.valid && v
419  })
420  io.jmp_taken := io.new_entry.jmpValid && io.new_entry.tailSlot.offset === io.cfiIndex.bits
421  for (i <- 0 until numBr) {
422    io.mispred_mask(i) := io.new_entry.brValids(i) && io.mispredict_vec(io.new_entry.brOffset(i))
423  }
424  io.mispred_mask.last := io.new_entry.jmpValid && io.mispredict_vec(pd.jmpOffset)
425
426  // for perf counters
427  io.is_init_entry           := !hit
428  io.is_old_entry            := hit && !is_new_br && !jalr_target_modified && !strong_bias_modified
429  io.is_new_br               := hit && is_new_br
430  io.is_jalr_target_modified := hit && jalr_target_modified
431  io.is_strong_bias_modified := hit && strong_bias_modified
432  io.is_br_full              := hit && is_new_br && may_have_to_replace
433}
434
435class FtqPcMemWrapper(numOtherReads: Int)(implicit p: Parameters) extends XSModule with HasBackendRedirectInfo {
436  val io = IO(new Bundle {
437    val ifuPtr_w           = Input(new FtqPtr)
438    val ifuPtrPlus1_w      = Input(new FtqPtr)
439    val ifuPtrPlus2_w      = Input(new FtqPtr)
440    val pfPtr_w            = Input(new FtqPtr)
441    val pfPtrPlus1_w       = Input(new FtqPtr)
442    val commPtr_w          = Input(new FtqPtr)
443    val commPtrPlus1_w     = Input(new FtqPtr)
444    val ifuPtr_rdata       = Output(new Ftq_RF_Components)
445    val ifuPtrPlus1_rdata  = Output(new Ftq_RF_Components)
446    val ifuPtrPlus2_rdata  = Output(new Ftq_RF_Components)
447    val pfPtr_rdata        = Output(new Ftq_RF_Components)
448    val pfPtrPlus1_rdata   = Output(new Ftq_RF_Components)
449    val commPtr_rdata      = Output(new Ftq_RF_Components)
450    val commPtrPlus1_rdata = Output(new Ftq_RF_Components)
451
452    val wen   = Input(Bool())
453    val waddr = Input(UInt(log2Ceil(FtqSize).W))
454    val wdata = Input(new Ftq_RF_Components)
455  })
456
457  val num_pc_read = numOtherReads + 5
458  val mem         = Module(new SyncDataModuleTemplate(new Ftq_RF_Components, FtqSize, num_pc_read, 1, "FtqPC"))
459  mem.io.wen(0)   := io.wen
460  mem.io.waddr(0) := io.waddr
461  mem.io.wdata(0) := io.wdata
462
463  // read one cycle ahead for ftq local reads
464  val raddr_vec = VecInit(Seq(
465    io.ifuPtr_w.value,
466    io.ifuPtrPlus1_w.value,
467    io.ifuPtrPlus2_w.value,
468    io.pfPtr_w.value,
469    io.pfPtrPlus1_w.value,
470    io.commPtrPlus1_w.value,
471    io.commPtr_w.value
472  ))
473
474  mem.io.raddr := raddr_vec
475
476  io.ifuPtr_rdata       := mem.io.rdata.dropRight(6).last
477  io.ifuPtrPlus1_rdata  := mem.io.rdata.dropRight(5).last
478  io.ifuPtrPlus2_rdata  := mem.io.rdata.dropRight(4).last
479  io.pfPtr_rdata        := mem.io.rdata.dropRight(3).last
480  io.pfPtrPlus1_rdata   := mem.io.rdata.dropRight(2).last
481  io.commPtrPlus1_rdata := mem.io.rdata.dropRight(1).last
482  io.commPtr_rdata      := mem.io.rdata.last
483}
484
485class Ftq(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelper
486    with HasBackendRedirectInfo with BPUUtils with HasBPUConst with HasPerfEvents
487    with HasICacheParameters {
488  val io = IO(new Bundle {
489    val fromBpu     = Flipped(new BpuToFtqIO)
490    val fromIfu     = Flipped(new IfuToFtqIO)
491    val fromBackend = Flipped(new CtrlToFtqIO)
492
493    val toBpu       = new FtqToBpuIO
494    val toIfu       = new FtqToIfuIO
495    val toICache    = new FtqToICacheIO
496    val toBackend   = new FtqToCtrlIO
497    val toPrefetch  = new FtqToPrefetchIO
498    val icacheFlush = Output(Bool())
499
500    val bpuInfo = new Bundle {
501      val bpRight = Output(UInt(XLEN.W))
502      val bpWrong = Output(UInt(XLEN.W))
503    }
504
505    val mmioCommitRead = Flipped(new mmioCommitRead)
506
507    // for perf
508    val ControlBTBMissBubble = Output(Bool())
509    val TAGEMissBubble       = Output(Bool())
510    val SCMissBubble         = Output(Bool())
511    val ITTAGEMissBubble     = Output(Bool())
512    val RASMissBubble        = Output(Bool())
513  })
514  io.bpuInfo := DontCare
515
516  val topdown_stage = RegInit(0.U.asTypeOf(new FrontendTopDownBundle))
517  // only driven by clock, not valid-ready
518  topdown_stage                  := io.fromBpu.resp.bits.topdown_info
519  io.toIfu.req.bits.topdown_info := topdown_stage
520
521  val ifuRedirected = RegInit(VecInit(Seq.fill(FtqSize)(false.B)))
522
523  // io.fromBackend.ftqIdxAhead: bju(BjuCnt) + ldReplay + exception
524  val ftqIdxAhead = VecInit(Seq.tabulate(FtqRedirectAheadNum)(i => io.fromBackend.ftqIdxAhead(i))) // only bju
525  val ftqIdxSelOH = io.fromBackend.ftqIdxSelOH.bits(FtqRedirectAheadNum - 1, 0)
526
527  val aheadValid         = ftqIdxAhead.map(_.valid).reduce(_ | _) && !io.fromBackend.redirect.valid
528  val realAhdValid       = io.fromBackend.redirect.valid && (ftqIdxSelOH > 0.U) && RegNext(aheadValid)
529  val backendRedirect    = Wire(Valid(new BranchPredictionRedirect))
530  val backendRedirectReg = Wire(Valid(new BranchPredictionRedirect))
531  backendRedirectReg.valid := RegNext(Mux(realAhdValid, false.B, backendRedirect.valid))
532  backendRedirectReg.bits  := RegEnable(backendRedirect.bits, backendRedirect.valid)
533  val fromBackendRedirect = Wire(Valid(new BranchPredictionRedirect))
534  fromBackendRedirect := Mux(realAhdValid, backendRedirect, backendRedirectReg)
535
536  val stage2Flush  = backendRedirect.valid
537  val backendFlush = stage2Flush || RegNext(stage2Flush)
538  val ifuFlush     = Wire(Bool())
539
540  val flush = stage2Flush || RegNext(stage2Flush)
541
542  val allowBpuIn, allowToIfu = WireInit(false.B)
543  val flushToIfu             = !allowToIfu
544  allowBpuIn := !ifuFlush && !backendRedirect.valid && !backendRedirectReg.valid
545  allowToIfu := !ifuFlush && !backendRedirect.valid && !backendRedirectReg.valid
546
547  def copyNum                                              = 5
548  val bpuPtr, ifuPtr, pfPtr, ifuWbPtr, commPtr, robCommPtr = RegInit(FtqPtr(false.B, 0.U))
549  val ifuPtrPlus1                                          = RegInit(FtqPtr(false.B, 1.U))
550  val ifuPtrPlus2                                          = RegInit(FtqPtr(false.B, 2.U))
551  val pfPtrPlus1                                           = RegInit(FtqPtr(false.B, 1.U))
552  val commPtrPlus1                                         = RegInit(FtqPtr(false.B, 1.U))
553  val copied_ifu_ptr                                       = Seq.fill(copyNum)(RegInit(FtqPtr(false.B, 0.U)))
554  val copied_bpu_ptr                                       = Seq.fill(copyNum)(RegInit(FtqPtr(false.B, 0.U)))
555  require(FtqSize >= 4)
556  val ifuPtr_write       = WireInit(ifuPtr)
557  val ifuPtrPlus1_write  = WireInit(ifuPtrPlus1)
558  val ifuPtrPlus2_write  = WireInit(ifuPtrPlus2)
559  val pfPtr_write        = WireInit(pfPtr)
560  val pfPtrPlus1_write   = WireInit(pfPtrPlus1)
561  val ifuWbPtr_write     = WireInit(ifuWbPtr)
562  val commPtr_write      = WireInit(commPtr)
563  val commPtrPlus1_write = WireInit(commPtrPlus1)
564  val robCommPtr_write   = WireInit(robCommPtr)
565  ifuPtr       := ifuPtr_write
566  ifuPtrPlus1  := ifuPtrPlus1_write
567  ifuPtrPlus2  := ifuPtrPlus2_write
568  pfPtr        := pfPtr_write
569  pfPtrPlus1   := pfPtrPlus1_write
570  ifuWbPtr     := ifuWbPtr_write
571  commPtr      := commPtr_write
572  commPtrPlus1 := commPtrPlus1_write
573  copied_ifu_ptr.map { ptr =>
574    ptr := ifuPtr_write
575    dontTouch(ptr)
576  }
577  robCommPtr := robCommPtr_write
578  val validEntries = distanceBetween(bpuPtr, commPtr)
579  val canCommit    = Wire(Bool())
580
581  // Instruction page fault and instruction access fault are sent from backend with redirect requests.
582  // When IPF and IAF are sent, backendPcFaultIfuPtr points to the FTQ entry whose first instruction
583  // raises IPF or IAF, which is ifuWbPtr_write or IfuPtr_write.
584  // Only when IFU has written back that FTQ entry can backendIpf and backendIaf be false because this
585  // makes sure that IAF and IPF are correctly raised instead of being flushed by redirect requests.
586  val backendException  = RegInit(ExceptionType.none)
587  val backendPcFaultPtr = RegInit(FtqPtr(false.B, 0.U))
588  when(fromBackendRedirect.valid) {
589    backendException := ExceptionType.fromOH(
590      has_pf = fromBackendRedirect.bits.cfiUpdate.backendIPF,
591      has_gpf = fromBackendRedirect.bits.cfiUpdate.backendIGPF,
592      has_af = fromBackendRedirect.bits.cfiUpdate.backendIAF
593    )
594    when(
595      fromBackendRedirect.bits.cfiUpdate.backendIPF || fromBackendRedirect.bits.cfiUpdate.backendIGPF ||
596        fromBackendRedirect.bits.cfiUpdate.backendIAF
597    ) {
598      backendPcFaultPtr := ifuWbPtr_write
599    }
600  }.elsewhen(ifuWbPtr =/= backendPcFaultPtr) {
601    backendException := ExceptionType.none
602  }
603
604  // **********************************************************************
605  // **************************** enq from bpu ****************************
606  // **********************************************************************
607  val new_entry_ready = validEntries < FtqSize.U || canCommit
608  io.fromBpu.resp.ready := new_entry_ready
609
610  val bpu_s2_resp     = io.fromBpu.resp.bits.s2
611  val bpu_s3_resp     = io.fromBpu.resp.bits.s3
612  val bpu_s2_redirect = bpu_s2_resp.valid(3) && bpu_s2_resp.hasRedirect(3)
613  val bpu_s3_redirect = bpu_s3_resp.valid(3) && bpu_s3_resp.hasRedirect(3)
614
615  io.toBpu.enq_ptr := bpuPtr
616  val enq_fire    = io.fromBpu.resp.fire && allowBpuIn // from bpu s1
617  val bpu_in_fire = (io.fromBpu.resp.fire || bpu_s2_redirect || bpu_s3_redirect) && allowBpuIn
618
619  val bpu_in_resp     = io.fromBpu.resp.bits.selectedResp
620  val bpu_in_stage    = io.fromBpu.resp.bits.selectedRespIdxForFtq
621  val bpu_in_resp_ptr = Mux(bpu_in_stage === BP_S1, bpuPtr, bpu_in_resp.ftq_idx)
622  val bpu_in_resp_idx = bpu_in_resp_ptr.value
623
624  // read ports:      pfReq1 + pfReq2 ++  ifuReq1 + ifuReq2 + ifuReq3 + commitUpdate2 + commitUpdate
625  val ftq_pc_mem = Module(new FtqPcMemWrapper(2))
626  // resp from uBTB
627  ftq_pc_mem.io.wen   := bpu_in_fire
628  ftq_pc_mem.io.waddr := bpu_in_resp_idx
629  ftq_pc_mem.io.wdata.fromBranchPrediction(bpu_in_resp)
630
631  //                                                            ifuRedirect + backendRedirect + commit
632  val ftq_redirect_mem = Module(new SyncDataModuleTemplate(
633    new Ftq_Redirect_SRAMEntry,
634    FtqSize,
635    IfuRedirectNum + FtqRedirectAheadNum + 1,
636    1,
637    hasRen = true
638  ))
639  // these info is intended to enq at the last stage of bpu
640  ftq_redirect_mem.io.wen(0)   := io.fromBpu.resp.bits.lastStage.valid(3)
641  ftq_redirect_mem.io.waddr(0) := io.fromBpu.resp.bits.lastStage.ftq_idx.value
642  ftq_redirect_mem.io.wdata(0) := io.fromBpu.resp.bits.last_stage_spec_info
643  println(f"ftq redirect MEM: entry ${ftq_redirect_mem.io.wdata(0).getWidth} * ${FtqSize} * 3")
644
645  val ftq_meta_1r_sram = Module(new FtqNRSRAM(new Ftq_1R_SRAMEntry, 1))
646  // these info is intended to enq at the last stage of bpu
647  ftq_meta_1r_sram.io.wen             := io.fromBpu.resp.bits.lastStage.valid(3)
648  ftq_meta_1r_sram.io.waddr           := io.fromBpu.resp.bits.lastStage.ftq_idx.value
649  ftq_meta_1r_sram.io.wdata.meta      := io.fromBpu.resp.bits.last_stage_meta
650  ftq_meta_1r_sram.io.wdata.ftb_entry := io.fromBpu.resp.bits.last_stage_ftb_entry
651  //                                                            ifuRedirect + backendRedirect (commit moved to ftq_meta_1r_sram)
652  val ftb_entry_mem = Module(new SyncDataModuleTemplate(
653    new FTBEntry_FtqMem,
654    FtqSize,
655    IfuRedirectNum + FtqRedirectAheadNum,
656    1,
657    hasRen = true
658  ))
659  ftb_entry_mem.io.wen(0)   := io.fromBpu.resp.bits.lastStage.valid(3)
660  ftb_entry_mem.io.waddr(0) := io.fromBpu.resp.bits.lastStage.ftq_idx.value
661  ftb_entry_mem.io.wdata(0) := io.fromBpu.resp.bits.last_stage_ftb_entry
662  private val mbistPl = MbistPipeline.PlaceMbistPipeline(1, "MbistPipeFtq", hasMbist)
663
664  // multi-write
665  val update_target = Reg(Vec(FtqSize, UInt(VAddrBits.W))) // could be taken target or fallThrough //TODO: remove this
666  val newest_entry_target          = Reg(UInt(VAddrBits.W))
667  val newest_entry_target_modified = RegInit(false.B)
668  val newest_entry_ptr             = Reg(new FtqPtr)
669  val newest_entry_ptr_modified    = RegInit(false.B)
670  val cfiIndex_vec                 = Reg(Vec(FtqSize, ValidUndirectioned(UInt(log2Ceil(PredictWidth).W))))
671  val mispredict_vec               = Reg(Vec(FtqSize, Vec(PredictWidth, Bool())))
672  val pred_stage                   = Reg(Vec(FtqSize, UInt(2.W)))
673  val pred_s1_cycle                = if (!env.FPGAPlatform) Some(Reg(Vec(FtqSize, UInt(64.W)))) else None
674
675  val c_empty :: c_toCommit :: c_committed :: c_flushed :: Nil = Enum(4)
676  val commitStateQueueReg = RegInit(VecInit(Seq.fill(FtqSize) {
677    VecInit(Seq.fill(PredictWidth)(c_empty))
678  }))
679  val commitStateQueueEnable = WireInit(VecInit(Seq.fill(FtqSize)(false.B)))
680  val commitStateQueueNext   = WireInit(commitStateQueueReg)
681
682  for (f <- 0 until FtqSize) {
683    when(commitStateQueueEnable(f)) {
684      commitStateQueueReg(f) := commitStateQueueNext(f)
685    }
686  }
687
688  val f_to_send :: f_sent :: Nil = Enum(2)
689  val entry_fetch_status         = RegInit(VecInit(Seq.fill(FtqSize)(f_sent)))
690
691  val h_not_hit :: h_false_hit :: h_hit :: Nil = Enum(3)
692  val entry_hit_status                         = RegInit(VecInit(Seq.fill(FtqSize)(h_not_hit)))
693
694  // modify registers one cycle later to cut critical path
695  val last_cycle_bpu_in       = RegNext(bpu_in_fire)
696  val last_cycle_bpu_in_ptr   = RegEnable(bpu_in_resp_ptr, bpu_in_fire)
697  val last_cycle_bpu_in_idx   = last_cycle_bpu_in_ptr.value
698  val last_cycle_bpu_target   = RegEnable(bpu_in_resp.getTarget(3), bpu_in_fire)
699  val last_cycle_cfiIndex     = RegEnable(bpu_in_resp.cfiIndex(3), bpu_in_fire)
700  val last_cycle_bpu_in_stage = RegEnable(bpu_in_stage, bpu_in_fire)
701
702  def extra_copyNum_for_commitStateQueue = 2
703  val copied_last_cycle_bpu_in =
704    VecInit(Seq.fill(copyNum + extra_copyNum_for_commitStateQueue)(RegNext(bpu_in_fire)))
705  val copied_last_cycle_bpu_in_ptr_for_ftq =
706    VecInit(Seq.fill(extra_copyNum_for_commitStateQueue)(RegEnable(bpu_in_resp_ptr, bpu_in_fire)))
707
708  newest_entry_target_modified := false.B
709  newest_entry_ptr_modified    := false.B
710  when(last_cycle_bpu_in) {
711    entry_fetch_status(last_cycle_bpu_in_idx) := f_to_send
712    cfiIndex_vec(last_cycle_bpu_in_idx)       := last_cycle_cfiIndex
713    pred_stage(last_cycle_bpu_in_idx)         := last_cycle_bpu_in_stage
714
715    update_target(last_cycle_bpu_in_idx) := last_cycle_bpu_target // TODO: remove this
716    newest_entry_target_modified         := true.B
717    newest_entry_target                  := last_cycle_bpu_target
718    newest_entry_ptr_modified            := true.B
719    newest_entry_ptr                     := last_cycle_bpu_in_ptr
720  }
721
722  // reduce fanout by delay write for a cycle
723  when(RegNext(last_cycle_bpu_in)) {
724    mispredict_vec(RegEnable(last_cycle_bpu_in_idx, last_cycle_bpu_in)) :=
725      WireInit(VecInit(Seq.fill(PredictWidth)(false.B)))
726  }
727
728  // record s1 pred cycles
729  pred_s1_cycle.map { vec =>
730    when(bpu_in_fire && (bpu_in_stage === BP_S1)) {
731      vec(bpu_in_resp_ptr.value) := bpu_in_resp.full_pred(0).predCycle.getOrElse(0.U)
732    }
733  }
734
735  // reduce fanout using copied last_cycle_bpu_in and copied last_cycle_bpu_in_ptr
736  val copied_last_cycle_bpu_in_for_ftq = copied_last_cycle_bpu_in.takeRight(extra_copyNum_for_commitStateQueue)
737  copied_last_cycle_bpu_in_for_ftq.zip(copied_last_cycle_bpu_in_ptr_for_ftq).zipWithIndex.map {
738    case ((in, ptr), i) =>
739      when(in) {
740        val perSetEntries = FtqSize / extra_copyNum_for_commitStateQueue // 32
741        require(FtqSize % extra_copyNum_for_commitStateQueue == 0)
742        for (j <- 0 until perSetEntries) {
743          when(ptr.value === (i * perSetEntries + j).U) {
744            commitStateQueueNext(i * perSetEntries + j) := VecInit(Seq.fill(PredictWidth)(c_empty))
745            // Clock gating optimization, use 1 gate cell to control a row
746            commitStateQueueEnable(i * perSetEntries + j) := true.B
747          }
748        }
749      }
750  }
751
752  bpuPtr := bpuPtr + enq_fire
753  copied_bpu_ptr.map(_ := bpuPtr + enq_fire)
754  when(io.toIfu.req.fire && allowToIfu) {
755    ifuPtr_write      := ifuPtrPlus1
756    ifuPtrPlus1_write := ifuPtrPlus2
757    ifuPtrPlus2_write := ifuPtrPlus2 + 1.U
758  }
759  when(io.toPrefetch.req.fire && allowToIfu) {
760    pfPtr_write      := pfPtrPlus1
761    pfPtrPlus1_write := pfPtrPlus1 + 1.U
762  }
763
764  // only use ftb result to assign hit status
765  when(bpu_s2_resp.valid(3)) {
766    entry_hit_status(bpu_s2_resp.ftq_idx.value) := Mux(bpu_s2_resp.full_pred(3).hit, h_hit, h_not_hit)
767  }
768
769  io.toIfu.flushFromBpu.s2.valid      := bpu_s2_redirect
770  io.toIfu.flushFromBpu.s2.bits       := bpu_s2_resp.ftq_idx
771  io.toPrefetch.flushFromBpu.s2.valid := bpu_s2_redirect
772  io.toPrefetch.flushFromBpu.s2.bits  := bpu_s2_resp.ftq_idx
773  when(bpu_s2_redirect) {
774    bpuPtr := bpu_s2_resp.ftq_idx + 1.U
775    copied_bpu_ptr.map(_ := bpu_s2_resp.ftq_idx + 1.U)
776    // only when ifuPtr runs ahead of bpu s2 resp should we recover it
777    when(!isBefore(ifuPtr, bpu_s2_resp.ftq_idx)) {
778      ifuPtr_write      := bpu_s2_resp.ftq_idx
779      ifuPtrPlus1_write := bpu_s2_resp.ftq_idx + 1.U
780      ifuPtrPlus2_write := bpu_s2_resp.ftq_idx + 2.U
781    }
782    when(!isBefore(pfPtr, bpu_s2_resp.ftq_idx)) {
783      pfPtr_write      := bpu_s2_resp.ftq_idx
784      pfPtrPlus1_write := bpu_s2_resp.ftq_idx + 1.U
785    }
786  }
787
788  io.toIfu.flushFromBpu.s3.valid      := bpu_s3_redirect
789  io.toIfu.flushFromBpu.s3.bits       := bpu_s3_resp.ftq_idx
790  io.toPrefetch.flushFromBpu.s3.valid := bpu_s3_redirect
791  io.toPrefetch.flushFromBpu.s3.bits  := bpu_s3_resp.ftq_idx
792  when(bpu_s3_redirect) {
793    bpuPtr := bpu_s3_resp.ftq_idx + 1.U
794    copied_bpu_ptr.map(_ := bpu_s3_resp.ftq_idx + 1.U)
795    // only when ifuPtr runs ahead of bpu s2 resp should we recover it
796    when(!isBefore(ifuPtr, bpu_s3_resp.ftq_idx)) {
797      ifuPtr_write      := bpu_s3_resp.ftq_idx
798      ifuPtrPlus1_write := bpu_s3_resp.ftq_idx + 1.U
799      ifuPtrPlus2_write := bpu_s3_resp.ftq_idx + 2.U
800    }
801    when(!isBefore(pfPtr, bpu_s3_resp.ftq_idx)) {
802      pfPtr_write      := bpu_s3_resp.ftq_idx
803      pfPtrPlus1_write := bpu_s3_resp.ftq_idx + 1.U
804    }
805  }
806
807  XSError(isBefore(bpuPtr, ifuPtr) && !isFull(bpuPtr, ifuPtr), "\nifuPtr is before bpuPtr!\n")
808  XSError(isBefore(bpuPtr, pfPtr) && !isFull(bpuPtr, pfPtr), "\npfPtr is before bpuPtr!\n")
809  XSError(isBefore(ifuWbPtr, commPtr) && !isFull(ifuWbPtr, commPtr), "\ncommPtr is before ifuWbPtr!\n")
810
811  (0 until copyNum).map(i => XSError(copied_bpu_ptr(i) =/= bpuPtr, "\ncopiedBpuPtr is different from bpuPtr!\n"))
812
813  // ****************************************************************
814  // **************************** to ifu ****************************
815  // ****************************************************************
816  // 0  for ifu, and 1-4 for ICache
817  val bpu_in_bypass_buf         = RegEnable(ftq_pc_mem.io.wdata, bpu_in_fire)
818  val copied_bpu_in_bypass_buf  = VecInit(Seq.fill(copyNum)(RegEnable(ftq_pc_mem.io.wdata, bpu_in_fire)))
819  val bpu_in_bypass_buf_for_ifu = bpu_in_bypass_buf
820  val bpu_in_bypass_ptr         = RegEnable(bpu_in_resp_ptr, bpu_in_fire)
821  val last_cycle_to_ifu_fire    = RegNext(io.toIfu.req.fire)
822  val last_cycle_to_pf_fire     = RegNext(io.toPrefetch.req.fire)
823
824  val copied_bpu_in_bypass_ptr      = VecInit(Seq.fill(copyNum)(RegEnable(bpu_in_resp_ptr, bpu_in_fire)))
825  val copied_last_cycle_to_ifu_fire = VecInit(Seq.fill(copyNum)(RegNext(io.toIfu.req.fire)))
826
827  // read pc and target
828  ftq_pc_mem.io.ifuPtr_w       := ifuPtr_write
829  ftq_pc_mem.io.ifuPtrPlus1_w  := ifuPtrPlus1_write
830  ftq_pc_mem.io.ifuPtrPlus2_w  := ifuPtrPlus2_write
831  ftq_pc_mem.io.pfPtr_w        := pfPtr_write
832  ftq_pc_mem.io.pfPtrPlus1_w   := pfPtrPlus1_write
833  ftq_pc_mem.io.commPtr_w      := commPtr_write
834  ftq_pc_mem.io.commPtrPlus1_w := commPtrPlus1_write
835
836  io.toIfu.req.bits.ftqIdx := ifuPtr
837
838  val toICachePcBundle               = Wire(Vec(copyNum, new Ftq_RF_Components))
839  val toICacheEntryToSend            = Wire(Vec(copyNum, Bool()))
840  val nextCycleToPrefetchPcBundle    = Wire(new Ftq_RF_Components)
841  val nextCycleToPrefetchEntryToSend = Wire(Bool())
842  val toPrefetchPcBundle             = RegNext(nextCycleToPrefetchPcBundle)
843  val toPrefetchEntryToSend          = RegNext(nextCycleToPrefetchEntryToSend)
844  val toIfuPcBundle                  = Wire(new Ftq_RF_Components)
845  val entry_is_to_send               = WireInit(entry_fetch_status(ifuPtr.value) === f_to_send)
846  val entry_ftq_offset               = WireInit(cfiIndex_vec(ifuPtr.value))
847  val entry_next_addr                = Wire(UInt(VAddrBits.W))
848
849  val pc_mem_ifu_ptr_rdata   = VecInit(Seq.fill(copyNum)(RegNext(ftq_pc_mem.io.ifuPtr_rdata)))
850  val pc_mem_ifu_plus1_rdata = VecInit(Seq.fill(copyNum)(RegNext(ftq_pc_mem.io.ifuPtrPlus1_rdata)))
851  val diff_entry_next_addr   = WireInit(update_target(ifuPtr.value)) // TODO: remove this
852
853  val copied_ifu_plus1_to_send = VecInit(Seq.fill(copyNum)(RegNext(
854    entry_fetch_status(ifuPtrPlus1.value) === f_to_send
855  ) || RegNext(last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtrPlus1)))
856  val copied_ifu_ptr_to_send = VecInit(Seq.fill(copyNum)(RegNext(
857    entry_fetch_status(ifuPtr.value) === f_to_send
858  ) || RegNext(last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtr)))
859
860  for (i <- 0 until copyNum) {
861    when(copied_last_cycle_bpu_in(i) && copied_bpu_in_bypass_ptr(i) === copied_ifu_ptr(i)) {
862      toICachePcBundle(i)    := copied_bpu_in_bypass_buf(i)
863      toICacheEntryToSend(i) := true.B
864    }.elsewhen(copied_last_cycle_to_ifu_fire(i)) {
865      toICachePcBundle(i)    := pc_mem_ifu_plus1_rdata(i)
866      toICacheEntryToSend(i) := copied_ifu_plus1_to_send(i)
867    }.otherwise {
868      toICachePcBundle(i)    := pc_mem_ifu_ptr_rdata(i)
869      toICacheEntryToSend(i) := copied_ifu_ptr_to_send(i)
870    }
871  }
872
873  // Calculate requests sent to prefetcher one cycle in advance to cut critical path
874  when(bpu_in_fire && bpu_in_resp_ptr === pfPtr_write) {
875    nextCycleToPrefetchPcBundle    := ftq_pc_mem.io.wdata
876    nextCycleToPrefetchEntryToSend := true.B
877  }.elsewhen(io.toPrefetch.req.fire) {
878    nextCycleToPrefetchPcBundle := ftq_pc_mem.io.pfPtrPlus1_rdata
879    nextCycleToPrefetchEntryToSend := entry_fetch_status(pfPtrPlus1.value) === f_to_send ||
880      last_cycle_bpu_in && bpu_in_bypass_ptr === pfPtrPlus1
881  }.otherwise {
882    nextCycleToPrefetchPcBundle := ftq_pc_mem.io.pfPtr_rdata
883    nextCycleToPrefetchEntryToSend := entry_fetch_status(pfPtr.value) === f_to_send ||
884      last_cycle_bpu_in && bpu_in_bypass_ptr === pfPtr // reduce potential bubbles
885  }
886
887  // TODO: reconsider target address bypass logic
888  when(last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtr) {
889    toIfuPcBundle        := bpu_in_bypass_buf_for_ifu
890    entry_is_to_send     := true.B
891    entry_next_addr      := last_cycle_bpu_target
892    entry_ftq_offset     := last_cycle_cfiIndex
893    diff_entry_next_addr := last_cycle_bpu_target // TODO: remove this
894  }.elsewhen(last_cycle_to_ifu_fire) {
895    toIfuPcBundle := RegNext(ftq_pc_mem.io.ifuPtrPlus1_rdata)
896    entry_is_to_send := RegNext(entry_fetch_status(ifuPtrPlus1.value) === f_to_send) ||
897      RegNext(last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtrPlus1) // reduce potential bubbles
898    entry_next_addr := Mux(
899      last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtrPlus1,
900      bpu_in_bypass_buf_for_ifu.startAddr,
901      Mux(ifuPtr === newest_entry_ptr, newest_entry_target, RegNext(ftq_pc_mem.io.ifuPtrPlus2_rdata.startAddr))
902    ) // ifuPtr+2
903  }.otherwise {
904    toIfuPcBundle := RegNext(ftq_pc_mem.io.ifuPtr_rdata)
905    entry_is_to_send := RegNext(entry_fetch_status(ifuPtr.value) === f_to_send) ||
906      RegNext(last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtr) // reduce potential bubbles
907    entry_next_addr := Mux(
908      last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtrPlus1,
909      bpu_in_bypass_buf_for_ifu.startAddr,
910      Mux(ifuPtr === newest_entry_ptr, newest_entry_target, RegNext(ftq_pc_mem.io.ifuPtrPlus1_rdata.startAddr))
911    ) // ifuPtr+1
912  }
913
914  io.toIfu.req.valid              := entry_is_to_send && ifuPtr =/= bpuPtr
915  io.toIfu.req.bits.nextStartAddr := entry_next_addr
916  io.toIfu.req.bits.ftqOffset     := entry_ftq_offset
917  io.toIfu.req.bits.fromFtqPcBundle(toIfuPcBundle)
918
919  io.toICache.req.valid := entry_is_to_send && ifuPtr =/= bpuPtr
920  io.toICache.req.bits.readValid.zipWithIndex.map { case (copy, i) =>
921    copy := toICacheEntryToSend(i) && copied_ifu_ptr(i) =/= copied_bpu_ptr(i)
922  }
923  io.toICache.req.bits.pcMemRead.zipWithIndex.foreach { case (copy, i) =>
924    copy.fromFtqPcBundle(toICachePcBundle(i))
925    copy.ftqIdx := ifuPtr
926  }
927  io.toICache.req.bits.backendException := ExceptionType.hasException(backendException) && backendPcFaultPtr === ifuPtr
928
929  io.toPrefetch.req.valid := toPrefetchEntryToSend && pfPtr =/= bpuPtr
930  io.toPrefetch.req.bits.fromFtqPcBundle(toPrefetchPcBundle)
931  io.toPrefetch.req.bits.ftqIdx  := pfPtr
932  io.toPrefetch.backendException := Mux(backendPcFaultPtr === pfPtr, backendException, ExceptionType.none)
933  // io.toICache.req.bits.bypassSelect := last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtr
934  // io.toICache.req.bits.bpuBypassWrite.zipWithIndex.map{case(bypassWrtie, i) =>
935  //   bypassWrtie.startAddr := bpu_in_bypass_buf.tail(i).startAddr
936  //   bypassWrtie.nextlineStart := bpu_in_bypass_buf.tail(i).nextLineAddr
937  // }
938
939  // TODO: remove this
940  XSError(
941    io.toIfu.req.valid && diff_entry_next_addr =/= entry_next_addr,
942    p"\nifu_req_target wrong! ifuPtr: ${ifuPtr}, entry_next_addr: ${Hexadecimal(entry_next_addr)} diff_entry_next_addr: ${Hexadecimal(diff_entry_next_addr)}\n"
943  )
944
945  // when fall through is smaller in value than start address, there must be a false hit
946  when(toIfuPcBundle.fallThruError && entry_hit_status(ifuPtr.value) === h_hit) {
947    when(io.toIfu.req.fire &&
948      !(bpu_s2_redirect && bpu_s2_resp.ftq_idx === ifuPtr) &&
949      !(bpu_s3_redirect && bpu_s3_resp.ftq_idx === ifuPtr)) {
950      entry_hit_status(ifuPtr.value) := h_false_hit
951      // XSError(true.B, "FTB false hit by fallThroughError, startAddr: %x, fallTHru: %x\n", io.toIfu.req.bits.startAddr, io.toIfu.req.bits.nextStartAddr)
952    }
953  }
954  XSDebug(
955    toIfuPcBundle.fallThruError && entry_hit_status(ifuPtr.value) === h_hit,
956    "fallThruError! start:%x, fallThru:%x\n",
957    io.toIfu.req.bits.startAddr,
958    io.toIfu.req.bits.nextStartAddr
959  )
960
961  XSPerfAccumulate(
962    f"fall_through_error_to_ifu",
963    toIfuPcBundle.fallThruError && entry_hit_status(ifuPtr.value) === h_hit &&
964      io.toIfu.req.fire && !(bpu_s2_redirect && bpu_s2_resp.ftq_idx === ifuPtr) && !(bpu_s3_redirect && bpu_s3_resp.ftq_idx === ifuPtr)
965  )
966
967  val ifu_req_should_be_flushed =
968    io.toIfu.flushFromBpu.shouldFlushByStage2(io.toIfu.req.bits.ftqIdx) ||
969      io.toIfu.flushFromBpu.shouldFlushByStage3(io.toIfu.req.bits.ftqIdx)
970
971  when(io.toIfu.req.fire && !ifu_req_should_be_flushed) {
972    entry_fetch_status(ifuPtr.value) := f_sent
973  }
974
975  // *********************************************************************
976  // **************************** wb from ifu ****************************
977  // *********************************************************************
978  val pdWb         = io.fromIfu.pdWb
979  val pds          = pdWb.bits.pd
980  val ifu_wb_valid = pdWb.valid
981  val ifu_wb_idx   = pdWb.bits.ftqIdx.value
982  // read ports:                                                         commit update
983  val ftq_pd_mem =
984    Module(new SyncDataModuleTemplate(new Ftq_pd_Entry, FtqSize, FtqRedirectAheadNum + 1, 1, hasRen = true))
985  ftq_pd_mem.io.wen(0)   := ifu_wb_valid
986  ftq_pd_mem.io.waddr(0) := pdWb.bits.ftqIdx.value
987  ftq_pd_mem.io.wdata(0).fromPdWb(pdWb.bits)
988
989  val hit_pd_valid       = entry_hit_status(ifu_wb_idx) === h_hit && ifu_wb_valid
990  val hit_pd_mispred     = hit_pd_valid && pdWb.bits.misOffset.valid
991  val hit_pd_mispred_reg = RegNext(hit_pd_mispred, init = false.B)
992  val pd_reg             = RegEnable(pds, pdWb.valid)
993  val start_pc_reg       = RegEnable(pdWb.bits.pc(0), pdWb.valid)
994  val wb_idx_reg         = RegEnable(ifu_wb_idx, pdWb.valid)
995
996  when(ifu_wb_valid) {
997    val comm_stq_wen = VecInit(pds.map(_.valid).zip(pdWb.bits.instrRange).map {
998      case (v, inRange) => v && inRange
999    })
1000    commitStateQueueEnable(ifu_wb_idx) := true.B
1001    (commitStateQueueNext(ifu_wb_idx) zip comm_stq_wen).map {
1002      case (qe, v) => when(v) {
1003          qe := c_toCommit
1004        }
1005    }
1006  }
1007
1008  when(ifu_wb_valid) {
1009    ifuWbPtr_write := ifuWbPtr + 1.U
1010  }
1011
1012  XSError(ifu_wb_valid && isAfter(pdWb.bits.ftqIdx, ifuPtr), "IFU returned a predecode before its req, check IFU")
1013
1014  ftb_entry_mem.io.ren.get.head := ifu_wb_valid
1015  ftb_entry_mem.io.raddr.head   := ifu_wb_idx
1016  val has_false_hit = WireInit(false.B)
1017  when(RegNext(hit_pd_valid)) {
1018    // check for false hit
1019    val pred_ftb_entry = ftb_entry_mem.io.rdata.head
1020    val brSlots        = pred_ftb_entry.brSlots
1021    val tailSlot       = pred_ftb_entry.tailSlot
1022    // we check cfis that bpu predicted
1023
1024    // bpu predicted branches but denied by predecode
1025    val br_false_hit =
1026      brSlots.map {
1027        s => s.valid && !(pd_reg(s.offset).valid && pd_reg(s.offset).isBr)
1028      }.reduce(_ || _) ||
1029        (tailSlot.valid && pred_ftb_entry.tailSlot.sharing &&
1030          !(pd_reg(tailSlot.offset).valid && pd_reg(tailSlot.offset).isBr))
1031
1032    val jmpOffset = tailSlot.offset
1033    val jmp_pd    = pd_reg(jmpOffset)
1034    val jal_false_hit = pred_ftb_entry.jmpValid &&
1035      ((pred_ftb_entry.isJal && !(jmp_pd.valid && jmp_pd.isJal)) ||
1036        (pred_ftb_entry.isJalr && !(jmp_pd.valid && jmp_pd.isJalr)) ||
1037        (pred_ftb_entry.isCall && !(jmp_pd.valid && jmp_pd.isCall)) ||
1038        (pred_ftb_entry.isRet && !(jmp_pd.valid && jmp_pd.isRet)))
1039
1040    has_false_hit := br_false_hit || jal_false_hit || hit_pd_mispred_reg
1041    // assert(!has_false_hit)
1042  }
1043  XSDebug(
1044    RegNext(hit_pd_valid) && has_false_hit,
1045    "FTB false hit by br or jal or hit_pd, startAddr: %x\n",
1046    pdWb.bits.pc(0)
1047  )
1048
1049  when(has_false_hit) {
1050    entry_hit_status(wb_idx_reg) := h_false_hit
1051  }
1052
1053  // *******************************************************************************
1054  // **************************** redirect from backend ****************************
1055  // *******************************************************************************
1056
1057  // redirect read cfiInfo, couples to redirectGen s2
1058  // ftqIdxAhead(0-3) => ftq_redirect_mem(1-4), reuse ftq_redirect_mem(1)
1059  val ftq_redirect_rdata = Wire(Vec(FtqRedirectAheadNum, new Ftq_Redirect_SRAMEntry))
1060  val ftb_redirect_rdata = Wire(Vec(FtqRedirectAheadNum, new FTBEntry_FtqMem))
1061
1062  val ftq_pd_rdata = Wire(Vec(FtqRedirectAheadNum, new Ftq_pd_Entry))
1063  for (i <- 1 until FtqRedirectAheadNum) {
1064    ftq_redirect_mem.io.ren.get(i + IfuRedirectNum) := ftqIdxAhead(i).valid
1065    ftq_redirect_mem.io.raddr(i + IfuRedirectNum)   := ftqIdxAhead(i).bits.value
1066    ftb_entry_mem.io.ren.get(i + IfuRedirectNum)    := ftqIdxAhead(i).valid
1067    ftb_entry_mem.io.raddr(i + IfuRedirectNum)      := ftqIdxAhead(i).bits.value
1068
1069    ftq_pd_mem.io.ren.get(i) := ftqIdxAhead(i).valid
1070    ftq_pd_mem.io.raddr(i)   := ftqIdxAhead(i).bits.value
1071  }
1072  ftq_redirect_mem.io.ren.get(IfuRedirectNum) := Mux(aheadValid, ftqIdxAhead(0).valid, backendRedirect.valid)
1073  ftq_redirect_mem.io.raddr(IfuRedirectNum) := Mux(
1074    aheadValid,
1075    ftqIdxAhead(0).bits.value,
1076    backendRedirect.bits.ftqIdx.value
1077  )
1078  ftb_entry_mem.io.ren.get(IfuRedirectNum) := Mux(aheadValid, ftqIdxAhead(0).valid, backendRedirect.valid)
1079  ftb_entry_mem.io.raddr(IfuRedirectNum) := Mux(
1080    aheadValid,
1081    ftqIdxAhead(0).bits.value,
1082    backendRedirect.bits.ftqIdx.value
1083  )
1084
1085  ftq_pd_mem.io.ren.get(0) := Mux(aheadValid, ftqIdxAhead(0).valid, backendRedirect.valid)
1086  ftq_pd_mem.io.raddr(0)   := Mux(aheadValid, ftqIdxAhead(0).bits.value, backendRedirect.bits.ftqIdx.value)
1087
1088  for (i <- 0 until FtqRedirectAheadNum) {
1089    ftq_redirect_rdata(i) := ftq_redirect_mem.io.rdata(i + IfuRedirectNum)
1090    ftb_redirect_rdata(i) := ftb_entry_mem.io.rdata(i + IfuRedirectNum)
1091
1092    ftq_pd_rdata(i) := ftq_pd_mem.io.rdata(i)
1093  }
1094  val stage3CfiInfo =
1095    Mux(realAhdValid, Mux1H(ftqIdxSelOH, ftq_redirect_rdata), ftq_redirect_mem.io.rdata(IfuRedirectNum))
1096  val stage3PdInfo       = Mux(realAhdValid, Mux1H(ftqIdxSelOH, ftq_pd_rdata), ftq_pd_mem.io.rdata(0))
1097  val backendRedirectCfi = fromBackendRedirect.bits.cfiUpdate
1098  backendRedirectCfi.fromFtqRedirectSram(stage3CfiInfo)
1099  backendRedirectCfi.pd := stage3PdInfo.toPd(fromBackendRedirect.bits.ftqOffset)
1100
1101  val r_ftb_entry = Mux(realAhdValid, Mux1H(ftqIdxSelOH, ftb_redirect_rdata), ftb_entry_mem.io.rdata(IfuRedirectNum))
1102  val r_ftqOffset = fromBackendRedirect.bits.ftqOffset
1103
1104  backendRedirectCfi.br_hit := r_ftb_entry.brIsSaved(r_ftqOffset)
1105  backendRedirectCfi.jr_hit := r_ftb_entry.isJalr && r_ftb_entry.tailSlot.offset === r_ftqOffset
1106  // FIXME: not portable
1107  val sc_disagree = stage3CfiInfo.sc_disagree.getOrElse(VecInit(Seq.fill(numBr)(false.B)))
1108  backendRedirectCfi.sc_hit := backendRedirectCfi.br_hit && Mux(
1109    r_ftb_entry.brSlots(0).offset === r_ftqOffset,
1110    sc_disagree(0),
1111    sc_disagree(1)
1112  )
1113
1114  when(entry_hit_status(fromBackendRedirect.bits.ftqIdx.value) === h_hit) {
1115    backendRedirectCfi.shift := PopCount(r_ftb_entry.getBrMaskByOffset(r_ftqOffset)) +&
1116      (backendRedirectCfi.pd.isBr && !r_ftb_entry.brIsSaved(r_ftqOffset) &&
1117        !r_ftb_entry.newBrCanNotInsert(r_ftqOffset))
1118
1119    backendRedirectCfi.addIntoHist := backendRedirectCfi.pd.isBr && (r_ftb_entry.brIsSaved(r_ftqOffset) ||
1120      !r_ftb_entry.newBrCanNotInsert(r_ftqOffset))
1121  }.otherwise {
1122    backendRedirectCfi.shift       := (backendRedirectCfi.pd.isBr && backendRedirectCfi.taken).asUInt
1123    backendRedirectCfi.addIntoHist := backendRedirectCfi.pd.isBr.asUInt
1124  }
1125
1126  // ***************************************************************************
1127  // **************************** redirect from ifu ****************************
1128  // ***************************************************************************
1129  val fromIfuRedirect = WireInit(0.U.asTypeOf(Valid(new BranchPredictionRedirect)))
1130  fromIfuRedirect.valid              := pdWb.valid && pdWb.bits.misOffset.valid && !backendFlush
1131  fromIfuRedirect.bits.ftqIdx        := pdWb.bits.ftqIdx
1132  fromIfuRedirect.bits.ftqOffset     := pdWb.bits.misOffset.bits
1133  fromIfuRedirect.bits.level         := RedirectLevel.flushAfter
1134  fromIfuRedirect.bits.BTBMissBubble := true.B
1135  fromIfuRedirect.bits.debugIsMemVio := false.B
1136  fromIfuRedirect.bits.debugIsCtrl   := false.B
1137
1138  val ifuRedirectCfiUpdate = fromIfuRedirect.bits.cfiUpdate
1139  ifuRedirectCfiUpdate.pc        := pdWb.bits.pc(pdWb.bits.misOffset.bits)
1140  ifuRedirectCfiUpdate.pd        := pdWb.bits.pd(pdWb.bits.misOffset.bits)
1141  ifuRedirectCfiUpdate.predTaken := cfiIndex_vec(pdWb.bits.ftqIdx.value).valid
1142  ifuRedirectCfiUpdate.target    := pdWb.bits.target
1143  ifuRedirectCfiUpdate.taken     := pdWb.bits.cfiOffset.valid
1144  ifuRedirectCfiUpdate.isMisPred := pdWb.bits.misOffset.valid
1145
1146  val ifuRedirectReg   = RegNextWithEnable(fromIfuRedirect, hasInit = true)
1147  val ifuRedirectToBpu = WireInit(ifuRedirectReg)
1148  ifuFlush := fromIfuRedirect.valid || ifuRedirectToBpu.valid
1149
1150  ftq_redirect_mem.io.ren.get.head := fromIfuRedirect.valid
1151  ftq_redirect_mem.io.raddr.head   := fromIfuRedirect.bits.ftqIdx.value
1152
1153  val toBpuCfi = ifuRedirectToBpu.bits.cfiUpdate
1154  toBpuCfi.fromFtqRedirectSram(ftq_redirect_mem.io.rdata.head)
1155  when(ifuRedirectReg.bits.cfiUpdate.pd.isRet && ifuRedirectReg.bits.cfiUpdate.pd.valid) {
1156    toBpuCfi.target := toBpuCfi.topAddr
1157  }
1158
1159  when(ifuRedirectReg.valid) {
1160    ifuRedirected(ifuRedirectReg.bits.ftqIdx.value) := true.B
1161  }.elsewhen(RegNext(pdWb.valid)) {
1162    // if pdWb and no redirect, set to false
1163    ifuRedirected(last_cycle_bpu_in_ptr.value) := false.B
1164  }
1165
1166  // **********************************************************************
1167  // ***************************** to backend *****************************
1168  // **********************************************************************
1169  // to backend pc mem / target
1170  io.toBackend.pc_mem_wen   := RegNext(last_cycle_bpu_in)
1171  io.toBackend.pc_mem_waddr := RegEnable(last_cycle_bpu_in_idx, last_cycle_bpu_in)
1172  io.toBackend.pc_mem_wdata := RegEnable(bpu_in_bypass_buf_for_ifu, last_cycle_bpu_in)
1173
1174  // num cycle is fixed
1175  val newest_entry_en: Bool = RegNext(last_cycle_bpu_in || backendRedirect.valid || ifuRedirectToBpu.valid)
1176  io.toBackend.newest_entry_en     := RegNext(newest_entry_en)
1177  io.toBackend.newest_entry_ptr    := RegEnable(newest_entry_ptr, newest_entry_en)
1178  io.toBackend.newest_entry_target := RegEnable(newest_entry_target, newest_entry_en)
1179
1180  // *********************************************************************
1181  // **************************** wb from exu ****************************
1182  // *********************************************************************
1183
1184  backendRedirect.valid := io.fromBackend.redirect.valid
1185  backendRedirect.bits.connectRedirect(io.fromBackend.redirect.bits)
1186  backendRedirect.bits.BTBMissBubble := false.B
1187
1188  def extractRedirectInfo(wb: Valid[Redirect]) = {
1189    val ftqPtr    = wb.bits.ftqIdx
1190    val ftqOffset = wb.bits.ftqOffset
1191    val taken     = wb.bits.cfiUpdate.taken
1192    val mispred   = wb.bits.cfiUpdate.isMisPred
1193    (wb.valid, ftqPtr, ftqOffset, taken, mispred)
1194  }
1195
1196  // fix mispredict entry
1197  val lastIsMispredict = RegNext(
1198    backendRedirect.valid && backendRedirect.bits.level === RedirectLevel.flushAfter,
1199    init = false.B
1200  )
1201
1202  def updateCfiInfo(redirect: Valid[Redirect], isBackend: Boolean = true) = {
1203    val (r_valid, r_ptr, r_offset, r_taken, r_mispred) = extractRedirectInfo(redirect)
1204    val r_idx                                          = r_ptr.value
1205    val cfiIndex_bits_wen                              = r_valid && r_taken && r_offset < cfiIndex_vec(r_idx).bits
1206    val cfiIndex_valid_wen                             = r_valid && r_offset === cfiIndex_vec(r_idx).bits
1207    when(cfiIndex_bits_wen || cfiIndex_valid_wen) {
1208      cfiIndex_vec(r_idx).valid := cfiIndex_bits_wen || cfiIndex_valid_wen && r_taken
1209    }.elsewhen(r_valid && !r_taken && r_offset =/= cfiIndex_vec(r_idx).bits) {
1210      cfiIndex_vec(r_idx).valid := false.B
1211    }
1212    when(cfiIndex_bits_wen) {
1213      cfiIndex_vec(r_idx).bits := r_offset
1214    }
1215    newest_entry_target_modified := true.B
1216    newest_entry_target          := redirect.bits.cfiUpdate.target
1217    newest_entry_ptr_modified    := true.B
1218    newest_entry_ptr             := r_ptr
1219
1220    update_target(r_idx) := redirect.bits.cfiUpdate.target // TODO: remove this
1221    if (isBackend) {
1222      mispredict_vec(r_idx)(r_offset) := r_mispred
1223    }
1224  }
1225
1226  when(fromBackendRedirect.valid) {
1227    updateCfiInfo(fromBackendRedirect)
1228  }.elsewhen(ifuRedirectToBpu.valid) {
1229    updateCfiInfo(ifuRedirectToBpu, isBackend = false)
1230  }
1231
1232  when(fromBackendRedirect.valid) {
1233    when(fromBackendRedirect.bits.ControlRedirectBubble) {
1234      when(fromBackendRedirect.bits.ControlBTBMissBubble) {
1235        topdown_stage.reasons(TopDownCounters.BTBMissBubble.id)                  := true.B
1236        io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.BTBMissBubble.id) := true.B
1237      }.elsewhen(fromBackendRedirect.bits.TAGEMissBubble) {
1238        topdown_stage.reasons(TopDownCounters.TAGEMissBubble.id)                  := true.B
1239        io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.TAGEMissBubble.id) := true.B
1240      }.elsewhen(fromBackendRedirect.bits.SCMissBubble) {
1241        topdown_stage.reasons(TopDownCounters.SCMissBubble.id)                  := true.B
1242        io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.SCMissBubble.id) := true.B
1243      }.elsewhen(fromBackendRedirect.bits.ITTAGEMissBubble) {
1244        topdown_stage.reasons(TopDownCounters.ITTAGEMissBubble.id)                  := true.B
1245        io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.ITTAGEMissBubble.id) := true.B
1246      }.elsewhen(fromBackendRedirect.bits.RASMissBubble) {
1247        topdown_stage.reasons(TopDownCounters.RASMissBubble.id)                  := true.B
1248        io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.RASMissBubble.id) := true.B
1249      }
1250
1251    }.elsewhen(backendRedirect.bits.MemVioRedirectBubble) {
1252      topdown_stage.reasons(TopDownCounters.MemVioRedirectBubble.id)                  := true.B
1253      io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.MemVioRedirectBubble.id) := true.B
1254    }.otherwise {
1255      topdown_stage.reasons(TopDownCounters.OtherRedirectBubble.id)                  := true.B
1256      io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.OtherRedirectBubble.id) := true.B
1257    }
1258  }.elsewhen(ifuRedirectReg.valid) {
1259    topdown_stage.reasons(TopDownCounters.BTBMissBubble.id)                  := true.B
1260    io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.BTBMissBubble.id) := true.B
1261  }
1262
1263  io.ControlBTBMissBubble := fromBackendRedirect.bits.ControlBTBMissBubble
1264  io.TAGEMissBubble       := fromBackendRedirect.bits.TAGEMissBubble
1265  io.SCMissBubble         := fromBackendRedirect.bits.SCMissBubble
1266  io.ITTAGEMissBubble     := fromBackendRedirect.bits.ITTAGEMissBubble
1267  io.RASMissBubble        := fromBackendRedirect.bits.RASMissBubble
1268
1269  // ***********************************************************************************
1270  // **************************** flush ptr and state queue ****************************
1271  // ***********************************************************************************
1272
1273  val redirectVec = VecInit(backendRedirect, fromIfuRedirect)
1274
1275  // when redirect, we should reset ptrs and status queues
1276  io.icacheFlush := redirectVec.map(r => r.valid).reduce(_ || _)
1277  XSPerfAccumulate("icacheFlushFromBackend", backendRedirect.valid)
1278  XSPerfAccumulate("icacheFlushFromIFU", fromIfuRedirect.valid)
1279  when(redirectVec.map(r => r.valid).reduce(_ || _)) {
1280    val r                          = PriorityMux(redirectVec.map(r => r.valid -> r.bits))
1281    val notIfu                     = redirectVec.dropRight(1).map(r => r.valid).reduce(_ || _)
1282    val (idx, offset, flushItSelf) = (r.ftqIdx, r.ftqOffset, RedirectLevel.flushItself(r.level))
1283    val next                       = idx + 1.U
1284    bpuPtr := next
1285    copied_bpu_ptr.map(_ := next)
1286    ifuPtr_write      := next
1287    ifuWbPtr_write    := next
1288    ifuPtrPlus1_write := idx + 2.U
1289    ifuPtrPlus2_write := idx + 3.U
1290    pfPtr_write       := next
1291    pfPtrPlus1_write  := idx + 2.U
1292  }
1293  when(RegNext(redirectVec.map(r => r.valid).reduce(_ || _))) {
1294    val r                          = PriorityMux(redirectVec.map(r => r.valid -> r.bits))
1295    val notIfu                     = redirectVec.dropRight(1).map(r => r.valid).reduce(_ || _)
1296    val (idx, offset, flushItSelf) = (r.ftqIdx, r.ftqOffset, RedirectLevel.flushItself(r.level))
1297    when(RegNext(notIfu)) {
1298      commitStateQueueEnable(RegNext(idx.value)) := true.B
1299      commitStateQueueNext(RegNext(idx.value)).zipWithIndex.foreach { case (s, i) =>
1300        when(i.U > RegNext(offset)) {
1301          s := c_empty
1302        }
1303        when(i.U === RegNext(offset) && RegNext(flushItSelf)) {
1304          s := c_flushed
1305        }
1306      }
1307    }
1308  }
1309
1310  // only the valid bit is actually needed
1311  io.toIfu.redirect.bits    := backendRedirect.bits
1312  io.toIfu.redirect.valid   := stage2Flush
1313  io.toIfu.topdown_redirect := fromBackendRedirect
1314
1315  // commit
1316  for (c <- io.fromBackend.rob_commits) {
1317    when(c.valid) {
1318      commitStateQueueEnable(c.bits.ftqIdx.value)                 := true.B
1319      commitStateQueueNext(c.bits.ftqIdx.value)(c.bits.ftqOffset) := c_committed
1320      // TODO: remove this
1321      // For instruction fusions, we also update the next instruction
1322      when(c.bits.commitType === 4.U) {
1323        commitStateQueueNext(c.bits.ftqIdx.value)(c.bits.ftqOffset + 1.U) := c_committed
1324      }.elsewhen(c.bits.commitType === 5.U) {
1325        commitStateQueueNext(c.bits.ftqIdx.value)(c.bits.ftqOffset + 2.U) := c_committed
1326      }.elsewhen(c.bits.commitType === 6.U) {
1327        val index = (c.bits.ftqIdx + 1.U).value
1328        commitStateQueueEnable(index)  := true.B
1329        commitStateQueueNext(index)(0) := c_committed
1330      }.elsewhen(c.bits.commitType === 7.U) {
1331        val index = (c.bits.ftqIdx + 1.U).value
1332        commitStateQueueEnable(index)  := true.B
1333        commitStateQueueNext(index)(1) := c_committed
1334      }
1335    }
1336  }
1337
1338  // ****************************************************************
1339  // **************************** to bpu ****************************
1340  // ****************************************************************
1341
1342  io.toBpu.redirctFromIFU := ifuRedirectToBpu.valid
1343  io.toBpu.redirect       := Mux(fromBackendRedirect.valid, fromBackendRedirect, ifuRedirectToBpu)
1344  val dummy_s1_pred_cycle_vec = VecInit(List.tabulate(FtqSize)(_ => 0.U(64.W)))
1345  val redirect_latency =
1346    GTimer() - pred_s1_cycle.getOrElse(dummy_s1_pred_cycle_vec)(io.toBpu.redirect.bits.ftqIdx.value) + 1.U
1347  XSPerfHistogram("backend_redirect_latency", redirect_latency, fromBackendRedirect.valid, 0, 60, 1)
1348  XSPerfHistogram(
1349    "ifu_redirect_latency",
1350    redirect_latency,
1351    !fromBackendRedirect.valid && ifuRedirectToBpu.valid,
1352    0,
1353    60,
1354    1
1355  )
1356
1357  XSError(
1358    io.toBpu.redirect.valid && isBefore(io.toBpu.redirect.bits.ftqIdx, commPtr),
1359    "Ftq received a redirect after its commit, check backend or replay"
1360  )
1361
1362  val may_have_stall_from_bpu = Wire(Bool())
1363  val bpu_ftb_update_stall    = RegInit(0.U(2.W)) // 2-cycle stall, so we need 3 states
1364  may_have_stall_from_bpu := bpu_ftb_update_stall =/= 0.U
1365
1366  val validInstructions     = commitStateQueueReg(commPtr.value).map(s => s === c_toCommit || s === c_committed)
1367  val lastInstructionStatus = PriorityMux(validInstructions.reverse.zip(commitStateQueueReg(commPtr.value).reverse))
1368  val firstInstructionFlushed = commitStateQueueReg(commPtr.value)(0) === c_flushed ||
1369    commitStateQueueReg(commPtr.value)(0) === c_empty && commitStateQueueReg(commPtr.value)(1) === c_flushed
1370  canCommit := commPtr =/= ifuWbPtr && !may_have_stall_from_bpu &&
1371    (isAfter(robCommPtr, commPtr) ||
1372      validInstructions.reduce(_ || _) && lastInstructionStatus === c_committed)
1373  val canMoveCommPtr = commPtr =/= ifuWbPtr && !may_have_stall_from_bpu &&
1374    (isAfter(robCommPtr, commPtr) ||
1375      validInstructions.reduce(_ || _) && lastInstructionStatus === c_committed ||
1376      firstInstructionFlushed)
1377
1378  when(io.fromBackend.rob_commits.map(_.valid).reduce(_ | _)) {
1379    robCommPtr_write := ParallelPriorityMux(
1380      io.fromBackend.rob_commits.map(_.valid).reverse,
1381      io.fromBackend.rob_commits.map(_.bits.ftqIdx).reverse
1382    )
1383  }.elsewhen(isAfter(commPtr, robCommPtr)) {
1384    robCommPtr_write := commPtr
1385  }.otherwise {
1386    robCommPtr_write := robCommPtr
1387  }
1388
1389  /**
1390    *************************************************************************************
1391    * MMIO instruction fetch is allowed only if MMIO is the oldest instruction.
1392    *************************************************************************************
1393    */
1394  val mmioReadPtr = io.mmioCommitRead.mmioFtqPtr
1395  val mmioLastCommit = isAfter(commPtr, mmioReadPtr) ||
1396    commPtr === mmioReadPtr && validInstructions.reduce(_ || _) && lastInstructionStatus === c_committed
1397  io.mmioCommitRead.mmioLastCommit := RegNext(mmioLastCommit)
1398
1399  // commit reads
1400  val commit_pc_bundle = RegNext(ftq_pc_mem.io.commPtr_rdata)
1401  val commit_target =
1402    Mux(
1403      RegNext(commPtr === newest_entry_ptr),
1404      RegEnable(newest_entry_target, newest_entry_target_modified),
1405      RegNext(ftq_pc_mem.io.commPtrPlus1_rdata.startAddr)
1406    )
1407  ftq_pd_mem.io.ren.get.last := canCommit
1408  ftq_pd_mem.io.raddr.last   := commPtr.value
1409  val commit_pd = ftq_pd_mem.io.rdata.last
1410  ftq_redirect_mem.io.ren.get.last := canCommit
1411  ftq_redirect_mem.io.raddr.last   := commPtr.value
1412  val commit_spec_meta = ftq_redirect_mem.io.rdata.last
1413  ftq_meta_1r_sram.io.ren(0)   := canCommit
1414  ftq_meta_1r_sram.io.raddr(0) := commPtr.value
1415  val commit_meta      = ftq_meta_1r_sram.io.rdata(0).meta
1416  val commit_ftb_entry = ftq_meta_1r_sram.io.rdata(0).ftb_entry
1417
1418  // need one cycle to read mem and srams
1419  val do_commit_ptr = RegEnable(commPtr, canCommit)
1420  val do_commit     = RegNext(canCommit, init = false.B)
1421  when(canMoveCommPtr) {
1422    commPtr_write      := commPtrPlus1
1423    commPtrPlus1_write := commPtrPlus1 + 1.U
1424  }
1425  val commit_state   = RegEnable(commitStateQueueReg(commPtr.value), canCommit)
1426  val can_commit_cfi = WireInit(cfiIndex_vec(commPtr.value))
1427  val do_commit_cfi  = WireInit(cfiIndex_vec(do_commit_ptr.value))
1428  //
1429  // when (commitStateQueue(commPtr.value)(can_commit_cfi.bits) =/= c_commited) {
1430  //  can_commit_cfi.valid := false.B
1431  // }
1432  val commit_cfi = RegEnable(can_commit_cfi, canCommit)
1433  val debug_cfi  = commitStateQueueReg(do_commit_ptr.value)(do_commit_cfi.bits) =/= c_committed && do_commit_cfi.valid
1434
1435  val commit_mispredict: Vec[Bool] =
1436    VecInit((RegEnable(mispredict_vec(commPtr.value), canCommit) zip commit_state).map {
1437      case (mis, state) => mis && state === c_committed
1438    })
1439  val commit_instCommited: Vec[Bool] = VecInit(commit_state.map(_ === c_committed)) // [PredictWidth]
1440  val can_commit_hit     = entry_hit_status(commPtr.value)
1441  val commit_hit         = RegEnable(can_commit_hit, canCommit)
1442  val diff_commit_target = RegEnable(update_target(commPtr.value), canCommit) // TODO: remove this
1443  val commit_stage       = RegEnable(pred_stage(commPtr.value), canCommit)
1444  val commit_valid       = commit_hit === h_hit || commit_cfi.valid           // hit or taken
1445
1446  val to_bpu_hit = can_commit_hit === h_hit || can_commit_hit === h_false_hit
1447  switch(bpu_ftb_update_stall) {
1448    is(0.U) {
1449      when(can_commit_cfi.valid && !to_bpu_hit && canCommit) {
1450        bpu_ftb_update_stall := 2.U // 2-cycle stall
1451      }
1452    }
1453    is(2.U) {
1454      bpu_ftb_update_stall := 1.U
1455    }
1456    is(1.U) {
1457      bpu_ftb_update_stall := 0.U
1458    }
1459    is(3.U) {
1460      // XSError below
1461    }
1462  }
1463  XSError(bpu_ftb_update_stall === 3.U, "bpu_ftb_update_stall should be 0, 1 or 2")
1464
1465  // TODO: remove this
1466  XSError(do_commit && diff_commit_target =/= commit_target, "\ncommit target should be the same as update target\n")
1467
1468  // update latency stats
1469  val update_latency = GTimer() - pred_s1_cycle.getOrElse(dummy_s1_pred_cycle_vec)(do_commit_ptr.value) + 1.U
1470  XSPerfHistogram("bpu_update_latency", update_latency, io.toBpu.update.valid, 0, 64, 2)
1471
1472  io.toBpu.update       := DontCare
1473  io.toBpu.update.valid := commit_valid && do_commit
1474  val update = io.toBpu.update.bits
1475  update.false_hit   := commit_hit === h_false_hit
1476  update.pc          := commit_pc_bundle.startAddr
1477  update.meta        := commit_meta
1478  update.cfi_idx     := commit_cfi
1479  update.full_target := commit_target
1480  update.from_stage  := commit_stage
1481  update.spec_info   := commit_spec_meta
1482  XSError(commit_valid && do_commit && debug_cfi, "\ncommit cfi can be non c_commited\n")
1483
1484  val commit_real_hit  = commit_hit === h_hit
1485  val update_ftb_entry = update.ftb_entry
1486
1487  val ftbEntryGen = Module(new FTBEntryGen).io
1488  ftbEntryGen.start_addr     := commit_pc_bundle.startAddr
1489  ftbEntryGen.old_entry      := commit_ftb_entry
1490  ftbEntryGen.pd             := commit_pd
1491  ftbEntryGen.cfiIndex       := commit_cfi
1492  ftbEntryGen.target         := commit_target
1493  ftbEntryGen.hit            := commit_real_hit
1494  ftbEntryGen.mispredict_vec := commit_mispredict
1495
1496  update_ftb_entry         := ftbEntryGen.new_entry
1497  update.new_br_insert_pos := ftbEntryGen.new_br_insert_pos
1498  update.mispred_mask      := ftbEntryGen.mispred_mask
1499  update.old_entry         := ftbEntryGen.is_old_entry
1500  update.pred_hit          := commit_hit === h_hit || commit_hit === h_false_hit
1501  update.br_taken_mask     := ftbEntryGen.taken_mask
1502  update.br_committed := (ftbEntryGen.new_entry.brValids zip ftbEntryGen.new_entry.brOffset) map {
1503    case (valid, offset) => valid && commit_instCommited(offset)
1504  }
1505  update.jmp_taken := ftbEntryGen.jmp_taken
1506
1507  // update.full_pred.fromFtbEntry(ftbEntryGen.new_entry, update.pc)
1508  // update.full_pred.jalr_target := commit_target
1509  // update.full_pred.hit := true.B
1510  // when (update.full_pred.is_jalr) {
1511  //   update.full_pred.targets.last := commit_target
1512  // }
1513
1514  // ******************************************************************************
1515  // **************************** commit perf counters ****************************
1516  // ******************************************************************************
1517
1518  val commit_inst_mask        = VecInit(commit_state.map(c => c === c_committed && do_commit)).asUInt
1519  val commit_mispred_mask     = commit_mispredict.asUInt
1520  val commit_not_mispred_mask = ~commit_mispred_mask
1521
1522  val commit_br_mask  = commit_pd.brMask.asUInt
1523  val commit_jmp_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.jmpInfo.valid.asTypeOf(UInt(1.W)))
1524  val commit_cfi_mask = commit_br_mask | commit_jmp_mask
1525
1526  val mbpInstrs = commit_inst_mask & commit_cfi_mask
1527
1528  val mbpRights = mbpInstrs & commit_not_mispred_mask
1529  val mbpWrongs = mbpInstrs & commit_mispred_mask
1530
1531  io.bpuInfo.bpRight := PopCount(mbpRights)
1532  io.bpuInfo.bpWrong := PopCount(mbpWrongs)
1533
1534  val hartId           = p(XSCoreParamsKey).HartId
1535  val isWriteFTQTable  = Constantin.createRecord(s"isWriteFTQTable$hartId")
1536  val ftqBranchTraceDB = ChiselDB.createTable(s"FTQTable$hartId", new FtqDebugBundle)
1537  // Cfi Info
1538  for (i <- 0 until PredictWidth) {
1539    val pc      = commit_pc_bundle.startAddr + (i * instBytes).U
1540    val v       = commit_state(i) === c_committed
1541    val isBr    = commit_pd.brMask(i)
1542    val isJmp   = commit_pd.jmpInfo.valid && commit_pd.jmpOffset === i.U
1543    val isCfi   = isBr || isJmp
1544    val isTaken = commit_cfi.valid && commit_cfi.bits === i.U
1545    val misPred = commit_mispredict(i)
1546    // val ghist = commit_spec_meta.ghist.predHist
1547    val histPtr   = commit_spec_meta.histPtr
1548    val predCycle = commit_meta(63, 0)
1549    val target    = commit_target
1550
1551    val brIdx = OHToUInt(Reverse(Cat(update_ftb_entry.brValids.zip(update_ftb_entry.brOffset).map { case (v, offset) =>
1552      v && offset === i.U
1553    })))
1554    val inFtbEntry = update_ftb_entry.brValids.zip(update_ftb_entry.brOffset).map { case (v, offset) =>
1555      v && offset === i.U
1556    }.reduce(_ || _)
1557    val addIntoHist =
1558      ((commit_hit === h_hit) && inFtbEntry) || (!(commit_hit === h_hit) && i.U === commit_cfi.bits && isBr && commit_cfi.valid)
1559    XSDebug(
1560      v && do_commit && isCfi,
1561      p"cfi_update: isBr(${isBr}) pc(${Hexadecimal(pc)}) " +
1562        p"taken(${isTaken}) mispred(${misPred}) cycle($predCycle) hist(${histPtr.value}) " +
1563        p"startAddr(${Hexadecimal(commit_pc_bundle.startAddr)}) AddIntoHist(${addIntoHist}) " +
1564        p"brInEntry(${inFtbEntry}) brIdx(${brIdx}) target(${Hexadecimal(target)})\n"
1565    )
1566
1567    val logbundle = Wire(new FtqDebugBundle)
1568    logbundle.pc        := pc
1569    logbundle.target    := target
1570    logbundle.isBr      := isBr
1571    logbundle.isJmp     := isJmp
1572    logbundle.isCall    := isJmp && commit_pd.hasCall
1573    logbundle.isRet     := isJmp && commit_pd.hasRet
1574    logbundle.misPred   := misPred
1575    logbundle.isTaken   := isTaken
1576    logbundle.predStage := commit_stage
1577
1578    ftqBranchTraceDB.log(
1579      data = logbundle /* hardware of type T */,
1580      en = isWriteFTQTable.orR && v && do_commit && isCfi,
1581      site = "FTQ" + p(XSCoreParamsKey).HartId.toString,
1582      clock = clock,
1583      reset = reset
1584    )
1585  }
1586
1587  val enq           = io.fromBpu.resp
1588  val perf_redirect = backendRedirect
1589
1590  XSPerfAccumulate("entry", validEntries)
1591  XSPerfAccumulate("bpu_to_ftq_stall", enq.valid && !enq.ready)
1592  XSPerfAccumulate("mispredictRedirect", perf_redirect.valid && RedirectLevel.flushAfter === perf_redirect.bits.level)
1593  XSPerfAccumulate("replayRedirect", perf_redirect.valid && RedirectLevel.flushItself(perf_redirect.bits.level))
1594  XSPerfAccumulate("predecodeRedirect", fromIfuRedirect.valid)
1595
1596  XSPerfAccumulate("to_ifu_bubble", io.toIfu.req.ready && !io.toIfu.req.valid)
1597
1598  XSPerfAccumulate("to_ifu_stall", io.toIfu.req.valid && !io.toIfu.req.ready)
1599  XSPerfAccumulate("from_bpu_real_bubble", !enq.valid && enq.ready && allowBpuIn)
1600  XSPerfAccumulate("bpu_to_ifu_bubble", bpuPtr === ifuPtr)
1601  XSPerfAccumulate(
1602    "bpu_to_ifu_bubble_when_ftq_full",
1603    (bpuPtr === ifuPtr) && isFull(bpuPtr, commPtr) && io.toIfu.req.ready
1604  )
1605
1606  XSPerfAccumulate("redirectAhead_ValidNum", ftqIdxAhead.map(_.valid).reduce(_ | _))
1607  XSPerfAccumulate("fromBackendRedirect_ValidNum", io.fromBackend.redirect.valid)
1608  XSPerfAccumulate("toBpuRedirect_ValidNum", io.toBpu.redirect.valid)
1609
1610  val from_bpu = io.fromBpu.resp.bits
1611  val to_ifu   = io.toIfu.req.bits
1612
1613  XSPerfHistogram("commit_num_inst", PopCount(commit_inst_mask), do_commit, 0, PredictWidth + 1, 1)
1614
1615  val commit_jal_mask  = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasJal.asTypeOf(UInt(1.W)))
1616  val commit_jalr_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasJalr.asTypeOf(UInt(1.W)))
1617  val commit_call_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasCall.asTypeOf(UInt(1.W)))
1618  val commit_ret_mask  = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasRet.asTypeOf(UInt(1.W)))
1619
1620  val mbpBRights = mbpRights & commit_br_mask
1621  val mbpJRights = mbpRights & commit_jal_mask
1622  val mbpIRights = mbpRights & commit_jalr_mask
1623  val mbpCRights = mbpRights & commit_call_mask
1624  val mbpRRights = mbpRights & commit_ret_mask
1625
1626  val mbpBWrongs = mbpWrongs & commit_br_mask
1627  val mbpJWrongs = mbpWrongs & commit_jal_mask
1628  val mbpIWrongs = mbpWrongs & commit_jalr_mask
1629  val mbpCWrongs = mbpWrongs & commit_call_mask
1630  val mbpRWrongs = mbpWrongs & commit_ret_mask
1631
1632  val commit_pred_stage = RegNext(pred_stage(commPtr.value))
1633
1634  def pred_stage_map(src: UInt, name: String) =
1635    (0 until numBpStages).map(i =>
1636      f"${name}_stage_${i + 1}" -> PopCount(src.asBools.map(_ && commit_pred_stage === BP_STAGES(i)))
1637    ).foldLeft(Map[String, UInt]())(_ + _)
1638
1639  val mispred_stage_map      = pred_stage_map(mbpWrongs, "mispredict")
1640  val br_mispred_stage_map   = pred_stage_map(mbpBWrongs, "br_mispredict")
1641  val jalr_mispred_stage_map = pred_stage_map(mbpIWrongs, "jalr_mispredict")
1642  val correct_stage_map      = pred_stage_map(mbpRights, "correct")
1643  val br_correct_stage_map   = pred_stage_map(mbpBRights, "br_correct")
1644  val jalr_correct_stage_map = pred_stage_map(mbpIRights, "jalr_correct")
1645
1646  val update_valid = io.toBpu.update.valid
1647  def u(cond: Bool) = update_valid && cond
1648  val ftb_false_hit = u(update.false_hit)
1649  // assert(!ftb_false_hit)
1650  val ftb_hit = u(commit_hit === h_hit)
1651
1652  val ftb_new_entry                = u(ftbEntryGen.is_init_entry)
1653  val ftb_new_entry_only_br        = ftb_new_entry && !update_ftb_entry.jmpValid
1654  val ftb_new_entry_only_jmp       = ftb_new_entry && !update_ftb_entry.brValids(0)
1655  val ftb_new_entry_has_br_and_jmp = ftb_new_entry && update_ftb_entry.brValids(0) && update_ftb_entry.jmpValid
1656
1657  val ftb_old_entry = u(ftbEntryGen.is_old_entry)
1658
1659  val ftb_modified_entry =
1660    u(ftbEntryGen.is_new_br || ftbEntryGen.is_jalr_target_modified || ftbEntryGen.is_strong_bias_modified)
1661  val ftb_modified_entry_new_br               = u(ftbEntryGen.is_new_br)
1662  val ftb_modified_entry_ifu_redirected       = u(ifuRedirected(do_commit_ptr.value))
1663  val ftb_modified_entry_jalr_target_modified = u(ftbEntryGen.is_jalr_target_modified)
1664  val ftb_modified_entry_br_full              = ftb_modified_entry && ftbEntryGen.is_br_full
1665  val ftb_modified_entry_strong_bias          = ftb_modified_entry && ftbEntryGen.is_strong_bias_modified
1666
1667  def getFtbEntryLen(pc: UInt, entry: FTBEntry) = (entry.getFallThrough(pc) - pc) >> instOffsetBits
1668  val gen_ftb_entry_len = getFtbEntryLen(update.pc, ftbEntryGen.new_entry)
1669  XSPerfHistogram("ftb_init_entry_len", gen_ftb_entry_len, ftb_new_entry, 0, PredictWidth + 1, 1)
1670  XSPerfHistogram("ftb_modified_entry_len", gen_ftb_entry_len, ftb_modified_entry, 0, PredictWidth + 1, 1)
1671  val s3_ftb_entry_len = getFtbEntryLen(from_bpu.s3.pc(0), from_bpu.last_stage_ftb_entry)
1672  XSPerfHistogram("s3_ftb_entry_len", s3_ftb_entry_len, from_bpu.s3.valid(0), 0, PredictWidth + 1, 1)
1673
1674  XSPerfHistogram("ftq_has_entry", validEntries, true.B, 0, FtqSize + 1, 1)
1675
1676  val perfCountsMap = Map(
1677    "BpInstr"                        -> PopCount(mbpInstrs),
1678    "BpBInstr"                       -> PopCount(mbpBRights | mbpBWrongs),
1679    "BpRight"                        -> PopCount(mbpRights),
1680    "BpWrong"                        -> PopCount(mbpWrongs),
1681    "BpBRight"                       -> PopCount(mbpBRights),
1682    "BpBWrong"                       -> PopCount(mbpBWrongs),
1683    "BpJRight"                       -> PopCount(mbpJRights),
1684    "BpJWrong"                       -> PopCount(mbpJWrongs),
1685    "BpIRight"                       -> PopCount(mbpIRights),
1686    "BpIWrong"                       -> PopCount(mbpIWrongs),
1687    "BpCRight"                       -> PopCount(mbpCRights),
1688    "BpCWrong"                       -> PopCount(mbpCWrongs),
1689    "BpRRight"                       -> PopCount(mbpRRights),
1690    "BpRWrong"                       -> PopCount(mbpRWrongs),
1691    "ftb_false_hit"                  -> PopCount(ftb_false_hit),
1692    "ftb_hit"                        -> PopCount(ftb_hit),
1693    "ftb_new_entry"                  -> PopCount(ftb_new_entry),
1694    "ftb_new_entry_only_br"          -> PopCount(ftb_new_entry_only_br),
1695    "ftb_new_entry_only_jmp"         -> PopCount(ftb_new_entry_only_jmp),
1696    "ftb_new_entry_has_br_and_jmp"   -> PopCount(ftb_new_entry_has_br_and_jmp),
1697    "ftb_old_entry"                  -> PopCount(ftb_old_entry),
1698    "ftb_modified_entry"             -> PopCount(ftb_modified_entry),
1699    "ftb_modified_entry_new_br"      -> PopCount(ftb_modified_entry_new_br),
1700    "ftb_jalr_target_modified"       -> PopCount(ftb_modified_entry_jalr_target_modified),
1701    "ftb_modified_entry_br_full"     -> PopCount(ftb_modified_entry_br_full),
1702    "ftb_modified_entry_strong_bias" -> PopCount(ftb_modified_entry_strong_bias)
1703  ) ++ mispred_stage_map ++ br_mispred_stage_map ++ jalr_mispred_stage_map ++
1704    correct_stage_map ++ br_correct_stage_map ++ jalr_correct_stage_map
1705
1706  for ((key, value) <- perfCountsMap) {
1707    XSPerfAccumulate(key, value)
1708  }
1709
1710  // --------------------------- Debug --------------------------------
1711  // XSDebug(enq_fire, p"enq! " + io.fromBpu.resp.bits.toPrintable)
1712  XSDebug(io.toIfu.req.fire, p"fire to ifu " + io.toIfu.req.bits.toPrintable)
1713  XSDebug(do_commit, p"deq! [ptr] $do_commit_ptr\n")
1714  XSDebug(true.B, p"[bpuPtr] $bpuPtr, [ifuPtr] $ifuPtr, [ifuWbPtr] $ifuWbPtr [commPtr] $commPtr\n")
1715  XSDebug(
1716    true.B,
1717    p"[in] v:${io.fromBpu.resp.valid} r:${io.fromBpu.resp.ready} " +
1718      p"[out] v:${io.toIfu.req.valid} r:${io.toIfu.req.ready}\n"
1719  )
1720  XSDebug(do_commit, p"[deq info] cfiIndex: $commit_cfi, $commit_pc_bundle, target: ${Hexadecimal(commit_target)}\n")
1721
1722  //   def ubtbCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1723  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1724  //       case (((valid, pd), ans), taken) =>
1725  //       Mux(valid && pd.isBr,
1726  //         isWrong ^ Mux(ans.hit.asBool,
1727  //           Mux(ans.taken.asBool, taken && ans.target === commitEntry.target,
1728  //           !taken),
1729  //         !taken),
1730  //       false.B)
1731  //     }
1732  //   }
1733
1734  //   def btbCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1735  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1736  //       case (((valid, pd), ans), taken) =>
1737  //       Mux(valid && pd.isBr,
1738  //         isWrong ^ Mux(ans.hit.asBool,
1739  //           Mux(ans.taken.asBool, taken && ans.target === commitEntry.target,
1740  //           !taken),
1741  //         !taken),
1742  //       false.B)
1743  //     }
1744  //   }
1745
1746  //   def tageCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1747  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1748  //       case (((valid, pd), ans), taken) =>
1749  //       Mux(valid && pd.isBr,
1750  //         isWrong ^ (ans.taken.asBool === taken),
1751  //       false.B)
1752  //     }
1753  //   }
1754
1755  //   def loopCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1756  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1757  //       case (((valid, pd), ans), taken) =>
1758  //       Mux(valid && (pd.isBr) && ans.hit.asBool,
1759  //         isWrong ^ (!taken),
1760  //           false.B)
1761  //     }
1762  //   }
1763
1764  //   def rasCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1765  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1766  //       case (((valid, pd), ans), taken) =>
1767  //       Mux(valid && pd.isRet.asBool /*&& taken*/ && ans.hit.asBool,
1768  //         isWrong ^ (ans.target === commitEntry.target),
1769  //           false.B)
1770  //     }
1771  //   }
1772
1773  //   val ubtbRights = ubtbCheck(commitEntry, commitEntry.metas.map(_.ubtbAns), false.B)
1774  //   val ubtbWrongs = ubtbCheck(commitEntry, commitEntry.metas.map(_.ubtbAns), true.B)
1775  //   // btb and ubtb pred jal and jalr as well
1776  //   val btbRights = btbCheck(commitEntry, commitEntry.metas.map(_.btbAns), false.B)
1777  //   val btbWrongs = btbCheck(commitEntry, commitEntry.metas.map(_.btbAns), true.B)
1778  //   val tageRights = tageCheck(commitEntry, commitEntry.metas.map(_.tageAns), false.B)
1779  //   val tageWrongs = tageCheck(commitEntry, commitEntry.metas.map(_.tageAns), true.B)
1780
1781  //   val loopRights = loopCheck(commitEntry, commitEntry.metas.map(_.loopAns), false.B)
1782  //   val loopWrongs = loopCheck(commitEntry, commitEntry.metas.map(_.loopAns), true.B)
1783
1784  //   val rasRights = rasCheck(commitEntry, commitEntry.metas.map(_.rasAns), false.B)
1785  //   val rasWrongs = rasCheck(commitEntry, commitEntry.metas.map(_.rasAns), true.B)
1786
1787  val perfEvents = Seq(
1788    ("bpu_s2_redirect        ", bpu_s2_redirect),
1789    ("bpu_s3_redirect        ", bpu_s3_redirect),
1790    ("bpu_to_ftq_stall       ", enq.valid && ~enq.ready),
1791    ("mispredictRedirect     ", perf_redirect.valid && RedirectLevel.flushAfter === perf_redirect.bits.level),
1792    ("replayRedirect         ", perf_redirect.valid && RedirectLevel.flushItself(perf_redirect.bits.level)),
1793    ("predecodeRedirect      ", fromIfuRedirect.valid),
1794    ("to_ifu_bubble          ", io.toIfu.req.ready && !io.toIfu.req.valid),
1795    ("from_bpu_real_bubble   ", !enq.valid && enq.ready && allowBpuIn),
1796    ("BpInstr                ", PopCount(mbpInstrs)),
1797    ("BpBInstr               ", PopCount(mbpBRights | mbpBWrongs)),
1798    ("BpRight                ", PopCount(mbpRights)),
1799    ("BpWrong                ", PopCount(mbpWrongs)),
1800    ("BpBRight               ", PopCount(mbpBRights)),
1801    ("BpBWrong               ", PopCount(mbpBWrongs)),
1802    ("BpJRight               ", PopCount(mbpJRights)),
1803    ("BpJWrong               ", PopCount(mbpJWrongs)),
1804    ("BpIRight               ", PopCount(mbpIRights)),
1805    ("BpIWrong               ", PopCount(mbpIWrongs)),
1806    ("BpCRight               ", PopCount(mbpCRights)),
1807    ("BpCWrong               ", PopCount(mbpCWrongs)),
1808    ("BpRRight               ", PopCount(mbpRRights)),
1809    ("BpRWrong               ", PopCount(mbpRWrongs)),
1810    ("ftb_false_hit          ", PopCount(ftb_false_hit)),
1811    ("ftb_hit                ", PopCount(ftb_hit))
1812  )
1813  generatePerfEvent()
1814}
1815