xref: /XiangShan/src/main/scala/xiangshan/frontend/NewFtq.scala (revision 491c16ade93d4956fec6dde187943d72bb010bc4)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15*
16*
17* Acknowledgement
18*
19* This implementation is inspired by several key papers:
20* [1] Glenn Reinman, Todd Austin, and Brad Calder. "[A scalable front-end architecture for fast instruction delivery.]
21* (https://doi.org/10.1109/ISCA.1999.765954)" 26th International Symposium on Computer Architecture (ISCA). 1999.
22*
23***************************************************************************************/
24
25package xiangshan.frontend
26
27import chisel3._
28import chisel3.util._
29import org.chipsalliance.cde.config.Parameters
30import utility._
31import utility.ChiselDB
32import utils._
33import xiangshan._
34import xiangshan.backend.CtrlToFtqIO
35import xiangshan.backend.decode.ImmUnion
36import xiangshan.frontend.icache._
37
38class FtqDebugBundle extends Bundle {
39  val pc        = UInt(39.W)
40  val target    = UInt(39.W)
41  val isBr      = Bool()
42  val isJmp     = Bool()
43  val isCall    = Bool()
44  val isRet     = Bool()
45  val misPred   = Bool()
46  val isTaken   = Bool()
47  val predStage = UInt(2.W)
48}
49
50class FtqPtr(entries: Int) extends CircularQueuePtr[FtqPtr](
51      entries
52    ) {
53  def this()(implicit p: Parameters) = this(p(XSCoreParamsKey).FtqSize)
54}
55
56object FtqPtr {
57  def apply(f: Bool, v: UInt)(implicit p: Parameters): FtqPtr = {
58    val ptr = Wire(new FtqPtr)
59    ptr.flag  := f
60    ptr.value := v
61    ptr
62  }
63  def inverse(ptr: FtqPtr)(implicit p: Parameters): FtqPtr =
64    apply(!ptr.flag, ptr.value)
65}
66
67class FtqNRSRAM[T <: Data](gen: T, numRead: Int)(implicit p: Parameters) extends XSModule {
68
69  val io = IO(new Bundle() {
70    val raddr = Input(Vec(numRead, UInt(log2Up(FtqSize).W)))
71    val ren   = Input(Vec(numRead, Bool()))
72    val rdata = Output(Vec(numRead, gen))
73    val waddr = Input(UInt(log2Up(FtqSize).W))
74    val wen   = Input(Bool())
75    val wdata = Input(gen)
76  })
77
78  for (i <- 0 until numRead) {
79    val sram = Module(new SRAMTemplate(gen, FtqSize, withClockGate = true))
80    sram.io.r.req.valid       := io.ren(i)
81    sram.io.r.req.bits.setIdx := io.raddr(i)
82    io.rdata(i)               := sram.io.r.resp.data(0)
83    sram.io.w.req.valid       := io.wen
84    sram.io.w.req.bits.setIdx := io.waddr
85    sram.io.w.req.bits.data   := VecInit(io.wdata)
86  }
87
88}
89
90class Ftq_RF_Components(implicit p: Parameters) extends XSBundle with BPUUtils {
91  val startAddr     = UInt(VAddrBits.W)
92  val nextLineAddr  = UInt(VAddrBits.W)
93  val isNextMask    = Vec(PredictWidth, Bool())
94  val fallThruError = Bool()
95  // val carry = Bool()
96  def getPc(offset: UInt) = {
97    def getHigher(pc: UInt) = pc(VAddrBits - 1, log2Ceil(PredictWidth) + instOffsetBits + 1)
98    def getOffset(pc: UInt) = pc(log2Ceil(PredictWidth) + instOffsetBits, instOffsetBits)
99    Cat(
100      getHigher(Mux(isNextMask(offset) && startAddr(log2Ceil(PredictWidth) + instOffsetBits), nextLineAddr, startAddr)),
101      getOffset(startAddr) + offset,
102      0.U(instOffsetBits.W)
103    )
104  }
105  def fromBranchPrediction(resp: BranchPredictionBundle) = {
106    def carryPos(addr: UInt) = addr(instOffsetBits + log2Ceil(PredictWidth) + 1)
107    this.startAddr    := resp.pc(3)
108    this.nextLineAddr := resp.pc(3) + (FetchWidth * 4 * 2).U // may be broken on other configs
109    this.isNextMask := VecInit((0 until PredictWidth).map(i =>
110      (resp.pc(3)(log2Ceil(PredictWidth), 1) +& i.U)(log2Ceil(PredictWidth)).asBool
111    ))
112    this.fallThruError := resp.fallThruError(3)
113    this
114  }
115  override def toPrintable: Printable =
116    p"startAddr:${Hexadecimal(startAddr)}"
117}
118
119class Ftq_pd_Entry(implicit p: Parameters) extends XSBundle {
120  val brMask    = Vec(PredictWidth, Bool())
121  val jmpInfo   = ValidUndirectioned(Vec(3, Bool()))
122  val jmpOffset = UInt(log2Ceil(PredictWidth).W)
123  val jalTarget = UInt(VAddrBits.W)
124  val rvcMask   = Vec(PredictWidth, Bool())
125  def hasJal    = jmpInfo.valid && !jmpInfo.bits(0)
126  def hasJalr   = jmpInfo.valid && jmpInfo.bits(0)
127  def hasCall   = jmpInfo.valid && jmpInfo.bits(1)
128  def hasRet    = jmpInfo.valid && jmpInfo.bits(2)
129
130  def fromPdWb(pdWb: PredecodeWritebackBundle) = {
131    val pds = pdWb.pd
132    this.brMask        := VecInit(pds.map(pd => pd.isBr && pd.valid))
133    this.jmpInfo.valid := VecInit(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid)).asUInt.orR
134    this.jmpInfo.bits := ParallelPriorityMux(
135      pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid),
136      pds.map(pd => VecInit(pd.isJalr, pd.isCall, pd.isRet))
137    )
138    this.jmpOffset := ParallelPriorityEncoder(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid))
139    this.rvcMask   := VecInit(pds.map(pd => pd.isRVC))
140    this.jalTarget := pdWb.jalTarget
141  }
142
143  def toPd(offset: UInt) = {
144    require(offset.getWidth == log2Ceil(PredictWidth))
145    val pd = Wire(new PreDecodeInfo)
146    pd.valid := true.B
147    pd.isRVC := rvcMask(offset)
148    val isBr   = brMask(offset)
149    val isJalr = offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(0)
150    pd.brType := Cat(offset === jmpOffset && jmpInfo.valid, isJalr || isBr)
151    pd.isCall := offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(1)
152    pd.isRet  := offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(2)
153    pd
154  }
155}
156
157class PrefetchPtrDB(implicit p: Parameters) extends Bundle {
158  val fromFtqPtr = UInt(log2Up(p(XSCoreParamsKey).FtqSize).W)
159  val fromIfuPtr = UInt(log2Up(p(XSCoreParamsKey).FtqSize).W)
160}
161
162class Ftq_Redirect_SRAMEntry(implicit p: Parameters) extends SpeculativeInfo {
163  val sc_disagree = if (!env.FPGAPlatform) Some(Vec(numBr, Bool())) else None
164}
165
166class Ftq_1R_SRAMEntry(implicit p: Parameters) extends XSBundle with HasBPUConst {
167  val meta      = UInt(MaxMetaLength.W)
168  val ftb_entry = new FTBEntry
169}
170
171class Ftq_Pred_Info(implicit p: Parameters) extends XSBundle {
172  val target   = UInt(VAddrBits.W)
173  val cfiIndex = ValidUndirectioned(UInt(log2Ceil(PredictWidth).W))
174}
175
176class FtqRead[T <: Data](private val gen: T)(implicit p: Parameters) extends XSBundle {
177  val valid  = Output(Bool())
178  val ptr    = Output(new FtqPtr)
179  val offset = Output(UInt(log2Ceil(PredictWidth).W))
180  val data   = Input(gen)
181  def apply(valid: Bool, ptr: FtqPtr, offset: UInt) = {
182    this.valid  := valid
183    this.ptr    := ptr
184    this.offset := offset
185    this.data
186  }
187}
188
189class FtqToBpuIO(implicit p: Parameters) extends XSBundle {
190  val redirect       = Valid(new BranchPredictionRedirect)
191  val update         = Valid(new BranchPredictionUpdate)
192  val enq_ptr        = Output(new FtqPtr)
193  val redirctFromIFU = Output(Bool())
194}
195
196class BpuFlushInfo(implicit p: Parameters) extends XSBundle with HasCircularQueuePtrHelper {
197  // when ifu pipeline is not stalled,
198  // a packet from bpu s3 can reach f1 at most
199  val s2 = Valid(new FtqPtr)
200  val s3 = Valid(new FtqPtr)
201  def shouldFlushBy(src: Valid[FtqPtr], idx_to_flush: FtqPtr) =
202    src.valid && !isAfter(src.bits, idx_to_flush)
203  def shouldFlushByStage2(idx: FtqPtr) = shouldFlushBy(s2, idx)
204  def shouldFlushByStage3(idx: FtqPtr) = shouldFlushBy(s3, idx)
205}
206
207class FtqToIfuIO(implicit p: Parameters) extends XSBundle {
208  val req              = Decoupled(new FetchRequestBundle)
209  val redirect         = Valid(new BranchPredictionRedirect)
210  val topdown_redirect = Valid(new BranchPredictionRedirect)
211  val flushFromBpu     = new BpuFlushInfo
212}
213
214class FtqToICacheIO(implicit p: Parameters) extends XSBundle {
215  // NOTE: req.bits must be prepare in T cycle
216  // while req.valid is set true in T + 1 cycle
217  val req = Decoupled(new FtqToICacheRequestBundle)
218}
219
220class FtqToPrefetchIO(implicit p: Parameters) extends XSBundle {
221  val req              = Decoupled(new FtqICacheInfo)
222  val flushFromBpu     = new BpuFlushInfo
223  val backendException = UInt(ExceptionType.width.W)
224}
225
226trait HasBackendRedirectInfo extends HasXSParameter {
227  def isLoadReplay(r: Valid[Redirect]) = r.bits.flushItself()
228}
229
230class FtqToCtrlIO(implicit p: Parameters) extends XSBundle with HasBackendRedirectInfo {
231  // write to backend pc mem
232  val pc_mem_wen   = Output(Bool())
233  val pc_mem_waddr = Output(UInt(log2Ceil(FtqSize).W))
234  val pc_mem_wdata = Output(new Ftq_RF_Components)
235  // newest target
236  val newest_entry_en     = Output(Bool())
237  val newest_entry_target = Output(UInt(VAddrBits.W))
238  val newest_entry_ptr    = Output(new FtqPtr)
239}
240
241class FTBEntryGen(implicit p: Parameters) extends XSModule with HasBackendRedirectInfo with HasBPUParameter {
242  val io = IO(new Bundle {
243    val start_addr     = Input(UInt(VAddrBits.W))
244    val old_entry      = Input(new FTBEntry)
245    val pd             = Input(new Ftq_pd_Entry)
246    val cfiIndex       = Flipped(Valid(UInt(log2Ceil(PredictWidth).W)))
247    val target         = Input(UInt(VAddrBits.W))
248    val hit            = Input(Bool())
249    val mispredict_vec = Input(Vec(PredictWidth, Bool()))
250
251    val new_entry         = Output(new FTBEntry)
252    val new_br_insert_pos = Output(Vec(numBr, Bool()))
253    val taken_mask        = Output(Vec(numBr, Bool()))
254    val jmp_taken         = Output(Bool())
255    val mispred_mask      = Output(Vec(numBr + 1, Bool()))
256
257    // for perf counters
258    val is_init_entry           = Output(Bool())
259    val is_old_entry            = Output(Bool())
260    val is_new_br               = Output(Bool())
261    val is_jalr_target_modified = Output(Bool())
262    val is_strong_bias_modified = Output(Bool())
263    val is_br_full              = Output(Bool())
264  })
265
266  // no mispredictions detected at predecode
267  val hit = io.hit
268  val pd  = io.pd
269
270  val init_entry = WireInit(0.U.asTypeOf(new FTBEntry))
271
272  val cfi_is_br       = pd.brMask(io.cfiIndex.bits) && io.cfiIndex.valid
273  val entry_has_jmp   = pd.jmpInfo.valid
274  val new_jmp_is_jal  = entry_has_jmp && !pd.jmpInfo.bits(0) && io.cfiIndex.valid
275  val new_jmp_is_jalr = entry_has_jmp && pd.jmpInfo.bits(0) && io.cfiIndex.valid
276  val new_jmp_is_call = entry_has_jmp && pd.jmpInfo.bits(1) && io.cfiIndex.valid
277  val new_jmp_is_ret  = entry_has_jmp && pd.jmpInfo.bits(2) && io.cfiIndex.valid
278  val last_jmp_rvi    = entry_has_jmp && pd.jmpOffset === (PredictWidth - 1).U && !pd.rvcMask.last
279  // val last_br_rvi = cfi_is_br && io.cfiIndex.bits === (PredictWidth-1).U && !pd.rvcMask.last
280
281  val cfi_is_jal  = io.cfiIndex.bits === pd.jmpOffset && new_jmp_is_jal
282  val cfi_is_jalr = io.cfiIndex.bits === pd.jmpOffset && new_jmp_is_jalr
283
284  def carryPos = log2Ceil(PredictWidth) + instOffsetBits
285  def getLower(pc: UInt) = pc(carryPos - 1, instOffsetBits)
286  // if not hit, establish a new entry
287  init_entry.valid := true.B
288  // tag is left for ftb to assign
289
290  // case br
291  val init_br_slot = init_entry.getSlotForBr(0)
292  when(cfi_is_br) {
293    init_br_slot.valid  := true.B
294    init_br_slot.offset := io.cfiIndex.bits
295    init_br_slot.setLowerStatByTarget(io.start_addr, io.target, numBr == 1)
296    init_entry.strong_bias(0) := true.B // set to strong bias on init
297  }
298
299  // case jmp
300  when(entry_has_jmp) {
301    init_entry.tailSlot.offset := pd.jmpOffset
302    init_entry.tailSlot.valid  := new_jmp_is_jal || new_jmp_is_jalr
303    init_entry.tailSlot.setLowerStatByTarget(io.start_addr, Mux(cfi_is_jalr, io.target, pd.jalTarget), isShare = false)
304    init_entry.strong_bias(numBr - 1) := new_jmp_is_jalr // set strong bias for the jalr on init
305  }
306
307  val jmpPft = getLower(io.start_addr) +& pd.jmpOffset +& Mux(pd.rvcMask(pd.jmpOffset), 1.U, 2.U)
308  init_entry.pftAddr := Mux(entry_has_jmp && !last_jmp_rvi, jmpPft, getLower(io.start_addr))
309  init_entry.carry   := Mux(entry_has_jmp && !last_jmp_rvi, jmpPft(carryPos - instOffsetBits), true.B)
310
311  require(
312    isPow2(PredictWidth),
313    "If PredictWidth does not satisfy the power of 2," +
314      "pftAddr := getLower(io.start_addr) and carry := true.B  not working!!"
315  )
316
317  init_entry.isJalr := new_jmp_is_jalr
318  init_entry.isCall := new_jmp_is_call
319  init_entry.isRet  := new_jmp_is_ret
320  // that means fall thru points to the middle of an inst
321  init_entry.last_may_be_rvi_call := pd.jmpOffset === (PredictWidth - 1).U && !pd.rvcMask(pd.jmpOffset)
322
323  // if hit, check whether a new cfi(only br is possible) is detected
324  val oe              = io.old_entry
325  val br_recorded_vec = oe.getBrRecordedVec(io.cfiIndex.bits)
326  val br_recorded     = br_recorded_vec.asUInt.orR
327  val is_new_br       = cfi_is_br && !br_recorded
328  val new_br_offset   = io.cfiIndex.bits
329  // vec(i) means new br will be inserted BEFORE old br(i)
330  val allBrSlotsVec = oe.allSlotsForBr
331  val new_br_insert_onehot = VecInit((0 until numBr).map {
332    i =>
333      i match {
334        case 0 =>
335          !allBrSlotsVec(0).valid || new_br_offset < allBrSlotsVec(0).offset
336        case idx =>
337          allBrSlotsVec(idx - 1).valid && new_br_offset > allBrSlotsVec(idx - 1).offset &&
338          (!allBrSlotsVec(idx).valid || new_br_offset < allBrSlotsVec(idx).offset)
339      }
340  })
341
342  val old_entry_modified = WireInit(io.old_entry)
343  for (i <- 0 until numBr) {
344    val slot = old_entry_modified.allSlotsForBr(i)
345    when(new_br_insert_onehot(i)) {
346      slot.valid  := true.B
347      slot.offset := new_br_offset
348      slot.setLowerStatByTarget(io.start_addr, io.target, i == numBr - 1)
349      old_entry_modified.strong_bias(i) := true.B
350    }.elsewhen(new_br_offset > oe.allSlotsForBr(i).offset) {
351      old_entry_modified.strong_bias(i) := false.B
352      // all other fields remain unchanged
353    }.otherwise {
354      // case i == 0, remain unchanged
355      if (i != 0) {
356        val noNeedToMoveFromFormerSlot = (i == numBr - 1).B && !oe.brSlots.last.valid
357        when(!noNeedToMoveFromFormerSlot) {
358          slot.fromAnotherSlot(oe.allSlotsForBr(i - 1))
359          old_entry_modified.strong_bias(i) := oe.strong_bias(i)
360        }
361      }
362    }
363  }
364
365  // two circumstances:
366  // 1. oe: | br | j  |, new br should be in front of j, thus addr of j should be new pft
367  // 2. oe: | br | br |, new br could be anywhere between, thus new pft is the addr of either
368  //        the previous last br or the new br
369  val may_have_to_replace = oe.noEmptySlotForNewBr
370  val pft_need_to_change  = is_new_br && may_have_to_replace
371  // it should either be the given last br or the new br
372  when(pft_need_to_change) {
373    val new_pft_offset =
374      Mux(!new_br_insert_onehot.asUInt.orR, new_br_offset, oe.allSlotsForBr.last.offset)
375
376    // set jmp to invalid
377    old_entry_modified.pftAddr              := getLower(io.start_addr) + new_pft_offset
378    old_entry_modified.carry                := (getLower(io.start_addr) +& new_pft_offset).head(1).asBool
379    old_entry_modified.last_may_be_rvi_call := false.B
380    old_entry_modified.isCall               := false.B
381    old_entry_modified.isRet                := false.B
382    old_entry_modified.isJalr               := false.B
383  }
384
385  val old_entry_jmp_target_modified = WireInit(oe)
386  val old_target      = oe.tailSlot.getTarget(io.start_addr) // may be wrong because we store only 20 lowest bits
387  val old_tail_is_jmp = !oe.tailSlot.sharing
388  val jalr_target_modified = cfi_is_jalr && (old_target =/= io.target) && old_tail_is_jmp // TODO: pass full jalr target
389  when(jalr_target_modified) {
390    old_entry_jmp_target_modified.setByJmpTarget(io.start_addr, io.target)
391    old_entry_jmp_target_modified.strong_bias := 0.U.asTypeOf(Vec(numBr, Bool()))
392  }
393
394  val old_entry_strong_bias    = WireInit(oe)
395  val strong_bias_modified_vec = Wire(Vec(numBr, Bool())) // whether modified or not
396  for (i <- 0 until numBr) {
397    when(br_recorded_vec(0)) {
398      old_entry_strong_bias.strong_bias(0) :=
399        oe.strong_bias(0) && io.cfiIndex.valid && oe.brValids(0) && io.cfiIndex.bits === oe.brOffset(0)
400    }.elsewhen(br_recorded_vec(numBr - 1)) {
401      old_entry_strong_bias.strong_bias(0) := false.B
402      old_entry_strong_bias.strong_bias(numBr - 1) :=
403        oe.strong_bias(numBr - 1) && io.cfiIndex.valid && oe.brValids(numBr - 1) && io.cfiIndex.bits === oe.brOffset(
404          numBr - 1
405        )
406    }
407    strong_bias_modified_vec(i) := oe.strong_bias(i) && oe.brValids(i) && !old_entry_strong_bias.strong_bias(i)
408  }
409  val strong_bias_modified = strong_bias_modified_vec.reduce(_ || _)
410
411  val derived_from_old_entry =
412    Mux(is_new_br, old_entry_modified, Mux(jalr_target_modified, old_entry_jmp_target_modified, old_entry_strong_bias))
413
414  io.new_entry := Mux(!hit, init_entry, derived_from_old_entry)
415
416  io.new_br_insert_pos := new_br_insert_onehot
417  io.taken_mask := VecInit((io.new_entry.brOffset zip io.new_entry.brValids).map {
418    case (off, v) => io.cfiIndex.bits === off && io.cfiIndex.valid && v
419  })
420  io.jmp_taken := io.new_entry.jmpValid && io.new_entry.tailSlot.offset === io.cfiIndex.bits
421  for (i <- 0 until numBr) {
422    io.mispred_mask(i) := io.new_entry.brValids(i) && io.mispredict_vec(io.new_entry.brOffset(i))
423  }
424  io.mispred_mask.last := io.new_entry.jmpValid && io.mispredict_vec(pd.jmpOffset)
425
426  // for perf counters
427  io.is_init_entry           := !hit
428  io.is_old_entry            := hit && !is_new_br && !jalr_target_modified && !strong_bias_modified
429  io.is_new_br               := hit && is_new_br
430  io.is_jalr_target_modified := hit && jalr_target_modified
431  io.is_strong_bias_modified := hit && strong_bias_modified
432  io.is_br_full              := hit && is_new_br && may_have_to_replace
433}
434
435class FtqPcMemWrapper(numOtherReads: Int)(implicit p: Parameters) extends XSModule with HasBackendRedirectInfo {
436  val io = IO(new Bundle {
437    val ifuPtr_w           = Input(new FtqPtr)
438    val ifuPtrPlus1_w      = Input(new FtqPtr)
439    val ifuPtrPlus2_w      = Input(new FtqPtr)
440    val pfPtr_w            = Input(new FtqPtr)
441    val pfPtrPlus1_w       = Input(new FtqPtr)
442    val commPtr_w          = Input(new FtqPtr)
443    val commPtrPlus1_w     = Input(new FtqPtr)
444    val ifuPtr_rdata       = Output(new Ftq_RF_Components)
445    val ifuPtrPlus1_rdata  = Output(new Ftq_RF_Components)
446    val ifuPtrPlus2_rdata  = Output(new Ftq_RF_Components)
447    val pfPtr_rdata        = Output(new Ftq_RF_Components)
448    val pfPtrPlus1_rdata   = Output(new Ftq_RF_Components)
449    val commPtr_rdata      = Output(new Ftq_RF_Components)
450    val commPtrPlus1_rdata = Output(new Ftq_RF_Components)
451
452    val wen   = Input(Bool())
453    val waddr = Input(UInt(log2Ceil(FtqSize).W))
454    val wdata = Input(new Ftq_RF_Components)
455  })
456
457  val num_pc_read = numOtherReads + 5
458  val mem         = Module(new SyncDataModuleTemplate(new Ftq_RF_Components, FtqSize, num_pc_read, 1, "FtqPC"))
459  mem.io.wen(0)   := io.wen
460  mem.io.waddr(0) := io.waddr
461  mem.io.wdata(0) := io.wdata
462
463  // read one cycle ahead for ftq local reads
464  val raddr_vec = VecInit(Seq(
465    io.ifuPtr_w.value,
466    io.ifuPtrPlus1_w.value,
467    io.ifuPtrPlus2_w.value,
468    io.pfPtr_w.value,
469    io.pfPtrPlus1_w.value,
470    io.commPtrPlus1_w.value,
471    io.commPtr_w.value
472  ))
473
474  mem.io.raddr := raddr_vec
475
476  io.ifuPtr_rdata       := mem.io.rdata.dropRight(6).last
477  io.ifuPtrPlus1_rdata  := mem.io.rdata.dropRight(5).last
478  io.ifuPtrPlus2_rdata  := mem.io.rdata.dropRight(4).last
479  io.pfPtr_rdata        := mem.io.rdata.dropRight(3).last
480  io.pfPtrPlus1_rdata   := mem.io.rdata.dropRight(2).last
481  io.commPtrPlus1_rdata := mem.io.rdata.dropRight(1).last
482  io.commPtr_rdata      := mem.io.rdata.last
483}
484
485class Ftq(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelper
486    with HasBackendRedirectInfo with BPUUtils with HasBPUConst with HasPerfEvents
487    with HasICacheParameters {
488  val io = IO(new Bundle {
489    val fromBpu     = Flipped(new BpuToFtqIO)
490    val fromIfu     = Flipped(new IfuToFtqIO)
491    val fromBackend = Flipped(new CtrlToFtqIO)
492
493    val toBpu       = new FtqToBpuIO
494    val toIfu       = new FtqToIfuIO
495    val toICache    = new FtqToICacheIO
496    val toBackend   = new FtqToCtrlIO
497    val toPrefetch  = new FtqToPrefetchIO
498    val icacheFlush = Output(Bool())
499
500    val bpuInfo = new Bundle {
501      val bpRight = Output(UInt(XLEN.W))
502      val bpWrong = Output(UInt(XLEN.W))
503    }
504
505    val mmioCommitRead = Flipped(new mmioCommitRead)
506
507    // for perf
508    val ControlBTBMissBubble = Output(Bool())
509    val TAGEMissBubble       = Output(Bool())
510    val SCMissBubble         = Output(Bool())
511    val ITTAGEMissBubble     = Output(Bool())
512    val RASMissBubble        = Output(Bool())
513  })
514  io.bpuInfo := DontCare
515
516  val topdown_stage = RegInit(0.U.asTypeOf(new FrontendTopDownBundle))
517  // only driven by clock, not valid-ready
518  topdown_stage                  := io.fromBpu.resp.bits.topdown_info
519  io.toIfu.req.bits.topdown_info := topdown_stage
520
521  val ifuRedirected = RegInit(VecInit(Seq.fill(FtqSize)(false.B)))
522
523  // io.fromBackend.ftqIdxAhead: bju(BjuCnt) + ldReplay + exception
524  val ftqIdxAhead = VecInit(Seq.tabulate(FtqRedirectAheadNum)(i => io.fromBackend.ftqIdxAhead(i))) // only bju
525  val ftqIdxSelOH = io.fromBackend.ftqIdxSelOH.bits(FtqRedirectAheadNum - 1, 0)
526
527  val aheadValid         = ftqIdxAhead.map(_.valid).reduce(_ | _) && !io.fromBackend.redirect.valid
528  val realAhdValid       = io.fromBackend.redirect.valid && (ftqIdxSelOH > 0.U) && RegNext(aheadValid)
529  val backendRedirect    = Wire(Valid(new BranchPredictionRedirect))
530  val backendRedirectReg = Wire(Valid(new BranchPredictionRedirect))
531  backendRedirectReg.valid := RegNext(Mux(realAhdValid, false.B, backendRedirect.valid))
532  backendRedirectReg.bits  := RegEnable(backendRedirect.bits, backendRedirect.valid)
533  val fromBackendRedirect = Wire(Valid(new BranchPredictionRedirect))
534  fromBackendRedirect := Mux(realAhdValid, backendRedirect, backendRedirectReg)
535
536  val stage2Flush  = backendRedirect.valid
537  val backendFlush = stage2Flush || RegNext(stage2Flush)
538  val ifuFlush     = Wire(Bool())
539
540  val flush = stage2Flush || RegNext(stage2Flush)
541
542  val allowBpuIn, allowToIfu = WireInit(false.B)
543  val flushToIfu             = !allowToIfu
544  allowBpuIn := !ifuFlush && !backendRedirect.valid && !backendRedirectReg.valid
545  allowToIfu := !ifuFlush && !backendRedirect.valid && !backendRedirectReg.valid
546
547  def copyNum                                              = 5
548  val bpuPtr, ifuPtr, pfPtr, ifuWbPtr, commPtr, robCommPtr = RegInit(FtqPtr(false.B, 0.U))
549  val ifuPtrPlus1                                          = RegInit(FtqPtr(false.B, 1.U))
550  val ifuPtrPlus2                                          = RegInit(FtqPtr(false.B, 2.U))
551  val pfPtrPlus1                                           = RegInit(FtqPtr(false.B, 1.U))
552  val commPtrPlus1                                         = RegInit(FtqPtr(false.B, 1.U))
553  val copied_ifu_ptr                                       = Seq.fill(copyNum)(RegInit(FtqPtr(false.B, 0.U)))
554  val copied_bpu_ptr                                       = Seq.fill(copyNum)(RegInit(FtqPtr(false.B, 0.U)))
555  require(FtqSize >= 4)
556  val ifuPtr_write       = WireInit(ifuPtr)
557  val ifuPtrPlus1_write  = WireInit(ifuPtrPlus1)
558  val ifuPtrPlus2_write  = WireInit(ifuPtrPlus2)
559  val pfPtr_write        = WireInit(pfPtr)
560  val pfPtrPlus1_write   = WireInit(pfPtrPlus1)
561  val ifuWbPtr_write     = WireInit(ifuWbPtr)
562  val commPtr_write      = WireInit(commPtr)
563  val commPtrPlus1_write = WireInit(commPtrPlus1)
564  val robCommPtr_write   = WireInit(robCommPtr)
565  ifuPtr       := ifuPtr_write
566  ifuPtrPlus1  := ifuPtrPlus1_write
567  ifuPtrPlus2  := ifuPtrPlus2_write
568  pfPtr        := pfPtr_write
569  pfPtrPlus1   := pfPtrPlus1_write
570  ifuWbPtr     := ifuWbPtr_write
571  commPtr      := commPtr_write
572  commPtrPlus1 := commPtrPlus1_write
573  copied_ifu_ptr.map { ptr =>
574    ptr := ifuPtr_write
575    dontTouch(ptr)
576  }
577  robCommPtr := robCommPtr_write
578  val validEntries = distanceBetween(bpuPtr, commPtr)
579  val canCommit    = Wire(Bool())
580
581  // Instruction page fault and instruction access fault are sent from backend with redirect requests.
582  // When IPF and IAF are sent, backendPcFaultIfuPtr points to the FTQ entry whose first instruction
583  // raises IPF or IAF, which is ifuWbPtr_write or IfuPtr_write.
584  // Only when IFU has written back that FTQ entry can backendIpf and backendIaf be false because this
585  // makes sure that IAF and IPF are correctly raised instead of being flushed by redirect requests.
586  val backendException  = RegInit(ExceptionType.none)
587  val backendPcFaultPtr = RegInit(FtqPtr(false.B, 0.U))
588  when(fromBackendRedirect.valid) {
589    backendException := ExceptionType.fromOH(
590      has_pf = fromBackendRedirect.bits.cfiUpdate.backendIPF,
591      has_gpf = fromBackendRedirect.bits.cfiUpdate.backendIGPF,
592      has_af = fromBackendRedirect.bits.cfiUpdate.backendIAF
593    )
594    when(
595      fromBackendRedirect.bits.cfiUpdate.backendIPF || fromBackendRedirect.bits.cfiUpdate.backendIGPF ||
596        fromBackendRedirect.bits.cfiUpdate.backendIAF
597    ) {
598      backendPcFaultPtr := ifuWbPtr_write
599    }
600  }.elsewhen(ifuWbPtr =/= backendPcFaultPtr) {
601    backendException := ExceptionType.none
602  }
603
604  // **********************************************************************
605  // **************************** enq from bpu ****************************
606  // **********************************************************************
607  val new_entry_ready = validEntries < FtqSize.U || canCommit
608  io.fromBpu.resp.ready := new_entry_ready
609
610  val bpu_s2_resp     = io.fromBpu.resp.bits.s2
611  val bpu_s3_resp     = io.fromBpu.resp.bits.s3
612  val bpu_s2_redirect = bpu_s2_resp.valid(3) && bpu_s2_resp.hasRedirect(3)
613  val bpu_s3_redirect = bpu_s3_resp.valid(3) && bpu_s3_resp.hasRedirect(3)
614
615  io.toBpu.enq_ptr := bpuPtr
616  val enq_fire    = io.fromBpu.resp.fire && allowBpuIn // from bpu s1
617  val bpu_in_fire = (io.fromBpu.resp.fire || bpu_s2_redirect || bpu_s3_redirect) && allowBpuIn
618
619  val bpu_in_resp     = io.fromBpu.resp.bits.selectedResp
620  val bpu_in_stage    = io.fromBpu.resp.bits.selectedRespIdxForFtq
621  val bpu_in_resp_ptr = Mux(bpu_in_stage === BP_S1, bpuPtr, bpu_in_resp.ftq_idx)
622  val bpu_in_resp_idx = bpu_in_resp_ptr.value
623
624  // read ports:      pfReq1 + pfReq2 ++  ifuReq1 + ifuReq2 + ifuReq3 + commitUpdate2 + commitUpdate
625  val ftq_pc_mem = Module(new FtqPcMemWrapper(2))
626  // resp from uBTB
627  ftq_pc_mem.io.wen   := bpu_in_fire
628  ftq_pc_mem.io.waddr := bpu_in_resp_idx
629  ftq_pc_mem.io.wdata.fromBranchPrediction(bpu_in_resp)
630
631  //                                                            ifuRedirect + backendRedirect + commit
632  val ftq_redirect_mem = Module(new SyncDataModuleTemplate(
633    new Ftq_Redirect_SRAMEntry,
634    FtqSize,
635    IfuRedirectNum + FtqRedirectAheadNum + 1,
636    1,
637    hasRen = true
638  ))
639  // these info is intended to enq at the last stage of bpu
640  ftq_redirect_mem.io.wen(0)   := io.fromBpu.resp.bits.lastStage.valid(3)
641  ftq_redirect_mem.io.waddr(0) := io.fromBpu.resp.bits.lastStage.ftq_idx.value
642  ftq_redirect_mem.io.wdata(0) := io.fromBpu.resp.bits.last_stage_spec_info
643  println(f"ftq redirect MEM: entry ${ftq_redirect_mem.io.wdata(0).getWidth} * ${FtqSize} * 3")
644
645  val ftq_meta_1r_sram = Module(new FtqNRSRAM(new Ftq_1R_SRAMEntry, 1))
646  // these info is intended to enq at the last stage of bpu
647  ftq_meta_1r_sram.io.wen             := io.fromBpu.resp.bits.lastStage.valid(3)
648  ftq_meta_1r_sram.io.waddr           := io.fromBpu.resp.bits.lastStage.ftq_idx.value
649  ftq_meta_1r_sram.io.wdata.meta      := io.fromBpu.resp.bits.last_stage_meta
650  ftq_meta_1r_sram.io.wdata.ftb_entry := io.fromBpu.resp.bits.last_stage_ftb_entry
651  //                                                            ifuRedirect + backendRedirect (commit moved to ftq_meta_1r_sram)
652  val ftb_entry_mem = Module(new SyncDataModuleTemplate(
653    new FTBEntry_FtqMem,
654    FtqSize,
655    IfuRedirectNum + FtqRedirectAheadNum,
656    1,
657    hasRen = true
658  ))
659  ftb_entry_mem.io.wen(0)   := io.fromBpu.resp.bits.lastStage.valid(3)
660  ftb_entry_mem.io.waddr(0) := io.fromBpu.resp.bits.lastStage.ftq_idx.value
661  ftb_entry_mem.io.wdata(0) := io.fromBpu.resp.bits.last_stage_ftb_entry
662
663  // multi-write
664  val update_target = Reg(Vec(FtqSize, UInt(VAddrBits.W))) // could be taken target or fallThrough //TODO: remove this
665  val newest_entry_target          = Reg(UInt(VAddrBits.W))
666  val newest_entry_target_modified = RegInit(false.B)
667  val newest_entry_ptr             = Reg(new FtqPtr)
668  val newest_entry_ptr_modified    = RegInit(false.B)
669  val cfiIndex_vec                 = Reg(Vec(FtqSize, ValidUndirectioned(UInt(log2Ceil(PredictWidth).W))))
670  val mispredict_vec               = Reg(Vec(FtqSize, Vec(PredictWidth, Bool())))
671  val pred_stage                   = Reg(Vec(FtqSize, UInt(2.W)))
672  val pred_s1_cycle                = if (!env.FPGAPlatform) Some(Reg(Vec(FtqSize, UInt(64.W)))) else None
673
674  val c_empty :: c_toCommit :: c_committed :: c_flushed :: Nil = Enum(4)
675  val commitStateQueueReg = RegInit(VecInit(Seq.fill(FtqSize) {
676    VecInit(Seq.fill(PredictWidth)(c_empty))
677  }))
678  val commitStateQueueEnable = WireInit(VecInit(Seq.fill(FtqSize)(false.B)))
679  val commitStateQueueNext   = WireInit(commitStateQueueReg)
680
681  for (f <- 0 until FtqSize) {
682    when(commitStateQueueEnable(f)) {
683      commitStateQueueReg(f) := commitStateQueueNext(f)
684    }
685  }
686
687  val f_to_send :: f_sent :: Nil = Enum(2)
688  val entry_fetch_status         = RegInit(VecInit(Seq.fill(FtqSize)(f_sent)))
689
690  val h_not_hit :: h_false_hit :: h_hit :: Nil = Enum(3)
691  val entry_hit_status                         = RegInit(VecInit(Seq.fill(FtqSize)(h_not_hit)))
692
693  // modify registers one cycle later to cut critical path
694  val last_cycle_bpu_in       = RegNext(bpu_in_fire)
695  val last_cycle_bpu_in_ptr   = RegEnable(bpu_in_resp_ptr, bpu_in_fire)
696  val last_cycle_bpu_in_idx   = last_cycle_bpu_in_ptr.value
697  val last_cycle_bpu_target   = RegEnable(bpu_in_resp.getTarget(3), bpu_in_fire)
698  val last_cycle_cfiIndex     = RegEnable(bpu_in_resp.cfiIndex(3), bpu_in_fire)
699  val last_cycle_bpu_in_stage = RegEnable(bpu_in_stage, bpu_in_fire)
700
701  def extra_copyNum_for_commitStateQueue = 2
702  val copied_last_cycle_bpu_in =
703    VecInit(Seq.fill(copyNum + extra_copyNum_for_commitStateQueue)(RegNext(bpu_in_fire)))
704  val copied_last_cycle_bpu_in_ptr_for_ftq =
705    VecInit(Seq.fill(extra_copyNum_for_commitStateQueue)(RegEnable(bpu_in_resp_ptr, bpu_in_fire)))
706
707  newest_entry_target_modified := false.B
708  newest_entry_ptr_modified    := false.B
709  when(last_cycle_bpu_in) {
710    entry_fetch_status(last_cycle_bpu_in_idx) := f_to_send
711    cfiIndex_vec(last_cycle_bpu_in_idx)       := last_cycle_cfiIndex
712    pred_stage(last_cycle_bpu_in_idx)         := last_cycle_bpu_in_stage
713
714    update_target(last_cycle_bpu_in_idx) := last_cycle_bpu_target // TODO: remove this
715    newest_entry_target_modified         := true.B
716    newest_entry_target                  := last_cycle_bpu_target
717    newest_entry_ptr_modified            := true.B
718    newest_entry_ptr                     := last_cycle_bpu_in_ptr
719  }
720
721  // reduce fanout by delay write for a cycle
722  when(RegNext(last_cycle_bpu_in)) {
723    mispredict_vec(RegEnable(last_cycle_bpu_in_idx, last_cycle_bpu_in)) :=
724      WireInit(VecInit(Seq.fill(PredictWidth)(false.B)))
725  }
726
727  // record s1 pred cycles
728  pred_s1_cycle.map { vec =>
729    when(bpu_in_fire && (bpu_in_stage === BP_S1)) {
730      vec(bpu_in_resp_ptr.value) := bpu_in_resp.full_pred(0).predCycle.getOrElse(0.U)
731    }
732  }
733
734  // reduce fanout using copied last_cycle_bpu_in and copied last_cycle_bpu_in_ptr
735  val copied_last_cycle_bpu_in_for_ftq = copied_last_cycle_bpu_in.takeRight(extra_copyNum_for_commitStateQueue)
736  copied_last_cycle_bpu_in_for_ftq.zip(copied_last_cycle_bpu_in_ptr_for_ftq).zipWithIndex.map {
737    case ((in, ptr), i) =>
738      when(in) {
739        val perSetEntries = FtqSize / extra_copyNum_for_commitStateQueue // 32
740        require(FtqSize % extra_copyNum_for_commitStateQueue == 0)
741        for (j <- 0 until perSetEntries) {
742          when(ptr.value === (i * perSetEntries + j).U) {
743            commitStateQueueNext(i * perSetEntries + j) := VecInit(Seq.fill(PredictWidth)(c_empty))
744            // Clock gating optimization, use 1 gate cell to control a row
745            commitStateQueueEnable(i * perSetEntries + j) := true.B
746          }
747        }
748      }
749  }
750
751  bpuPtr := bpuPtr + enq_fire
752  copied_bpu_ptr.map(_ := bpuPtr + enq_fire)
753  when(io.toIfu.req.fire && allowToIfu) {
754    ifuPtr_write      := ifuPtrPlus1
755    ifuPtrPlus1_write := ifuPtrPlus2
756    ifuPtrPlus2_write := ifuPtrPlus2 + 1.U
757  }
758  when(io.toPrefetch.req.fire && allowToIfu) {
759    pfPtr_write      := pfPtrPlus1
760    pfPtrPlus1_write := pfPtrPlus1 + 1.U
761  }
762
763  // only use ftb result to assign hit status
764  when(bpu_s2_resp.valid(3)) {
765    entry_hit_status(bpu_s2_resp.ftq_idx.value) := Mux(bpu_s2_resp.full_pred(3).hit, h_hit, h_not_hit)
766  }
767
768  io.toIfu.flushFromBpu.s2.valid      := bpu_s2_redirect
769  io.toIfu.flushFromBpu.s2.bits       := bpu_s2_resp.ftq_idx
770  io.toPrefetch.flushFromBpu.s2.valid := bpu_s2_redirect
771  io.toPrefetch.flushFromBpu.s2.bits  := bpu_s2_resp.ftq_idx
772  when(bpu_s2_redirect) {
773    bpuPtr := bpu_s2_resp.ftq_idx + 1.U
774    copied_bpu_ptr.map(_ := bpu_s2_resp.ftq_idx + 1.U)
775    // only when ifuPtr runs ahead of bpu s2 resp should we recover it
776    when(!isBefore(ifuPtr, bpu_s2_resp.ftq_idx)) {
777      ifuPtr_write      := bpu_s2_resp.ftq_idx
778      ifuPtrPlus1_write := bpu_s2_resp.ftq_idx + 1.U
779      ifuPtrPlus2_write := bpu_s2_resp.ftq_idx + 2.U
780    }
781    when(!isBefore(pfPtr, bpu_s2_resp.ftq_idx)) {
782      pfPtr_write      := bpu_s2_resp.ftq_idx
783      pfPtrPlus1_write := bpu_s2_resp.ftq_idx + 1.U
784    }
785  }
786
787  io.toIfu.flushFromBpu.s3.valid      := bpu_s3_redirect
788  io.toIfu.flushFromBpu.s3.bits       := bpu_s3_resp.ftq_idx
789  io.toPrefetch.flushFromBpu.s3.valid := bpu_s3_redirect
790  io.toPrefetch.flushFromBpu.s3.bits  := bpu_s3_resp.ftq_idx
791  when(bpu_s3_redirect) {
792    bpuPtr := bpu_s3_resp.ftq_idx + 1.U
793    copied_bpu_ptr.map(_ := bpu_s3_resp.ftq_idx + 1.U)
794    // only when ifuPtr runs ahead of bpu s2 resp should we recover it
795    when(!isBefore(ifuPtr, bpu_s3_resp.ftq_idx)) {
796      ifuPtr_write      := bpu_s3_resp.ftq_idx
797      ifuPtrPlus1_write := bpu_s3_resp.ftq_idx + 1.U
798      ifuPtrPlus2_write := bpu_s3_resp.ftq_idx + 2.U
799    }
800    when(!isBefore(pfPtr, bpu_s3_resp.ftq_idx)) {
801      pfPtr_write      := bpu_s3_resp.ftq_idx
802      pfPtrPlus1_write := bpu_s3_resp.ftq_idx + 1.U
803    }
804  }
805
806  XSError(isBefore(bpuPtr, ifuPtr) && !isFull(bpuPtr, ifuPtr), "\nifuPtr is before bpuPtr!\n")
807  XSError(isBefore(bpuPtr, pfPtr) && !isFull(bpuPtr, pfPtr), "\npfPtr is before bpuPtr!\n")
808  XSError(isBefore(ifuWbPtr, commPtr) && !isFull(ifuWbPtr, commPtr), "\ncommPtr is before ifuWbPtr!\n")
809
810  (0 until copyNum).map(i => XSError(copied_bpu_ptr(i) =/= bpuPtr, "\ncopiedBpuPtr is different from bpuPtr!\n"))
811
812  // ****************************************************************
813  // **************************** to ifu ****************************
814  // ****************************************************************
815  // 0  for ifu, and 1-4 for ICache
816  val bpu_in_bypass_buf         = RegEnable(ftq_pc_mem.io.wdata, bpu_in_fire)
817  val copied_bpu_in_bypass_buf  = VecInit(Seq.fill(copyNum)(RegEnable(ftq_pc_mem.io.wdata, bpu_in_fire)))
818  val bpu_in_bypass_buf_for_ifu = bpu_in_bypass_buf
819  val bpu_in_bypass_ptr         = RegEnable(bpu_in_resp_ptr, bpu_in_fire)
820  val last_cycle_to_ifu_fire    = RegNext(io.toIfu.req.fire)
821  val last_cycle_to_pf_fire     = RegNext(io.toPrefetch.req.fire)
822
823  val copied_bpu_in_bypass_ptr      = VecInit(Seq.fill(copyNum)(RegEnable(bpu_in_resp_ptr, bpu_in_fire)))
824  val copied_last_cycle_to_ifu_fire = VecInit(Seq.fill(copyNum)(RegNext(io.toIfu.req.fire)))
825
826  // read pc and target
827  ftq_pc_mem.io.ifuPtr_w       := ifuPtr_write
828  ftq_pc_mem.io.ifuPtrPlus1_w  := ifuPtrPlus1_write
829  ftq_pc_mem.io.ifuPtrPlus2_w  := ifuPtrPlus2_write
830  ftq_pc_mem.io.pfPtr_w        := pfPtr_write
831  ftq_pc_mem.io.pfPtrPlus1_w   := pfPtrPlus1_write
832  ftq_pc_mem.io.commPtr_w      := commPtr_write
833  ftq_pc_mem.io.commPtrPlus1_w := commPtrPlus1_write
834
835  io.toIfu.req.bits.ftqIdx := ifuPtr
836
837  val toICachePcBundle               = Wire(Vec(copyNum, new Ftq_RF_Components))
838  val toICacheEntryToSend            = Wire(Vec(copyNum, Bool()))
839  val nextCycleToPrefetchPcBundle    = Wire(new Ftq_RF_Components)
840  val nextCycleToPrefetchEntryToSend = Wire(Bool())
841  val toPrefetchPcBundle             = RegNext(nextCycleToPrefetchPcBundle)
842  val toPrefetchEntryToSend          = RegNext(nextCycleToPrefetchEntryToSend)
843  val toIfuPcBundle                  = Wire(new Ftq_RF_Components)
844  val entry_is_to_send               = WireInit(entry_fetch_status(ifuPtr.value) === f_to_send)
845  val entry_ftq_offset               = WireInit(cfiIndex_vec(ifuPtr.value))
846  val entry_next_addr                = Wire(UInt(VAddrBits.W))
847
848  val pc_mem_ifu_ptr_rdata   = VecInit(Seq.fill(copyNum)(RegNext(ftq_pc_mem.io.ifuPtr_rdata)))
849  val pc_mem_ifu_plus1_rdata = VecInit(Seq.fill(copyNum)(RegNext(ftq_pc_mem.io.ifuPtrPlus1_rdata)))
850  val diff_entry_next_addr   = WireInit(update_target(ifuPtr.value)) // TODO: remove this
851
852  val copied_ifu_plus1_to_send = VecInit(Seq.fill(copyNum)(RegNext(
853    entry_fetch_status(ifuPtrPlus1.value) === f_to_send
854  ) || RegNext(last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtrPlus1)))
855  val copied_ifu_ptr_to_send = VecInit(Seq.fill(copyNum)(RegNext(
856    entry_fetch_status(ifuPtr.value) === f_to_send
857  ) || RegNext(last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtr)))
858
859  for (i <- 0 until copyNum) {
860    when(copied_last_cycle_bpu_in(i) && copied_bpu_in_bypass_ptr(i) === copied_ifu_ptr(i)) {
861      toICachePcBundle(i)    := copied_bpu_in_bypass_buf(i)
862      toICacheEntryToSend(i) := true.B
863    }.elsewhen(copied_last_cycle_to_ifu_fire(i)) {
864      toICachePcBundle(i)    := pc_mem_ifu_plus1_rdata(i)
865      toICacheEntryToSend(i) := copied_ifu_plus1_to_send(i)
866    }.otherwise {
867      toICachePcBundle(i)    := pc_mem_ifu_ptr_rdata(i)
868      toICacheEntryToSend(i) := copied_ifu_ptr_to_send(i)
869    }
870  }
871
872  // Calculate requests sent to prefetcher one cycle in advance to cut critical path
873  when(bpu_in_fire && bpu_in_resp_ptr === pfPtr_write) {
874    nextCycleToPrefetchPcBundle    := ftq_pc_mem.io.wdata
875    nextCycleToPrefetchEntryToSend := true.B
876  }.elsewhen(io.toPrefetch.req.fire) {
877    nextCycleToPrefetchPcBundle := ftq_pc_mem.io.pfPtrPlus1_rdata
878    nextCycleToPrefetchEntryToSend := entry_fetch_status(pfPtrPlus1.value) === f_to_send ||
879      last_cycle_bpu_in && bpu_in_bypass_ptr === pfPtrPlus1
880  }.otherwise {
881    nextCycleToPrefetchPcBundle := ftq_pc_mem.io.pfPtr_rdata
882    nextCycleToPrefetchEntryToSend := entry_fetch_status(pfPtr.value) === f_to_send ||
883      last_cycle_bpu_in && bpu_in_bypass_ptr === pfPtr // reduce potential bubbles
884  }
885
886  // TODO: reconsider target address bypass logic
887  when(last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtr) {
888    toIfuPcBundle        := bpu_in_bypass_buf_for_ifu
889    entry_is_to_send     := true.B
890    entry_next_addr      := last_cycle_bpu_target
891    entry_ftq_offset     := last_cycle_cfiIndex
892    diff_entry_next_addr := last_cycle_bpu_target // TODO: remove this
893  }.elsewhen(last_cycle_to_ifu_fire) {
894    toIfuPcBundle := RegNext(ftq_pc_mem.io.ifuPtrPlus1_rdata)
895    entry_is_to_send := RegNext(entry_fetch_status(ifuPtrPlus1.value) === f_to_send) ||
896      RegNext(last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtrPlus1) // reduce potential bubbles
897    entry_next_addr := Mux(
898      last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtrPlus1,
899      bpu_in_bypass_buf_for_ifu.startAddr,
900      Mux(ifuPtr === newest_entry_ptr, newest_entry_target, RegNext(ftq_pc_mem.io.ifuPtrPlus2_rdata.startAddr))
901    ) // ifuPtr+2
902  }.otherwise {
903    toIfuPcBundle := RegNext(ftq_pc_mem.io.ifuPtr_rdata)
904    entry_is_to_send := RegNext(entry_fetch_status(ifuPtr.value) === f_to_send) ||
905      RegNext(last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtr) // reduce potential bubbles
906    entry_next_addr := Mux(
907      last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtrPlus1,
908      bpu_in_bypass_buf_for_ifu.startAddr,
909      Mux(ifuPtr === newest_entry_ptr, newest_entry_target, RegNext(ftq_pc_mem.io.ifuPtrPlus1_rdata.startAddr))
910    ) // ifuPtr+1
911  }
912
913  io.toIfu.req.valid              := entry_is_to_send && ifuPtr =/= bpuPtr
914  io.toIfu.req.bits.nextStartAddr := entry_next_addr
915  io.toIfu.req.bits.ftqOffset     := entry_ftq_offset
916  io.toIfu.req.bits.fromFtqPcBundle(toIfuPcBundle)
917
918  io.toICache.req.valid := entry_is_to_send && ifuPtr =/= bpuPtr
919  io.toICache.req.bits.readValid.zipWithIndex.map { case (copy, i) =>
920    copy := toICacheEntryToSend(i) && copied_ifu_ptr(i) =/= copied_bpu_ptr(i)
921  }
922  io.toICache.req.bits.pcMemRead.zipWithIndex.foreach { case (copy, i) =>
923    copy.fromFtqPcBundle(toICachePcBundle(i))
924    copy.ftqIdx := ifuPtr
925  }
926  io.toICache.req.bits.backendException := ExceptionType.hasException(backendException) && backendPcFaultPtr === ifuPtr
927
928  io.toPrefetch.req.valid := toPrefetchEntryToSend && pfPtr =/= bpuPtr
929  io.toPrefetch.req.bits.fromFtqPcBundle(toPrefetchPcBundle)
930  io.toPrefetch.req.bits.ftqIdx  := pfPtr
931  io.toPrefetch.backendException := Mux(backendPcFaultPtr === pfPtr, backendException, ExceptionType.none)
932  // io.toICache.req.bits.bypassSelect := last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtr
933  // io.toICache.req.bits.bpuBypassWrite.zipWithIndex.map{case(bypassWrtie, i) =>
934  //   bypassWrtie.startAddr := bpu_in_bypass_buf.tail(i).startAddr
935  //   bypassWrtie.nextlineStart := bpu_in_bypass_buf.tail(i).nextLineAddr
936  // }
937
938  // TODO: remove this
939  XSError(
940    io.toIfu.req.valid && diff_entry_next_addr =/= entry_next_addr,
941    p"\nifu_req_target wrong! ifuPtr: ${ifuPtr}, entry_next_addr: ${Hexadecimal(entry_next_addr)} diff_entry_next_addr: ${Hexadecimal(diff_entry_next_addr)}\n"
942  )
943
944  // when fall through is smaller in value than start address, there must be a false hit
945  when(toIfuPcBundle.fallThruError && entry_hit_status(ifuPtr.value) === h_hit) {
946    when(io.toIfu.req.fire &&
947      !(bpu_s2_redirect && bpu_s2_resp.ftq_idx === ifuPtr) &&
948      !(bpu_s3_redirect && bpu_s3_resp.ftq_idx === ifuPtr)) {
949      entry_hit_status(ifuPtr.value) := h_false_hit
950      // XSError(true.B, "FTB false hit by fallThroughError, startAddr: %x, fallTHru: %x\n", io.toIfu.req.bits.startAddr, io.toIfu.req.bits.nextStartAddr)
951    }
952  }
953  XSDebug(
954    toIfuPcBundle.fallThruError && entry_hit_status(ifuPtr.value) === h_hit,
955    "fallThruError! start:%x, fallThru:%x\n",
956    io.toIfu.req.bits.startAddr,
957    io.toIfu.req.bits.nextStartAddr
958  )
959
960  XSPerfAccumulate(
961    f"fall_through_error_to_ifu",
962    toIfuPcBundle.fallThruError && entry_hit_status(ifuPtr.value) === h_hit &&
963      io.toIfu.req.fire && !(bpu_s2_redirect && bpu_s2_resp.ftq_idx === ifuPtr) && !(bpu_s3_redirect && bpu_s3_resp.ftq_idx === ifuPtr)
964  )
965
966  val ifu_req_should_be_flushed =
967    io.toIfu.flushFromBpu.shouldFlushByStage2(io.toIfu.req.bits.ftqIdx) ||
968      io.toIfu.flushFromBpu.shouldFlushByStage3(io.toIfu.req.bits.ftqIdx)
969
970  when(io.toIfu.req.fire && !ifu_req_should_be_flushed) {
971    entry_fetch_status(ifuPtr.value) := f_sent
972  }
973
974  // *********************************************************************
975  // **************************** wb from ifu ****************************
976  // *********************************************************************
977  val pdWb         = io.fromIfu.pdWb
978  val pds          = pdWb.bits.pd
979  val ifu_wb_valid = pdWb.valid
980  val ifu_wb_idx   = pdWb.bits.ftqIdx.value
981  // read ports:                                                         commit update
982  val ftq_pd_mem =
983    Module(new SyncDataModuleTemplate(new Ftq_pd_Entry, FtqSize, FtqRedirectAheadNum + 1, 1, hasRen = true))
984  ftq_pd_mem.io.wen(0)   := ifu_wb_valid
985  ftq_pd_mem.io.waddr(0) := pdWb.bits.ftqIdx.value
986  ftq_pd_mem.io.wdata(0).fromPdWb(pdWb.bits)
987
988  val hit_pd_valid       = entry_hit_status(ifu_wb_idx) === h_hit && ifu_wb_valid
989  val hit_pd_mispred     = hit_pd_valid && pdWb.bits.misOffset.valid
990  val hit_pd_mispred_reg = RegNext(hit_pd_mispred, init = false.B)
991  val pd_reg             = RegEnable(pds, pdWb.valid)
992  val start_pc_reg       = RegEnable(pdWb.bits.pc(0), pdWb.valid)
993  val wb_idx_reg         = RegEnable(ifu_wb_idx, pdWb.valid)
994
995  when(ifu_wb_valid) {
996    val comm_stq_wen = VecInit(pds.map(_.valid).zip(pdWb.bits.instrRange).map {
997      case (v, inRange) => v && inRange
998    })
999    commitStateQueueEnable(ifu_wb_idx) := true.B
1000    (commitStateQueueNext(ifu_wb_idx) zip comm_stq_wen).map {
1001      case (qe, v) => when(v) {
1002          qe := c_toCommit
1003        }
1004    }
1005  }
1006
1007  when(ifu_wb_valid) {
1008    ifuWbPtr_write := ifuWbPtr + 1.U
1009  }
1010
1011  XSError(ifu_wb_valid && isAfter(pdWb.bits.ftqIdx, ifuPtr), "IFU returned a predecode before its req, check IFU")
1012
1013  ftb_entry_mem.io.ren.get.head := ifu_wb_valid
1014  ftb_entry_mem.io.raddr.head   := ifu_wb_idx
1015  val has_false_hit = WireInit(false.B)
1016  when(RegNext(hit_pd_valid)) {
1017    // check for false hit
1018    val pred_ftb_entry = ftb_entry_mem.io.rdata.head
1019    val brSlots        = pred_ftb_entry.brSlots
1020    val tailSlot       = pred_ftb_entry.tailSlot
1021    // we check cfis that bpu predicted
1022
1023    // bpu predicted branches but denied by predecode
1024    val br_false_hit =
1025      brSlots.map {
1026        s => s.valid && !(pd_reg(s.offset).valid && pd_reg(s.offset).isBr)
1027      }.reduce(_ || _) ||
1028        (tailSlot.valid && pred_ftb_entry.tailSlot.sharing &&
1029          !(pd_reg(tailSlot.offset).valid && pd_reg(tailSlot.offset).isBr))
1030
1031    val jmpOffset = tailSlot.offset
1032    val jmp_pd    = pd_reg(jmpOffset)
1033    val jal_false_hit = pred_ftb_entry.jmpValid &&
1034      ((pred_ftb_entry.isJal && !(jmp_pd.valid && jmp_pd.isJal)) ||
1035        (pred_ftb_entry.isJalr && !(jmp_pd.valid && jmp_pd.isJalr)) ||
1036        (pred_ftb_entry.isCall && !(jmp_pd.valid && jmp_pd.isCall)) ||
1037        (pred_ftb_entry.isRet && !(jmp_pd.valid && jmp_pd.isRet)))
1038
1039    has_false_hit := br_false_hit || jal_false_hit || hit_pd_mispred_reg
1040    // assert(!has_false_hit)
1041  }
1042  XSDebug(
1043    RegNext(hit_pd_valid) && has_false_hit,
1044    "FTB false hit by br or jal or hit_pd, startAddr: %x\n",
1045    pdWb.bits.pc(0)
1046  )
1047
1048  when(has_false_hit) {
1049    entry_hit_status(wb_idx_reg) := h_false_hit
1050  }
1051
1052  // *******************************************************************************
1053  // **************************** redirect from backend ****************************
1054  // *******************************************************************************
1055
1056  // redirect read cfiInfo, couples to redirectGen s2
1057  // ftqIdxAhead(0-3) => ftq_redirect_mem(1-4), reuse ftq_redirect_mem(1)
1058  val ftq_redirect_rdata = Wire(Vec(FtqRedirectAheadNum, new Ftq_Redirect_SRAMEntry))
1059  val ftb_redirect_rdata = Wire(Vec(FtqRedirectAheadNum, new FTBEntry_FtqMem))
1060
1061  val ftq_pd_rdata = Wire(Vec(FtqRedirectAheadNum, new Ftq_pd_Entry))
1062  for (i <- 1 until FtqRedirectAheadNum) {
1063    ftq_redirect_mem.io.ren.get(i + IfuRedirectNum) := ftqIdxAhead(i).valid
1064    ftq_redirect_mem.io.raddr(i + IfuRedirectNum)   := ftqIdxAhead(i).bits.value
1065    ftb_entry_mem.io.ren.get(i + IfuRedirectNum)    := ftqIdxAhead(i).valid
1066    ftb_entry_mem.io.raddr(i + IfuRedirectNum)      := ftqIdxAhead(i).bits.value
1067
1068    ftq_pd_mem.io.ren.get(i) := ftqIdxAhead(i).valid
1069    ftq_pd_mem.io.raddr(i)   := ftqIdxAhead(i).bits.value
1070  }
1071  ftq_redirect_mem.io.ren.get(IfuRedirectNum) := Mux(aheadValid, ftqIdxAhead(0).valid, backendRedirect.valid)
1072  ftq_redirect_mem.io.raddr(IfuRedirectNum) := Mux(
1073    aheadValid,
1074    ftqIdxAhead(0).bits.value,
1075    backendRedirect.bits.ftqIdx.value
1076  )
1077  ftb_entry_mem.io.ren.get(IfuRedirectNum) := Mux(aheadValid, ftqIdxAhead(0).valid, backendRedirect.valid)
1078  ftb_entry_mem.io.raddr(IfuRedirectNum) := Mux(
1079    aheadValid,
1080    ftqIdxAhead(0).bits.value,
1081    backendRedirect.bits.ftqIdx.value
1082  )
1083
1084  ftq_pd_mem.io.ren.get(0) := Mux(aheadValid, ftqIdxAhead(0).valid, backendRedirect.valid)
1085  ftq_pd_mem.io.raddr(0)   := Mux(aheadValid, ftqIdxAhead(0).bits.value, backendRedirect.bits.ftqIdx.value)
1086
1087  for (i <- 0 until FtqRedirectAheadNum) {
1088    ftq_redirect_rdata(i) := ftq_redirect_mem.io.rdata(i + IfuRedirectNum)
1089    ftb_redirect_rdata(i) := ftb_entry_mem.io.rdata(i + IfuRedirectNum)
1090
1091    ftq_pd_rdata(i) := ftq_pd_mem.io.rdata(i)
1092  }
1093  val stage3CfiInfo =
1094    Mux(realAhdValid, Mux1H(ftqIdxSelOH, ftq_redirect_rdata), ftq_redirect_mem.io.rdata(IfuRedirectNum))
1095  val stage3PdInfo       = Mux(realAhdValid, Mux1H(ftqIdxSelOH, ftq_pd_rdata), ftq_pd_mem.io.rdata(0))
1096  val backendRedirectCfi = fromBackendRedirect.bits.cfiUpdate
1097  backendRedirectCfi.fromFtqRedirectSram(stage3CfiInfo)
1098  backendRedirectCfi.pd := stage3PdInfo.toPd(fromBackendRedirect.bits.ftqOffset)
1099
1100  val r_ftb_entry = Mux(realAhdValid, Mux1H(ftqIdxSelOH, ftb_redirect_rdata), ftb_entry_mem.io.rdata(IfuRedirectNum))
1101  val r_ftqOffset = fromBackendRedirect.bits.ftqOffset
1102
1103  backendRedirectCfi.br_hit := r_ftb_entry.brIsSaved(r_ftqOffset)
1104  backendRedirectCfi.jr_hit := r_ftb_entry.isJalr && r_ftb_entry.tailSlot.offset === r_ftqOffset
1105  // FIXME: not portable
1106  val sc_disagree = stage3CfiInfo.sc_disagree.getOrElse(VecInit(Seq.fill(numBr)(false.B)))
1107  backendRedirectCfi.sc_hit := backendRedirectCfi.br_hit && Mux(
1108    r_ftb_entry.brSlots(0).offset === r_ftqOffset,
1109    sc_disagree(0),
1110    sc_disagree(1)
1111  )
1112
1113  when(entry_hit_status(fromBackendRedirect.bits.ftqIdx.value) === h_hit) {
1114    backendRedirectCfi.shift := PopCount(r_ftb_entry.getBrMaskByOffset(r_ftqOffset)) +&
1115      (backendRedirectCfi.pd.isBr && !r_ftb_entry.brIsSaved(r_ftqOffset) &&
1116        !r_ftb_entry.newBrCanNotInsert(r_ftqOffset))
1117
1118    backendRedirectCfi.addIntoHist := backendRedirectCfi.pd.isBr && (r_ftb_entry.brIsSaved(r_ftqOffset) ||
1119      !r_ftb_entry.newBrCanNotInsert(r_ftqOffset))
1120  }.otherwise {
1121    backendRedirectCfi.shift       := (backendRedirectCfi.pd.isBr && backendRedirectCfi.taken).asUInt
1122    backendRedirectCfi.addIntoHist := backendRedirectCfi.pd.isBr.asUInt
1123  }
1124
1125  // ***************************************************************************
1126  // **************************** redirect from ifu ****************************
1127  // ***************************************************************************
1128  val fromIfuRedirect = WireInit(0.U.asTypeOf(Valid(new BranchPredictionRedirect)))
1129  fromIfuRedirect.valid              := pdWb.valid && pdWb.bits.misOffset.valid && !backendFlush
1130  fromIfuRedirect.bits.ftqIdx        := pdWb.bits.ftqIdx
1131  fromIfuRedirect.bits.ftqOffset     := pdWb.bits.misOffset.bits
1132  fromIfuRedirect.bits.level         := RedirectLevel.flushAfter
1133  fromIfuRedirect.bits.BTBMissBubble := true.B
1134  fromIfuRedirect.bits.debugIsMemVio := false.B
1135  fromIfuRedirect.bits.debugIsCtrl   := false.B
1136
1137  val ifuRedirectCfiUpdate = fromIfuRedirect.bits.cfiUpdate
1138  ifuRedirectCfiUpdate.pc        := pdWb.bits.pc(pdWb.bits.misOffset.bits)
1139  ifuRedirectCfiUpdate.pd        := pdWb.bits.pd(pdWb.bits.misOffset.bits)
1140  ifuRedirectCfiUpdate.predTaken := cfiIndex_vec(pdWb.bits.ftqIdx.value).valid
1141  ifuRedirectCfiUpdate.target    := pdWb.bits.target
1142  ifuRedirectCfiUpdate.taken     := pdWb.bits.cfiOffset.valid
1143  ifuRedirectCfiUpdate.isMisPred := pdWb.bits.misOffset.valid
1144
1145  val ifuRedirectReg   = RegNextWithEnable(fromIfuRedirect, hasInit = true)
1146  val ifuRedirectToBpu = WireInit(ifuRedirectReg)
1147  ifuFlush := fromIfuRedirect.valid || ifuRedirectToBpu.valid
1148
1149  ftq_redirect_mem.io.ren.get.head := fromIfuRedirect.valid
1150  ftq_redirect_mem.io.raddr.head   := fromIfuRedirect.bits.ftqIdx.value
1151
1152  val toBpuCfi = ifuRedirectToBpu.bits.cfiUpdate
1153  toBpuCfi.fromFtqRedirectSram(ftq_redirect_mem.io.rdata.head)
1154  when(ifuRedirectReg.bits.cfiUpdate.pd.isRet && ifuRedirectReg.bits.cfiUpdate.pd.valid) {
1155    toBpuCfi.target := toBpuCfi.topAddr
1156  }
1157
1158  when(ifuRedirectReg.valid) {
1159    ifuRedirected(ifuRedirectReg.bits.ftqIdx.value) := true.B
1160  }.elsewhen(RegNext(pdWb.valid)) {
1161    // if pdWb and no redirect, set to false
1162    ifuRedirected(last_cycle_bpu_in_ptr.value) := false.B
1163  }
1164
1165  // **********************************************************************
1166  // ***************************** to backend *****************************
1167  // **********************************************************************
1168  // to backend pc mem / target
1169  io.toBackend.pc_mem_wen   := RegNext(last_cycle_bpu_in)
1170  io.toBackend.pc_mem_waddr := RegEnable(last_cycle_bpu_in_idx, last_cycle_bpu_in)
1171  io.toBackend.pc_mem_wdata := RegEnable(bpu_in_bypass_buf_for_ifu, last_cycle_bpu_in)
1172
1173  // num cycle is fixed
1174  val newest_entry_en: Bool = RegNext(last_cycle_bpu_in || backendRedirect.valid || ifuRedirectToBpu.valid)
1175  io.toBackend.newest_entry_en     := RegNext(newest_entry_en)
1176  io.toBackend.newest_entry_ptr    := RegEnable(newest_entry_ptr, newest_entry_en)
1177  io.toBackend.newest_entry_target := RegEnable(newest_entry_target, newest_entry_en)
1178
1179  // *********************************************************************
1180  // **************************** wb from exu ****************************
1181  // *********************************************************************
1182
1183  backendRedirect.valid := io.fromBackend.redirect.valid
1184  backendRedirect.bits.connectRedirect(io.fromBackend.redirect.bits)
1185  backendRedirect.bits.BTBMissBubble := false.B
1186
1187  def extractRedirectInfo(wb: Valid[Redirect]) = {
1188    val ftqPtr    = wb.bits.ftqIdx
1189    val ftqOffset = wb.bits.ftqOffset
1190    val taken     = wb.bits.cfiUpdate.taken
1191    val mispred   = wb.bits.cfiUpdate.isMisPred
1192    (wb.valid, ftqPtr, ftqOffset, taken, mispred)
1193  }
1194
1195  // fix mispredict entry
1196  val lastIsMispredict = RegNext(
1197    backendRedirect.valid && backendRedirect.bits.level === RedirectLevel.flushAfter,
1198    init = false.B
1199  )
1200
1201  def updateCfiInfo(redirect: Valid[Redirect], isBackend: Boolean = true) = {
1202    val (r_valid, r_ptr, r_offset, r_taken, r_mispred) = extractRedirectInfo(redirect)
1203    val r_idx                                          = r_ptr.value
1204    val cfiIndex_bits_wen                              = r_valid && r_taken && r_offset < cfiIndex_vec(r_idx).bits
1205    val cfiIndex_valid_wen                             = r_valid && r_offset === cfiIndex_vec(r_idx).bits
1206    when(cfiIndex_bits_wen || cfiIndex_valid_wen) {
1207      cfiIndex_vec(r_idx).valid := cfiIndex_bits_wen || cfiIndex_valid_wen && r_taken
1208    }.elsewhen(r_valid && !r_taken && r_offset =/= cfiIndex_vec(r_idx).bits) {
1209      cfiIndex_vec(r_idx).valid := false.B
1210    }
1211    when(cfiIndex_bits_wen) {
1212      cfiIndex_vec(r_idx).bits := r_offset
1213    }
1214    newest_entry_target_modified := true.B
1215    newest_entry_target          := redirect.bits.cfiUpdate.target
1216    newest_entry_ptr_modified    := true.B
1217    newest_entry_ptr             := r_ptr
1218
1219    update_target(r_idx) := redirect.bits.cfiUpdate.target // TODO: remove this
1220    if (isBackend) {
1221      mispredict_vec(r_idx)(r_offset) := r_mispred
1222    }
1223  }
1224
1225  when(fromBackendRedirect.valid) {
1226    updateCfiInfo(fromBackendRedirect)
1227  }.elsewhen(ifuRedirectToBpu.valid) {
1228    updateCfiInfo(ifuRedirectToBpu, isBackend = false)
1229  }
1230
1231  when(fromBackendRedirect.valid) {
1232    when(fromBackendRedirect.bits.ControlRedirectBubble) {
1233      when(fromBackendRedirect.bits.ControlBTBMissBubble) {
1234        topdown_stage.reasons(TopDownCounters.BTBMissBubble.id)                  := true.B
1235        io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.BTBMissBubble.id) := true.B
1236      }.elsewhen(fromBackendRedirect.bits.TAGEMissBubble) {
1237        topdown_stage.reasons(TopDownCounters.TAGEMissBubble.id)                  := true.B
1238        io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.TAGEMissBubble.id) := true.B
1239      }.elsewhen(fromBackendRedirect.bits.SCMissBubble) {
1240        topdown_stage.reasons(TopDownCounters.SCMissBubble.id)                  := true.B
1241        io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.SCMissBubble.id) := true.B
1242      }.elsewhen(fromBackendRedirect.bits.ITTAGEMissBubble) {
1243        topdown_stage.reasons(TopDownCounters.ITTAGEMissBubble.id)                  := true.B
1244        io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.ITTAGEMissBubble.id) := true.B
1245      }.elsewhen(fromBackendRedirect.bits.RASMissBubble) {
1246        topdown_stage.reasons(TopDownCounters.RASMissBubble.id)                  := true.B
1247        io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.RASMissBubble.id) := true.B
1248      }
1249
1250    }.elsewhen(backendRedirect.bits.MemVioRedirectBubble) {
1251      topdown_stage.reasons(TopDownCounters.MemVioRedirectBubble.id)                  := true.B
1252      io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.MemVioRedirectBubble.id) := true.B
1253    }.otherwise {
1254      topdown_stage.reasons(TopDownCounters.OtherRedirectBubble.id)                  := true.B
1255      io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.OtherRedirectBubble.id) := true.B
1256    }
1257  }.elsewhen(ifuRedirectReg.valid) {
1258    topdown_stage.reasons(TopDownCounters.BTBMissBubble.id)                  := true.B
1259    io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.BTBMissBubble.id) := true.B
1260  }
1261
1262  io.ControlBTBMissBubble := fromBackendRedirect.bits.ControlBTBMissBubble
1263  io.TAGEMissBubble       := fromBackendRedirect.bits.TAGEMissBubble
1264  io.SCMissBubble         := fromBackendRedirect.bits.SCMissBubble
1265  io.ITTAGEMissBubble     := fromBackendRedirect.bits.ITTAGEMissBubble
1266  io.RASMissBubble        := fromBackendRedirect.bits.RASMissBubble
1267
1268  // ***********************************************************************************
1269  // **************************** flush ptr and state queue ****************************
1270  // ***********************************************************************************
1271
1272  val redirectVec = VecInit(backendRedirect, fromIfuRedirect)
1273
1274  // when redirect, we should reset ptrs and status queues
1275  io.icacheFlush := redirectVec.map(r => r.valid).reduce(_ || _)
1276  XSPerfAccumulate("icacheFlushFromBackend", backendRedirect.valid)
1277  XSPerfAccumulate("icacheFlushFromIFU", fromIfuRedirect.valid)
1278  when(redirectVec.map(r => r.valid).reduce(_ || _)) {
1279    val r                          = PriorityMux(redirectVec.map(r => r.valid -> r.bits))
1280    val notIfu                     = redirectVec.dropRight(1).map(r => r.valid).reduce(_ || _)
1281    val (idx, offset, flushItSelf) = (r.ftqIdx, r.ftqOffset, RedirectLevel.flushItself(r.level))
1282    val next                       = idx + 1.U
1283    bpuPtr := next
1284    copied_bpu_ptr.map(_ := next)
1285    ifuPtr_write      := next
1286    ifuWbPtr_write    := next
1287    ifuPtrPlus1_write := idx + 2.U
1288    ifuPtrPlus2_write := idx + 3.U
1289    pfPtr_write       := next
1290    pfPtrPlus1_write  := idx + 2.U
1291  }
1292  when(RegNext(redirectVec.map(r => r.valid).reduce(_ || _))) {
1293    val r                          = PriorityMux(redirectVec.map(r => r.valid -> r.bits))
1294    val notIfu                     = redirectVec.dropRight(1).map(r => r.valid).reduce(_ || _)
1295    val (idx, offset, flushItSelf) = (r.ftqIdx, r.ftqOffset, RedirectLevel.flushItself(r.level))
1296    when(RegNext(notIfu)) {
1297      commitStateQueueEnable(RegNext(idx.value)) := true.B
1298      commitStateQueueNext(RegNext(idx.value)).zipWithIndex.foreach { case (s, i) =>
1299        when(i.U > RegNext(offset)) {
1300          s := c_empty
1301        }
1302        when(i.U === RegNext(offset) && RegNext(flushItSelf)) {
1303          s := c_flushed
1304        }
1305      }
1306    }
1307  }
1308
1309  // only the valid bit is actually needed
1310  io.toIfu.redirect.bits    := backendRedirect.bits
1311  io.toIfu.redirect.valid   := stage2Flush
1312  io.toIfu.topdown_redirect := fromBackendRedirect
1313
1314  // commit
1315  for (c <- io.fromBackend.rob_commits) {
1316    when(c.valid) {
1317      commitStateQueueEnable(c.bits.ftqIdx.value)                 := true.B
1318      commitStateQueueNext(c.bits.ftqIdx.value)(c.bits.ftqOffset) := c_committed
1319      // TODO: remove this
1320      // For instruction fusions, we also update the next instruction
1321      when(c.bits.commitType === 4.U) {
1322        commitStateQueueNext(c.bits.ftqIdx.value)(c.bits.ftqOffset + 1.U) := c_committed
1323      }.elsewhen(c.bits.commitType === 5.U) {
1324        commitStateQueueNext(c.bits.ftqIdx.value)(c.bits.ftqOffset + 2.U) := c_committed
1325      }.elsewhen(c.bits.commitType === 6.U) {
1326        val index = (c.bits.ftqIdx + 1.U).value
1327        commitStateQueueEnable(index)  := true.B
1328        commitStateQueueNext(index)(0) := c_committed
1329      }.elsewhen(c.bits.commitType === 7.U) {
1330        val index = (c.bits.ftqIdx + 1.U).value
1331        commitStateQueueEnable(index)  := true.B
1332        commitStateQueueNext(index)(1) := c_committed
1333      }
1334    }
1335  }
1336
1337  // ****************************************************************
1338  // **************************** to bpu ****************************
1339  // ****************************************************************
1340
1341  io.toBpu.redirctFromIFU := ifuRedirectToBpu.valid
1342  io.toBpu.redirect       := Mux(fromBackendRedirect.valid, fromBackendRedirect, ifuRedirectToBpu)
1343  val dummy_s1_pred_cycle_vec = VecInit(List.tabulate(FtqSize)(_ => 0.U(64.W)))
1344  val redirect_latency =
1345    GTimer() - pred_s1_cycle.getOrElse(dummy_s1_pred_cycle_vec)(io.toBpu.redirect.bits.ftqIdx.value) + 1.U
1346  XSPerfHistogram("backend_redirect_latency", redirect_latency, fromBackendRedirect.valid, 0, 60, 1)
1347  XSPerfHistogram(
1348    "ifu_redirect_latency",
1349    redirect_latency,
1350    !fromBackendRedirect.valid && ifuRedirectToBpu.valid,
1351    0,
1352    60,
1353    1
1354  )
1355
1356  XSError(
1357    io.toBpu.redirect.valid && isBefore(io.toBpu.redirect.bits.ftqIdx, commPtr),
1358    "Ftq received a redirect after its commit, check backend or replay"
1359  )
1360
1361  val may_have_stall_from_bpu = Wire(Bool())
1362  val bpu_ftb_update_stall    = RegInit(0.U(2.W)) // 2-cycle stall, so we need 3 states
1363  may_have_stall_from_bpu := bpu_ftb_update_stall =/= 0.U
1364
1365  val validInstructions     = commitStateQueueReg(commPtr.value).map(s => s === c_toCommit || s === c_committed)
1366  val lastInstructionStatus = PriorityMux(validInstructions.reverse.zip(commitStateQueueReg(commPtr.value).reverse))
1367  val firstInstructionFlushed = commitStateQueueReg(commPtr.value)(0) === c_flushed ||
1368    commitStateQueueReg(commPtr.value)(0) === c_empty && commitStateQueueReg(commPtr.value)(1) === c_flushed
1369  canCommit := commPtr =/= ifuWbPtr && !may_have_stall_from_bpu &&
1370    (isAfter(robCommPtr, commPtr) ||
1371      validInstructions.reduce(_ || _) && lastInstructionStatus === c_committed)
1372  val canMoveCommPtr = commPtr =/= ifuWbPtr && !may_have_stall_from_bpu &&
1373    (isAfter(robCommPtr, commPtr) ||
1374      validInstructions.reduce(_ || _) && lastInstructionStatus === c_committed ||
1375      firstInstructionFlushed)
1376
1377  when(io.fromBackend.rob_commits.map(_.valid).reduce(_ | _)) {
1378    robCommPtr_write := ParallelPriorityMux(
1379      io.fromBackend.rob_commits.map(_.valid).reverse,
1380      io.fromBackend.rob_commits.map(_.bits.ftqIdx).reverse
1381    )
1382  }.elsewhen(isAfter(commPtr, robCommPtr)) {
1383    robCommPtr_write := commPtr
1384  }.otherwise {
1385    robCommPtr_write := robCommPtr
1386  }
1387
1388  /**
1389    *************************************************************************************
1390    * MMIO instruction fetch is allowed only if MMIO is the oldest instruction.
1391    *************************************************************************************
1392    */
1393  val mmioReadPtr = io.mmioCommitRead.mmioFtqPtr
1394  val mmioLastCommit = isAfter(commPtr, mmioReadPtr) ||
1395    commPtr === mmioReadPtr && validInstructions.reduce(_ || _) && lastInstructionStatus === c_committed
1396  io.mmioCommitRead.mmioLastCommit := RegNext(mmioLastCommit)
1397
1398  // commit reads
1399  val commit_pc_bundle = RegNext(ftq_pc_mem.io.commPtr_rdata)
1400  val commit_target =
1401    Mux(
1402      RegNext(commPtr === newest_entry_ptr),
1403      RegEnable(newest_entry_target, newest_entry_target_modified),
1404      RegNext(ftq_pc_mem.io.commPtrPlus1_rdata.startAddr)
1405    )
1406  ftq_pd_mem.io.ren.get.last := canCommit
1407  ftq_pd_mem.io.raddr.last   := commPtr.value
1408  val commit_pd = ftq_pd_mem.io.rdata.last
1409  ftq_redirect_mem.io.ren.get.last := canCommit
1410  ftq_redirect_mem.io.raddr.last   := commPtr.value
1411  val commit_spec_meta = ftq_redirect_mem.io.rdata.last
1412  ftq_meta_1r_sram.io.ren(0)   := canCommit
1413  ftq_meta_1r_sram.io.raddr(0) := commPtr.value
1414  val commit_meta      = ftq_meta_1r_sram.io.rdata(0).meta
1415  val commit_ftb_entry = ftq_meta_1r_sram.io.rdata(0).ftb_entry
1416
1417  // need one cycle to read mem and srams
1418  val do_commit_ptr = RegEnable(commPtr, canCommit)
1419  val do_commit     = RegNext(canCommit, init = false.B)
1420  when(canMoveCommPtr) {
1421    commPtr_write      := commPtrPlus1
1422    commPtrPlus1_write := commPtrPlus1 + 1.U
1423  }
1424  val commit_state   = RegEnable(commitStateQueueReg(commPtr.value), canCommit)
1425  val can_commit_cfi = WireInit(cfiIndex_vec(commPtr.value))
1426  val do_commit_cfi  = WireInit(cfiIndex_vec(do_commit_ptr.value))
1427  //
1428  // when (commitStateQueue(commPtr.value)(can_commit_cfi.bits) =/= c_commited) {
1429  //  can_commit_cfi.valid := false.B
1430  // }
1431  val commit_cfi = RegEnable(can_commit_cfi, canCommit)
1432  val debug_cfi  = commitStateQueueReg(do_commit_ptr.value)(do_commit_cfi.bits) =/= c_committed && do_commit_cfi.valid
1433
1434  val commit_mispredict: Vec[Bool] =
1435    VecInit((RegEnable(mispredict_vec(commPtr.value), canCommit) zip commit_state).map {
1436      case (mis, state) => mis && state === c_committed
1437    })
1438  val commit_instCommited: Vec[Bool] = VecInit(commit_state.map(_ === c_committed)) // [PredictWidth]
1439  val can_commit_hit     = entry_hit_status(commPtr.value)
1440  val commit_hit         = RegEnable(can_commit_hit, canCommit)
1441  val diff_commit_target = RegEnable(update_target(commPtr.value), canCommit) // TODO: remove this
1442  val commit_stage       = RegEnable(pred_stage(commPtr.value), canCommit)
1443  val commit_valid       = commit_hit === h_hit || commit_cfi.valid           // hit or taken
1444
1445  val to_bpu_hit = can_commit_hit === h_hit || can_commit_hit === h_false_hit
1446  switch(bpu_ftb_update_stall) {
1447    is(0.U) {
1448      when(can_commit_cfi.valid && !to_bpu_hit && canCommit) {
1449        bpu_ftb_update_stall := 2.U // 2-cycle stall
1450      }
1451    }
1452    is(2.U) {
1453      bpu_ftb_update_stall := 1.U
1454    }
1455    is(1.U) {
1456      bpu_ftb_update_stall := 0.U
1457    }
1458    is(3.U) {
1459      // XSError below
1460    }
1461  }
1462  XSError(bpu_ftb_update_stall === 3.U, "bpu_ftb_update_stall should be 0, 1 or 2")
1463
1464  // TODO: remove this
1465  XSError(do_commit && diff_commit_target =/= commit_target, "\ncommit target should be the same as update target\n")
1466
1467  // update latency stats
1468  val update_latency = GTimer() - pred_s1_cycle.getOrElse(dummy_s1_pred_cycle_vec)(do_commit_ptr.value) + 1.U
1469  XSPerfHistogram("bpu_update_latency", update_latency, io.toBpu.update.valid, 0, 64, 2)
1470
1471  io.toBpu.update       := DontCare
1472  io.toBpu.update.valid := commit_valid && do_commit
1473  val update = io.toBpu.update.bits
1474  update.false_hit   := commit_hit === h_false_hit
1475  update.pc          := commit_pc_bundle.startAddr
1476  update.meta        := commit_meta
1477  update.cfi_idx     := commit_cfi
1478  update.full_target := commit_target
1479  update.from_stage  := commit_stage
1480  update.spec_info   := commit_spec_meta
1481  XSError(commit_valid && do_commit && debug_cfi, "\ncommit cfi can be non c_commited\n")
1482
1483  val commit_real_hit  = commit_hit === h_hit
1484  val update_ftb_entry = update.ftb_entry
1485
1486  val ftbEntryGen = Module(new FTBEntryGen).io
1487  ftbEntryGen.start_addr     := commit_pc_bundle.startAddr
1488  ftbEntryGen.old_entry      := commit_ftb_entry
1489  ftbEntryGen.pd             := commit_pd
1490  ftbEntryGen.cfiIndex       := commit_cfi
1491  ftbEntryGen.target         := commit_target
1492  ftbEntryGen.hit            := commit_real_hit
1493  ftbEntryGen.mispredict_vec := commit_mispredict
1494
1495  update_ftb_entry         := ftbEntryGen.new_entry
1496  update.new_br_insert_pos := ftbEntryGen.new_br_insert_pos
1497  update.mispred_mask      := ftbEntryGen.mispred_mask
1498  update.old_entry         := ftbEntryGen.is_old_entry
1499  update.pred_hit          := commit_hit === h_hit || commit_hit === h_false_hit
1500  update.br_taken_mask     := ftbEntryGen.taken_mask
1501  update.br_committed := (ftbEntryGen.new_entry.brValids zip ftbEntryGen.new_entry.brOffset) map {
1502    case (valid, offset) => valid && commit_instCommited(offset)
1503  }
1504  update.jmp_taken := ftbEntryGen.jmp_taken
1505
1506  // update.full_pred.fromFtbEntry(ftbEntryGen.new_entry, update.pc)
1507  // update.full_pred.jalr_target := commit_target
1508  // update.full_pred.hit := true.B
1509  // when (update.full_pred.is_jalr) {
1510  //   update.full_pred.targets.last := commit_target
1511  // }
1512
1513  // ******************************************************************************
1514  // **************************** commit perf counters ****************************
1515  // ******************************************************************************
1516
1517  val commit_inst_mask        = VecInit(commit_state.map(c => c === c_committed && do_commit)).asUInt
1518  val commit_mispred_mask     = commit_mispredict.asUInt
1519  val commit_not_mispred_mask = ~commit_mispred_mask
1520
1521  val commit_br_mask  = commit_pd.brMask.asUInt
1522  val commit_jmp_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.jmpInfo.valid.asTypeOf(UInt(1.W)))
1523  val commit_cfi_mask = commit_br_mask | commit_jmp_mask
1524
1525  val mbpInstrs = commit_inst_mask & commit_cfi_mask
1526
1527  val mbpRights = mbpInstrs & commit_not_mispred_mask
1528  val mbpWrongs = mbpInstrs & commit_mispred_mask
1529
1530  io.bpuInfo.bpRight := PopCount(mbpRights)
1531  io.bpuInfo.bpWrong := PopCount(mbpWrongs)
1532
1533  val hartId           = p(XSCoreParamsKey).HartId
1534  val isWriteFTQTable  = Constantin.createRecord(s"isWriteFTQTable$hartId")
1535  val ftqBranchTraceDB = ChiselDB.createTable(s"FTQTable$hartId", new FtqDebugBundle)
1536  // Cfi Info
1537  for (i <- 0 until PredictWidth) {
1538    val pc      = commit_pc_bundle.startAddr + (i * instBytes).U
1539    val v       = commit_state(i) === c_committed
1540    val isBr    = commit_pd.brMask(i)
1541    val isJmp   = commit_pd.jmpInfo.valid && commit_pd.jmpOffset === i.U
1542    val isCfi   = isBr || isJmp
1543    val isTaken = commit_cfi.valid && commit_cfi.bits === i.U
1544    val misPred = commit_mispredict(i)
1545    // val ghist = commit_spec_meta.ghist.predHist
1546    val histPtr   = commit_spec_meta.histPtr
1547    val predCycle = commit_meta(63, 0)
1548    val target    = commit_target
1549
1550    val brIdx = OHToUInt(Reverse(Cat(update_ftb_entry.brValids.zip(update_ftb_entry.brOffset).map { case (v, offset) =>
1551      v && offset === i.U
1552    })))
1553    val inFtbEntry = update_ftb_entry.brValids.zip(update_ftb_entry.brOffset).map { case (v, offset) =>
1554      v && offset === i.U
1555    }.reduce(_ || _)
1556    val addIntoHist =
1557      ((commit_hit === h_hit) && inFtbEntry) || (!(commit_hit === h_hit) && i.U === commit_cfi.bits && isBr && commit_cfi.valid)
1558    XSDebug(
1559      v && do_commit && isCfi,
1560      p"cfi_update: isBr(${isBr}) pc(${Hexadecimal(pc)}) " +
1561        p"taken(${isTaken}) mispred(${misPred}) cycle($predCycle) hist(${histPtr.value}) " +
1562        p"startAddr(${Hexadecimal(commit_pc_bundle.startAddr)}) AddIntoHist(${addIntoHist}) " +
1563        p"brInEntry(${inFtbEntry}) brIdx(${brIdx}) target(${Hexadecimal(target)})\n"
1564    )
1565
1566    val logbundle = Wire(new FtqDebugBundle)
1567    logbundle.pc        := pc
1568    logbundle.target    := target
1569    logbundle.isBr      := isBr
1570    logbundle.isJmp     := isJmp
1571    logbundle.isCall    := isJmp && commit_pd.hasCall
1572    logbundle.isRet     := isJmp && commit_pd.hasRet
1573    logbundle.misPred   := misPred
1574    logbundle.isTaken   := isTaken
1575    logbundle.predStage := commit_stage
1576
1577    ftqBranchTraceDB.log(
1578      data = logbundle /* hardware of type T */,
1579      en = isWriteFTQTable.orR && v && do_commit && isCfi,
1580      site = "FTQ" + p(XSCoreParamsKey).HartId.toString,
1581      clock = clock,
1582      reset = reset
1583    )
1584  }
1585
1586  val enq           = io.fromBpu.resp
1587  val perf_redirect = backendRedirect
1588
1589  XSPerfAccumulate("entry", validEntries)
1590  XSPerfAccumulate("bpu_to_ftq_stall", enq.valid && !enq.ready)
1591  XSPerfAccumulate("mispredictRedirect", perf_redirect.valid && RedirectLevel.flushAfter === perf_redirect.bits.level)
1592  XSPerfAccumulate("replayRedirect", perf_redirect.valid && RedirectLevel.flushItself(perf_redirect.bits.level))
1593  XSPerfAccumulate("predecodeRedirect", fromIfuRedirect.valid)
1594
1595  XSPerfAccumulate("to_ifu_bubble", io.toIfu.req.ready && !io.toIfu.req.valid)
1596
1597  XSPerfAccumulate("to_ifu_stall", io.toIfu.req.valid && !io.toIfu.req.ready)
1598  XSPerfAccumulate("from_bpu_real_bubble", !enq.valid && enq.ready && allowBpuIn)
1599  XSPerfAccumulate("bpu_to_ifu_bubble", bpuPtr === ifuPtr)
1600  XSPerfAccumulate(
1601    "bpu_to_ifu_bubble_when_ftq_full",
1602    (bpuPtr === ifuPtr) && isFull(bpuPtr, commPtr) && io.toIfu.req.ready
1603  )
1604
1605  XSPerfAccumulate("redirectAhead_ValidNum", ftqIdxAhead.map(_.valid).reduce(_ | _))
1606  XSPerfAccumulate("fromBackendRedirect_ValidNum", io.fromBackend.redirect.valid)
1607  XSPerfAccumulate("toBpuRedirect_ValidNum", io.toBpu.redirect.valid)
1608
1609  val from_bpu = io.fromBpu.resp.bits
1610  val to_ifu   = io.toIfu.req.bits
1611
1612  XSPerfHistogram("commit_num_inst", PopCount(commit_inst_mask), do_commit, 0, PredictWidth + 1, 1)
1613
1614  val commit_jal_mask  = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasJal.asTypeOf(UInt(1.W)))
1615  val commit_jalr_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasJalr.asTypeOf(UInt(1.W)))
1616  val commit_call_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasCall.asTypeOf(UInt(1.W)))
1617  val commit_ret_mask  = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasRet.asTypeOf(UInt(1.W)))
1618
1619  val mbpBRights = mbpRights & commit_br_mask
1620  val mbpJRights = mbpRights & commit_jal_mask
1621  val mbpIRights = mbpRights & commit_jalr_mask
1622  val mbpCRights = mbpRights & commit_call_mask
1623  val mbpRRights = mbpRights & commit_ret_mask
1624
1625  val mbpBWrongs = mbpWrongs & commit_br_mask
1626  val mbpJWrongs = mbpWrongs & commit_jal_mask
1627  val mbpIWrongs = mbpWrongs & commit_jalr_mask
1628  val mbpCWrongs = mbpWrongs & commit_call_mask
1629  val mbpRWrongs = mbpWrongs & commit_ret_mask
1630
1631  val commit_pred_stage = RegNext(pred_stage(commPtr.value))
1632
1633  def pred_stage_map(src: UInt, name: String) =
1634    (0 until numBpStages).map(i =>
1635      f"${name}_stage_${i + 1}" -> PopCount(src.asBools.map(_ && commit_pred_stage === BP_STAGES(i)))
1636    ).foldLeft(Map[String, UInt]())(_ + _)
1637
1638  val mispred_stage_map      = pred_stage_map(mbpWrongs, "mispredict")
1639  val br_mispred_stage_map   = pred_stage_map(mbpBWrongs, "br_mispredict")
1640  val jalr_mispred_stage_map = pred_stage_map(mbpIWrongs, "jalr_mispredict")
1641  val correct_stage_map      = pred_stage_map(mbpRights, "correct")
1642  val br_correct_stage_map   = pred_stage_map(mbpBRights, "br_correct")
1643  val jalr_correct_stage_map = pred_stage_map(mbpIRights, "jalr_correct")
1644
1645  val update_valid = io.toBpu.update.valid
1646  def u(cond: Bool) = update_valid && cond
1647  val ftb_false_hit = u(update.false_hit)
1648  // assert(!ftb_false_hit)
1649  val ftb_hit = u(commit_hit === h_hit)
1650
1651  val ftb_new_entry                = u(ftbEntryGen.is_init_entry)
1652  val ftb_new_entry_only_br        = ftb_new_entry && !update_ftb_entry.jmpValid
1653  val ftb_new_entry_only_jmp       = ftb_new_entry && !update_ftb_entry.brValids(0)
1654  val ftb_new_entry_has_br_and_jmp = ftb_new_entry && update_ftb_entry.brValids(0) && update_ftb_entry.jmpValid
1655
1656  val ftb_old_entry = u(ftbEntryGen.is_old_entry)
1657
1658  val ftb_modified_entry =
1659    u(ftbEntryGen.is_new_br || ftbEntryGen.is_jalr_target_modified || ftbEntryGen.is_strong_bias_modified)
1660  val ftb_modified_entry_new_br               = u(ftbEntryGen.is_new_br)
1661  val ftb_modified_entry_ifu_redirected       = u(ifuRedirected(do_commit_ptr.value))
1662  val ftb_modified_entry_jalr_target_modified = u(ftbEntryGen.is_jalr_target_modified)
1663  val ftb_modified_entry_br_full              = ftb_modified_entry && ftbEntryGen.is_br_full
1664  val ftb_modified_entry_strong_bias          = ftb_modified_entry && ftbEntryGen.is_strong_bias_modified
1665
1666  def getFtbEntryLen(pc: UInt, entry: FTBEntry) = (entry.getFallThrough(pc) - pc) >> instOffsetBits
1667  val gen_ftb_entry_len = getFtbEntryLen(update.pc, ftbEntryGen.new_entry)
1668  XSPerfHistogram("ftb_init_entry_len", gen_ftb_entry_len, ftb_new_entry, 0, PredictWidth + 1, 1)
1669  XSPerfHistogram("ftb_modified_entry_len", gen_ftb_entry_len, ftb_modified_entry, 0, PredictWidth + 1, 1)
1670  val s3_ftb_entry_len = getFtbEntryLen(from_bpu.s3.pc(0), from_bpu.last_stage_ftb_entry)
1671  XSPerfHistogram("s3_ftb_entry_len", s3_ftb_entry_len, from_bpu.s3.valid(0), 0, PredictWidth + 1, 1)
1672
1673  XSPerfHistogram("ftq_has_entry", validEntries, true.B, 0, FtqSize + 1, 1)
1674
1675  val perfCountsMap = Map(
1676    "BpInstr"                        -> PopCount(mbpInstrs),
1677    "BpBInstr"                       -> PopCount(mbpBRights | mbpBWrongs),
1678    "BpRight"                        -> PopCount(mbpRights),
1679    "BpWrong"                        -> PopCount(mbpWrongs),
1680    "BpBRight"                       -> PopCount(mbpBRights),
1681    "BpBWrong"                       -> PopCount(mbpBWrongs),
1682    "BpJRight"                       -> PopCount(mbpJRights),
1683    "BpJWrong"                       -> PopCount(mbpJWrongs),
1684    "BpIRight"                       -> PopCount(mbpIRights),
1685    "BpIWrong"                       -> PopCount(mbpIWrongs),
1686    "BpCRight"                       -> PopCount(mbpCRights),
1687    "BpCWrong"                       -> PopCount(mbpCWrongs),
1688    "BpRRight"                       -> PopCount(mbpRRights),
1689    "BpRWrong"                       -> PopCount(mbpRWrongs),
1690    "ftb_false_hit"                  -> PopCount(ftb_false_hit),
1691    "ftb_hit"                        -> PopCount(ftb_hit),
1692    "ftb_new_entry"                  -> PopCount(ftb_new_entry),
1693    "ftb_new_entry_only_br"          -> PopCount(ftb_new_entry_only_br),
1694    "ftb_new_entry_only_jmp"         -> PopCount(ftb_new_entry_only_jmp),
1695    "ftb_new_entry_has_br_and_jmp"   -> PopCount(ftb_new_entry_has_br_and_jmp),
1696    "ftb_old_entry"                  -> PopCount(ftb_old_entry),
1697    "ftb_modified_entry"             -> PopCount(ftb_modified_entry),
1698    "ftb_modified_entry_new_br"      -> PopCount(ftb_modified_entry_new_br),
1699    "ftb_jalr_target_modified"       -> PopCount(ftb_modified_entry_jalr_target_modified),
1700    "ftb_modified_entry_br_full"     -> PopCount(ftb_modified_entry_br_full),
1701    "ftb_modified_entry_strong_bias" -> PopCount(ftb_modified_entry_strong_bias)
1702  ) ++ mispred_stage_map ++ br_mispred_stage_map ++ jalr_mispred_stage_map ++
1703    correct_stage_map ++ br_correct_stage_map ++ jalr_correct_stage_map
1704
1705  for ((key, value) <- perfCountsMap) {
1706    XSPerfAccumulate(key, value)
1707  }
1708
1709  // --------------------------- Debug --------------------------------
1710  // XSDebug(enq_fire, p"enq! " + io.fromBpu.resp.bits.toPrintable)
1711  XSDebug(io.toIfu.req.fire, p"fire to ifu " + io.toIfu.req.bits.toPrintable)
1712  XSDebug(do_commit, p"deq! [ptr] $do_commit_ptr\n")
1713  XSDebug(true.B, p"[bpuPtr] $bpuPtr, [ifuPtr] $ifuPtr, [ifuWbPtr] $ifuWbPtr [commPtr] $commPtr\n")
1714  XSDebug(
1715    true.B,
1716    p"[in] v:${io.fromBpu.resp.valid} r:${io.fromBpu.resp.ready} " +
1717      p"[out] v:${io.toIfu.req.valid} r:${io.toIfu.req.ready}\n"
1718  )
1719  XSDebug(do_commit, p"[deq info] cfiIndex: $commit_cfi, $commit_pc_bundle, target: ${Hexadecimal(commit_target)}\n")
1720
1721  //   def ubtbCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1722  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1723  //       case (((valid, pd), ans), taken) =>
1724  //       Mux(valid && pd.isBr,
1725  //         isWrong ^ Mux(ans.hit.asBool,
1726  //           Mux(ans.taken.asBool, taken && ans.target === commitEntry.target,
1727  //           !taken),
1728  //         !taken),
1729  //       false.B)
1730  //     }
1731  //   }
1732
1733  //   def btbCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1734  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1735  //       case (((valid, pd), ans), taken) =>
1736  //       Mux(valid && pd.isBr,
1737  //         isWrong ^ Mux(ans.hit.asBool,
1738  //           Mux(ans.taken.asBool, taken && ans.target === commitEntry.target,
1739  //           !taken),
1740  //         !taken),
1741  //       false.B)
1742  //     }
1743  //   }
1744
1745  //   def tageCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1746  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1747  //       case (((valid, pd), ans), taken) =>
1748  //       Mux(valid && pd.isBr,
1749  //         isWrong ^ (ans.taken.asBool === taken),
1750  //       false.B)
1751  //     }
1752  //   }
1753
1754  //   def loopCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1755  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1756  //       case (((valid, pd), ans), taken) =>
1757  //       Mux(valid && (pd.isBr) && ans.hit.asBool,
1758  //         isWrong ^ (!taken),
1759  //           false.B)
1760  //     }
1761  //   }
1762
1763  //   def rasCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1764  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1765  //       case (((valid, pd), ans), taken) =>
1766  //       Mux(valid && pd.isRet.asBool /*&& taken*/ && ans.hit.asBool,
1767  //         isWrong ^ (ans.target === commitEntry.target),
1768  //           false.B)
1769  //     }
1770  //   }
1771
1772  //   val ubtbRights = ubtbCheck(commitEntry, commitEntry.metas.map(_.ubtbAns), false.B)
1773  //   val ubtbWrongs = ubtbCheck(commitEntry, commitEntry.metas.map(_.ubtbAns), true.B)
1774  //   // btb and ubtb pred jal and jalr as well
1775  //   val btbRights = btbCheck(commitEntry, commitEntry.metas.map(_.btbAns), false.B)
1776  //   val btbWrongs = btbCheck(commitEntry, commitEntry.metas.map(_.btbAns), true.B)
1777  //   val tageRights = tageCheck(commitEntry, commitEntry.metas.map(_.tageAns), false.B)
1778  //   val tageWrongs = tageCheck(commitEntry, commitEntry.metas.map(_.tageAns), true.B)
1779
1780  //   val loopRights = loopCheck(commitEntry, commitEntry.metas.map(_.loopAns), false.B)
1781  //   val loopWrongs = loopCheck(commitEntry, commitEntry.metas.map(_.loopAns), true.B)
1782
1783  //   val rasRights = rasCheck(commitEntry, commitEntry.metas.map(_.rasAns), false.B)
1784  //   val rasWrongs = rasCheck(commitEntry, commitEntry.metas.map(_.rasAns), true.B)
1785
1786  val perfEvents = Seq(
1787    ("bpu_s2_redirect        ", bpu_s2_redirect),
1788    ("bpu_s3_redirect        ", bpu_s3_redirect),
1789    ("bpu_to_ftq_stall       ", enq.valid && ~enq.ready),
1790    ("mispredictRedirect     ", perf_redirect.valid && RedirectLevel.flushAfter === perf_redirect.bits.level),
1791    ("replayRedirect         ", perf_redirect.valid && RedirectLevel.flushItself(perf_redirect.bits.level)),
1792    ("predecodeRedirect      ", fromIfuRedirect.valid),
1793    ("to_ifu_bubble          ", io.toIfu.req.ready && !io.toIfu.req.valid),
1794    ("from_bpu_real_bubble   ", !enq.valid && enq.ready && allowBpuIn),
1795    ("BpInstr                ", PopCount(mbpInstrs)),
1796    ("BpBInstr               ", PopCount(mbpBRights | mbpBWrongs)),
1797    ("BpRight                ", PopCount(mbpRights)),
1798    ("BpWrong                ", PopCount(mbpWrongs)),
1799    ("BpBRight               ", PopCount(mbpBRights)),
1800    ("BpBWrong               ", PopCount(mbpBWrongs)),
1801    ("BpJRight               ", PopCount(mbpJRights)),
1802    ("BpJWrong               ", PopCount(mbpJWrongs)),
1803    ("BpIRight               ", PopCount(mbpIRights)),
1804    ("BpIWrong               ", PopCount(mbpIWrongs)),
1805    ("BpCRight               ", PopCount(mbpCRights)),
1806    ("BpCWrong               ", PopCount(mbpCWrongs)),
1807    ("BpRRight               ", PopCount(mbpRRights)),
1808    ("BpRWrong               ", PopCount(mbpRWrongs)),
1809    ("ftb_false_hit          ", PopCount(ftb_false_hit)),
1810    ("ftb_hit                ", PopCount(ftb_hit))
1811  )
1812  generatePerfEvent()
1813}
1814