xref: /XiangShan/src/main/scala/xiangshan/frontend/NewFtq.scala (revision 94aa21c6009c2f39c5c5dae9c87260c78887efcc)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15*
16*
17* Acknowledgement
18*
19* This implementation is inspired by several key papers:
20* [1] Glenn Reinman, Todd Austin, and Brad Calder. "[A scalable front-end architecture for fast instruction delivery.]
21* (https://doi.org/10.1109/ISCA.1999.765954)" 26th International Symposium on Computer Architecture (ISCA). 1999.
22*
23***************************************************************************************/
24
25package xiangshan.frontend
26
27import chisel3._
28import chisel3.util._
29import org.chipsalliance.cde.config.Parameters
30import utility._
31import utility.ChiselDB
32import utils._
33import xiangshan._
34import xiangshan.backend.CtrlToFtqIO
35import xiangshan.backend.decode.ImmUnion
36import xiangshan.frontend.icache._
37
38class FtqDebugBundle extends Bundle {
39  val pc        = UInt(39.W)
40  val target    = UInt(39.W)
41  val isBr      = Bool()
42  val isJmp     = Bool()
43  val isCall    = Bool()
44  val isRet     = Bool()
45  val misPred   = Bool()
46  val isTaken   = Bool()
47  val predStage = UInt(2.W)
48}
49
50class FtqPtr(entries: Int) extends CircularQueuePtr[FtqPtr](
51      entries
52    ) {
53  def this()(implicit p: Parameters) = this(p(XSCoreParamsKey).FtqSize)
54}
55
56object FtqPtr {
57  def apply(f: Bool, v: UInt)(implicit p: Parameters): FtqPtr = {
58    val ptr = Wire(new FtqPtr)
59    ptr.flag  := f
60    ptr.value := v
61    ptr
62  }
63  def inverse(ptr: FtqPtr)(implicit p: Parameters): FtqPtr =
64    apply(!ptr.flag, ptr.value)
65}
66
67class FtqNRSRAM[T <: Data](gen: T, numRead: Int)(implicit p: Parameters) extends XSModule {
68
69  val io = IO(new Bundle() {
70    val raddr = Input(Vec(numRead, UInt(log2Up(FtqSize).W)))
71    val ren   = Input(Vec(numRead, Bool()))
72    val rdata = Output(Vec(numRead, gen))
73    val waddr = Input(UInt(log2Up(FtqSize).W))
74    val wen   = Input(Bool())
75    val wdata = Input(gen)
76  })
77
78  for (i <- 0 until numRead) {
79    val sram = Module(new SRAMTemplate(gen, FtqSize, withClockGate = true))
80    sram.io.r.req.valid       := io.ren(i)
81    sram.io.r.req.bits.setIdx := io.raddr(i)
82    io.rdata(i)               := sram.io.r.resp.data(0)
83    sram.io.w.req.valid       := io.wen
84    sram.io.w.req.bits.setIdx := io.waddr
85    sram.io.w.req.bits.data   := VecInit(io.wdata)
86  }
87
88}
89
90class Ftq_RF_Components(implicit p: Parameters) extends XSBundle with BPUUtils {
91  val startAddr     = UInt(VAddrBits.W)
92  val nextLineAddr  = UInt(VAddrBits.W)
93  val isNextMask    = Vec(PredictWidth, Bool())
94  val fallThruError = Bool()
95  // val carry = Bool()
96  def getPc(offset: UInt) = {
97    def getHigher(pc: UInt) = pc(VAddrBits - 1, log2Ceil(PredictWidth) + instOffsetBits + 1)
98    def getOffset(pc: UInt) = pc(log2Ceil(PredictWidth) + instOffsetBits, instOffsetBits)
99    Cat(
100      getHigher(Mux(isNextMask(offset) && startAddr(log2Ceil(PredictWidth) + instOffsetBits), nextLineAddr, startAddr)),
101      getOffset(startAddr) + offset,
102      0.U(instOffsetBits.W)
103    )
104  }
105  def fromBranchPrediction(resp: BranchPredictionBundle) = {
106    def carryPos(addr: UInt) = addr(instOffsetBits + log2Ceil(PredictWidth) + 1)
107    this.startAddr    := resp.pc(3)
108    this.nextLineAddr := resp.pc(3) + (FetchWidth * 4 * 2).U // may be broken on other configs
109    this.isNextMask := VecInit((0 until PredictWidth).map(i =>
110      (resp.pc(3)(log2Ceil(PredictWidth), 1) +& i.U)(log2Ceil(PredictWidth)).asBool
111    ))
112    this.fallThruError := resp.fallThruError(3)
113    this
114  }
115  override def toPrintable: Printable =
116    p"startAddr:${Hexadecimal(startAddr)}"
117}
118
119class Ftq_pd_Entry(implicit p: Parameters) extends XSBundle {
120  val brMask    = Vec(PredictWidth, Bool())
121  val jmpInfo   = ValidUndirectioned(Vec(3, Bool()))
122  val jmpOffset = UInt(log2Ceil(PredictWidth).W)
123  val jalTarget = UInt(VAddrBits.W)
124  val rvcMask   = Vec(PredictWidth, Bool())
125  def hasJal    = jmpInfo.valid && !jmpInfo.bits(0)
126  def hasJalr   = jmpInfo.valid && jmpInfo.bits(0)
127  def hasCall   = jmpInfo.valid && jmpInfo.bits(1)
128  def hasRet    = jmpInfo.valid && jmpInfo.bits(2)
129
130  def fromPdWb(pdWb: PredecodeWritebackBundle) = {
131    val pds = pdWb.pd
132    this.brMask        := VecInit(pds.map(pd => pd.isBr && pd.valid))
133    this.jmpInfo.valid := VecInit(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid)).asUInt.orR
134    this.jmpInfo.bits := ParallelPriorityMux(
135      pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid),
136      pds.map(pd => VecInit(pd.isJalr, pd.isCall, pd.isRet))
137    )
138    this.jmpOffset := ParallelPriorityEncoder(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid))
139    this.rvcMask   := VecInit(pds.map(pd => pd.isRVC))
140    this.jalTarget := pdWb.jalTarget
141  }
142
143  def toPd(offset: UInt) = {
144    require(offset.getWidth == log2Ceil(PredictWidth))
145    val pd = Wire(new PreDecodeInfo)
146    pd.valid := true.B
147    pd.isRVC := rvcMask(offset)
148    val isBr   = brMask(offset)
149    val isJalr = offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(0)
150    pd.brType := Cat(offset === jmpOffset && jmpInfo.valid, isJalr || isBr)
151    pd.isCall := offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(1)
152    pd.isRet  := offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(2)
153    pd
154  }
155}
156
157class PrefetchPtrDB(implicit p: Parameters) extends Bundle {
158  val fromFtqPtr = UInt(log2Up(p(XSCoreParamsKey).FtqSize).W)
159  val fromIfuPtr = UInt(log2Up(p(XSCoreParamsKey).FtqSize).W)
160}
161
162class Ftq_Redirect_SRAMEntry(implicit p: Parameters) extends SpeculativeInfo {
163  val sc_disagree = if (!env.FPGAPlatform) Some(Vec(numBr, Bool())) else None
164}
165
166class Ftq_1R_SRAMEntry(implicit p: Parameters) extends XSBundle with HasBPUConst {
167  val meta      = UInt(MaxMetaLength.W)
168  val ftb_entry = new FTBEntry
169}
170
171class Ftq_Pred_Info(implicit p: Parameters) extends XSBundle {
172  val target   = UInt(VAddrBits.W)
173  val cfiIndex = ValidUndirectioned(UInt(log2Ceil(PredictWidth).W))
174}
175
176class FtqRead[T <: Data](private val gen: T)(implicit p: Parameters) extends XSBundle {
177  val valid  = Output(Bool())
178  val ptr    = Output(new FtqPtr)
179  val offset = Output(UInt(log2Ceil(PredictWidth).W))
180  val data   = Input(gen)
181  def apply(valid: Bool, ptr: FtqPtr, offset: UInt) = {
182    this.valid  := valid
183    this.ptr    := ptr
184    this.offset := offset
185    this.data
186  }
187}
188
189class FtqToBpuIO(implicit p: Parameters) extends XSBundle {
190  val redirect       = Valid(new BranchPredictionRedirect)
191  val update         = Valid(new BranchPredictionUpdate)
192  val enq_ptr        = Output(new FtqPtr)
193  val redirctFromIFU = Output(Bool())
194}
195
196class BpuFlushInfo(implicit p: Parameters) extends XSBundle with HasCircularQueuePtrHelper {
197  // when ifu pipeline is not stalled,
198  // a packet from bpu s3 can reach f1 at most
199  val s2 = Valid(new FtqPtr)
200  val s3 = Valid(new FtqPtr)
201  def shouldFlushBy(src: Valid[FtqPtr], idx_to_flush: FtqPtr) =
202    src.valid && !isAfter(src.bits, idx_to_flush)
203  def shouldFlushByStage2(idx: FtqPtr) = shouldFlushBy(s2, idx)
204  def shouldFlushByStage3(idx: FtqPtr) = shouldFlushBy(s3, idx)
205}
206
207class FtqToIfuIO(implicit p: Parameters) extends XSBundle {
208  val req              = Decoupled(new FetchRequestBundle)
209  val redirect         = Valid(new BranchPredictionRedirect)
210  val topdown_redirect = Valid(new BranchPredictionRedirect)
211  val flushFromBpu     = new BpuFlushInfo
212}
213
214class FtqToICacheIO(implicit p: Parameters) extends XSBundle {
215  // NOTE: req.bits must be prepare in T cycle
216  // while req.valid is set true in T + 1 cycle
217  val req = Decoupled(new FtqToICacheRequestBundle)
218}
219
220class FtqToPrefetchIO(implicit p: Parameters) extends XSBundle {
221  val req              = Decoupled(new FtqICacheInfo)
222  val flushFromBpu     = new BpuFlushInfo
223  val backendException = UInt(ExceptionType.width.W)
224}
225
226trait HasBackendRedirectInfo extends HasXSParameter {
227  def isLoadReplay(r: Valid[Redirect]) = r.bits.flushItself()
228}
229
230class FtqToCtrlIO(implicit p: Parameters) extends XSBundle with HasBackendRedirectInfo {
231  // write to backend pc mem
232  val pc_mem_wen   = Output(Bool())
233  val pc_mem_waddr = Output(UInt(log2Ceil(FtqSize).W))
234  val pc_mem_wdata = Output(new Ftq_RF_Components)
235  // newest target
236  val newest_entry_en     = Output(Bool())
237  val newest_entry_target = Output(UInt(VAddrBits.W))
238  val newest_entry_ptr    = Output(new FtqPtr)
239}
240
241class FTBEntryGen(implicit p: Parameters) extends XSModule with HasBackendRedirectInfo with HasBPUParameter {
242  val io = IO(new Bundle {
243    val start_addr     = Input(UInt(VAddrBits.W))
244    val old_entry      = Input(new FTBEntry)
245    val pd             = Input(new Ftq_pd_Entry)
246    val cfiIndex       = Flipped(Valid(UInt(log2Ceil(PredictWidth).W)))
247    val target         = Input(UInt(VAddrBits.W))
248    val hit            = Input(Bool())
249    val mispredict_vec = Input(Vec(PredictWidth, Bool()))
250
251    val new_entry         = Output(new FTBEntry)
252    val new_br_insert_pos = Output(Vec(numBr, Bool()))
253    val taken_mask        = Output(Vec(numBr, Bool()))
254    val jmp_taken         = Output(Bool())
255    val mispred_mask      = Output(Vec(numBr + 1, Bool()))
256
257    // for perf counters
258    val is_init_entry           = Output(Bool())
259    val is_old_entry            = Output(Bool())
260    val is_new_br               = Output(Bool())
261    val is_jalr_target_modified = Output(Bool())
262    val is_strong_bias_modified = Output(Bool())
263    val is_br_full              = Output(Bool())
264  })
265
266  // no mispredictions detected at predecode
267  val hit = io.hit
268  val pd  = io.pd
269
270  val init_entry = WireInit(0.U.asTypeOf(new FTBEntry))
271
272  val cfi_is_br       = pd.brMask(io.cfiIndex.bits) && io.cfiIndex.valid
273  val entry_has_jmp   = pd.jmpInfo.valid
274  val new_jmp_is_jal  = entry_has_jmp && !pd.jmpInfo.bits(0) && io.cfiIndex.valid
275  val new_jmp_is_jalr = entry_has_jmp && pd.jmpInfo.bits(0) && io.cfiIndex.valid
276  val new_jmp_is_call = entry_has_jmp && pd.jmpInfo.bits(1) && io.cfiIndex.valid
277  val new_jmp_is_ret  = entry_has_jmp && pd.jmpInfo.bits(2) && io.cfiIndex.valid
278  val last_jmp_rvi    = entry_has_jmp && pd.jmpOffset === (PredictWidth - 1).U && !pd.rvcMask.last
279  // val last_br_rvi = cfi_is_br && io.cfiIndex.bits === (PredictWidth-1).U && !pd.rvcMask.last
280
281  val cfi_is_jal  = io.cfiIndex.bits === pd.jmpOffset && new_jmp_is_jal
282  val cfi_is_jalr = io.cfiIndex.bits === pd.jmpOffset && new_jmp_is_jalr
283
284  def carryPos = log2Ceil(PredictWidth) + instOffsetBits
285  def getLower(pc: UInt) = pc(carryPos - 1, instOffsetBits)
286  // if not hit, establish a new entry
287  init_entry.valid := true.B
288  // tag is left for ftb to assign
289
290  // case br
291  val init_br_slot = init_entry.getSlotForBr(0)
292  when(cfi_is_br) {
293    init_br_slot.valid  := true.B
294    init_br_slot.offset := io.cfiIndex.bits
295    init_br_slot.setLowerStatByTarget(io.start_addr, io.target, numBr == 1)
296    init_entry.strong_bias(0) := true.B // set to strong bias on init
297  }
298
299  // case jmp
300  when(entry_has_jmp) {
301    init_entry.tailSlot.offset := pd.jmpOffset
302    init_entry.tailSlot.valid  := new_jmp_is_jal || new_jmp_is_jalr
303    init_entry.tailSlot.setLowerStatByTarget(io.start_addr, Mux(cfi_is_jalr, io.target, pd.jalTarget), isShare = false)
304    init_entry.strong_bias(numBr - 1) := new_jmp_is_jalr // set strong bias for the jalr on init
305  }
306
307  val jmpPft = getLower(io.start_addr) +& pd.jmpOffset +& Mux(pd.rvcMask(pd.jmpOffset), 1.U, 2.U)
308  init_entry.pftAddr := Mux(entry_has_jmp && !last_jmp_rvi, jmpPft, getLower(io.start_addr))
309  init_entry.carry   := Mux(entry_has_jmp && !last_jmp_rvi, jmpPft(carryPos - instOffsetBits), true.B)
310
311  require(
312    isPow2(PredictWidth),
313    "If PredictWidth does not satisfy the power of 2," +
314      "pftAddr := getLower(io.start_addr) and carry := true.B  not working!!"
315  )
316
317  init_entry.isJalr := new_jmp_is_jalr
318  init_entry.isCall := new_jmp_is_call
319  init_entry.isRet  := new_jmp_is_ret
320  // that means fall thru points to the middle of an inst
321  init_entry.last_may_be_rvi_call := pd.jmpOffset === (PredictWidth - 1).U && !pd.rvcMask(pd.jmpOffset)
322
323  // if hit, check whether a new cfi(only br is possible) is detected
324  val oe              = io.old_entry
325  val br_recorded_vec = oe.getBrRecordedVec(io.cfiIndex.bits)
326  val br_recorded     = br_recorded_vec.asUInt.orR
327  val is_new_br       = cfi_is_br && !br_recorded
328  val new_br_offset   = io.cfiIndex.bits
329  // vec(i) means new br will be inserted BEFORE old br(i)
330  val allBrSlotsVec = oe.allSlotsForBr
331  val new_br_insert_onehot = VecInit((0 until numBr).map {
332    i =>
333      i match {
334        case 0 =>
335          !allBrSlotsVec(0).valid || new_br_offset < allBrSlotsVec(0).offset
336        case idx =>
337          allBrSlotsVec(idx - 1).valid && new_br_offset > allBrSlotsVec(idx - 1).offset &&
338          (!allBrSlotsVec(idx).valid || new_br_offset < allBrSlotsVec(idx).offset)
339      }
340  })
341
342  val old_entry_modified = WireInit(io.old_entry)
343  for (i <- 0 until numBr) {
344    val slot = old_entry_modified.allSlotsForBr(i)
345    when(new_br_insert_onehot(i)) {
346      slot.valid  := true.B
347      slot.offset := new_br_offset
348      slot.setLowerStatByTarget(io.start_addr, io.target, i == numBr - 1)
349      old_entry_modified.strong_bias(i) := true.B
350    }.elsewhen(new_br_offset > oe.allSlotsForBr(i).offset) {
351      old_entry_modified.strong_bias(i) := false.B
352      // all other fields remain unchanged
353    }.otherwise {
354      // case i == 0, remain unchanged
355      if (i != 0) {
356        val noNeedToMoveFromFormerSlot = (i == numBr - 1).B && !oe.brSlots.last.valid
357        when(!noNeedToMoveFromFormerSlot) {
358          slot.fromAnotherSlot(oe.allSlotsForBr(i - 1))
359          old_entry_modified.strong_bias(i) := oe.strong_bias(i)
360        }
361      }
362    }
363  }
364
365  // two circumstances:
366  // 1. oe: | br | j  |, new br should be in front of j, thus addr of j should be new pft
367  // 2. oe: | br | br |, new br could be anywhere between, thus new pft is the addr of either
368  //        the previous last br or the new br
369  val may_have_to_replace = oe.noEmptySlotForNewBr
370  val pft_need_to_change  = is_new_br && may_have_to_replace
371  // it should either be the given last br or the new br
372  when(pft_need_to_change) {
373    val new_pft_offset =
374      Mux(!new_br_insert_onehot.asUInt.orR, new_br_offset, oe.allSlotsForBr.last.offset)
375
376    // set jmp to invalid
377    old_entry_modified.pftAddr              := getLower(io.start_addr) + new_pft_offset
378    old_entry_modified.carry                := (getLower(io.start_addr) +& new_pft_offset).head(1).asBool
379    old_entry_modified.last_may_be_rvi_call := false.B
380    old_entry_modified.isCall               := false.B
381    old_entry_modified.isRet                := false.B
382    old_entry_modified.isJalr               := false.B
383  }
384
385  val old_entry_jmp_target_modified = WireInit(oe)
386  val old_target      = oe.tailSlot.getTarget(io.start_addr) // may be wrong because we store only 20 lowest bits
387  val old_tail_is_jmp = !oe.tailSlot.sharing
388  val jalr_target_modified = cfi_is_jalr && (old_target =/= io.target) && old_tail_is_jmp // TODO: pass full jalr target
389  when(jalr_target_modified) {
390    old_entry_jmp_target_modified.setByJmpTarget(io.start_addr, io.target)
391    old_entry_jmp_target_modified.strong_bias := 0.U.asTypeOf(Vec(numBr, Bool()))
392  }
393
394  val old_entry_strong_bias    = WireInit(oe)
395  val strong_bias_modified_vec = Wire(Vec(numBr, Bool())) // whether modified or not
396  for (i <- 0 until numBr) {
397    when(br_recorded_vec(0)) {
398      old_entry_strong_bias.strong_bias(0) :=
399        oe.strong_bias(0) && io.cfiIndex.valid && oe.brValids(0) && io.cfiIndex.bits === oe.brOffset(0)
400    }.elsewhen(br_recorded_vec(numBr - 1)) {
401      old_entry_strong_bias.strong_bias(0) := false.B
402      old_entry_strong_bias.strong_bias(numBr - 1) :=
403        oe.strong_bias(numBr - 1) && io.cfiIndex.valid && oe.brValids(numBr - 1) && io.cfiIndex.bits === oe.brOffset(
404          numBr - 1
405        )
406    }
407    strong_bias_modified_vec(i) := oe.strong_bias(i) && oe.brValids(i) && !old_entry_strong_bias.strong_bias(i)
408  }
409  val strong_bias_modified = strong_bias_modified_vec.reduce(_ || _)
410
411  val derived_from_old_entry =
412    Mux(is_new_br, old_entry_modified, Mux(jalr_target_modified, old_entry_jmp_target_modified, old_entry_strong_bias))
413
414  io.new_entry := Mux(!hit, init_entry, derived_from_old_entry)
415
416  io.new_br_insert_pos := new_br_insert_onehot
417  io.taken_mask := VecInit((io.new_entry.brOffset zip io.new_entry.brValids).map {
418    case (off, v) => io.cfiIndex.bits === off && io.cfiIndex.valid && v
419  })
420  io.jmp_taken := io.new_entry.jmpValid && io.new_entry.tailSlot.offset === io.cfiIndex.bits
421  for (i <- 0 until numBr) {
422    io.mispred_mask(i) := io.new_entry.brValids(i) && io.mispredict_vec(io.new_entry.brOffset(i))
423  }
424  io.mispred_mask.last := io.new_entry.jmpValid && io.mispredict_vec(pd.jmpOffset)
425
426  // for perf counters
427  io.is_init_entry           := !hit
428  io.is_old_entry            := hit && !is_new_br && !jalr_target_modified && !strong_bias_modified
429  io.is_new_br               := hit && is_new_br
430  io.is_jalr_target_modified := hit && jalr_target_modified
431  io.is_strong_bias_modified := hit && strong_bias_modified
432  io.is_br_full              := hit && is_new_br && may_have_to_replace
433}
434
435class FtqPcMemWrapper(numOtherReads: Int)(implicit p: Parameters) extends XSModule with HasBackendRedirectInfo {
436  val io = IO(new Bundle {
437    val ifuPtr_w           = Input(new FtqPtr)
438    val ifuPtrPlus1_w      = Input(new FtqPtr)
439    val ifuPtrPlus2_w      = Input(new FtqPtr)
440    val pfPtr_w            = Input(new FtqPtr)
441    val pfPtrPlus1_w       = Input(new FtqPtr)
442    val commPtr_w          = Input(new FtqPtr)
443    val commPtrPlus1_w     = Input(new FtqPtr)
444    val ifuPtr_rdata       = Output(new Ftq_RF_Components)
445    val ifuPtrPlus1_rdata  = Output(new Ftq_RF_Components)
446    val ifuPtrPlus2_rdata  = Output(new Ftq_RF_Components)
447    val pfPtr_rdata        = Output(new Ftq_RF_Components)
448    val pfPtrPlus1_rdata   = Output(new Ftq_RF_Components)
449    val commPtr_rdata      = Output(new Ftq_RF_Components)
450    val commPtrPlus1_rdata = Output(new Ftq_RF_Components)
451
452    val wen   = Input(Bool())
453    val waddr = Input(UInt(log2Ceil(FtqSize).W))
454    val wdata = Input(new Ftq_RF_Components)
455  })
456
457  val num_pc_read = numOtherReads + 5
458  val mem         = Module(new SyncDataModuleTemplate(new Ftq_RF_Components, FtqSize, num_pc_read, 1, "FtqPC"))
459  mem.io.wen(0)   := io.wen
460  mem.io.waddr(0) := io.waddr
461  mem.io.wdata(0) := io.wdata
462
463  // read one cycle ahead for ftq local reads
464  val raddr_vec = VecInit(Seq(
465    io.ifuPtr_w.value,
466    io.ifuPtrPlus1_w.value,
467    io.ifuPtrPlus2_w.value,
468    io.pfPtr_w.value,
469    io.pfPtrPlus1_w.value,
470    io.commPtrPlus1_w.value,
471    io.commPtr_w.value
472  ))
473
474  mem.io.raddr := raddr_vec
475
476  io.ifuPtr_rdata       := mem.io.rdata.dropRight(6).last
477  io.ifuPtrPlus1_rdata  := mem.io.rdata.dropRight(5).last
478  io.ifuPtrPlus2_rdata  := mem.io.rdata.dropRight(4).last
479  io.pfPtr_rdata        := mem.io.rdata.dropRight(3).last
480  io.pfPtrPlus1_rdata   := mem.io.rdata.dropRight(2).last
481  io.commPtrPlus1_rdata := mem.io.rdata.dropRight(1).last
482  io.commPtr_rdata      := mem.io.rdata.last
483}
484
485class Ftq(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelper
486    with HasBackendRedirectInfo with BPUUtils with HasBPUConst with HasPerfEvents
487    with HasICacheParameters {
488  val io = IO(new Bundle {
489    val fromBpu     = Flipped(new BpuToFtqIO)
490    val fromIfu     = Flipped(new IfuToFtqIO)
491    val fromBackend = Flipped(new CtrlToFtqIO)
492
493    val toBpu       = new FtqToBpuIO
494    val toIfu       = new FtqToIfuIO
495    val toICache    = new FtqToICacheIO
496    val toBackend   = new FtqToCtrlIO
497    val toPrefetch  = new FtqToPrefetchIO
498    val icacheFlush = Output(Bool())
499
500    val bpuInfo = new Bundle {
501      val bpRight = Output(UInt(XLEN.W))
502      val bpWrong = Output(UInt(XLEN.W))
503    }
504
505    val mmioCommitRead = Flipped(new mmioCommitRead)
506
507    // for perf
508    val ControlBTBMissBubble = Output(Bool())
509    val TAGEMissBubble       = Output(Bool())
510    val SCMissBubble         = Output(Bool())
511    val ITTAGEMissBubble     = Output(Bool())
512    val RASMissBubble        = Output(Bool())
513  })
514  io.bpuInfo := DontCare
515
516  val topdown_stage = RegInit(0.U.asTypeOf(new FrontendTopDownBundle))
517  // only driven by clock, not valid-ready
518  topdown_stage                  := io.fromBpu.resp.bits.topdown_info
519  io.toIfu.req.bits.topdown_info := topdown_stage
520
521  val ifuRedirected = RegInit(VecInit(Seq.fill(FtqSize)(false.B)))
522
523  // io.fromBackend.ftqIdxAhead: bju(BjuCnt) + ldReplay + exception
524  val ftqIdxAhead = VecInit(Seq.tabulate(FtqRedirectAheadNum)(i => io.fromBackend.ftqIdxAhead(i))) // only bju
525  val ftqIdxSelOH = io.fromBackend.ftqIdxSelOH.bits(FtqRedirectAheadNum - 1, 0)
526
527  val aheadValid         = ftqIdxAhead.map(_.valid).reduce(_ | _) && !io.fromBackend.redirect.valid
528  val realAhdValid       = io.fromBackend.redirect.valid && (ftqIdxSelOH > 0.U) && RegNext(aheadValid)
529  val backendRedirect    = Wire(Valid(new BranchPredictionRedirect))
530  val backendRedirectReg = Wire(Valid(new BranchPredictionRedirect))
531  backendRedirectReg.valid := RegNext(Mux(realAhdValid, false.B, backendRedirect.valid))
532  backendRedirectReg.bits  := RegEnable(backendRedirect.bits, backendRedirect.valid)
533  val fromBackendRedirect = Wire(Valid(new BranchPredictionRedirect))
534  fromBackendRedirect := Mux(realAhdValid, backendRedirect, backendRedirectReg)
535
536  val stage2Flush  = backendRedirect.valid
537  val backendFlush = stage2Flush || RegNext(stage2Flush)
538  val ifuFlush     = Wire(Bool())
539
540  val flush = stage2Flush || RegNext(stage2Flush)
541
542  val allowBpuIn, allowToIfu = WireInit(false.B)
543  val flushToIfu             = !allowToIfu
544  allowBpuIn := !ifuFlush && !backendRedirect.valid && !backendRedirectReg.valid
545  allowToIfu := !ifuFlush && !backendRedirect.valid && !backendRedirectReg.valid
546
547  def copyNum                                              = 5
548  val bpuPtr, ifuPtr, pfPtr, ifuWbPtr, commPtr, robCommPtr = RegInit(FtqPtr(false.B, 0.U))
549  val ifuPtrPlus1                                          = RegInit(FtqPtr(false.B, 1.U))
550  val ifuPtrPlus2                                          = RegInit(FtqPtr(false.B, 2.U))
551  val pfPtrPlus1                                           = RegInit(FtqPtr(false.B, 1.U))
552  val commPtrPlus1                                         = RegInit(FtqPtr(false.B, 1.U))
553  val copied_ifu_ptr                                       = Seq.fill(copyNum)(RegInit(FtqPtr(false.B, 0.U)))
554  val copied_bpu_ptr                                       = Seq.fill(copyNum)(RegInit(FtqPtr(false.B, 0.U)))
555  require(FtqSize >= 4)
556  val ifuPtr_write       = WireInit(ifuPtr)
557  val ifuPtrPlus1_write  = WireInit(ifuPtrPlus1)
558  val ifuPtrPlus2_write  = WireInit(ifuPtrPlus2)
559  val pfPtr_write        = WireInit(pfPtr)
560  val pfPtrPlus1_write   = WireInit(pfPtrPlus1)
561  val ifuWbPtr_write     = WireInit(ifuWbPtr)
562  val commPtr_write      = WireInit(commPtr)
563  val commPtrPlus1_write = WireInit(commPtrPlus1)
564  val robCommPtr_write   = WireInit(robCommPtr)
565  ifuPtr       := ifuPtr_write
566  ifuPtrPlus1  := ifuPtrPlus1_write
567  ifuPtrPlus2  := ifuPtrPlus2_write
568  pfPtr        := pfPtr_write
569  pfPtrPlus1   := pfPtrPlus1_write
570  ifuWbPtr     := ifuWbPtr_write
571  commPtr      := commPtr_write
572  commPtrPlus1 := commPtrPlus1_write
573  copied_ifu_ptr.map { ptr =>
574    ptr := ifuPtr_write
575    dontTouch(ptr)
576  }
577  robCommPtr := robCommPtr_write
578  val validEntries = distanceBetween(bpuPtr, commPtr)
579  val canCommit    = Wire(Bool())
580
581  // Instruction page fault and instruction access fault are sent from backend with redirect requests.
582  // When IPF and IAF are sent, backendPcFaultIfuPtr points to the FTQ entry whose first instruction
583  // raises IPF or IAF, which is ifuWbPtr_write or IfuPtr_write.
584  // Only when IFU has written back that FTQ entry can backendIpf and backendIaf be false because this
585  // makes sure that IAF and IPF are correctly raised instead of being flushed by redirect requests.
586  val backendException  = RegInit(ExceptionType.none)
587  val backendPcFaultPtr = RegInit(FtqPtr(false.B, 0.U))
588  when(fromBackendRedirect.valid) {
589    backendException := ExceptionType.fromOH(
590      has_pf = fromBackendRedirect.bits.cfiUpdate.backendIPF,
591      has_gpf = fromBackendRedirect.bits.cfiUpdate.backendIGPF,
592      has_af = fromBackendRedirect.bits.cfiUpdate.backendIAF
593    )
594    when(
595      fromBackendRedirect.bits.cfiUpdate.backendIPF || fromBackendRedirect.bits.cfiUpdate.backendIGPF ||
596        fromBackendRedirect.bits.cfiUpdate.backendIAF
597    ) {
598      backendPcFaultPtr := ifuWbPtr_write
599    }
600  }.elsewhen(ifuWbPtr =/= backendPcFaultPtr) {
601    backendException := ExceptionType.none
602  }
603
604  // **********************************************************************
605  // **************************** enq from bpu ****************************
606  // **********************************************************************
607  val new_entry_ready = validEntries < FtqSize.U || canCommit
608  io.fromBpu.resp.ready := new_entry_ready
609
610  val bpu_s2_resp     = io.fromBpu.resp.bits.s2
611  val bpu_s3_resp     = io.fromBpu.resp.bits.s3
612  val bpu_s2_redirect = bpu_s2_resp.valid(3) && bpu_s2_resp.hasRedirect(3)
613  val bpu_s3_redirect = bpu_s3_resp.valid(3) && bpu_s3_resp.hasRedirect(3)
614
615  io.toBpu.enq_ptr := bpuPtr
616  val enq_fire    = io.fromBpu.resp.fire && allowBpuIn // from bpu s1
617  val bpu_in_fire = (io.fromBpu.resp.fire || bpu_s2_redirect || bpu_s3_redirect) && allowBpuIn
618
619  val bpu_in_resp     = io.fromBpu.resp.bits.selectedResp
620  val bpu_in_stage    = io.fromBpu.resp.bits.selectedRespIdxForFtq
621  val bpu_in_resp_ptr = Mux(bpu_in_stage === BP_S1, bpuPtr, bpu_in_resp.ftq_idx)
622  val bpu_in_resp_idx = bpu_in_resp_ptr.value
623
624  // read ports:      pfReq1 + pfReq2 ++  ifuReq1 + ifuReq2 + ifuReq3 + commitUpdate2 + commitUpdate
625  val ftq_pc_mem = Module(new FtqPcMemWrapper(2))
626  // resp from uBTB
627  ftq_pc_mem.io.wen   := bpu_in_fire
628  ftq_pc_mem.io.waddr := bpu_in_resp_idx
629  ftq_pc_mem.io.wdata.fromBranchPrediction(bpu_in_resp)
630
631  //                                                            ifuRedirect + backendRedirect + commit
632  val ftq_redirect_mem = Module(new SyncDataModuleTemplate(
633    new Ftq_Redirect_SRAMEntry,
634    FtqSize,
635    IfuRedirectNum + FtqRedirectAheadNum + 1,
636    1,
637    hasRen = true
638  ))
639  // these info is intended to enq at the last stage of bpu
640  ftq_redirect_mem.io.wen(0)   := io.fromBpu.resp.bits.lastStage.valid(3)
641  ftq_redirect_mem.io.waddr(0) := io.fromBpu.resp.bits.lastStage.ftq_idx.value
642  ftq_redirect_mem.io.wdata(0) := io.fromBpu.resp.bits.last_stage_spec_info
643  println(f"ftq redirect MEM: entry ${ftq_redirect_mem.io.wdata(0).getWidth} * ${FtqSize} * 3")
644
645  val ftq_meta_1r_sram = Module(new FtqNRSRAM(new Ftq_1R_SRAMEntry, 1))
646  // these info is intended to enq at the last stage of bpu
647  ftq_meta_1r_sram.io.wen             := io.fromBpu.resp.bits.lastStage.valid(3)
648  ftq_meta_1r_sram.io.waddr           := io.fromBpu.resp.bits.lastStage.ftq_idx.value
649  ftq_meta_1r_sram.io.wdata.meta      := io.fromBpu.resp.bits.last_stage_meta
650  ftq_meta_1r_sram.io.wdata.ftb_entry := io.fromBpu.resp.bits.last_stage_ftb_entry
651  //                                                            ifuRedirect + backendRedirect (commit moved to ftq_meta_1r_sram)
652  val ftb_entry_mem = Module(new SyncDataModuleTemplate(
653    new FTBEntry_FtqMem,
654    FtqSize,
655    IfuRedirectNum + FtqRedirectAheadNum,
656    1,
657    hasRen = true
658  ))
659  ftb_entry_mem.io.wen(0)   := io.fromBpu.resp.bits.lastStage.valid(3)
660  ftb_entry_mem.io.waddr(0) := io.fromBpu.resp.bits.lastStage.ftq_idx.value
661  ftb_entry_mem.io.wdata(0) := io.fromBpu.resp.bits.last_stage_ftb_entry
662
663  // multi-write
664  val update_target = Reg(Vec(FtqSize, UInt(VAddrBits.W))) // could be taken target or fallThrough //TODO: remove this
665  val newest_entry_target          = Reg(UInt(VAddrBits.W))
666  val newest_entry_target_modified = RegInit(false.B)
667  val newest_entry_ptr             = Reg(new FtqPtr)
668  val newest_entry_ptr_modified    = RegInit(false.B)
669  val cfiIndex_vec                 = Reg(Vec(FtqSize, ValidUndirectioned(UInt(log2Ceil(PredictWidth).W))))
670  val mispredict_vec               = Reg(Vec(FtqSize, Vec(PredictWidth, Bool())))
671  val pred_stage                   = Reg(Vec(FtqSize, UInt(2.W)))
672  val pred_s1_cycle                = if (!env.FPGAPlatform) Some(Reg(Vec(FtqSize, UInt(64.W)))) else None
673
674  val c_empty :: c_toCommit :: c_committed :: c_flushed :: Nil = Enum(4)
675  val commitStateQueueReg = RegInit(VecInit(Seq.fill(FtqSize) {
676    VecInit(Seq.fill(PredictWidth)(c_empty))
677  }))
678  val commitStateQueueEnable = WireInit(VecInit(Seq.fill(FtqSize)(false.B)))
679  val commitStateQueueNext   = WireInit(commitStateQueueReg)
680
681  for (f <- 0 until FtqSize) {
682    when(commitStateQueueEnable(f)) {
683      commitStateQueueReg(f) := commitStateQueueNext(f)
684    }
685  }
686
687  val f_to_send :: f_sent :: Nil = Enum(2)
688  val entry_fetch_status         = RegInit(VecInit(Seq.fill(FtqSize)(f_sent)))
689
690  val h_not_hit :: h_false_hit :: h_hit :: Nil = Enum(3)
691  val entry_hit_status                         = RegInit(VecInit(Seq.fill(FtqSize)(h_not_hit)))
692
693  // modify registers one cycle later to cut critical path
694  val last_cycle_bpu_in       = RegNext(bpu_in_fire)
695  val last_cycle_bpu_in_ptr   = RegEnable(bpu_in_resp_ptr, bpu_in_fire)
696  val last_cycle_bpu_in_idx   = last_cycle_bpu_in_ptr.value
697  val last_cycle_bpu_target   = RegEnable(bpu_in_resp.getTarget(3), bpu_in_fire)
698  val last_cycle_cfiIndex     = RegEnable(bpu_in_resp.cfiIndex(3), bpu_in_fire)
699  val last_cycle_bpu_in_stage = RegEnable(bpu_in_stage, bpu_in_fire)
700
701  def extra_copyNum_for_commitStateQueue = 2
702  val copied_last_cycle_bpu_in =
703    VecInit(Seq.fill(copyNum + extra_copyNum_for_commitStateQueue)(RegNext(bpu_in_fire)))
704  val copied_last_cycle_bpu_in_ptr_for_ftq =
705    VecInit(Seq.fill(extra_copyNum_for_commitStateQueue)(RegEnable(bpu_in_resp_ptr, bpu_in_fire)))
706
707  newest_entry_target_modified := false.B
708  newest_entry_ptr_modified    := false.B
709  when(last_cycle_bpu_in) {
710    entry_fetch_status(last_cycle_bpu_in_idx) := f_to_send
711    cfiIndex_vec(last_cycle_bpu_in_idx)       := last_cycle_cfiIndex
712    pred_stage(last_cycle_bpu_in_idx)         := last_cycle_bpu_in_stage
713
714    update_target(last_cycle_bpu_in_idx) := last_cycle_bpu_target // TODO: remove this
715    newest_entry_target_modified         := true.B
716    newest_entry_target                  := last_cycle_bpu_target
717    newest_entry_ptr_modified            := true.B
718    newest_entry_ptr                     := last_cycle_bpu_in_ptr
719  }
720
721  // reduce fanout by delay write for a cycle
722  when(RegNext(last_cycle_bpu_in)) {
723    mispredict_vec(RegEnable(last_cycle_bpu_in_idx, last_cycle_bpu_in)) :=
724      WireInit(VecInit(Seq.fill(PredictWidth)(false.B)))
725  }
726
727  // record s1 pred cycles
728  pred_s1_cycle.map { vec =>
729    when(bpu_in_fire && (bpu_in_stage === BP_S1)) {
730      vec(bpu_in_resp_ptr.value) := bpu_in_resp.full_pred(0).predCycle.getOrElse(0.U)
731    }
732  }
733
734  // reduce fanout using copied last_cycle_bpu_in and copied last_cycle_bpu_in_ptr
735  val copied_last_cycle_bpu_in_for_ftq = copied_last_cycle_bpu_in.takeRight(extra_copyNum_for_commitStateQueue)
736  copied_last_cycle_bpu_in_for_ftq.zip(copied_last_cycle_bpu_in_ptr_for_ftq).zipWithIndex.map {
737    case ((in, ptr), i) =>
738      when(in) {
739        val perSetEntries = FtqSize / extra_copyNum_for_commitStateQueue // 32
740        require(FtqSize % extra_copyNum_for_commitStateQueue == 0)
741        for (j <- 0 until perSetEntries) {
742          when(ptr.value === (i * perSetEntries + j).U) {
743            commitStateQueueNext(i * perSetEntries + j) := VecInit(Seq.fill(PredictWidth)(c_empty))
744            // Clock gating optimization, use 1 gate cell to control a row
745            commitStateQueueEnable(i * perSetEntries + j) := true.B
746          }
747        }
748      }
749  }
750
751  bpuPtr := bpuPtr + enq_fire
752  copied_bpu_ptr.map(_ := bpuPtr + enq_fire)
753  when(io.toIfu.req.fire && allowToIfu) {
754    ifuPtr_write      := ifuPtrPlus1
755    ifuPtrPlus1_write := ifuPtrPlus2
756    ifuPtrPlus2_write := ifuPtrPlus2 + 1.U
757  }
758  when(io.toPrefetch.req.fire && allowToIfu) {
759    pfPtr_write      := pfPtrPlus1
760    pfPtrPlus1_write := pfPtrPlus1 + 1.U
761  }
762
763  // only use ftb result to assign hit status
764  when(bpu_s2_resp.valid(3)) {
765    entry_hit_status(bpu_s2_resp.ftq_idx.value) := Mux(bpu_s2_resp.full_pred(3).hit, h_hit, h_not_hit)
766  }
767
768  io.toIfu.flushFromBpu.s2.valid      := bpu_s2_redirect
769  io.toIfu.flushFromBpu.s2.bits       := bpu_s2_resp.ftq_idx
770  io.toPrefetch.flushFromBpu.s2.valid := bpu_s2_redirect
771  io.toPrefetch.flushFromBpu.s2.bits  := bpu_s2_resp.ftq_idx
772  when(bpu_s2_redirect) {
773    bpuPtr := bpu_s2_resp.ftq_idx + 1.U
774    copied_bpu_ptr.map(_ := bpu_s2_resp.ftq_idx + 1.U)
775    // only when ifuPtr runs ahead of bpu s2 resp should we recover it
776    when(!isBefore(ifuPtr, bpu_s2_resp.ftq_idx)) {
777      ifuPtr_write      := bpu_s2_resp.ftq_idx
778      ifuPtrPlus1_write := bpu_s2_resp.ftq_idx + 1.U
779      ifuPtrPlus2_write := bpu_s2_resp.ftq_idx + 2.U
780    }
781    when(!isBefore(pfPtr, bpu_s2_resp.ftq_idx)) {
782      pfPtr_write      := bpu_s2_resp.ftq_idx
783      pfPtrPlus1_write := bpu_s2_resp.ftq_idx + 1.U
784    }
785  }
786
787  io.toIfu.flushFromBpu.s3.valid      := bpu_s3_redirect
788  io.toIfu.flushFromBpu.s3.bits       := bpu_s3_resp.ftq_idx
789  io.toPrefetch.flushFromBpu.s3.valid := bpu_s3_redirect
790  io.toPrefetch.flushFromBpu.s3.bits  := bpu_s3_resp.ftq_idx
791  when(bpu_s3_redirect) {
792    bpuPtr := bpu_s3_resp.ftq_idx + 1.U
793    copied_bpu_ptr.map(_ := bpu_s3_resp.ftq_idx + 1.U)
794    // only when ifuPtr runs ahead of bpu s2 resp should we recover it
795    when(!isBefore(ifuPtr, bpu_s3_resp.ftq_idx)) {
796      ifuPtr_write      := bpu_s3_resp.ftq_idx
797      ifuPtrPlus1_write := bpu_s3_resp.ftq_idx + 1.U
798      ifuPtrPlus2_write := bpu_s3_resp.ftq_idx + 2.U
799    }
800    when(!isBefore(pfPtr, bpu_s3_resp.ftq_idx)) {
801      pfPtr_write      := bpu_s3_resp.ftq_idx
802      pfPtrPlus1_write := bpu_s3_resp.ftq_idx + 1.U
803    }
804  }
805
806  XSError(isBefore(bpuPtr, ifuPtr) && !isFull(bpuPtr, ifuPtr), "\nifuPtr is before bpuPtr!\n")
807  XSError(isBefore(bpuPtr, pfPtr) && !isFull(bpuPtr, pfPtr), "\npfPtr is before bpuPtr!\n")
808  XSError(isBefore(ifuWbPtr, commPtr) && !isFull(ifuWbPtr, commPtr), "\ncommPtr is before ifuWbPtr!\n")
809
810  (0 until copyNum).map(i => XSError(copied_bpu_ptr(i) =/= bpuPtr, "\ncopiedBpuPtr is different from bpuPtr!\n"))
811
812  // ****************************************************************
813  // **************************** to ifu ****************************
814  // ****************************************************************
815  // 0  for ifu, and 1-4 for ICache
816  val bpu_in_bypass_buf         = RegEnable(ftq_pc_mem.io.wdata, bpu_in_fire)
817  val copied_bpu_in_bypass_buf  = VecInit(Seq.fill(copyNum)(RegEnable(ftq_pc_mem.io.wdata, bpu_in_fire)))
818  val bpu_in_bypass_buf_for_ifu = bpu_in_bypass_buf
819  val bpu_in_bypass_ptr         = RegEnable(bpu_in_resp_ptr, bpu_in_fire)
820  val last_cycle_to_ifu_fire    = RegNext(io.toIfu.req.fire)
821  val last_cycle_to_pf_fire     = RegNext(io.toPrefetch.req.fire)
822
823  val copied_bpu_in_bypass_ptr      = VecInit(Seq.fill(copyNum)(RegEnable(bpu_in_resp_ptr, bpu_in_fire)))
824  val copied_last_cycle_to_ifu_fire = VecInit(Seq.fill(copyNum)(RegNext(io.toIfu.req.fire)))
825
826  // read pc and target
827  ftq_pc_mem.io.ifuPtr_w       := ifuPtr_write
828  ftq_pc_mem.io.ifuPtrPlus1_w  := ifuPtrPlus1_write
829  ftq_pc_mem.io.ifuPtrPlus2_w  := ifuPtrPlus2_write
830  ftq_pc_mem.io.pfPtr_w        := pfPtr_write
831  ftq_pc_mem.io.pfPtrPlus1_w   := pfPtrPlus1_write
832  ftq_pc_mem.io.commPtr_w      := commPtr_write
833  ftq_pc_mem.io.commPtrPlus1_w := commPtrPlus1_write
834
835  io.toIfu.req.bits.ftqIdx := ifuPtr
836
837  val toICachePcBundle               = Wire(Vec(copyNum, new Ftq_RF_Components))
838  val toICacheEntryToSend            = Wire(Vec(copyNum, Bool()))
839  val nextCycleToPrefetchPcBundle    = Wire(new Ftq_RF_Components)
840  val nextCycleToPrefetchEntryToSend = Wire(Bool())
841  val toPrefetchPcBundle             = RegNext(nextCycleToPrefetchPcBundle)
842  val toPrefetchEntryToSend          = RegNext(nextCycleToPrefetchEntryToSend)
843  val toIfuPcBundle                  = Wire(new Ftq_RF_Components)
844  val entry_is_to_send               = WireInit(entry_fetch_status(ifuPtr.value) === f_to_send)
845  val entry_ftq_offset               = WireInit(cfiIndex_vec(ifuPtr.value))
846  val entry_next_addr                = Wire(UInt(VAddrBits.W))
847
848  val pc_mem_ifu_ptr_rdata   = VecInit(Seq.fill(copyNum)(RegNext(ftq_pc_mem.io.ifuPtr_rdata)))
849  val pc_mem_ifu_plus1_rdata = VecInit(Seq.fill(copyNum)(RegNext(ftq_pc_mem.io.ifuPtrPlus1_rdata)))
850  val diff_entry_next_addr   = WireInit(update_target(ifuPtr.value)) // TODO: remove this
851
852  val copied_ifu_plus1_to_send = VecInit(Seq.fill(copyNum)(RegNext(
853    entry_fetch_status(ifuPtrPlus1.value) === f_to_send
854  ) || RegNext(last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtrPlus1)))
855  val copied_ifu_ptr_to_send = VecInit(Seq.fill(copyNum)(RegNext(
856    entry_fetch_status(ifuPtr.value) === f_to_send
857  ) || RegNext(last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtr)))
858
859  for (i <- 0 until copyNum) {
860    when(copied_last_cycle_bpu_in(i) && copied_bpu_in_bypass_ptr(i) === copied_ifu_ptr(i)) {
861      toICachePcBundle(i)    := copied_bpu_in_bypass_buf(i)
862      toICacheEntryToSend(i) := true.B
863    }.elsewhen(copied_last_cycle_to_ifu_fire(i)) {
864      toICachePcBundle(i)    := pc_mem_ifu_plus1_rdata(i)
865      toICacheEntryToSend(i) := copied_ifu_plus1_to_send(i)
866    }.otherwise {
867      toICachePcBundle(i)    := pc_mem_ifu_ptr_rdata(i)
868      toICacheEntryToSend(i) := copied_ifu_ptr_to_send(i)
869    }
870  }
871
872  // Calculate requests sent to prefetcher one cycle in advance to cut critical path
873  when(bpu_in_fire && bpu_in_resp_ptr === pfPtr_write) {
874    nextCycleToPrefetchPcBundle    := ftq_pc_mem.io.wdata
875    nextCycleToPrefetchEntryToSend := true.B
876  }.elsewhen(io.toPrefetch.req.fire) {
877    nextCycleToPrefetchPcBundle := ftq_pc_mem.io.pfPtrPlus1_rdata
878    nextCycleToPrefetchEntryToSend := entry_fetch_status(pfPtrPlus1.value) === f_to_send ||
879      last_cycle_bpu_in && bpu_in_bypass_ptr === pfPtrPlus1
880  }.otherwise {
881    nextCycleToPrefetchPcBundle := ftq_pc_mem.io.pfPtr_rdata
882    nextCycleToPrefetchEntryToSend := entry_fetch_status(pfPtr.value) === f_to_send ||
883      last_cycle_bpu_in && bpu_in_bypass_ptr === pfPtr // reduce potential bubbles
884  }
885
886  // TODO: reconsider target address bypass logic
887  when(last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtr) {
888    toIfuPcBundle        := bpu_in_bypass_buf_for_ifu
889    entry_is_to_send     := true.B
890    entry_next_addr      := last_cycle_bpu_target
891    entry_ftq_offset     := last_cycle_cfiIndex
892    diff_entry_next_addr := last_cycle_bpu_target // TODO: remove this
893  }.elsewhen(last_cycle_to_ifu_fire) {
894    toIfuPcBundle := RegNext(ftq_pc_mem.io.ifuPtrPlus1_rdata)
895    entry_is_to_send := RegNext(entry_fetch_status(ifuPtrPlus1.value) === f_to_send) ||
896      RegNext(last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtrPlus1) // reduce potential bubbles
897    entry_next_addr := Mux(
898      last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtrPlus1,
899      bpu_in_bypass_buf_for_ifu.startAddr,
900      Mux(ifuPtr === newest_entry_ptr, newest_entry_target, RegNext(ftq_pc_mem.io.ifuPtrPlus2_rdata.startAddr))
901    ) // ifuPtr+2
902  }.otherwise {
903    toIfuPcBundle := RegNext(ftq_pc_mem.io.ifuPtr_rdata)
904    entry_is_to_send := RegNext(entry_fetch_status(ifuPtr.value) === f_to_send) ||
905      RegNext(last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtr) // reduce potential bubbles
906    entry_next_addr := Mux(
907      last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtrPlus1,
908      bpu_in_bypass_buf_for_ifu.startAddr,
909      Mux(ifuPtr === newest_entry_ptr, newest_entry_target, RegNext(ftq_pc_mem.io.ifuPtrPlus1_rdata.startAddr))
910    ) // ifuPtr+1
911  }
912
913  io.toIfu.req.valid              := entry_is_to_send && ifuPtr =/= bpuPtr
914  io.toIfu.req.bits.nextStartAddr := entry_next_addr
915  io.toIfu.req.bits.ftqOffset     := entry_ftq_offset
916  io.toIfu.req.bits.fromFtqPcBundle(toIfuPcBundle)
917
918  io.toICache.req.valid := entry_is_to_send && ifuPtr =/= bpuPtr
919  io.toICache.req.bits.readValid.zipWithIndex.map { case (copy, i) =>
920    copy := toICacheEntryToSend(i) && copied_ifu_ptr(i) =/= copied_bpu_ptr(i)
921  }
922  io.toICache.req.bits.pcMemRead.zipWithIndex.foreach { case (copy, i) =>
923    copy.fromFtqPcBundle(toICachePcBundle(i))
924    copy.ftqIdx := ifuPtr
925  }
926  io.toICache.req.bits.backendException := ExceptionType.hasException(backendException) && backendPcFaultPtr === ifuPtr
927
928  io.toPrefetch.req.valid := toPrefetchEntryToSend && pfPtr =/= bpuPtr
929  io.toPrefetch.req.bits.fromFtqPcBundle(toPrefetchPcBundle)
930  io.toPrefetch.req.bits.ftqIdx  := pfPtr
931  io.toPrefetch.backendException := Mux(backendPcFaultPtr === pfPtr, backendException, ExceptionType.none)
932  // io.toICache.req.bits.bypassSelect := last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtr
933  // io.toICache.req.bits.bpuBypassWrite.zipWithIndex.map{case(bypassWrtie, i) =>
934  //   bypassWrtie.startAddr := bpu_in_bypass_buf.tail(i).startAddr
935  //   bypassWrtie.nextlineStart := bpu_in_bypass_buf.tail(i).nextLineAddr
936  // }
937
938  // TODO: remove this
939  XSError(
940    io.toIfu.req.valid && diff_entry_next_addr =/= entry_next_addr,
941    p"\nifu_req_target wrong! ifuPtr: ${ifuPtr}, entry_next_addr: ${Hexadecimal(entry_next_addr)} diff_entry_next_addr: ${Hexadecimal(diff_entry_next_addr)}\n"
942  )
943
944  // when fall through is smaller in value than start address, there must be a false hit
945  when(toIfuPcBundle.fallThruError && entry_hit_status(ifuPtr.value) === h_hit) {
946    when(io.toIfu.req.fire &&
947      !(bpu_s2_redirect && bpu_s2_resp.ftq_idx === ifuPtr) &&
948      !(bpu_s3_redirect && bpu_s3_resp.ftq_idx === ifuPtr)) {
949      entry_hit_status(ifuPtr.value) := h_false_hit
950      // XSError(true.B, "FTB false hit by fallThroughError, startAddr: %x, fallTHru: %x\n", io.toIfu.req.bits.startAddr, io.toIfu.req.bits.nextStartAddr)
951    }
952  }
953  XSDebug(
954    toIfuPcBundle.fallThruError && entry_hit_status(ifuPtr.value) === h_hit,
955    "fallThruError! start:%x, fallThru:%x\n",
956    io.toIfu.req.bits.startAddr,
957    io.toIfu.req.bits.nextStartAddr
958  )
959
960  XSPerfAccumulate(
961    f"fall_through_error_to_ifu",
962    toIfuPcBundle.fallThruError && entry_hit_status(ifuPtr.value) === h_hit &&
963      io.toIfu.req.fire && !(bpu_s2_redirect && bpu_s2_resp.ftq_idx === ifuPtr) && !(bpu_s3_redirect && bpu_s3_resp.ftq_idx === ifuPtr)
964  )
965
966  val ifu_req_should_be_flushed =
967    io.toIfu.flushFromBpu.shouldFlushByStage2(io.toIfu.req.bits.ftqIdx) ||
968      io.toIfu.flushFromBpu.shouldFlushByStage3(io.toIfu.req.bits.ftqIdx)
969
970  when(io.toIfu.req.fire && !ifu_req_should_be_flushed) {
971    entry_fetch_status(ifuPtr.value) := f_sent
972  }
973
974  // *********************************************************************
975  // **************************** wb from ifu ****************************
976  // *********************************************************************
977  val pdWb         = io.fromIfu.pdWb
978  val pds          = pdWb.bits.pd
979  val ifu_wb_valid = pdWb.valid
980  val ifu_wb_idx   = pdWb.bits.ftqIdx.value
981  // read ports:                                                         commit update
982  val ftq_pd_mem =
983    Module(new SyncDataModuleTemplate(new Ftq_pd_Entry, FtqSize, FtqRedirectAheadNum + 1, 1, hasRen = true))
984  ftq_pd_mem.io.wen(0)   := ifu_wb_valid
985  ftq_pd_mem.io.waddr(0) := pdWb.bits.ftqIdx.value
986  ftq_pd_mem.io.wdata(0).fromPdWb(pdWb.bits)
987
988  val hit_pd_valid       = entry_hit_status(ifu_wb_idx) === h_hit && ifu_wb_valid
989  val hit_pd_mispred     = hit_pd_valid && pdWb.bits.misOffset.valid
990  val hit_pd_mispred_reg = RegNext(hit_pd_mispred, init = false.B)
991  val pd_reg             = RegEnable(pds, pdWb.valid)
992  val start_pc_reg       = RegEnable(pdWb.bits.pc(0), pdWb.valid)
993  val wb_idx_reg         = RegEnable(ifu_wb_idx, pdWb.valid)
994
995  when(ifu_wb_valid) {
996    val comm_stq_wen = VecInit(pds.map(_.valid).zip(pdWb.bits.instrRange).map {
997      case (v, inRange) => v && inRange
998    })
999    commitStateQueueEnable(ifu_wb_idx) := true.B
1000    (commitStateQueueNext(ifu_wb_idx) zip comm_stq_wen).map {
1001      case (qe, v) => when(v) {
1002          qe := c_toCommit
1003        }
1004    }
1005  }
1006
1007  when(ifu_wb_valid) {
1008    ifuWbPtr_write := ifuWbPtr + 1.U
1009  }
1010
1011  XSError(ifu_wb_valid && isAfter(pdWb.bits.ftqIdx, ifuPtr), "IFU returned a predecode before its req, check IFU")
1012
1013  ftb_entry_mem.io.ren.get.head := ifu_wb_valid
1014  ftb_entry_mem.io.raddr.head   := ifu_wb_idx
1015  val has_false_hit = WireInit(false.B)
1016  when(RegNext(hit_pd_valid)) {
1017    // check for false hit
1018    val pred_ftb_entry = ftb_entry_mem.io.rdata.head
1019    val brSlots        = pred_ftb_entry.brSlots
1020    val tailSlot       = pred_ftb_entry.tailSlot
1021    // we check cfis that bpu predicted
1022
1023    // bpu predicted branches but denied by predecode
1024    val br_false_hit =
1025      brSlots.map {
1026        s => s.valid && !(pd_reg(s.offset).valid && pd_reg(s.offset).isBr)
1027      }.reduce(_ || _) ||
1028        (tailSlot.valid && pred_ftb_entry.tailSlot.sharing &&
1029          !(pd_reg(tailSlot.offset).valid && pd_reg(tailSlot.offset).isBr))
1030
1031    val jmpOffset = tailSlot.offset
1032    val jmp_pd    = pd_reg(jmpOffset)
1033    val jal_false_hit = pred_ftb_entry.jmpValid &&
1034      ((pred_ftb_entry.isJal && !(jmp_pd.valid && jmp_pd.isJal)) ||
1035        (pred_ftb_entry.isJalr && !(jmp_pd.valid && jmp_pd.isJalr)) ||
1036        (pred_ftb_entry.isCall && !(jmp_pd.valid && jmp_pd.isCall)) ||
1037        (pred_ftb_entry.isRet && !(jmp_pd.valid && jmp_pd.isRet)))
1038
1039    has_false_hit := br_false_hit || jal_false_hit || hit_pd_mispred_reg
1040    // assert(!has_false_hit)
1041  }
1042  XSDebug(
1043    RegNext(hit_pd_valid) && has_false_hit,
1044    "FTB false hit by br or jal or hit_pd, startAddr: %x\n",
1045    pdWb.bits.pc(0)
1046  )
1047
1048  when(has_false_hit) {
1049    entry_hit_status(wb_idx_reg) := h_false_hit
1050  }
1051
1052  // *******************************************************************************
1053  // **************************** redirect from backend ****************************
1054  // *******************************************************************************
1055
1056  // redirect read cfiInfo, couples to redirectGen s2
1057  // ftqIdxAhead(0-3) => ftq_redirect_mem(1-4), reuse ftq_redirect_mem(1)
1058  val ftq_redirect_rdata = Wire(Vec(FtqRedirectAheadNum, new Ftq_Redirect_SRAMEntry))
1059  val ftb_redirect_rdata = Wire(Vec(FtqRedirectAheadNum, new FTBEntry_FtqMem))
1060
1061  val ftq_pd_rdata = Wire(Vec(FtqRedirectAheadNum, new Ftq_pd_Entry))
1062  for (i <- 1 until FtqRedirectAheadNum) {
1063    ftq_redirect_mem.io.ren.get(i + IfuRedirectNum) := ftqIdxAhead(i).valid
1064    ftq_redirect_mem.io.raddr(i + IfuRedirectNum)   := ftqIdxAhead(i).bits.value
1065    ftb_entry_mem.io.ren.get(i + IfuRedirectNum)    := ftqIdxAhead(i).valid
1066    ftb_entry_mem.io.raddr(i + IfuRedirectNum)      := ftqIdxAhead(i).bits.value
1067
1068    ftq_pd_mem.io.ren.get(i) := ftqIdxAhead(i).valid
1069    ftq_pd_mem.io.raddr(i)   := ftqIdxAhead(i).bits.value
1070  }
1071  ftq_redirect_mem.io.ren.get(IfuRedirectNum) := Mux(aheadValid, ftqIdxAhead(0).valid, backendRedirect.valid)
1072  ftq_redirect_mem.io.raddr(IfuRedirectNum) := Mux(
1073    aheadValid,
1074    ftqIdxAhead(0).bits.value,
1075    backendRedirect.bits.ftqIdx.value
1076  )
1077  ftb_entry_mem.io.ren.get(IfuRedirectNum) := Mux(aheadValid, ftqIdxAhead(0).valid, backendRedirect.valid)
1078  ftb_entry_mem.io.raddr(IfuRedirectNum) := Mux(
1079    aheadValid,
1080    ftqIdxAhead(0).bits.value,
1081    backendRedirect.bits.ftqIdx.value
1082  )
1083
1084  ftq_pd_mem.io.ren.get(0) := Mux(aheadValid, ftqIdxAhead(0).valid, backendRedirect.valid)
1085  ftq_pd_mem.io.raddr(0)   := Mux(aheadValid, ftqIdxAhead(0).bits.value, backendRedirect.bits.ftqIdx.value)
1086
1087  for (i <- 0 until FtqRedirectAheadNum) {
1088    ftq_redirect_rdata(i) := ftq_redirect_mem.io.rdata(i + IfuRedirectNum)
1089    ftb_redirect_rdata(i) := ftb_entry_mem.io.rdata(i + IfuRedirectNum)
1090
1091    ftq_pd_rdata(i) := ftq_pd_mem.io.rdata(i)
1092  }
1093  val stage3CfiInfo =
1094    Mux(realAhdValid, Mux1H(ftqIdxSelOH, ftq_redirect_rdata), ftq_redirect_mem.io.rdata(IfuRedirectNum))
1095  val stage3PdInfo       = Mux(realAhdValid, Mux1H(ftqIdxSelOH, ftq_pd_rdata), ftq_pd_mem.io.rdata(0))
1096  val backendRedirectCfi = fromBackendRedirect.bits.cfiUpdate
1097  backendRedirectCfi.fromFtqRedirectSram(stage3CfiInfo)
1098  backendRedirectCfi.pd := stage3PdInfo.toPd(fromBackendRedirect.bits.ftqOffset)
1099
1100  val r_ftb_entry = Mux(realAhdValid, Mux1H(ftqIdxSelOH, ftb_redirect_rdata), ftb_entry_mem.io.rdata(IfuRedirectNum))
1101  val r_ftqOffset = fromBackendRedirect.bits.ftqOffset
1102
1103  backendRedirectCfi.br_hit := r_ftb_entry.brIsSaved(r_ftqOffset)
1104  backendRedirectCfi.jr_hit := r_ftb_entry.isJalr && r_ftb_entry.tailSlot.offset === r_ftqOffset
1105  // FIXME: not portable
1106  val sc_disagree = stage3CfiInfo.sc_disagree.getOrElse(VecInit(Seq.fill(numBr)(false.B)))
1107  backendRedirectCfi.sc_hit := backendRedirectCfi.br_hit && Mux(
1108    r_ftb_entry.brSlots(0).offset === r_ftqOffset,
1109    sc_disagree(0),
1110    sc_disagree(1)
1111  )
1112
1113  when(entry_hit_status(fromBackendRedirect.bits.ftqIdx.value) === h_hit) {
1114    backendRedirectCfi.shift := PopCount(r_ftb_entry.getBrMaskByOffset(r_ftqOffset)) +&
1115      (backendRedirectCfi.pd.isBr && !r_ftb_entry.brIsSaved(r_ftqOffset) &&
1116        !r_ftb_entry.newBrCanNotInsert(r_ftqOffset))
1117
1118    backendRedirectCfi.addIntoHist := backendRedirectCfi.pd.isBr && (r_ftb_entry.brIsSaved(r_ftqOffset) ||
1119      !r_ftb_entry.newBrCanNotInsert(r_ftqOffset))
1120  }.otherwise {
1121    backendRedirectCfi.shift       := (backendRedirectCfi.pd.isBr && backendRedirectCfi.taken).asUInt
1122    backendRedirectCfi.addIntoHist := backendRedirectCfi.pd.isBr.asUInt
1123  }
1124
1125  // ***************************************************************************
1126  // **************************** redirect from ifu ****************************
1127  // ***************************************************************************
1128  val fromIfuRedirect = WireInit(0.U.asTypeOf(Valid(new BranchPredictionRedirect)))
1129  fromIfuRedirect.valid              := pdWb.valid && pdWb.bits.misOffset.valid && !backendFlush
1130  fromIfuRedirect.bits.ftqIdx        := pdWb.bits.ftqIdx
1131  fromIfuRedirect.bits.ftqOffset     := pdWb.bits.misOffset.bits
1132  fromIfuRedirect.bits.level         := RedirectLevel.flushAfter
1133  fromIfuRedirect.bits.BTBMissBubble := true.B
1134  fromIfuRedirect.bits.debugIsMemVio := false.B
1135  fromIfuRedirect.bits.debugIsCtrl   := false.B
1136
1137  val ifuRedirectCfiUpdate = fromIfuRedirect.bits.cfiUpdate
1138  ifuRedirectCfiUpdate.pc        := pdWb.bits.pc(pdWb.bits.misOffset.bits)
1139  ifuRedirectCfiUpdate.pd        := pdWb.bits.pd(pdWb.bits.misOffset.bits)
1140  ifuRedirectCfiUpdate.predTaken := cfiIndex_vec(pdWb.bits.ftqIdx.value).valid
1141  ifuRedirectCfiUpdate.target    := pdWb.bits.target
1142  ifuRedirectCfiUpdate.taken     := pdWb.bits.cfiOffset.valid
1143  ifuRedirectCfiUpdate.isMisPred := pdWb.bits.misOffset.valid
1144
1145  val ifuRedirectReg   = RegNextWithEnable(fromIfuRedirect, hasInit = true)
1146  val ifuRedirectToBpu = WireInit(ifuRedirectReg)
1147  ifuFlush := fromIfuRedirect.valid || ifuRedirectToBpu.valid
1148
1149  ftq_redirect_mem.io.ren.get.head := fromIfuRedirect.valid
1150  ftq_redirect_mem.io.raddr.head   := fromIfuRedirect.bits.ftqIdx.value
1151
1152  val toBpuCfi = ifuRedirectToBpu.bits.cfiUpdate
1153  toBpuCfi.fromFtqRedirectSram(ftq_redirect_mem.io.rdata.head)
1154  when(ifuRedirectReg.bits.cfiUpdate.pd.isRet && ifuRedirectReg.bits.cfiUpdate.pd.valid) {
1155    toBpuCfi.target := toBpuCfi.topAddr
1156  }
1157
1158  when(ifuRedirectReg.valid) {
1159    ifuRedirected(ifuRedirectReg.bits.ftqIdx.value) := true.B
1160  }.elsewhen(RegNext(pdWb.valid)) {
1161    // if pdWb and no redirect, set to false
1162    ifuRedirected(last_cycle_bpu_in_ptr.value) := false.B
1163  }
1164
1165  // **********************************************************************
1166  // ***************************** to backend *****************************
1167  // **********************************************************************
1168  // to backend pc mem / target
1169  io.toBackend.pc_mem_wen   := RegNext(last_cycle_bpu_in)
1170  io.toBackend.pc_mem_waddr := RegEnable(last_cycle_bpu_in_idx, last_cycle_bpu_in)
1171  io.toBackend.pc_mem_wdata := RegEnable(bpu_in_bypass_buf_for_ifu, last_cycle_bpu_in)
1172
1173  // num cycle is fixed
1174  val newest_entry_en: Bool = RegNext(last_cycle_bpu_in || backendRedirect.valid || ifuRedirectToBpu.valid)
1175  io.toBackend.newest_entry_en     := RegNext(newest_entry_en)
1176  io.toBackend.newest_entry_ptr    := RegEnable(newest_entry_ptr, newest_entry_en)
1177  io.toBackend.newest_entry_target := RegEnable(newest_entry_target, newest_entry_en)
1178
1179  // *********************************************************************
1180  // **************************** wb from exu ****************************
1181  // *********************************************************************
1182
1183  backendRedirect.valid := io.fromBackend.redirect.valid
1184  backendRedirect.bits.connectRedirect(io.fromBackend.redirect.bits)
1185  backendRedirect.bits.BTBMissBubble := false.B
1186
1187  def extractRedirectInfo(wb: Valid[Redirect]) = {
1188    val ftqPtr    = wb.bits.ftqIdx
1189    val ftqOffset = wb.bits.ftqOffset
1190    val taken     = wb.bits.cfiUpdate.taken
1191    val mispred   = wb.bits.cfiUpdate.isMisPred
1192    (wb.valid, ftqPtr, ftqOffset, taken, mispred)
1193  }
1194
1195  // fix mispredict entry
1196  val lastIsMispredict = RegNext(
1197    backendRedirect.valid && backendRedirect.bits.level === RedirectLevel.flushAfter,
1198    init = false.B
1199  )
1200
1201  def updateCfiInfo(redirect: Valid[Redirect], isBackend: Boolean = true) = {
1202    val (r_valid, r_ptr, r_offset, r_taken, r_mispred) = extractRedirectInfo(redirect)
1203    val r_idx                                          = r_ptr.value
1204    val cfiIndex_bits_wen                              = r_valid && r_taken && r_offset < cfiIndex_vec(r_idx).bits
1205    val cfiIndex_valid_wen                             = r_valid && r_offset === cfiIndex_vec(r_idx).bits
1206    when(cfiIndex_bits_wen || cfiIndex_valid_wen) {
1207      cfiIndex_vec(r_idx).valid := cfiIndex_bits_wen || cfiIndex_valid_wen && r_taken
1208    }.elsewhen(r_valid && !r_taken && r_offset =/= cfiIndex_vec(r_idx).bits) {
1209      cfiIndex_vec(r_idx).valid := false.B
1210    }
1211    when(cfiIndex_bits_wen) {
1212      cfiIndex_vec(r_idx).bits := r_offset
1213    }
1214    newest_entry_target_modified := true.B
1215    newest_entry_target          := redirect.bits.cfiUpdate.target
1216    newest_entry_ptr_modified    := true.B
1217    newest_entry_ptr             := r_ptr
1218
1219    update_target(r_idx) := redirect.bits.cfiUpdate.target // TODO: remove this
1220    if (isBackend) {
1221      mispredict_vec(r_idx)(r_offset) := r_mispred
1222    }
1223  }
1224
1225  when(fromBackendRedirect.valid) {
1226    updateCfiInfo(fromBackendRedirect)
1227  }.elsewhen(ifuRedirectToBpu.valid) {
1228    updateCfiInfo(ifuRedirectToBpu, isBackend = false)
1229  }
1230
1231  when(fromBackendRedirect.valid) {
1232    when(fromBackendRedirect.bits.ControlRedirectBubble) {
1233      when(fromBackendRedirect.bits.ControlBTBMissBubble) {
1234        topdown_stage.reasons(TopDownCounters.BTBMissBubble.id)                  := true.B
1235        io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.BTBMissBubble.id) := true.B
1236      }.elsewhen(fromBackendRedirect.bits.TAGEMissBubble) {
1237        topdown_stage.reasons(TopDownCounters.TAGEMissBubble.id)                  := true.B
1238        io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.TAGEMissBubble.id) := true.B
1239      }.elsewhen(fromBackendRedirect.bits.SCMissBubble) {
1240        topdown_stage.reasons(TopDownCounters.SCMissBubble.id)                  := true.B
1241        io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.SCMissBubble.id) := true.B
1242      }.elsewhen(fromBackendRedirect.bits.ITTAGEMissBubble) {
1243        topdown_stage.reasons(TopDownCounters.ITTAGEMissBubble.id)                  := true.B
1244        io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.ITTAGEMissBubble.id) := true.B
1245      }.elsewhen(fromBackendRedirect.bits.RASMissBubble) {
1246        topdown_stage.reasons(TopDownCounters.RASMissBubble.id)                  := true.B
1247        io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.RASMissBubble.id) := true.B
1248      }
1249
1250    }.elsewhen(backendRedirect.bits.MemVioRedirectBubble) {
1251      topdown_stage.reasons(TopDownCounters.MemVioRedirectBubble.id)                  := true.B
1252      io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.MemVioRedirectBubble.id) := true.B
1253    }.otherwise {
1254      topdown_stage.reasons(TopDownCounters.OtherRedirectBubble.id)                  := true.B
1255      io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.OtherRedirectBubble.id) := true.B
1256    }
1257  }.elsewhen(ifuRedirectReg.valid) {
1258    topdown_stage.reasons(TopDownCounters.BTBMissBubble.id)                  := true.B
1259    io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.BTBMissBubble.id) := true.B
1260  }
1261
1262  io.ControlBTBMissBubble := fromBackendRedirect.bits.ControlBTBMissBubble
1263  io.TAGEMissBubble       := fromBackendRedirect.bits.TAGEMissBubble
1264  io.SCMissBubble         := fromBackendRedirect.bits.SCMissBubble
1265  io.ITTAGEMissBubble     := fromBackendRedirect.bits.ITTAGEMissBubble
1266  io.RASMissBubble        := fromBackendRedirect.bits.RASMissBubble
1267
1268  // ***********************************************************************************
1269  // **************************** flush ptr and state queue ****************************
1270  // ***********************************************************************************
1271
1272  val redirectVec = VecInit(backendRedirect, fromIfuRedirect)
1273
1274  // when redirect, we should reset ptrs and status queues
1275  io.icacheFlush := redirectVec.map(r => r.valid).reduce(_ || _)
1276  XSPerfAccumulate("icacheFlushFromBackend", backendRedirect.valid)
1277  XSPerfAccumulate("icacheFlushFromIFU", fromIfuRedirect.valid)
1278  when(redirectVec.map(r => r.valid).reduce(_ || _)) {
1279    val r                          = PriorityMux(redirectVec.map(r => r.valid -> r.bits))
1280    val notIfu                     = redirectVec.dropRight(1).map(r => r.valid).reduce(_ || _)
1281    val (idx, offset, flushItSelf) = (r.ftqIdx, r.ftqOffset, RedirectLevel.flushItself(r.level))
1282    val next                       = idx + 1.U
1283    bpuPtr := next
1284    copied_bpu_ptr.map(_ := next)
1285    ifuPtr_write      := next
1286    ifuWbPtr_write    := next
1287    ifuPtrPlus1_write := idx + 2.U
1288    ifuPtrPlus2_write := idx + 3.U
1289    pfPtr_write       := next
1290    pfPtrPlus1_write  := idx + 2.U
1291  }
1292  when(RegNext(redirectVec.map(r => r.valid).reduce(_ || _))) {
1293    val r                          = PriorityMux(redirectVec.map(r => r.valid -> r.bits))
1294    val notIfu                     = redirectVec.dropRight(1).map(r => r.valid).reduce(_ || _)
1295    val (idx, offset, flushItSelf) = (r.ftqIdx, r.ftqOffset, RedirectLevel.flushItself(r.level))
1296    when(RegNext(notIfu)) {
1297      commitStateQueueEnable(RegNext(idx.value)) := true.B
1298      commitStateQueueNext(RegNext(idx.value)).zipWithIndex.foreach { case (s, i) =>
1299        when(i.U > RegNext(offset)) {
1300          s := c_empty
1301        }
1302        when(i.U === RegNext(offset) && RegNext(flushItSelf)) {
1303          s := c_flushed
1304        }
1305      }
1306    }
1307  }
1308
1309  // only the valid bit is actually needed
1310  io.toIfu.redirect.bits    := backendRedirect.bits
1311  io.toIfu.redirect.valid   := stage2Flush
1312  io.toIfu.topdown_redirect := fromBackendRedirect
1313
1314  // commit
1315  for (c <- io.fromBackend.rob_commits) {
1316    when(c.valid) {
1317      commitStateQueueEnable(c.bits.ftqIdx.value)                 := true.B
1318      commitStateQueueNext(c.bits.ftqIdx.value)(c.bits.ftqOffset) := c_committed
1319      // TODO: remove this
1320      // For instruction fusions, we also update the next instruction
1321      when(c.bits.commitType === 4.U) {
1322        commitStateQueueNext(c.bits.ftqIdx.value)(c.bits.ftqOffset + 1.U) := c_committed
1323      }.elsewhen(c.bits.commitType === 5.U) {
1324        commitStateQueueNext(c.bits.ftqIdx.value)(c.bits.ftqOffset + 2.U) := c_committed
1325      }.elsewhen(c.bits.commitType === 6.U) {
1326        val index = (c.bits.ftqIdx + 1.U).value
1327        commitStateQueueEnable(index)  := true.B
1328        commitStateQueueNext(index)(0) := c_committed
1329      }.elsewhen(c.bits.commitType === 7.U) {
1330        val index = (c.bits.ftqIdx + 1.U).value
1331        commitStateQueueEnable(index)  := true.B
1332        commitStateQueueNext(index)(1) := c_committed
1333      }
1334    }
1335  }
1336
1337  // ****************************************************************
1338  // **************************** to bpu ****************************
1339  // ****************************************************************
1340
1341  io.toBpu.redirctFromIFU := ifuRedirectToBpu.valid
1342  io.toBpu.redirect       := Mux(fromBackendRedirect.valid, fromBackendRedirect, ifuRedirectToBpu)
1343  val dummy_s1_pred_cycle_vec = VecInit(List.tabulate(FtqSize)(_ => 0.U(64.W)))
1344  val redirect_latency =
1345    GTimer() - pred_s1_cycle.getOrElse(dummy_s1_pred_cycle_vec)(io.toBpu.redirect.bits.ftqIdx.value) + 1.U
1346  XSPerfHistogram("backend_redirect_latency", redirect_latency, fromBackendRedirect.valid, 0, 60, 1)
1347  XSPerfHistogram(
1348    "ifu_redirect_latency",
1349    redirect_latency,
1350    !fromBackendRedirect.valid && ifuRedirectToBpu.valid,
1351    0,
1352    60,
1353    1
1354  )
1355
1356  XSError(
1357    io.toBpu.redirect.valid && isBefore(io.toBpu.redirect.bits.ftqIdx, commPtr),
1358    "Ftq received a redirect after its commit, check backend or replay"
1359  )
1360
1361  val may_have_stall_from_bpu = Wire(Bool())
1362  val bpu_ftb_update_stall    = RegInit(0.U(2.W)) // 2-cycle stall, so we need 3 states
1363  may_have_stall_from_bpu := bpu_ftb_update_stall =/= 0.U
1364
1365  val validInstructions       = commitStateQueueReg(commPtr.value).map(s => s === c_toCommit || s === c_committed)
1366  val lastInstructionStatus   = PriorityMux(validInstructions.reverse.zip(commitStateQueueReg(commPtr.value).reverse))
1367  val firstInstructionFlushed = commitStateQueueReg(commPtr.value)(0) === c_flushed
1368  canCommit := commPtr =/= ifuWbPtr && !may_have_stall_from_bpu &&
1369    (isAfter(robCommPtr, commPtr) ||
1370      validInstructions.reduce(_ || _) && lastInstructionStatus === c_committed)
1371  val canMoveCommPtr = commPtr =/= ifuWbPtr && !may_have_stall_from_bpu &&
1372    (isAfter(robCommPtr, commPtr) ||
1373      validInstructions.reduce(_ || _) && lastInstructionStatus === c_committed ||
1374      firstInstructionFlushed)
1375
1376  when(io.fromBackend.rob_commits.map(_.valid).reduce(_ | _)) {
1377    robCommPtr_write := ParallelPriorityMux(
1378      io.fromBackend.rob_commits.map(_.valid).reverse,
1379      io.fromBackend.rob_commits.map(_.bits.ftqIdx).reverse
1380    )
1381  }.elsewhen(isAfter(commPtr, robCommPtr)) {
1382    robCommPtr_write := commPtr
1383  }.otherwise {
1384    robCommPtr_write := robCommPtr
1385  }
1386
1387  /**
1388    *************************************************************************************
1389    * MMIO instruction fetch is allowed only if MMIO is the oldest instruction.
1390    *************************************************************************************
1391    */
1392  val mmioReadPtr = io.mmioCommitRead.mmioFtqPtr
1393  val mmioLastCommit = isAfter(commPtr, mmioReadPtr) ||
1394    commPtr === mmioReadPtr && validInstructions.reduce(_ || _) && lastInstructionStatus === c_committed
1395  io.mmioCommitRead.mmioLastCommit := RegNext(mmioLastCommit)
1396
1397  // commit reads
1398  val commit_pc_bundle = RegNext(ftq_pc_mem.io.commPtr_rdata)
1399  val commit_target =
1400    Mux(
1401      RegNext(commPtr === newest_entry_ptr),
1402      RegEnable(newest_entry_target, newest_entry_target_modified),
1403      RegNext(ftq_pc_mem.io.commPtrPlus1_rdata.startAddr)
1404    )
1405  ftq_pd_mem.io.ren.get.last := canCommit
1406  ftq_pd_mem.io.raddr.last   := commPtr.value
1407  val commit_pd = ftq_pd_mem.io.rdata.last
1408  ftq_redirect_mem.io.ren.get.last := canCommit
1409  ftq_redirect_mem.io.raddr.last   := commPtr.value
1410  val commit_spec_meta = ftq_redirect_mem.io.rdata.last
1411  ftq_meta_1r_sram.io.ren(0)   := canCommit
1412  ftq_meta_1r_sram.io.raddr(0) := commPtr.value
1413  val commit_meta      = ftq_meta_1r_sram.io.rdata(0).meta
1414  val commit_ftb_entry = ftq_meta_1r_sram.io.rdata(0).ftb_entry
1415
1416  // need one cycle to read mem and srams
1417  val do_commit_ptr = RegEnable(commPtr, canCommit)
1418  val do_commit     = RegNext(canCommit, init = false.B)
1419  when(canMoveCommPtr) {
1420    commPtr_write      := commPtrPlus1
1421    commPtrPlus1_write := commPtrPlus1 + 1.U
1422  }
1423  val commit_state   = RegEnable(commitStateQueueReg(commPtr.value), canCommit)
1424  val can_commit_cfi = WireInit(cfiIndex_vec(commPtr.value))
1425  val do_commit_cfi  = WireInit(cfiIndex_vec(do_commit_ptr.value))
1426  //
1427  // when (commitStateQueue(commPtr.value)(can_commit_cfi.bits) =/= c_commited) {
1428  //  can_commit_cfi.valid := false.B
1429  // }
1430  val commit_cfi = RegEnable(can_commit_cfi, canCommit)
1431  val debug_cfi  = commitStateQueueReg(do_commit_ptr.value)(do_commit_cfi.bits) =/= c_committed && do_commit_cfi.valid
1432
1433  val commit_mispredict: Vec[Bool] =
1434    VecInit((RegEnable(mispredict_vec(commPtr.value), canCommit) zip commit_state).map {
1435      case (mis, state) => mis && state === c_committed
1436    })
1437  val commit_instCommited: Vec[Bool] = VecInit(commit_state.map(_ === c_committed)) // [PredictWidth]
1438  val can_commit_hit     = entry_hit_status(commPtr.value)
1439  val commit_hit         = RegEnable(can_commit_hit, canCommit)
1440  val diff_commit_target = RegEnable(update_target(commPtr.value), canCommit) // TODO: remove this
1441  val commit_stage       = RegEnable(pred_stage(commPtr.value), canCommit)
1442  val commit_valid       = commit_hit === h_hit || commit_cfi.valid           // hit or taken
1443
1444  val to_bpu_hit = can_commit_hit === h_hit || can_commit_hit === h_false_hit
1445  switch(bpu_ftb_update_stall) {
1446    is(0.U) {
1447      when(can_commit_cfi.valid && !to_bpu_hit && canCommit) {
1448        bpu_ftb_update_stall := 2.U // 2-cycle stall
1449      }
1450    }
1451    is(2.U) {
1452      bpu_ftb_update_stall := 1.U
1453    }
1454    is(1.U) {
1455      bpu_ftb_update_stall := 0.U
1456    }
1457    is(3.U) {
1458      // XSError below
1459    }
1460  }
1461  XSError(bpu_ftb_update_stall === 3.U, "bpu_ftb_update_stall should be 0, 1 or 2")
1462
1463  // TODO: remove this
1464  XSError(do_commit && diff_commit_target =/= commit_target, "\ncommit target should be the same as update target\n")
1465
1466  // update latency stats
1467  val update_latency = GTimer() - pred_s1_cycle.getOrElse(dummy_s1_pred_cycle_vec)(do_commit_ptr.value) + 1.U
1468  XSPerfHistogram("bpu_update_latency", update_latency, io.toBpu.update.valid, 0, 64, 2)
1469
1470  io.toBpu.update       := DontCare
1471  io.toBpu.update.valid := commit_valid && do_commit
1472  val update = io.toBpu.update.bits
1473  update.false_hit   := commit_hit === h_false_hit
1474  update.pc          := commit_pc_bundle.startAddr
1475  update.meta        := commit_meta
1476  update.cfi_idx     := commit_cfi
1477  update.full_target := commit_target
1478  update.from_stage  := commit_stage
1479  update.spec_info   := commit_spec_meta
1480  XSError(commit_valid && do_commit && debug_cfi, "\ncommit cfi can be non c_commited\n")
1481
1482  val commit_real_hit  = commit_hit === h_hit
1483  val update_ftb_entry = update.ftb_entry
1484
1485  val ftbEntryGen = Module(new FTBEntryGen).io
1486  ftbEntryGen.start_addr     := commit_pc_bundle.startAddr
1487  ftbEntryGen.old_entry      := commit_ftb_entry
1488  ftbEntryGen.pd             := commit_pd
1489  ftbEntryGen.cfiIndex       := commit_cfi
1490  ftbEntryGen.target         := commit_target
1491  ftbEntryGen.hit            := commit_real_hit
1492  ftbEntryGen.mispredict_vec := commit_mispredict
1493
1494  update_ftb_entry         := ftbEntryGen.new_entry
1495  update.new_br_insert_pos := ftbEntryGen.new_br_insert_pos
1496  update.mispred_mask      := ftbEntryGen.mispred_mask
1497  update.old_entry         := ftbEntryGen.is_old_entry
1498  update.pred_hit          := commit_hit === h_hit || commit_hit === h_false_hit
1499  update.br_taken_mask     := ftbEntryGen.taken_mask
1500  update.br_committed := (ftbEntryGen.new_entry.brValids zip ftbEntryGen.new_entry.brOffset) map {
1501    case (valid, offset) => valid && commit_instCommited(offset)
1502  }
1503  update.jmp_taken := ftbEntryGen.jmp_taken
1504
1505  // update.full_pred.fromFtbEntry(ftbEntryGen.new_entry, update.pc)
1506  // update.full_pred.jalr_target := commit_target
1507  // update.full_pred.hit := true.B
1508  // when (update.full_pred.is_jalr) {
1509  //   update.full_pred.targets.last := commit_target
1510  // }
1511
1512  // ******************************************************************************
1513  // **************************** commit perf counters ****************************
1514  // ******************************************************************************
1515
1516  val commit_inst_mask        = VecInit(commit_state.map(c => c === c_committed && do_commit)).asUInt
1517  val commit_mispred_mask     = commit_mispredict.asUInt
1518  val commit_not_mispred_mask = ~commit_mispred_mask
1519
1520  val commit_br_mask  = commit_pd.brMask.asUInt
1521  val commit_jmp_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.jmpInfo.valid.asTypeOf(UInt(1.W)))
1522  val commit_cfi_mask = commit_br_mask | commit_jmp_mask
1523
1524  val mbpInstrs = commit_inst_mask & commit_cfi_mask
1525
1526  val mbpRights = mbpInstrs & commit_not_mispred_mask
1527  val mbpWrongs = mbpInstrs & commit_mispred_mask
1528
1529  io.bpuInfo.bpRight := PopCount(mbpRights)
1530  io.bpuInfo.bpWrong := PopCount(mbpWrongs)
1531
1532  val hartId           = p(XSCoreParamsKey).HartId
1533  val isWriteFTQTable  = Constantin.createRecord(s"isWriteFTQTable$hartId")
1534  val ftqBranchTraceDB = ChiselDB.createTable(s"FTQTable$hartId", new FtqDebugBundle)
1535  // Cfi Info
1536  for (i <- 0 until PredictWidth) {
1537    val pc      = commit_pc_bundle.startAddr + (i * instBytes).U
1538    val v       = commit_state(i) === c_committed
1539    val isBr    = commit_pd.brMask(i)
1540    val isJmp   = commit_pd.jmpInfo.valid && commit_pd.jmpOffset === i.U
1541    val isCfi   = isBr || isJmp
1542    val isTaken = commit_cfi.valid && commit_cfi.bits === i.U
1543    val misPred = commit_mispredict(i)
1544    // val ghist = commit_spec_meta.ghist.predHist
1545    val histPtr   = commit_spec_meta.histPtr
1546    val predCycle = commit_meta(63, 0)
1547    val target    = commit_target
1548
1549    val brIdx = OHToUInt(Reverse(Cat(update_ftb_entry.brValids.zip(update_ftb_entry.brOffset).map { case (v, offset) =>
1550      v && offset === i.U
1551    })))
1552    val inFtbEntry = update_ftb_entry.brValids.zip(update_ftb_entry.brOffset).map { case (v, offset) =>
1553      v && offset === i.U
1554    }.reduce(_ || _)
1555    val addIntoHist =
1556      ((commit_hit === h_hit) && inFtbEntry) || (!(commit_hit === h_hit) && i.U === commit_cfi.bits && isBr && commit_cfi.valid)
1557    XSDebug(
1558      v && do_commit && isCfi,
1559      p"cfi_update: isBr(${isBr}) pc(${Hexadecimal(pc)}) " +
1560        p"taken(${isTaken}) mispred(${misPred}) cycle($predCycle) hist(${histPtr.value}) " +
1561        p"startAddr(${Hexadecimal(commit_pc_bundle.startAddr)}) AddIntoHist(${addIntoHist}) " +
1562        p"brInEntry(${inFtbEntry}) brIdx(${brIdx}) target(${Hexadecimal(target)})\n"
1563    )
1564
1565    val logbundle = Wire(new FtqDebugBundle)
1566    logbundle.pc        := pc
1567    logbundle.target    := target
1568    logbundle.isBr      := isBr
1569    logbundle.isJmp     := isJmp
1570    logbundle.isCall    := isJmp && commit_pd.hasCall
1571    logbundle.isRet     := isJmp && commit_pd.hasRet
1572    logbundle.misPred   := misPred
1573    logbundle.isTaken   := isTaken
1574    logbundle.predStage := commit_stage
1575
1576    ftqBranchTraceDB.log(
1577      data = logbundle /* hardware of type T */,
1578      en = isWriteFTQTable.orR && v && do_commit && isCfi,
1579      site = "FTQ" + p(XSCoreParamsKey).HartId.toString,
1580      clock = clock,
1581      reset = reset
1582    )
1583  }
1584
1585  val enq           = io.fromBpu.resp
1586  val perf_redirect = backendRedirect
1587
1588  XSPerfAccumulate("entry", validEntries)
1589  XSPerfAccumulate("bpu_to_ftq_stall", enq.valid && !enq.ready)
1590  XSPerfAccumulate("mispredictRedirect", perf_redirect.valid && RedirectLevel.flushAfter === perf_redirect.bits.level)
1591  XSPerfAccumulate("replayRedirect", perf_redirect.valid && RedirectLevel.flushItself(perf_redirect.bits.level))
1592  XSPerfAccumulate("predecodeRedirect", fromIfuRedirect.valid)
1593
1594  XSPerfAccumulate("to_ifu_bubble", io.toIfu.req.ready && !io.toIfu.req.valid)
1595
1596  XSPerfAccumulate("to_ifu_stall", io.toIfu.req.valid && !io.toIfu.req.ready)
1597  XSPerfAccumulate("from_bpu_real_bubble", !enq.valid && enq.ready && allowBpuIn)
1598  XSPerfAccumulate("bpu_to_ifu_bubble", bpuPtr === ifuPtr)
1599  XSPerfAccumulate(
1600    "bpu_to_ifu_bubble_when_ftq_full",
1601    (bpuPtr === ifuPtr) && isFull(bpuPtr, commPtr) && io.toIfu.req.ready
1602  )
1603
1604  XSPerfAccumulate("redirectAhead_ValidNum", ftqIdxAhead.map(_.valid).reduce(_ | _))
1605  XSPerfAccumulate("fromBackendRedirect_ValidNum", io.fromBackend.redirect.valid)
1606  XSPerfAccumulate("toBpuRedirect_ValidNum", io.toBpu.redirect.valid)
1607
1608  val from_bpu = io.fromBpu.resp.bits
1609  val to_ifu   = io.toIfu.req.bits
1610
1611  XSPerfHistogram("commit_num_inst", PopCount(commit_inst_mask), do_commit, 0, PredictWidth + 1, 1)
1612
1613  val commit_jal_mask  = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasJal.asTypeOf(UInt(1.W)))
1614  val commit_jalr_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasJalr.asTypeOf(UInt(1.W)))
1615  val commit_call_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasCall.asTypeOf(UInt(1.W)))
1616  val commit_ret_mask  = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasRet.asTypeOf(UInt(1.W)))
1617
1618  val mbpBRights = mbpRights & commit_br_mask
1619  val mbpJRights = mbpRights & commit_jal_mask
1620  val mbpIRights = mbpRights & commit_jalr_mask
1621  val mbpCRights = mbpRights & commit_call_mask
1622  val mbpRRights = mbpRights & commit_ret_mask
1623
1624  val mbpBWrongs = mbpWrongs & commit_br_mask
1625  val mbpJWrongs = mbpWrongs & commit_jal_mask
1626  val mbpIWrongs = mbpWrongs & commit_jalr_mask
1627  val mbpCWrongs = mbpWrongs & commit_call_mask
1628  val mbpRWrongs = mbpWrongs & commit_ret_mask
1629
1630  val commit_pred_stage = RegNext(pred_stage(commPtr.value))
1631
1632  def pred_stage_map(src: UInt, name: String) =
1633    (0 until numBpStages).map(i =>
1634      f"${name}_stage_${i + 1}" -> PopCount(src.asBools.map(_ && commit_pred_stage === BP_STAGES(i)))
1635    ).foldLeft(Map[String, UInt]())(_ + _)
1636
1637  val mispred_stage_map      = pred_stage_map(mbpWrongs, "mispredict")
1638  val br_mispred_stage_map   = pred_stage_map(mbpBWrongs, "br_mispredict")
1639  val jalr_mispred_stage_map = pred_stage_map(mbpIWrongs, "jalr_mispredict")
1640  val correct_stage_map      = pred_stage_map(mbpRights, "correct")
1641  val br_correct_stage_map   = pred_stage_map(mbpBRights, "br_correct")
1642  val jalr_correct_stage_map = pred_stage_map(mbpIRights, "jalr_correct")
1643
1644  val update_valid = io.toBpu.update.valid
1645  def u(cond: Bool) = update_valid && cond
1646  val ftb_false_hit = u(update.false_hit)
1647  // assert(!ftb_false_hit)
1648  val ftb_hit = u(commit_hit === h_hit)
1649
1650  val ftb_new_entry                = u(ftbEntryGen.is_init_entry)
1651  val ftb_new_entry_only_br        = ftb_new_entry && !update_ftb_entry.jmpValid
1652  val ftb_new_entry_only_jmp       = ftb_new_entry && !update_ftb_entry.brValids(0)
1653  val ftb_new_entry_has_br_and_jmp = ftb_new_entry && update_ftb_entry.brValids(0) && update_ftb_entry.jmpValid
1654
1655  val ftb_old_entry = u(ftbEntryGen.is_old_entry)
1656
1657  val ftb_modified_entry =
1658    u(ftbEntryGen.is_new_br || ftbEntryGen.is_jalr_target_modified || ftbEntryGen.is_strong_bias_modified)
1659  val ftb_modified_entry_new_br               = u(ftbEntryGen.is_new_br)
1660  val ftb_modified_entry_ifu_redirected       = u(ifuRedirected(do_commit_ptr.value))
1661  val ftb_modified_entry_jalr_target_modified = u(ftbEntryGen.is_jalr_target_modified)
1662  val ftb_modified_entry_br_full              = ftb_modified_entry && ftbEntryGen.is_br_full
1663  val ftb_modified_entry_strong_bias          = ftb_modified_entry && ftbEntryGen.is_strong_bias_modified
1664
1665  def getFtbEntryLen(pc: UInt, entry: FTBEntry) = (entry.getFallThrough(pc) - pc) >> instOffsetBits
1666  val gen_ftb_entry_len = getFtbEntryLen(update.pc, ftbEntryGen.new_entry)
1667  XSPerfHistogram("ftb_init_entry_len", gen_ftb_entry_len, ftb_new_entry, 0, PredictWidth + 1, 1)
1668  XSPerfHistogram("ftb_modified_entry_len", gen_ftb_entry_len, ftb_modified_entry, 0, PredictWidth + 1, 1)
1669  val s3_ftb_entry_len = getFtbEntryLen(from_bpu.s3.pc(0), from_bpu.last_stage_ftb_entry)
1670  XSPerfHistogram("s3_ftb_entry_len", s3_ftb_entry_len, from_bpu.s3.valid(0), 0, PredictWidth + 1, 1)
1671
1672  XSPerfHistogram("ftq_has_entry", validEntries, true.B, 0, FtqSize + 1, 1)
1673
1674  val perfCountsMap = Map(
1675    "BpInstr"                        -> PopCount(mbpInstrs),
1676    "BpBInstr"                       -> PopCount(mbpBRights | mbpBWrongs),
1677    "BpRight"                        -> PopCount(mbpRights),
1678    "BpWrong"                        -> PopCount(mbpWrongs),
1679    "BpBRight"                       -> PopCount(mbpBRights),
1680    "BpBWrong"                       -> PopCount(mbpBWrongs),
1681    "BpJRight"                       -> PopCount(mbpJRights),
1682    "BpJWrong"                       -> PopCount(mbpJWrongs),
1683    "BpIRight"                       -> PopCount(mbpIRights),
1684    "BpIWrong"                       -> PopCount(mbpIWrongs),
1685    "BpCRight"                       -> PopCount(mbpCRights),
1686    "BpCWrong"                       -> PopCount(mbpCWrongs),
1687    "BpRRight"                       -> PopCount(mbpRRights),
1688    "BpRWrong"                       -> PopCount(mbpRWrongs),
1689    "ftb_false_hit"                  -> PopCount(ftb_false_hit),
1690    "ftb_hit"                        -> PopCount(ftb_hit),
1691    "ftb_new_entry"                  -> PopCount(ftb_new_entry),
1692    "ftb_new_entry_only_br"          -> PopCount(ftb_new_entry_only_br),
1693    "ftb_new_entry_only_jmp"         -> PopCount(ftb_new_entry_only_jmp),
1694    "ftb_new_entry_has_br_and_jmp"   -> PopCount(ftb_new_entry_has_br_and_jmp),
1695    "ftb_old_entry"                  -> PopCount(ftb_old_entry),
1696    "ftb_modified_entry"             -> PopCount(ftb_modified_entry),
1697    "ftb_modified_entry_new_br"      -> PopCount(ftb_modified_entry_new_br),
1698    "ftb_jalr_target_modified"       -> PopCount(ftb_modified_entry_jalr_target_modified),
1699    "ftb_modified_entry_br_full"     -> PopCount(ftb_modified_entry_br_full),
1700    "ftb_modified_entry_strong_bias" -> PopCount(ftb_modified_entry_strong_bias)
1701  ) ++ mispred_stage_map ++ br_mispred_stage_map ++ jalr_mispred_stage_map ++
1702    correct_stage_map ++ br_correct_stage_map ++ jalr_correct_stage_map
1703
1704  for ((key, value) <- perfCountsMap) {
1705    XSPerfAccumulate(key, value)
1706  }
1707
1708  // --------------------------- Debug --------------------------------
1709  // XSDebug(enq_fire, p"enq! " + io.fromBpu.resp.bits.toPrintable)
1710  XSDebug(io.toIfu.req.fire, p"fire to ifu " + io.toIfu.req.bits.toPrintable)
1711  XSDebug(do_commit, p"deq! [ptr] $do_commit_ptr\n")
1712  XSDebug(true.B, p"[bpuPtr] $bpuPtr, [ifuPtr] $ifuPtr, [ifuWbPtr] $ifuWbPtr [commPtr] $commPtr\n")
1713  XSDebug(
1714    true.B,
1715    p"[in] v:${io.fromBpu.resp.valid} r:${io.fromBpu.resp.ready} " +
1716      p"[out] v:${io.toIfu.req.valid} r:${io.toIfu.req.ready}\n"
1717  )
1718  XSDebug(do_commit, p"[deq info] cfiIndex: $commit_cfi, $commit_pc_bundle, target: ${Hexadecimal(commit_target)}\n")
1719
1720  //   def ubtbCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1721  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1722  //       case (((valid, pd), ans), taken) =>
1723  //       Mux(valid && pd.isBr,
1724  //         isWrong ^ Mux(ans.hit.asBool,
1725  //           Mux(ans.taken.asBool, taken && ans.target === commitEntry.target,
1726  //           !taken),
1727  //         !taken),
1728  //       false.B)
1729  //     }
1730  //   }
1731
1732  //   def btbCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1733  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1734  //       case (((valid, pd), ans), taken) =>
1735  //       Mux(valid && pd.isBr,
1736  //         isWrong ^ Mux(ans.hit.asBool,
1737  //           Mux(ans.taken.asBool, taken && ans.target === commitEntry.target,
1738  //           !taken),
1739  //         !taken),
1740  //       false.B)
1741  //     }
1742  //   }
1743
1744  //   def tageCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1745  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1746  //       case (((valid, pd), ans), taken) =>
1747  //       Mux(valid && pd.isBr,
1748  //         isWrong ^ (ans.taken.asBool === taken),
1749  //       false.B)
1750  //     }
1751  //   }
1752
1753  //   def loopCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1754  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1755  //       case (((valid, pd), ans), taken) =>
1756  //       Mux(valid && (pd.isBr) && ans.hit.asBool,
1757  //         isWrong ^ (!taken),
1758  //           false.B)
1759  //     }
1760  //   }
1761
1762  //   def rasCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1763  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1764  //       case (((valid, pd), ans), taken) =>
1765  //       Mux(valid && pd.isRet.asBool /*&& taken*/ && ans.hit.asBool,
1766  //         isWrong ^ (ans.target === commitEntry.target),
1767  //           false.B)
1768  //     }
1769  //   }
1770
1771  //   val ubtbRights = ubtbCheck(commitEntry, commitEntry.metas.map(_.ubtbAns), false.B)
1772  //   val ubtbWrongs = ubtbCheck(commitEntry, commitEntry.metas.map(_.ubtbAns), true.B)
1773  //   // btb and ubtb pred jal and jalr as well
1774  //   val btbRights = btbCheck(commitEntry, commitEntry.metas.map(_.btbAns), false.B)
1775  //   val btbWrongs = btbCheck(commitEntry, commitEntry.metas.map(_.btbAns), true.B)
1776  //   val tageRights = tageCheck(commitEntry, commitEntry.metas.map(_.tageAns), false.B)
1777  //   val tageWrongs = tageCheck(commitEntry, commitEntry.metas.map(_.tageAns), true.B)
1778
1779  //   val loopRights = loopCheck(commitEntry, commitEntry.metas.map(_.loopAns), false.B)
1780  //   val loopWrongs = loopCheck(commitEntry, commitEntry.metas.map(_.loopAns), true.B)
1781
1782  //   val rasRights = rasCheck(commitEntry, commitEntry.metas.map(_.rasAns), false.B)
1783  //   val rasWrongs = rasCheck(commitEntry, commitEntry.metas.map(_.rasAns), true.B)
1784
1785  val perfEvents = Seq(
1786    ("bpu_s2_redirect        ", bpu_s2_redirect),
1787    ("bpu_s3_redirect        ", bpu_s3_redirect),
1788    ("bpu_to_ftq_stall       ", enq.valid && ~enq.ready),
1789    ("mispredictRedirect     ", perf_redirect.valid && RedirectLevel.flushAfter === perf_redirect.bits.level),
1790    ("replayRedirect         ", perf_redirect.valid && RedirectLevel.flushItself(perf_redirect.bits.level)),
1791    ("predecodeRedirect      ", fromIfuRedirect.valid),
1792    ("to_ifu_bubble          ", io.toIfu.req.ready && !io.toIfu.req.valid),
1793    ("from_bpu_real_bubble   ", !enq.valid && enq.ready && allowBpuIn),
1794    ("BpInstr                ", PopCount(mbpInstrs)),
1795    ("BpBInstr               ", PopCount(mbpBRights | mbpBWrongs)),
1796    ("BpRight                ", PopCount(mbpRights)),
1797    ("BpWrong                ", PopCount(mbpWrongs)),
1798    ("BpBRight               ", PopCount(mbpBRights)),
1799    ("BpBWrong               ", PopCount(mbpBWrongs)),
1800    ("BpJRight               ", PopCount(mbpJRights)),
1801    ("BpJWrong               ", PopCount(mbpJWrongs)),
1802    ("BpIRight               ", PopCount(mbpIRights)),
1803    ("BpIWrong               ", PopCount(mbpIWrongs)),
1804    ("BpCRight               ", PopCount(mbpCRights)),
1805    ("BpCWrong               ", PopCount(mbpCWrongs)),
1806    ("BpRRight               ", PopCount(mbpRRights)),
1807    ("BpRWrong               ", PopCount(mbpRWrongs)),
1808    ("ftb_false_hit          ", PopCount(ftb_false_hit)),
1809    ("ftb_hit                ", PopCount(ftb_hit))
1810  )
1811  generatePerfEvent()
1812}
1813