xref: /XiangShan/src/main/scala/xiangshan/frontend/NewFtq.scala (revision 26814fb32c74ea890558d59192df1fd44c6f8ec6)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15*
16*
17* Acknowledgement
18*
19* This implementation is inspired by several key papers:
20* [1] Glenn Reinman, Todd Austin, and Brad Calder. "[A scalable front-end architecture for fast instruction delivery.]
21* (https://doi.org/10.1109/ISCA.1999.765954)" 26th International Symposium on Computer Architecture (ISCA). 1999.
22*
23***************************************************************************************/
24
25package xiangshan.frontend
26
27import chisel3._
28import chisel3.util._
29import org.chipsalliance.cde.config.Parameters
30import utility._
31import utility.ChiselDB
32import utility.mbist.MbistPipeline
33import utility.sram.SplittedSRAMTemplate
34import utils._
35import xiangshan._
36import xiangshan.backend.CtrlToFtqIO
37import xiangshan.frontend.icache._
38
39class FtqDebugBundle extends Bundle {
40  val pc        = UInt(39.W)
41  val target    = UInt(39.W)
42  val isBr      = Bool()
43  val isJmp     = Bool()
44  val isCall    = Bool()
45  val isRet     = Bool()
46  val misPred   = Bool()
47  val isTaken   = Bool()
48  val predStage = UInt(2.W)
49}
50
51class FtqPtr(entries: Int) extends CircularQueuePtr[FtqPtr](
52      entries
53    ) {
54  def this()(implicit p: Parameters) = this(p(XSCoreParamsKey).FtqSize)
55}
56
57object FtqPtr {
58  def apply(f: Bool, v: UInt)(implicit p: Parameters): FtqPtr = {
59    val ptr = Wire(new FtqPtr)
60    ptr.flag  := f
61    ptr.value := v
62    ptr
63  }
64  def inverse(ptr: FtqPtr)(implicit p: Parameters): FtqPtr =
65    apply(!ptr.flag, ptr.value)
66}
67
68class FtqNRSRAM[T <: Data](gen: T, numRead: Int)(implicit p: Parameters) extends XSModule {
69
70  val io = IO(new Bundle() {
71    val raddr = Input(Vec(numRead, UInt(log2Up(FtqSize).W)))
72    val ren   = Input(Vec(numRead, Bool()))
73    val rdata = Output(Vec(numRead, gen))
74    val waddr = Input(UInt(log2Up(FtqSize).W))
75    val wen   = Input(Bool())
76    val wdata = Input(gen)
77  })
78
79  for (i <- 0 until numRead) {
80    val sram = Module(new SplittedSRAMTemplate(
81      gen,
82      set = FtqSize,
83      way = 1,
84      dataSplit = 4,
85      singlePort = false,
86      withClockGate = true,
87      hasMbist = hasMbist,
88      hasSramCtl = hasSramCtl
89    ))
90    sram.io.r.req.valid       := io.ren(i)
91    sram.io.r.req.bits.setIdx := io.raddr(i)
92    io.rdata(i)               := sram.io.r.resp.data(0)
93    sram.io.w.req.valid       := io.wen
94    sram.io.w.req.bits.setIdx := io.waddr
95    sram.io.w.req.bits.data   := VecInit(io.wdata)
96  }
97
98}
99
100class Ftq_RF_Components(implicit p: Parameters) extends XSBundle with BPUUtils {
101  val startAddr     = UInt(VAddrBits.W)
102  val nextLineAddr  = UInt(VAddrBits.W)
103  val isNextMask    = Vec(PredictWidth, Bool())
104  val fallThruError = Bool()
105  // val carry = Bool()
106  def getPc(offset: UInt) = {
107    def getHigher(pc: UInt) = pc(VAddrBits - 1, log2Ceil(PredictWidth) + instOffsetBits + 1)
108    def getOffset(pc: UInt) = pc(log2Ceil(PredictWidth) + instOffsetBits, instOffsetBits)
109    Cat(
110      getHigher(Mux(isNextMask(offset) && startAddr(log2Ceil(PredictWidth) + instOffsetBits), nextLineAddr, startAddr)),
111      getOffset(startAddr) + offset,
112      0.U(instOffsetBits.W)
113    )
114  }
115  def fromBranchPrediction(resp: BranchPredictionBundle) = {
116    def carryPos(addr: UInt) = addr(instOffsetBits + log2Ceil(PredictWidth) + 1)
117    this.startAddr    := resp.pc(3)
118    this.nextLineAddr := resp.pc(3) + (FetchWidth * 4 * 2).U // may be broken on other configs
119    this.isNextMask := VecInit((0 until PredictWidth).map(i =>
120      (resp.pc(3)(log2Ceil(PredictWidth), 1) +& i.U)(log2Ceil(PredictWidth)).asBool
121    ))
122    this.fallThruError := resp.fallThruError(3)
123    this
124  }
125  override def toPrintable: Printable =
126    p"startAddr:${Hexadecimal(startAddr)}"
127}
128
129class Ftq_pd_Entry(implicit p: Parameters) extends XSBundle {
130  val brMask    = Vec(PredictWidth, Bool())
131  val jmpInfo   = ValidUndirectioned(Vec(3, Bool()))
132  val jmpOffset = UInt(log2Ceil(PredictWidth).W)
133  val jalTarget = UInt(VAddrBits.W)
134  val rvcMask   = Vec(PredictWidth, Bool())
135  def hasJal    = jmpInfo.valid && !jmpInfo.bits(0)
136  def hasJalr   = jmpInfo.valid && jmpInfo.bits(0)
137  def hasCall   = jmpInfo.valid && jmpInfo.bits(1)
138  def hasRet    = jmpInfo.valid && jmpInfo.bits(2)
139
140  def fromPdWb(pdWb: PredecodeWritebackBundle) = {
141    val pds = pdWb.pd
142    this.brMask        := VecInit(pds.map(pd => pd.isBr && pd.valid))
143    this.jmpInfo.valid := VecInit(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid)).asUInt.orR
144    this.jmpInfo.bits := ParallelPriorityMux(
145      pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid),
146      pds.map(pd => VecInit(pd.isJalr, pd.isCall, pd.isRet))
147    )
148    this.jmpOffset := ParallelPriorityEncoder(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid))
149    this.rvcMask   := VecInit(pds.map(pd => pd.isRVC))
150    this.jalTarget := pdWb.jalTarget
151  }
152
153  def toPd(offset: UInt) = {
154    require(offset.getWidth == log2Ceil(PredictWidth))
155    val pd = Wire(new PreDecodeInfo)
156    pd.valid := true.B
157    pd.isRVC := rvcMask(offset)
158    val isBr   = brMask(offset)
159    val isJalr = offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(0)
160    pd.brType := Cat(offset === jmpOffset && jmpInfo.valid, isJalr || isBr)
161    pd.isCall := offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(1)
162    pd.isRet  := offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(2)
163    pd
164  }
165}
166
167class PrefetchPtrDB(implicit p: Parameters) extends Bundle {
168  val fromFtqPtr = UInt(log2Up(p(XSCoreParamsKey).FtqSize).W)
169  val fromIfuPtr = UInt(log2Up(p(XSCoreParamsKey).FtqSize).W)
170}
171
172class Ftq_Redirect_SRAMEntry(implicit p: Parameters) extends SpeculativeInfo {
173  val sc_disagree = if (!env.FPGAPlatform) Some(Vec(numBr, Bool())) else None
174}
175
176class Ftq_1R_SRAMEntry(implicit p: Parameters) extends XSBundle with HasBPUConst {
177  val meta      = UInt(MaxMetaLength.W)
178  val ftb_entry = new FTBEntry
179}
180
181class Ftq_Pred_Info(implicit p: Parameters) extends XSBundle {
182  val target   = UInt(VAddrBits.W)
183  val cfiIndex = ValidUndirectioned(UInt(log2Ceil(PredictWidth).W))
184}
185
186class FtqRead[T <: Data](private val gen: T)(implicit p: Parameters) extends XSBundle {
187  val valid  = Output(Bool())
188  val ptr    = Output(new FtqPtr)
189  val offset = Output(UInt(log2Ceil(PredictWidth).W))
190  val data   = Input(gen)
191  def apply(valid: Bool, ptr: FtqPtr, offset: UInt) = {
192    this.valid  := valid
193    this.ptr    := ptr
194    this.offset := offset
195    this.data
196  }
197}
198
199class FtqToBpuIO(implicit p: Parameters) extends XSBundle {
200  val redirect       = Valid(new BranchPredictionRedirect)
201  val update         = Valid(new BranchPredictionUpdate)
202  val enq_ptr        = Output(new FtqPtr)
203  val redirctFromIFU = Output(Bool())
204}
205
206class BpuFlushInfo(implicit p: Parameters) extends XSBundle with HasCircularQueuePtrHelper {
207  // when ifu pipeline is not stalled,
208  // a packet from bpu s3 can reach f1 at most
209  val s2 = Valid(new FtqPtr)
210  val s3 = Valid(new FtqPtr)
211  def shouldFlushBy(src: Valid[FtqPtr], idx_to_flush: FtqPtr) =
212    src.valid && !isAfter(src.bits, idx_to_flush)
213  def shouldFlushByStage2(idx: FtqPtr) = shouldFlushBy(s2, idx)
214  def shouldFlushByStage3(idx: FtqPtr) = shouldFlushBy(s3, idx)
215}
216
217class FtqToIfuIO(implicit p: Parameters) extends XSBundle {
218  val req              = Decoupled(new FetchRequestBundle)
219  val redirect         = Valid(new BranchPredictionRedirect)
220  val topdown_redirect = Valid(new BranchPredictionRedirect)
221  val flushFromBpu     = new BpuFlushInfo
222}
223
224class FtqToICacheIO(implicit p: Parameters) extends XSBundle {
225  // NOTE: req.bits must be prepare in T cycle
226  // while req.valid is set true in T + 1 cycle
227  val req = Decoupled(new FtqToICacheRequestBundle)
228}
229
230class FtqToPrefetchIO(implicit p: Parameters) extends XSBundle {
231  val req              = Decoupled(new FtqICacheInfo)
232  val flushFromBpu     = new BpuFlushInfo
233  val backendException = UInt(ExceptionType.width.W)
234}
235
236trait HasBackendRedirectInfo extends HasXSParameter {
237  def isLoadReplay(r: Valid[Redirect]) = r.bits.flushItself()
238}
239
240class FtqToCtrlIO(implicit p: Parameters) extends XSBundle with HasBackendRedirectInfo {
241  // write to backend pc mem
242  val pc_mem_wen   = Output(Bool())
243  val pc_mem_waddr = Output(UInt(log2Ceil(FtqSize).W))
244  val pc_mem_wdata = Output(new Ftq_RF_Components)
245  // newest target
246  val newest_entry_en     = Output(Bool())
247  val newest_entry_target = Output(UInt(VAddrBits.W))
248  val newest_entry_ptr    = Output(new FtqPtr)
249}
250
251class FTBEntryGen(implicit p: Parameters) extends XSModule with HasBackendRedirectInfo with HasBPUParameter {
252  val io = IO(new Bundle {
253    val start_addr     = Input(UInt(VAddrBits.W))
254    val old_entry      = Input(new FTBEntry)
255    val pd             = Input(new Ftq_pd_Entry)
256    val cfiIndex       = Flipped(Valid(UInt(log2Ceil(PredictWidth).W)))
257    val target         = Input(UInt(VAddrBits.W))
258    val hit            = Input(Bool())
259    val mispredict_vec = Input(Vec(PredictWidth, Bool()))
260
261    val new_entry         = Output(new FTBEntry)
262    val new_br_insert_pos = Output(Vec(numBr, Bool()))
263    val taken_mask        = Output(Vec(numBr, Bool()))
264    val jmp_taken         = Output(Bool())
265    val mispred_mask      = Output(Vec(numBr + 1, Bool()))
266
267    // for perf counters
268    val is_init_entry           = Output(Bool())
269    val is_old_entry            = Output(Bool())
270    val is_new_br               = Output(Bool())
271    val is_jalr_target_modified = Output(Bool())
272    val is_strong_bias_modified = Output(Bool())
273    val is_br_full              = Output(Bool())
274  })
275
276  // no mispredictions detected at predecode
277  val hit = io.hit
278  val pd  = io.pd
279
280  val init_entry = WireInit(0.U.asTypeOf(new FTBEntry))
281
282  val cfi_is_br       = pd.brMask(io.cfiIndex.bits) && io.cfiIndex.valid
283  val entry_has_jmp   = pd.jmpInfo.valid
284  val new_jmp_is_jal  = entry_has_jmp && !pd.jmpInfo.bits(0) && io.cfiIndex.valid
285  val new_jmp_is_jalr = entry_has_jmp && pd.jmpInfo.bits(0) && io.cfiIndex.valid
286  val new_jmp_is_call = entry_has_jmp && pd.jmpInfo.bits(1) && io.cfiIndex.valid
287  val new_jmp_is_ret  = entry_has_jmp && pd.jmpInfo.bits(2) && io.cfiIndex.valid
288  val last_jmp_rvi    = entry_has_jmp && pd.jmpOffset === (PredictWidth - 1).U && !pd.rvcMask.last
289  // val last_br_rvi = cfi_is_br && io.cfiIndex.bits === (PredictWidth-1).U && !pd.rvcMask.last
290
291  val cfi_is_jal  = io.cfiIndex.bits === pd.jmpOffset && new_jmp_is_jal
292  val cfi_is_jalr = io.cfiIndex.bits === pd.jmpOffset && new_jmp_is_jalr
293
294  def carryPos = log2Ceil(PredictWidth) + instOffsetBits
295  def getLower(pc: UInt) = pc(carryPos - 1, instOffsetBits)
296  // if not hit, establish a new entry
297  init_entry.valid := true.B
298  // tag is left for ftb to assign
299
300  // case br
301  val init_br_slot = init_entry.getSlotForBr(0)
302  when(cfi_is_br) {
303    init_br_slot.valid  := true.B
304    init_br_slot.offset := io.cfiIndex.bits
305    init_br_slot.setLowerStatByTarget(io.start_addr, io.target, numBr == 1)
306    init_entry.strong_bias(0) := true.B // set to strong bias on init
307  }
308
309  // case jmp
310  when(entry_has_jmp) {
311    init_entry.tailSlot.offset := pd.jmpOffset
312    init_entry.tailSlot.valid  := new_jmp_is_jal || new_jmp_is_jalr
313    init_entry.tailSlot.setLowerStatByTarget(io.start_addr, Mux(cfi_is_jalr, io.target, pd.jalTarget), isShare = false)
314    init_entry.strong_bias(numBr - 1) := new_jmp_is_jalr // set strong bias for the jalr on init
315  }
316
317  val jmpPft = getLower(io.start_addr) +& pd.jmpOffset +& Mux(pd.rvcMask(pd.jmpOffset), 1.U, 2.U)
318  init_entry.pftAddr := Mux(entry_has_jmp && !last_jmp_rvi, jmpPft, getLower(io.start_addr))
319  init_entry.carry   := Mux(entry_has_jmp && !last_jmp_rvi, jmpPft(carryPos - instOffsetBits), true.B)
320
321  require(
322    isPow2(PredictWidth),
323    "If PredictWidth does not satisfy the power of 2," +
324      "pftAddr := getLower(io.start_addr) and carry := true.B  not working!!"
325  )
326
327  init_entry.isJalr := new_jmp_is_jalr
328  init_entry.isCall := new_jmp_is_call
329  init_entry.isRet  := new_jmp_is_ret
330  // that means fall thru points to the middle of an inst
331  init_entry.last_may_be_rvi_call := pd.jmpOffset === (PredictWidth - 1).U && !pd.rvcMask(pd.jmpOffset)
332
333  // if hit, check whether a new cfi(only br is possible) is detected
334  val oe              = io.old_entry
335  val br_recorded_vec = oe.getBrRecordedVec(io.cfiIndex.bits)
336  val br_recorded     = br_recorded_vec.asUInt.orR
337  val is_new_br       = cfi_is_br && !br_recorded
338  val new_br_offset   = io.cfiIndex.bits
339  // vec(i) means new br will be inserted BEFORE old br(i)
340  val allBrSlotsVec = oe.allSlotsForBr
341  val new_br_insert_onehot = VecInit((0 until numBr).map {
342    i =>
343      i match {
344        case 0 =>
345          !allBrSlotsVec(0).valid || new_br_offset < allBrSlotsVec(0).offset
346        case idx =>
347          allBrSlotsVec(idx - 1).valid && new_br_offset > allBrSlotsVec(idx - 1).offset &&
348          (!allBrSlotsVec(idx).valid || new_br_offset < allBrSlotsVec(idx).offset)
349      }
350  })
351
352  val old_entry_modified = WireInit(io.old_entry)
353  for (i <- 0 until numBr) {
354    val slot = old_entry_modified.allSlotsForBr(i)
355    when(new_br_insert_onehot(i)) {
356      slot.valid  := true.B
357      slot.offset := new_br_offset
358      slot.setLowerStatByTarget(io.start_addr, io.target, i == numBr - 1)
359      old_entry_modified.strong_bias(i) := true.B
360    }.elsewhen(new_br_offset > oe.allSlotsForBr(i).offset) {
361      old_entry_modified.strong_bias(i) := false.B
362      // all other fields remain unchanged
363    }.otherwise {
364      // case i == 0, remain unchanged
365      if (i != 0) {
366        val noNeedToMoveFromFormerSlot = (i == numBr - 1).B && !oe.brSlots.last.valid
367        when(!noNeedToMoveFromFormerSlot) {
368          slot.fromAnotherSlot(oe.allSlotsForBr(i - 1))
369          old_entry_modified.strong_bias(i) := oe.strong_bias(i)
370        }
371      }
372    }
373  }
374
375  // two circumstances:
376  // 1. oe: | br | j  |, new br should be in front of j, thus addr of j should be new pft
377  // 2. oe: | br | br |, new br could be anywhere between, thus new pft is the addr of either
378  //        the previous last br or the new br
379  val may_have_to_replace = oe.noEmptySlotForNewBr
380  val pft_need_to_change  = is_new_br && may_have_to_replace
381  // it should either be the given last br or the new br
382  when(pft_need_to_change) {
383    val new_pft_offset =
384      Mux(!new_br_insert_onehot.asUInt.orR, new_br_offset, oe.allSlotsForBr.last.offset)
385
386    // set jmp to invalid
387    old_entry_modified.pftAddr              := getLower(io.start_addr) + new_pft_offset
388    old_entry_modified.carry                := (getLower(io.start_addr) +& new_pft_offset).head(1).asBool
389    old_entry_modified.last_may_be_rvi_call := false.B
390    old_entry_modified.isCall               := false.B
391    old_entry_modified.isRet                := false.B
392    old_entry_modified.isJalr               := false.B
393  }
394
395  val old_entry_jmp_target_modified = WireInit(oe)
396  val old_target      = oe.tailSlot.getTarget(io.start_addr) // may be wrong because we store only 20 lowest bits
397  val old_tail_is_jmp = !oe.tailSlot.sharing
398  val jalr_target_modified = cfi_is_jalr && (old_target =/= io.target) && old_tail_is_jmp // TODO: pass full jalr target
399  when(jalr_target_modified) {
400    old_entry_jmp_target_modified.setByJmpTarget(io.start_addr, io.target)
401    old_entry_jmp_target_modified.strong_bias := 0.U.asTypeOf(Vec(numBr, Bool()))
402  }
403
404  val old_entry_strong_bias    = WireInit(oe)
405  val strong_bias_modified_vec = Wire(Vec(numBr, Bool())) // whether modified or not
406  for (i <- 0 until numBr) {
407    when(br_recorded_vec(0)) {
408      old_entry_strong_bias.strong_bias(0) :=
409        oe.strong_bias(0) && io.cfiIndex.valid && oe.brValids(0) && io.cfiIndex.bits === oe.brOffset(0)
410    }.elsewhen(br_recorded_vec(numBr - 1)) {
411      old_entry_strong_bias.strong_bias(0) := false.B
412      old_entry_strong_bias.strong_bias(numBr - 1) :=
413        oe.strong_bias(numBr - 1) && io.cfiIndex.valid && oe.brValids(numBr - 1) && io.cfiIndex.bits === oe.brOffset(
414          numBr - 1
415        )
416    }
417    strong_bias_modified_vec(i) := oe.strong_bias(i) && oe.brValids(i) && !old_entry_strong_bias.strong_bias(i)
418  }
419  val strong_bias_modified = strong_bias_modified_vec.reduce(_ || _)
420
421  val derived_from_old_entry =
422    Mux(is_new_br, old_entry_modified, Mux(jalr_target_modified, old_entry_jmp_target_modified, old_entry_strong_bias))
423
424  io.new_entry := Mux(!hit, init_entry, derived_from_old_entry)
425
426  io.new_br_insert_pos := new_br_insert_onehot
427  io.taken_mask := VecInit((io.new_entry.brOffset zip io.new_entry.brValids).map {
428    case (off, v) => io.cfiIndex.bits === off && io.cfiIndex.valid && v
429  })
430  io.jmp_taken := io.new_entry.jmpValid && io.new_entry.tailSlot.offset === io.cfiIndex.bits
431  for (i <- 0 until numBr) {
432    io.mispred_mask(i) := io.new_entry.brValids(i) && io.mispredict_vec(io.new_entry.brOffset(i))
433  }
434  io.mispred_mask.last := io.new_entry.jmpValid && io.mispredict_vec(pd.jmpOffset)
435
436  // for perf counters
437  io.is_init_entry           := !hit
438  io.is_old_entry            := hit && !is_new_br && !jalr_target_modified && !strong_bias_modified
439  io.is_new_br               := hit && is_new_br
440  io.is_jalr_target_modified := hit && jalr_target_modified
441  io.is_strong_bias_modified := hit && strong_bias_modified
442  io.is_br_full              := hit && is_new_br && may_have_to_replace
443}
444
445class FtqPcMemWrapper(numOtherReads: Int)(implicit p: Parameters) extends XSModule with HasBackendRedirectInfo {
446  val io = IO(new Bundle {
447    val ifuPtr_w           = Input(new FtqPtr)
448    val ifuPtrPlus1_w      = Input(new FtqPtr)
449    val ifuPtrPlus2_w      = Input(new FtqPtr)
450    val pfPtr_w            = Input(new FtqPtr)
451    val pfPtrPlus1_w       = Input(new FtqPtr)
452    val commPtr_w          = Input(new FtqPtr)
453    val commPtrPlus1_w     = Input(new FtqPtr)
454    val ifuPtr_rdata       = Output(new Ftq_RF_Components)
455    val ifuPtrPlus1_rdata  = Output(new Ftq_RF_Components)
456    val ifuPtrPlus2_rdata  = Output(new Ftq_RF_Components)
457    val pfPtr_rdata        = Output(new Ftq_RF_Components)
458    val pfPtrPlus1_rdata   = Output(new Ftq_RF_Components)
459    val commPtr_rdata      = Output(new Ftq_RF_Components)
460    val commPtrPlus1_rdata = Output(new Ftq_RF_Components)
461
462    val wen   = Input(Bool())
463    val waddr = Input(UInt(log2Ceil(FtqSize).W))
464    val wdata = Input(new Ftq_RF_Components)
465  })
466
467  val num_pc_read = numOtherReads + 5
468  val mem         = Module(new SyncDataModuleTemplate(new Ftq_RF_Components, FtqSize, num_pc_read, 1, "FtqPC"))
469  mem.io.wen(0)   := io.wen
470  mem.io.waddr(0) := io.waddr
471  mem.io.wdata(0) := io.wdata
472
473  // read one cycle ahead for ftq local reads
474  val raddr_vec = VecInit(Seq(
475    io.ifuPtr_w.value,
476    io.ifuPtrPlus1_w.value,
477    io.ifuPtrPlus2_w.value,
478    io.pfPtr_w.value,
479    io.pfPtrPlus1_w.value,
480    io.commPtrPlus1_w.value,
481    io.commPtr_w.value
482  ))
483
484  mem.io.raddr := raddr_vec
485
486  io.ifuPtr_rdata       := mem.io.rdata.dropRight(6).last
487  io.ifuPtrPlus1_rdata  := mem.io.rdata.dropRight(5).last
488  io.ifuPtrPlus2_rdata  := mem.io.rdata.dropRight(4).last
489  io.pfPtr_rdata        := mem.io.rdata.dropRight(3).last
490  io.pfPtrPlus1_rdata   := mem.io.rdata.dropRight(2).last
491  io.commPtrPlus1_rdata := mem.io.rdata.dropRight(1).last
492  io.commPtr_rdata      := mem.io.rdata.last
493}
494
495class Ftq(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelper
496    with HasBackendRedirectInfo with BPUUtils with HasBPUConst with HasPerfEvents
497    with HasICacheParameters {
498  val io = IO(new Bundle {
499    val fromBpu     = Flipped(new BpuToFtqIO)
500    val fromIfu     = Flipped(new IfuToFtqIO)
501    val fromBackend = Flipped(new CtrlToFtqIO)
502
503    val toBpu       = new FtqToBpuIO
504    val toIfu       = new FtqToIfuIO
505    val toICache    = new FtqToICacheIO
506    val toBackend   = new FtqToCtrlIO
507    val toPrefetch  = new FtqToPrefetchIO
508    val icacheFlush = Output(Bool())
509
510    val bpuInfo = new Bundle {
511      val bpRight = Output(UInt(XLEN.W))
512      val bpWrong = Output(UInt(XLEN.W))
513    }
514
515    val mmioCommitRead = Flipped(new mmioCommitRead)
516
517    // for perf
518    val ControlBTBMissBubble = Output(Bool())
519    val TAGEMissBubble       = Output(Bool())
520    val SCMissBubble         = Output(Bool())
521    val ITTAGEMissBubble     = Output(Bool())
522    val RASMissBubble        = Output(Bool())
523  })
524  io.bpuInfo := DontCare
525
526  val topdown_stage = RegInit(0.U.asTypeOf(new FrontendTopDownBundle))
527  // only driven by clock, not valid-ready
528  topdown_stage                  := io.fromBpu.resp.bits.topdown_info
529  io.toIfu.req.bits.topdown_info := topdown_stage
530
531  val ifuRedirected = RegInit(VecInit(Seq.fill(FtqSize)(false.B)))
532
533  // io.fromBackend.ftqIdxAhead: bju(BjuCnt) + ldReplay + exception
534  val ftqIdxAhead = VecInit(Seq.tabulate(FtqRedirectAheadNum)(i => io.fromBackend.ftqIdxAhead(i))) // only bju
535  val ftqIdxSelOH = io.fromBackend.ftqIdxSelOH.bits(FtqRedirectAheadNum - 1, 0)
536
537  val aheadValid         = ftqIdxAhead.map(_.valid).reduce(_ | _) && !io.fromBackend.redirect.valid
538  val realAhdValid       = io.fromBackend.redirect.valid && (ftqIdxSelOH > 0.U) && RegNext(aheadValid)
539  val backendRedirect    = Wire(Valid(new BranchPredictionRedirect))
540  val backendRedirectReg = Wire(Valid(new BranchPredictionRedirect))
541  backendRedirectReg.valid := RegNext(Mux(realAhdValid, false.B, backendRedirect.valid))
542  backendRedirectReg.bits  := RegEnable(backendRedirect.bits, backendRedirect.valid)
543  val fromBackendRedirect = Wire(Valid(new BranchPredictionRedirect))
544  fromBackendRedirect := Mux(realAhdValid, backendRedirect, backendRedirectReg)
545
546  val stage2Flush  = backendRedirect.valid
547  val backendFlush = stage2Flush || RegNext(stage2Flush)
548  val ifuFlush     = Wire(Bool())
549
550  val flush = stage2Flush || RegNext(stage2Flush)
551
552  val allowBpuIn, allowToIfu = WireInit(false.B)
553  val flushToIfu             = !allowToIfu
554  allowBpuIn := !ifuFlush && !backendRedirect.valid && !backendRedirectReg.valid
555  allowToIfu := !ifuFlush && !backendRedirect.valid && !backendRedirectReg.valid
556
557  def copyNum                                              = 5
558  val bpuPtr, ifuPtr, pfPtr, ifuWbPtr, commPtr, robCommPtr = RegInit(FtqPtr(false.B, 0.U))
559  val ifuPtrPlus1                                          = RegInit(FtqPtr(false.B, 1.U))
560  val ifuPtrPlus2                                          = RegInit(FtqPtr(false.B, 2.U))
561  val pfPtrPlus1                                           = RegInit(FtqPtr(false.B, 1.U))
562  val commPtrPlus1                                         = RegInit(FtqPtr(false.B, 1.U))
563  val copied_ifu_ptr                                       = Seq.fill(copyNum)(RegInit(FtqPtr(false.B, 0.U)))
564  val copied_bpu_ptr                                       = Seq.fill(copyNum)(RegInit(FtqPtr(false.B, 0.U)))
565  require(FtqSize >= 4)
566  val ifuPtr_write       = WireInit(ifuPtr)
567  val ifuPtrPlus1_write  = WireInit(ifuPtrPlus1)
568  val ifuPtrPlus2_write  = WireInit(ifuPtrPlus2)
569  val pfPtr_write        = WireInit(pfPtr)
570  val pfPtrPlus1_write   = WireInit(pfPtrPlus1)
571  val ifuWbPtr_write     = WireInit(ifuWbPtr)
572  val commPtr_write      = WireInit(commPtr)
573  val commPtrPlus1_write = WireInit(commPtrPlus1)
574  val robCommPtr_write   = WireInit(robCommPtr)
575  ifuPtr       := ifuPtr_write
576  ifuPtrPlus1  := ifuPtrPlus1_write
577  ifuPtrPlus2  := ifuPtrPlus2_write
578  pfPtr        := pfPtr_write
579  pfPtrPlus1   := pfPtrPlus1_write
580  ifuWbPtr     := ifuWbPtr_write
581  commPtr      := commPtr_write
582  commPtrPlus1 := commPtrPlus1_write
583  copied_ifu_ptr.map { ptr =>
584    ptr := ifuPtr_write
585    dontTouch(ptr)
586  }
587  robCommPtr := robCommPtr_write
588  val validEntries = distanceBetween(bpuPtr, commPtr)
589  val canCommit    = Wire(Bool())
590
591  // Instruction page fault and instruction access fault are sent from backend with redirect requests.
592  // When IPF and IAF are sent, backendPcFaultIfuPtr points to the FTQ entry whose first instruction
593  // raises IPF or IAF, which is ifuWbPtr_write or IfuPtr_write.
594  // Only when IFU has written back that FTQ entry can backendIpf and backendIaf be false because this
595  // makes sure that IAF and IPF are correctly raised instead of being flushed by redirect requests.
596  val backendException  = RegInit(ExceptionType.none)
597  val backendPcFaultPtr = RegInit(FtqPtr(false.B, 0.U))
598  when(fromBackendRedirect.valid) {
599    backendException := ExceptionType.fromOH(
600      has_pf = fromBackendRedirect.bits.cfiUpdate.backendIPF,
601      has_gpf = fromBackendRedirect.bits.cfiUpdate.backendIGPF,
602      has_af = fromBackendRedirect.bits.cfiUpdate.backendIAF
603    )
604    when(
605      fromBackendRedirect.bits.cfiUpdate.backendIPF || fromBackendRedirect.bits.cfiUpdate.backendIGPF ||
606        fromBackendRedirect.bits.cfiUpdate.backendIAF
607    ) {
608      backendPcFaultPtr := ifuWbPtr_write
609    }
610  }.elsewhen(ifuWbPtr =/= backendPcFaultPtr) {
611    backendException := ExceptionType.none
612  }
613
614  // **********************************************************************
615  // **************************** enq from bpu ****************************
616  // **********************************************************************
617  val new_entry_ready = validEntries < FtqSize.U || canCommit
618  io.fromBpu.resp.ready := new_entry_ready
619
620  val bpu_s2_resp     = io.fromBpu.resp.bits.s2
621  val bpu_s3_resp     = io.fromBpu.resp.bits.s3
622  val bpu_s2_redirect = bpu_s2_resp.valid(3) && bpu_s2_resp.hasRedirect(3)
623  val bpu_s3_redirect = bpu_s3_resp.valid(3) && bpu_s3_resp.hasRedirect(3)
624
625  io.toBpu.enq_ptr := bpuPtr
626  val enq_fire    = io.fromBpu.resp.fire && allowBpuIn // from bpu s1
627  val bpu_in_fire = (io.fromBpu.resp.fire || bpu_s2_redirect || bpu_s3_redirect) && allowBpuIn
628
629  val bpu_in_resp     = io.fromBpu.resp.bits.selectedResp
630  val bpu_in_stage    = io.fromBpu.resp.bits.selectedRespIdxForFtq
631  val bpu_in_resp_ptr = Mux(bpu_in_stage === BP_S1, bpuPtr, bpu_in_resp.ftq_idx)
632  val bpu_in_resp_idx = bpu_in_resp_ptr.value
633
634  // read ports:      pfReq1 + pfReq2 ++  ifuReq1 + ifuReq2 + ifuReq3 + commitUpdate2 + commitUpdate
635  val ftq_pc_mem = Module(new FtqPcMemWrapper(2))
636  // resp from uBTB
637  ftq_pc_mem.io.wen   := bpu_in_fire
638  ftq_pc_mem.io.waddr := bpu_in_resp_idx
639  ftq_pc_mem.io.wdata.fromBranchPrediction(bpu_in_resp)
640
641  //                                                            ifuRedirect + backendRedirect + commit
642  val ftq_redirect_mem = Module(new SyncDataModuleTemplate(
643    new Ftq_Redirect_SRAMEntry,
644    FtqSize,
645    IfuRedirectNum + FtqRedirectAheadNum + 1,
646    1,
647    hasRen = true
648  ))
649  // these info is intended to enq at the last stage of bpu
650  ftq_redirect_mem.io.wen(0)   := io.fromBpu.resp.bits.lastStage.valid(3)
651  ftq_redirect_mem.io.waddr(0) := io.fromBpu.resp.bits.lastStage.ftq_idx.value
652  ftq_redirect_mem.io.wdata(0) := io.fromBpu.resp.bits.last_stage_spec_info
653  println(f"ftq redirect MEM: entry ${ftq_redirect_mem.io.wdata(0).getWidth} * ${FtqSize} * 3")
654
655  val ftq_meta_1r_sram = Module(new FtqNRSRAM(new Ftq_1R_SRAMEntry, 1))
656  // these info is intended to enq at the last stage of bpu
657  ftq_meta_1r_sram.io.wen             := io.fromBpu.resp.bits.lastStage.valid(3)
658  ftq_meta_1r_sram.io.waddr           := io.fromBpu.resp.bits.lastStage.ftq_idx.value
659  ftq_meta_1r_sram.io.wdata.meta      := io.fromBpu.resp.bits.last_stage_meta
660  ftq_meta_1r_sram.io.wdata.ftb_entry := io.fromBpu.resp.bits.last_stage_ftb_entry
661  //                                                            ifuRedirect + backendRedirect (commit moved to ftq_meta_1r_sram)
662  val ftb_entry_mem = Module(new SyncDataModuleTemplate(
663    new FTBEntry_FtqMem,
664    FtqSize,
665    IfuRedirectNum + FtqRedirectAheadNum,
666    1,
667    hasRen = true
668  ))
669  ftb_entry_mem.io.wen(0)   := io.fromBpu.resp.bits.lastStage.valid(3)
670  ftb_entry_mem.io.waddr(0) := io.fromBpu.resp.bits.lastStage.ftq_idx.value
671  ftb_entry_mem.io.wdata(0) := io.fromBpu.resp.bits.last_stage_ftb_entry
672  private val mbistPl = MbistPipeline.PlaceMbistPipeline(1, "MbistPipeFtq", hasMbist)
673
674  // multi-write
675  val update_target = Reg(Vec(FtqSize, UInt(VAddrBits.W))) // could be taken target or fallThrough //TODO: remove this
676  val newest_entry_target          = Reg(UInt(VAddrBits.W))
677  val newest_entry_target_modified = RegInit(false.B)
678  val newest_entry_ptr             = Reg(new FtqPtr)
679  val newest_entry_ptr_modified    = RegInit(false.B)
680  val cfiIndex_vec                 = Reg(Vec(FtqSize, ValidUndirectioned(UInt(log2Ceil(PredictWidth).W))))
681  val mispredict_vec               = Reg(Vec(FtqSize, Vec(PredictWidth, Bool())))
682  val pred_stage                   = Reg(Vec(FtqSize, UInt(2.W)))
683  val pred_s1_cycle                = if (!env.FPGAPlatform) Some(Reg(Vec(FtqSize, UInt(64.W)))) else None
684
685  val c_empty :: c_toCommit :: c_committed :: c_flushed :: Nil = Enum(4)
686  val commitStateQueueReg = RegInit(VecInit(Seq.fill(FtqSize) {
687    VecInit(Seq.fill(PredictWidth)(c_empty))
688  }))
689  val commitStateQueueEnable = WireInit(VecInit(Seq.fill(FtqSize)(false.B)))
690  val commitStateQueueNext   = WireInit(commitStateQueueReg)
691
692  for (f <- 0 until FtqSize) {
693    when(commitStateQueueEnable(f)) {
694      commitStateQueueReg(f) := commitStateQueueNext(f)
695    }
696  }
697
698  val f_to_send :: f_sent :: Nil = Enum(2)
699  val entry_fetch_status         = RegInit(VecInit(Seq.fill(FtqSize)(f_sent)))
700
701  val h_not_hit :: h_false_hit :: h_hit :: Nil = Enum(3)
702  val entry_hit_status                         = RegInit(VecInit(Seq.fill(FtqSize)(h_not_hit)))
703
704  // modify registers one cycle later to cut critical path
705  val last_cycle_bpu_in       = RegNext(bpu_in_fire)
706  val last_cycle_bpu_in_ptr   = RegEnable(bpu_in_resp_ptr, bpu_in_fire)
707  val last_cycle_bpu_in_idx   = last_cycle_bpu_in_ptr.value
708  val last_cycle_bpu_target   = RegEnable(bpu_in_resp.getTarget(3), bpu_in_fire)
709  val last_cycle_cfiIndex     = RegEnable(bpu_in_resp.cfiIndex(3), bpu_in_fire)
710  val last_cycle_bpu_in_stage = RegEnable(bpu_in_stage, bpu_in_fire)
711
712  def extra_copyNum_for_commitStateQueue = 2
713  val copied_last_cycle_bpu_in =
714    VecInit(Seq.fill(copyNum + extra_copyNum_for_commitStateQueue)(RegNext(bpu_in_fire)))
715  val copied_last_cycle_bpu_in_ptr_for_ftq =
716    VecInit(Seq.fill(extra_copyNum_for_commitStateQueue)(RegEnable(bpu_in_resp_ptr, bpu_in_fire)))
717
718  newest_entry_target_modified := false.B
719  newest_entry_ptr_modified    := false.B
720  when(last_cycle_bpu_in) {
721    entry_fetch_status(last_cycle_bpu_in_idx) := f_to_send
722    cfiIndex_vec(last_cycle_bpu_in_idx)       := last_cycle_cfiIndex
723    pred_stage(last_cycle_bpu_in_idx)         := last_cycle_bpu_in_stage
724
725    update_target(last_cycle_bpu_in_idx) := last_cycle_bpu_target // TODO: remove this
726    newest_entry_target_modified         := true.B
727    newest_entry_target                  := last_cycle_bpu_target
728    newest_entry_ptr_modified            := true.B
729    newest_entry_ptr                     := last_cycle_bpu_in_ptr
730  }
731
732  // reduce fanout by delay write for a cycle
733  when(RegNext(last_cycle_bpu_in)) {
734    mispredict_vec(RegEnable(last_cycle_bpu_in_idx, last_cycle_bpu_in)) :=
735      WireInit(VecInit(Seq.fill(PredictWidth)(false.B)))
736  }
737
738  // record s1 pred cycles
739  pred_s1_cycle.map { vec =>
740    when(bpu_in_fire && (bpu_in_stage === BP_S1)) {
741      vec(bpu_in_resp_ptr.value) := bpu_in_resp.full_pred(0).predCycle.getOrElse(0.U)
742    }
743  }
744
745  // reduce fanout using copied last_cycle_bpu_in and copied last_cycle_bpu_in_ptr
746  val copied_last_cycle_bpu_in_for_ftq = copied_last_cycle_bpu_in.takeRight(extra_copyNum_for_commitStateQueue)
747  copied_last_cycle_bpu_in_for_ftq.zip(copied_last_cycle_bpu_in_ptr_for_ftq).zipWithIndex.map {
748    case ((in, ptr), i) =>
749      when(in) {
750        val perSetEntries = FtqSize / extra_copyNum_for_commitStateQueue // 32
751        require(FtqSize % extra_copyNum_for_commitStateQueue == 0)
752        for (j <- 0 until perSetEntries) {
753          when(ptr.value === (i * perSetEntries + j).U) {
754            commitStateQueueNext(i * perSetEntries + j) := VecInit(Seq.fill(PredictWidth)(c_empty))
755            // Clock gating optimization, use 1 gate cell to control a row
756            commitStateQueueEnable(i * perSetEntries + j) := true.B
757          }
758        }
759      }
760  }
761
762  bpuPtr := bpuPtr + enq_fire
763  copied_bpu_ptr.map(_ := bpuPtr + enq_fire)
764  when(io.toIfu.req.fire && allowToIfu) {
765    ifuPtr_write      := ifuPtrPlus1
766    ifuPtrPlus1_write := ifuPtrPlus2
767    ifuPtrPlus2_write := ifuPtrPlus2 + 1.U
768  }
769  when(io.toPrefetch.req.fire && allowToIfu) {
770    pfPtr_write      := pfPtrPlus1
771    pfPtrPlus1_write := pfPtrPlus1 + 1.U
772  }
773
774  // only use ftb result to assign hit status
775  when(bpu_s2_resp.valid(3)) {
776    entry_hit_status(bpu_s2_resp.ftq_idx.value) := Mux(bpu_s2_resp.full_pred(3).hit, h_hit, h_not_hit)
777  }
778
779  io.toIfu.flushFromBpu.s2.valid      := bpu_s2_redirect
780  io.toIfu.flushFromBpu.s2.bits       := bpu_s2_resp.ftq_idx
781  io.toPrefetch.flushFromBpu.s2.valid := bpu_s2_redirect
782  io.toPrefetch.flushFromBpu.s2.bits  := bpu_s2_resp.ftq_idx
783  when(bpu_s2_redirect) {
784    bpuPtr := bpu_s2_resp.ftq_idx + 1.U
785    copied_bpu_ptr.map(_ := bpu_s2_resp.ftq_idx + 1.U)
786    // only when ifuPtr runs ahead of bpu s2 resp should we recover it
787    when(!isBefore(ifuPtr, bpu_s2_resp.ftq_idx)) {
788      ifuPtr_write      := bpu_s2_resp.ftq_idx
789      ifuPtrPlus1_write := bpu_s2_resp.ftq_idx + 1.U
790      ifuPtrPlus2_write := bpu_s2_resp.ftq_idx + 2.U
791    }
792    when(!isBefore(pfPtr, bpu_s2_resp.ftq_idx)) {
793      pfPtr_write      := bpu_s2_resp.ftq_idx
794      pfPtrPlus1_write := bpu_s2_resp.ftq_idx + 1.U
795    }
796  }
797
798  io.toIfu.flushFromBpu.s3.valid      := bpu_s3_redirect
799  io.toIfu.flushFromBpu.s3.bits       := bpu_s3_resp.ftq_idx
800  io.toPrefetch.flushFromBpu.s3.valid := bpu_s3_redirect
801  io.toPrefetch.flushFromBpu.s3.bits  := bpu_s3_resp.ftq_idx
802  when(bpu_s3_redirect) {
803    bpuPtr := bpu_s3_resp.ftq_idx + 1.U
804    copied_bpu_ptr.map(_ := bpu_s3_resp.ftq_idx + 1.U)
805    // only when ifuPtr runs ahead of bpu s2 resp should we recover it
806    when(!isBefore(ifuPtr, bpu_s3_resp.ftq_idx)) {
807      ifuPtr_write      := bpu_s3_resp.ftq_idx
808      ifuPtrPlus1_write := bpu_s3_resp.ftq_idx + 1.U
809      ifuPtrPlus2_write := bpu_s3_resp.ftq_idx + 2.U
810    }
811    when(!isBefore(pfPtr, bpu_s3_resp.ftq_idx)) {
812      pfPtr_write      := bpu_s3_resp.ftq_idx
813      pfPtrPlus1_write := bpu_s3_resp.ftq_idx + 1.U
814    }
815  }
816
817  XSError(isBefore(bpuPtr, ifuPtr) && !isFull(bpuPtr, ifuPtr), "\nifuPtr is before bpuPtr!\n")
818  XSError(isBefore(bpuPtr, pfPtr) && !isFull(bpuPtr, pfPtr), "\npfPtr is before bpuPtr!\n")
819  XSError(isBefore(ifuWbPtr, commPtr) && !isFull(ifuWbPtr, commPtr), "\ncommPtr is before ifuWbPtr!\n")
820
821  (0 until copyNum).map(i => XSError(copied_bpu_ptr(i) =/= bpuPtr, "\ncopiedBpuPtr is different from bpuPtr!\n"))
822
823  // ****************************************************************
824  // **************************** to ifu ****************************
825  // ****************************************************************
826  // 0  for ifu, and 1-4 for ICache
827  val bpu_in_bypass_buf         = RegEnable(ftq_pc_mem.io.wdata, bpu_in_fire)
828  val copied_bpu_in_bypass_buf  = VecInit(Seq.fill(copyNum)(RegEnable(ftq_pc_mem.io.wdata, bpu_in_fire)))
829  val bpu_in_bypass_buf_for_ifu = bpu_in_bypass_buf
830  val bpu_in_bypass_ptr         = RegEnable(bpu_in_resp_ptr, bpu_in_fire)
831  val last_cycle_to_ifu_fire    = RegNext(io.toIfu.req.fire)
832  val last_cycle_to_pf_fire     = RegNext(io.toPrefetch.req.fire)
833
834  val copied_bpu_in_bypass_ptr      = VecInit(Seq.fill(copyNum)(RegEnable(bpu_in_resp_ptr, bpu_in_fire)))
835  val copied_last_cycle_to_ifu_fire = VecInit(Seq.fill(copyNum)(RegNext(io.toIfu.req.fire)))
836
837  // read pc and target
838  ftq_pc_mem.io.ifuPtr_w       := ifuPtr_write
839  ftq_pc_mem.io.ifuPtrPlus1_w  := ifuPtrPlus1_write
840  ftq_pc_mem.io.ifuPtrPlus2_w  := ifuPtrPlus2_write
841  ftq_pc_mem.io.pfPtr_w        := pfPtr_write
842  ftq_pc_mem.io.pfPtrPlus1_w   := pfPtrPlus1_write
843  ftq_pc_mem.io.commPtr_w      := commPtr_write
844  ftq_pc_mem.io.commPtrPlus1_w := commPtrPlus1_write
845
846  io.toIfu.req.bits.ftqIdx := ifuPtr
847
848  val toICachePcBundle               = Wire(Vec(copyNum, new Ftq_RF_Components))
849  val toICacheEntryToSend            = Wire(Vec(copyNum, Bool()))
850  val nextCycleToPrefetchPcBundle    = Wire(new Ftq_RF_Components)
851  val nextCycleToPrefetchEntryToSend = Wire(Bool())
852  val toPrefetchPcBundle             = RegNext(nextCycleToPrefetchPcBundle)
853  val toPrefetchEntryToSend          = RegNext(nextCycleToPrefetchEntryToSend)
854  val toIfuPcBundle                  = Wire(new Ftq_RF_Components)
855  val entry_is_to_send               = WireInit(entry_fetch_status(ifuPtr.value) === f_to_send)
856  val entry_ftq_offset               = WireInit(cfiIndex_vec(ifuPtr.value))
857  val entry_next_addr                = Wire(UInt(VAddrBits.W))
858
859  val pc_mem_ifu_ptr_rdata   = VecInit(Seq.fill(copyNum)(RegNext(ftq_pc_mem.io.ifuPtr_rdata)))
860  val pc_mem_ifu_plus1_rdata = VecInit(Seq.fill(copyNum)(RegNext(ftq_pc_mem.io.ifuPtrPlus1_rdata)))
861  val diff_entry_next_addr   = WireInit(update_target(ifuPtr.value)) // TODO: remove this
862
863  val copied_ifu_plus1_to_send = VecInit(Seq.fill(copyNum)(RegNext(
864    entry_fetch_status(ifuPtrPlus1.value) === f_to_send
865  ) || RegNext(last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtrPlus1)))
866  val copied_ifu_ptr_to_send = VecInit(Seq.fill(copyNum)(RegNext(
867    entry_fetch_status(ifuPtr.value) === f_to_send
868  ) || RegNext(last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtr)))
869
870  for (i <- 0 until copyNum) {
871    when(copied_last_cycle_bpu_in(i) && copied_bpu_in_bypass_ptr(i) === copied_ifu_ptr(i)) {
872      toICachePcBundle(i)    := copied_bpu_in_bypass_buf(i)
873      toICacheEntryToSend(i) := true.B
874    }.elsewhen(copied_last_cycle_to_ifu_fire(i)) {
875      toICachePcBundle(i)    := pc_mem_ifu_plus1_rdata(i)
876      toICacheEntryToSend(i) := copied_ifu_plus1_to_send(i)
877    }.otherwise {
878      toICachePcBundle(i)    := pc_mem_ifu_ptr_rdata(i)
879      toICacheEntryToSend(i) := copied_ifu_ptr_to_send(i)
880    }
881  }
882
883  // Calculate requests sent to prefetcher one cycle in advance to cut critical path
884  when(bpu_in_fire && bpu_in_resp_ptr === pfPtr_write) {
885    nextCycleToPrefetchPcBundle    := ftq_pc_mem.io.wdata
886    nextCycleToPrefetchEntryToSend := true.B
887  }.elsewhen(io.toPrefetch.req.fire) {
888    nextCycleToPrefetchPcBundle := ftq_pc_mem.io.pfPtrPlus1_rdata
889    nextCycleToPrefetchEntryToSend := entry_fetch_status(pfPtrPlus1.value) === f_to_send ||
890      last_cycle_bpu_in && bpu_in_bypass_ptr === pfPtrPlus1
891  }.otherwise {
892    nextCycleToPrefetchPcBundle := ftq_pc_mem.io.pfPtr_rdata
893    nextCycleToPrefetchEntryToSend := entry_fetch_status(pfPtr.value) === f_to_send ||
894      last_cycle_bpu_in && bpu_in_bypass_ptr === pfPtr // reduce potential bubbles
895  }
896
897  // TODO: reconsider target address bypass logic
898  when(last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtr) {
899    toIfuPcBundle        := bpu_in_bypass_buf_for_ifu
900    entry_is_to_send     := true.B
901    entry_next_addr      := last_cycle_bpu_target
902    entry_ftq_offset     := last_cycle_cfiIndex
903    diff_entry_next_addr := last_cycle_bpu_target // TODO: remove this
904  }.elsewhen(last_cycle_to_ifu_fire) {
905    toIfuPcBundle := RegNext(ftq_pc_mem.io.ifuPtrPlus1_rdata)
906    entry_is_to_send := RegNext(entry_fetch_status(ifuPtrPlus1.value) === f_to_send) ||
907      RegNext(last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtrPlus1) // reduce potential bubbles
908    entry_next_addr := Mux(
909      last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtrPlus1,
910      bpu_in_bypass_buf_for_ifu.startAddr,
911      Mux(ifuPtr === newest_entry_ptr, newest_entry_target, RegNext(ftq_pc_mem.io.ifuPtrPlus2_rdata.startAddr))
912    ) // ifuPtr+2
913  }.otherwise {
914    toIfuPcBundle := RegNext(ftq_pc_mem.io.ifuPtr_rdata)
915    entry_is_to_send := RegNext(entry_fetch_status(ifuPtr.value) === f_to_send) ||
916      RegNext(last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtr) // reduce potential bubbles
917    entry_next_addr := Mux(
918      last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtrPlus1,
919      bpu_in_bypass_buf_for_ifu.startAddr,
920      Mux(ifuPtr === newest_entry_ptr, newest_entry_target, RegNext(ftq_pc_mem.io.ifuPtrPlus1_rdata.startAddr))
921    ) // ifuPtr+1
922  }
923
924  io.toIfu.req.valid              := entry_is_to_send && ifuPtr =/= bpuPtr
925  io.toIfu.req.bits.nextStartAddr := entry_next_addr
926  io.toIfu.req.bits.ftqOffset     := entry_ftq_offset
927  io.toIfu.req.bits.fromFtqPcBundle(toIfuPcBundle)
928
929  io.toICache.req.valid := entry_is_to_send && ifuPtr =/= bpuPtr
930  io.toICache.req.bits.readValid.zipWithIndex.map { case (copy, i) =>
931    copy := toICacheEntryToSend(i) && copied_ifu_ptr(i) =/= copied_bpu_ptr(i)
932  }
933  io.toICache.req.bits.pcMemRead.zipWithIndex.foreach { case (copy, i) =>
934    copy.fromFtqPcBundle(toICachePcBundle(i))
935    copy.ftqIdx := ifuPtr
936  }
937  io.toICache.req.bits.backendException := ExceptionType.hasException(backendException) && backendPcFaultPtr === ifuPtr
938
939  io.toPrefetch.req.valid := toPrefetchEntryToSend && pfPtr =/= bpuPtr
940  io.toPrefetch.req.bits.fromFtqPcBundle(toPrefetchPcBundle)
941  io.toPrefetch.req.bits.ftqIdx  := pfPtr
942  io.toPrefetch.backendException := Mux(backendPcFaultPtr === pfPtr, backendException, ExceptionType.none)
943  // io.toICache.req.bits.bypassSelect := last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtr
944  // io.toICache.req.bits.bpuBypassWrite.zipWithIndex.map{case(bypassWrtie, i) =>
945  //   bypassWrtie.startAddr := bpu_in_bypass_buf.tail(i).startAddr
946  //   bypassWrtie.nextlineStart := bpu_in_bypass_buf.tail(i).nextLineAddr
947  // }
948
949  // TODO: remove this
950  XSError(
951    io.toIfu.req.valid && diff_entry_next_addr =/= entry_next_addr,
952    p"\nifu_req_target wrong! ifuPtr: ${ifuPtr}, entry_next_addr: ${Hexadecimal(entry_next_addr)} diff_entry_next_addr: ${Hexadecimal(diff_entry_next_addr)}\n"
953  )
954
955  // when fall through is smaller in value than start address, there must be a false hit
956  when(toIfuPcBundle.fallThruError && entry_hit_status(ifuPtr.value) === h_hit) {
957    when(io.toIfu.req.fire &&
958      !(bpu_s2_redirect && bpu_s2_resp.ftq_idx === ifuPtr) &&
959      !(bpu_s3_redirect && bpu_s3_resp.ftq_idx === ifuPtr)) {
960      entry_hit_status(ifuPtr.value) := h_false_hit
961      // XSError(true.B, "FTB false hit by fallThroughError, startAddr: %x, fallTHru: %x\n", io.toIfu.req.bits.startAddr, io.toIfu.req.bits.nextStartAddr)
962    }
963  }
964  XSDebug(
965    toIfuPcBundle.fallThruError && entry_hit_status(ifuPtr.value) === h_hit,
966    "fallThruError! start:%x, fallThru:%x\n",
967    io.toIfu.req.bits.startAddr,
968    io.toIfu.req.bits.nextStartAddr
969  )
970
971  XSPerfAccumulate(
972    f"fall_through_error_to_ifu",
973    toIfuPcBundle.fallThruError && entry_hit_status(ifuPtr.value) === h_hit &&
974      io.toIfu.req.fire && !(bpu_s2_redirect && bpu_s2_resp.ftq_idx === ifuPtr) && !(bpu_s3_redirect && bpu_s3_resp.ftq_idx === ifuPtr)
975  )
976
977  val ifu_req_should_be_flushed =
978    io.toIfu.flushFromBpu.shouldFlushByStage2(io.toIfu.req.bits.ftqIdx) ||
979      io.toIfu.flushFromBpu.shouldFlushByStage3(io.toIfu.req.bits.ftqIdx)
980
981  when(io.toIfu.req.fire && !ifu_req_should_be_flushed) {
982    entry_fetch_status(ifuPtr.value) := f_sent
983  }
984
985  // *********************************************************************
986  // **************************** wb from ifu ****************************
987  // *********************************************************************
988  val pdWb         = io.fromIfu.pdWb
989  val pds          = pdWb.bits.pd
990  val ifu_wb_valid = pdWb.valid
991  val ifu_wb_idx   = pdWb.bits.ftqIdx.value
992  // read ports:                                                         commit update
993  val ftq_pd_mem =
994    Module(new SyncDataModuleTemplate(new Ftq_pd_Entry, FtqSize, FtqRedirectAheadNum + 1, 1, hasRen = true))
995  ftq_pd_mem.io.wen(0)   := ifu_wb_valid
996  ftq_pd_mem.io.waddr(0) := pdWb.bits.ftqIdx.value
997  ftq_pd_mem.io.wdata(0).fromPdWb(pdWb.bits)
998
999  val hit_pd_valid       = entry_hit_status(ifu_wb_idx) === h_hit && ifu_wb_valid
1000  val hit_pd_mispred     = hit_pd_valid && pdWb.bits.misOffset.valid
1001  val hit_pd_mispred_reg = RegNext(hit_pd_mispred, init = false.B)
1002  val pd_reg             = RegEnable(pds, pdWb.valid)
1003  val start_pc_reg       = RegEnable(pdWb.bits.pc(0), pdWb.valid)
1004  val wb_idx_reg         = RegEnable(ifu_wb_idx, pdWb.valid)
1005
1006  when(ifu_wb_valid) {
1007    val comm_stq_wen = VecInit(pds.map(_.valid).zip(pdWb.bits.instrRange).map {
1008      case (v, inRange) => v && inRange
1009    })
1010    commitStateQueueEnable(ifu_wb_idx) := true.B
1011    (commitStateQueueNext(ifu_wb_idx) zip comm_stq_wen).map {
1012      case (qe, v) => when(v) {
1013          qe := c_toCommit
1014        }
1015    }
1016  }
1017
1018  when(ifu_wb_valid) {
1019    ifuWbPtr_write := ifuWbPtr + 1.U
1020  }
1021
1022  XSError(ifu_wb_valid && isAfter(pdWb.bits.ftqIdx, ifuPtr), "IFU returned a predecode before its req, check IFU")
1023
1024  ftb_entry_mem.io.ren.get.head := ifu_wb_valid
1025  ftb_entry_mem.io.raddr.head   := ifu_wb_idx
1026  val has_false_hit = WireInit(false.B)
1027  when(RegNext(hit_pd_valid)) {
1028    // check for false hit
1029    val pred_ftb_entry = ftb_entry_mem.io.rdata.head
1030    val brSlots        = pred_ftb_entry.brSlots
1031    val tailSlot       = pred_ftb_entry.tailSlot
1032    // we check cfis that bpu predicted
1033
1034    // bpu predicted branches but denied by predecode
1035    val br_false_hit =
1036      brSlots.map {
1037        s => s.valid && !(pd_reg(s.offset).valid && pd_reg(s.offset).isBr)
1038      }.reduce(_ || _) ||
1039        (tailSlot.valid && pred_ftb_entry.tailSlot.sharing &&
1040          !(pd_reg(tailSlot.offset).valid && pd_reg(tailSlot.offset).isBr))
1041
1042    val jmpOffset = tailSlot.offset
1043    val jmp_pd    = pd_reg(jmpOffset)
1044    val jal_false_hit = pred_ftb_entry.jmpValid &&
1045      ((pred_ftb_entry.isJal && !(jmp_pd.valid && jmp_pd.isJal)) ||
1046        (pred_ftb_entry.isJalr && !(jmp_pd.valid && jmp_pd.isJalr)) ||
1047        (pred_ftb_entry.isCall && !(jmp_pd.valid && jmp_pd.isCall)) ||
1048        (pred_ftb_entry.isRet && !(jmp_pd.valid && jmp_pd.isRet)))
1049
1050    has_false_hit := br_false_hit || jal_false_hit || hit_pd_mispred_reg
1051    // assert(!has_false_hit)
1052  }
1053  XSDebug(
1054    RegNext(hit_pd_valid) && has_false_hit,
1055    "FTB false hit by br or jal or hit_pd, startAddr: %x\n",
1056    pdWb.bits.pc(0)
1057  )
1058
1059  when(has_false_hit) {
1060    entry_hit_status(wb_idx_reg) := h_false_hit
1061  }
1062
1063  // *******************************************************************************
1064  // **************************** redirect from backend ****************************
1065  // *******************************************************************************
1066
1067  // redirect read cfiInfo, couples to redirectGen s2
1068  // ftqIdxAhead(0-3) => ftq_redirect_mem(1-4), reuse ftq_redirect_mem(1)
1069  val ftq_redirect_rdata = Wire(Vec(FtqRedirectAheadNum, new Ftq_Redirect_SRAMEntry))
1070  val ftb_redirect_rdata = Wire(Vec(FtqRedirectAheadNum, new FTBEntry_FtqMem))
1071
1072  val ftq_pd_rdata = Wire(Vec(FtqRedirectAheadNum, new Ftq_pd_Entry))
1073  for (i <- 1 until FtqRedirectAheadNum) {
1074    ftq_redirect_mem.io.ren.get(i + IfuRedirectNum) := ftqIdxAhead(i).valid
1075    ftq_redirect_mem.io.raddr(i + IfuRedirectNum)   := ftqIdxAhead(i).bits.value
1076    ftb_entry_mem.io.ren.get(i + IfuRedirectNum)    := ftqIdxAhead(i).valid
1077    ftb_entry_mem.io.raddr(i + IfuRedirectNum)      := ftqIdxAhead(i).bits.value
1078
1079    ftq_pd_mem.io.ren.get(i) := ftqIdxAhead(i).valid
1080    ftq_pd_mem.io.raddr(i)   := ftqIdxAhead(i).bits.value
1081  }
1082  ftq_redirect_mem.io.ren.get(IfuRedirectNum) := Mux(aheadValid, ftqIdxAhead(0).valid, backendRedirect.valid)
1083  ftq_redirect_mem.io.raddr(IfuRedirectNum) := Mux(
1084    aheadValid,
1085    ftqIdxAhead(0).bits.value,
1086    backendRedirect.bits.ftqIdx.value
1087  )
1088  ftb_entry_mem.io.ren.get(IfuRedirectNum) := Mux(aheadValid, ftqIdxAhead(0).valid, backendRedirect.valid)
1089  ftb_entry_mem.io.raddr(IfuRedirectNum) := Mux(
1090    aheadValid,
1091    ftqIdxAhead(0).bits.value,
1092    backendRedirect.bits.ftqIdx.value
1093  )
1094
1095  ftq_pd_mem.io.ren.get(0) := Mux(aheadValid, ftqIdxAhead(0).valid, backendRedirect.valid)
1096  ftq_pd_mem.io.raddr(0)   := Mux(aheadValid, ftqIdxAhead(0).bits.value, backendRedirect.bits.ftqIdx.value)
1097
1098  for (i <- 0 until FtqRedirectAheadNum) {
1099    ftq_redirect_rdata(i) := ftq_redirect_mem.io.rdata(i + IfuRedirectNum)
1100    ftb_redirect_rdata(i) := ftb_entry_mem.io.rdata(i + IfuRedirectNum)
1101
1102    ftq_pd_rdata(i) := ftq_pd_mem.io.rdata(i)
1103  }
1104  val stage3CfiInfo =
1105    Mux(realAhdValid, Mux1H(ftqIdxSelOH, ftq_redirect_rdata), ftq_redirect_mem.io.rdata(IfuRedirectNum))
1106  val stage3PdInfo       = Mux(realAhdValid, Mux1H(ftqIdxSelOH, ftq_pd_rdata), ftq_pd_mem.io.rdata(0))
1107  val backendRedirectCfi = fromBackendRedirect.bits.cfiUpdate
1108  backendRedirectCfi.fromFtqRedirectSram(stage3CfiInfo)
1109  backendRedirectCfi.pd := stage3PdInfo.toPd(fromBackendRedirect.bits.ftqOffset)
1110
1111  val r_ftb_entry = Mux(realAhdValid, Mux1H(ftqIdxSelOH, ftb_redirect_rdata), ftb_entry_mem.io.rdata(IfuRedirectNum))
1112  val r_ftqOffset = fromBackendRedirect.bits.ftqOffset
1113
1114  backendRedirectCfi.br_hit := r_ftb_entry.brIsSaved(r_ftqOffset)
1115  backendRedirectCfi.jr_hit := r_ftb_entry.isJalr && r_ftb_entry.tailSlot.offset === r_ftqOffset
1116  // FIXME: not portable
1117  val sc_disagree = stage3CfiInfo.sc_disagree.getOrElse(VecInit(Seq.fill(numBr)(false.B)))
1118  backendRedirectCfi.sc_hit := backendRedirectCfi.br_hit && Mux(
1119    r_ftb_entry.brSlots(0).offset === r_ftqOffset,
1120    sc_disagree(0),
1121    sc_disagree(1)
1122  )
1123
1124  when(entry_hit_status(fromBackendRedirect.bits.ftqIdx.value) === h_hit) {
1125    backendRedirectCfi.shift := PopCount(r_ftb_entry.getBrMaskByOffset(r_ftqOffset)) +&
1126      (backendRedirectCfi.pd.isBr && !r_ftb_entry.brIsSaved(r_ftqOffset) &&
1127        !r_ftb_entry.newBrCanNotInsert(r_ftqOffset))
1128
1129    backendRedirectCfi.addIntoHist := backendRedirectCfi.pd.isBr && (r_ftb_entry.brIsSaved(r_ftqOffset) ||
1130      !r_ftb_entry.newBrCanNotInsert(r_ftqOffset))
1131  }.otherwise {
1132    backendRedirectCfi.shift       := (backendRedirectCfi.pd.isBr && backendRedirectCfi.taken).asUInt
1133    backendRedirectCfi.addIntoHist := backendRedirectCfi.pd.isBr.asUInt
1134  }
1135
1136  // ***************************************************************************
1137  // **************************** redirect from ifu ****************************
1138  // ***************************************************************************
1139  val fromIfuRedirect = WireInit(0.U.asTypeOf(Valid(new BranchPredictionRedirect)))
1140  fromIfuRedirect.valid              := pdWb.valid && pdWb.bits.misOffset.valid && !backendFlush
1141  fromIfuRedirect.bits.ftqIdx        := pdWb.bits.ftqIdx
1142  fromIfuRedirect.bits.ftqOffset     := pdWb.bits.misOffset.bits
1143  fromIfuRedirect.bits.level         := RedirectLevel.flushAfter
1144  fromIfuRedirect.bits.BTBMissBubble := true.B
1145  fromIfuRedirect.bits.debugIsMemVio := false.B
1146  fromIfuRedirect.bits.debugIsCtrl   := false.B
1147
1148  val ifuRedirectCfiUpdate = fromIfuRedirect.bits.cfiUpdate
1149  ifuRedirectCfiUpdate.pc        := pdWb.bits.pc(pdWb.bits.misOffset.bits)
1150  ifuRedirectCfiUpdate.pd        := pdWb.bits.pd(pdWb.bits.misOffset.bits)
1151  ifuRedirectCfiUpdate.predTaken := cfiIndex_vec(pdWb.bits.ftqIdx.value).valid
1152  ifuRedirectCfiUpdate.target    := pdWb.bits.target
1153  ifuRedirectCfiUpdate.taken     := pdWb.bits.cfiOffset.valid
1154  ifuRedirectCfiUpdate.isMisPred := pdWb.bits.misOffset.valid
1155
1156  val ifuRedirectReg   = RegNextWithEnable(fromIfuRedirect, hasInit = true)
1157  val ifuRedirectToBpu = WireInit(ifuRedirectReg)
1158  ifuFlush := fromIfuRedirect.valid || ifuRedirectToBpu.valid
1159
1160  ftq_redirect_mem.io.ren.get.head := fromIfuRedirect.valid
1161  ftq_redirect_mem.io.raddr.head   := fromIfuRedirect.bits.ftqIdx.value
1162
1163  val toBpuCfi = ifuRedirectToBpu.bits.cfiUpdate
1164  toBpuCfi.fromFtqRedirectSram(ftq_redirect_mem.io.rdata.head)
1165  when(ifuRedirectReg.bits.cfiUpdate.pd.isRet && ifuRedirectReg.bits.cfiUpdate.pd.valid) {
1166    toBpuCfi.target := toBpuCfi.topAddr
1167  }
1168
1169  when(ifuRedirectReg.valid) {
1170    ifuRedirected(ifuRedirectReg.bits.ftqIdx.value) := true.B
1171  }.elsewhen(RegNext(pdWb.valid)) {
1172    // if pdWb and no redirect, set to false
1173    ifuRedirected(last_cycle_bpu_in_ptr.value) := false.B
1174  }
1175
1176  // **********************************************************************
1177  // ***************************** to backend *****************************
1178  // **********************************************************************
1179  // to backend pc mem / target
1180  io.toBackend.pc_mem_wen   := RegNext(last_cycle_bpu_in)
1181  io.toBackend.pc_mem_waddr := RegEnable(last_cycle_bpu_in_idx, last_cycle_bpu_in)
1182  io.toBackend.pc_mem_wdata := RegEnable(bpu_in_bypass_buf_for_ifu, last_cycle_bpu_in)
1183
1184  // num cycle is fixed
1185  val newest_entry_en: Bool = RegNext(last_cycle_bpu_in || backendRedirect.valid || ifuRedirectToBpu.valid)
1186  io.toBackend.newest_entry_en     := RegNext(newest_entry_en)
1187  io.toBackend.newest_entry_ptr    := RegEnable(newest_entry_ptr, newest_entry_en)
1188  io.toBackend.newest_entry_target := RegEnable(newest_entry_target, newest_entry_en)
1189
1190  // *********************************************************************
1191  // **************************** wb from exu ****************************
1192  // *********************************************************************
1193
1194  backendRedirect.valid := io.fromBackend.redirect.valid
1195  backendRedirect.bits.connectRedirect(io.fromBackend.redirect.bits)
1196  backendRedirect.bits.BTBMissBubble := false.B
1197
1198  def extractRedirectInfo(wb: Valid[Redirect]) = {
1199    val ftqPtr    = wb.bits.ftqIdx
1200    val ftqOffset = wb.bits.ftqOffset
1201    val taken     = wb.bits.cfiUpdate.taken
1202    val mispred   = wb.bits.cfiUpdate.isMisPred
1203    (wb.valid, ftqPtr, ftqOffset, taken, mispred)
1204  }
1205
1206  // fix mispredict entry
1207  val lastIsMispredict = RegNext(
1208    backendRedirect.valid && backendRedirect.bits.level === RedirectLevel.flushAfter,
1209    init = false.B
1210  )
1211
1212  def updateCfiInfo(redirect: Valid[Redirect], isBackend: Boolean = true) = {
1213    val (r_valid, r_ptr, r_offset, r_taken, r_mispred) = extractRedirectInfo(redirect)
1214    val r_idx                                          = r_ptr.value
1215    val cfiIndex_bits_wen                              = r_valid && r_taken && r_offset < cfiIndex_vec(r_idx).bits
1216    val cfiIndex_valid_wen                             = r_valid && r_offset === cfiIndex_vec(r_idx).bits
1217    when(cfiIndex_bits_wen || cfiIndex_valid_wen) {
1218      cfiIndex_vec(r_idx).valid := cfiIndex_bits_wen || cfiIndex_valid_wen && r_taken
1219    }.elsewhen(r_valid && !r_taken && r_offset =/= cfiIndex_vec(r_idx).bits) {
1220      cfiIndex_vec(r_idx).valid := false.B
1221    }
1222    when(cfiIndex_bits_wen) {
1223      cfiIndex_vec(r_idx).bits := r_offset
1224    }
1225    newest_entry_target_modified := true.B
1226    newest_entry_target          := redirect.bits.cfiUpdate.target
1227    newest_entry_ptr_modified    := true.B
1228    newest_entry_ptr             := r_ptr
1229
1230    update_target(r_idx) := redirect.bits.cfiUpdate.target // TODO: remove this
1231    if (isBackend) {
1232      mispredict_vec(r_idx)(r_offset) := r_mispred
1233    }
1234  }
1235
1236  when(fromBackendRedirect.valid) {
1237    updateCfiInfo(fromBackendRedirect)
1238  }.elsewhen(ifuRedirectToBpu.valid) {
1239    updateCfiInfo(ifuRedirectToBpu, isBackend = false)
1240  }
1241
1242  when(fromBackendRedirect.valid) {
1243    when(fromBackendRedirect.bits.ControlRedirectBubble) {
1244      when(fromBackendRedirect.bits.ControlBTBMissBubble) {
1245        topdown_stage.reasons(TopDownCounters.BTBMissBubble.id)                  := true.B
1246        io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.BTBMissBubble.id) := true.B
1247      }.elsewhen(fromBackendRedirect.bits.TAGEMissBubble) {
1248        topdown_stage.reasons(TopDownCounters.TAGEMissBubble.id)                  := true.B
1249        io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.TAGEMissBubble.id) := true.B
1250      }.elsewhen(fromBackendRedirect.bits.SCMissBubble) {
1251        topdown_stage.reasons(TopDownCounters.SCMissBubble.id)                  := true.B
1252        io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.SCMissBubble.id) := true.B
1253      }.elsewhen(fromBackendRedirect.bits.ITTAGEMissBubble) {
1254        topdown_stage.reasons(TopDownCounters.ITTAGEMissBubble.id)                  := true.B
1255        io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.ITTAGEMissBubble.id) := true.B
1256      }.elsewhen(fromBackendRedirect.bits.RASMissBubble) {
1257        topdown_stage.reasons(TopDownCounters.RASMissBubble.id)                  := true.B
1258        io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.RASMissBubble.id) := true.B
1259      }
1260
1261    }.elsewhen(backendRedirect.bits.MemVioRedirectBubble) {
1262      topdown_stage.reasons(TopDownCounters.MemVioRedirectBubble.id)                  := true.B
1263      io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.MemVioRedirectBubble.id) := true.B
1264    }.otherwise {
1265      topdown_stage.reasons(TopDownCounters.OtherRedirectBubble.id)                  := true.B
1266      io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.OtherRedirectBubble.id) := true.B
1267    }
1268  }.elsewhen(ifuRedirectReg.valid) {
1269    topdown_stage.reasons(TopDownCounters.BTBMissBubble.id)                  := true.B
1270    io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.BTBMissBubble.id) := true.B
1271  }
1272
1273  io.ControlBTBMissBubble := fromBackendRedirect.bits.ControlBTBMissBubble
1274  io.TAGEMissBubble       := fromBackendRedirect.bits.TAGEMissBubble
1275  io.SCMissBubble         := fromBackendRedirect.bits.SCMissBubble
1276  io.ITTAGEMissBubble     := fromBackendRedirect.bits.ITTAGEMissBubble
1277  io.RASMissBubble        := fromBackendRedirect.bits.RASMissBubble
1278
1279  // ***********************************************************************************
1280  // **************************** flush ptr and state queue ****************************
1281  // ***********************************************************************************
1282
1283  val redirectVec = VecInit(backendRedirect, fromIfuRedirect)
1284
1285  // when redirect, we should reset ptrs and status queues
1286  io.icacheFlush := redirectVec.map(r => r.valid).reduce(_ || _)
1287  XSPerfAccumulate("icacheFlushFromBackend", backendRedirect.valid)
1288  XSPerfAccumulate("icacheFlushFromIFU", fromIfuRedirect.valid)
1289  when(redirectVec.map(r => r.valid).reduce(_ || _)) {
1290    val r                          = PriorityMux(redirectVec.map(r => r.valid -> r.bits))
1291    val notIfu                     = redirectVec.dropRight(1).map(r => r.valid).reduce(_ || _)
1292    val (idx, offset, flushItSelf) = (r.ftqIdx, r.ftqOffset, RedirectLevel.flushItself(r.level))
1293    val next                       = idx + 1.U
1294    bpuPtr := next
1295    copied_bpu_ptr.map(_ := next)
1296    ifuPtr_write      := next
1297    ifuWbPtr_write    := next
1298    ifuPtrPlus1_write := idx + 2.U
1299    ifuPtrPlus2_write := idx + 3.U
1300    pfPtr_write       := next
1301    pfPtrPlus1_write  := idx + 2.U
1302  }
1303  when(RegNext(redirectVec.map(r => r.valid).reduce(_ || _))) {
1304    val r                          = PriorityMux(redirectVec.map(r => r.valid -> r.bits))
1305    val notIfu                     = redirectVec.dropRight(1).map(r => r.valid).reduce(_ || _)
1306    val (idx, offset, flushItSelf) = (r.ftqIdx, r.ftqOffset, RedirectLevel.flushItself(r.level))
1307    when(RegNext(notIfu)) {
1308      commitStateQueueEnable(RegNext(idx.value)) := true.B
1309      commitStateQueueNext(RegNext(idx.value)).zipWithIndex.foreach { case (s, i) =>
1310        when(i.U > RegNext(offset)) {
1311          s := c_empty
1312        }
1313        when(i.U === RegNext(offset) && RegNext(flushItSelf)) {
1314          s := c_flushed
1315        }
1316      }
1317    }
1318  }
1319
1320  // only the valid bit is actually needed
1321  io.toIfu.redirect.bits    := backendRedirect.bits
1322  io.toIfu.redirect.valid   := stage2Flush
1323  io.toIfu.topdown_redirect := fromBackendRedirect
1324
1325  // commit
1326  for (c <- io.fromBackend.rob_commits) {
1327    when(c.valid) {
1328      commitStateQueueEnable(c.bits.ftqIdx.value)                 := true.B
1329      commitStateQueueNext(c.bits.ftqIdx.value)(c.bits.ftqOffset) := c_committed
1330      // TODO: remove this
1331      // For instruction fusions, we also update the next instruction
1332      when(c.bits.commitType === 4.U) {
1333        commitStateQueueNext(c.bits.ftqIdx.value)(c.bits.ftqOffset + 1.U) := c_committed
1334      }.elsewhen(c.bits.commitType === 5.U) {
1335        commitStateQueueNext(c.bits.ftqIdx.value)(c.bits.ftqOffset + 2.U) := c_committed
1336      }.elsewhen(c.bits.commitType === 6.U) {
1337        val index = (c.bits.ftqIdx + 1.U).value
1338        commitStateQueueEnable(index)  := true.B
1339        commitStateQueueNext(index)(0) := c_committed
1340      }.elsewhen(c.bits.commitType === 7.U) {
1341        val index = (c.bits.ftqIdx + 1.U).value
1342        commitStateQueueEnable(index)  := true.B
1343        commitStateQueueNext(index)(1) := c_committed
1344      }
1345    }
1346  }
1347
1348  // ****************************************************************
1349  // **************************** to bpu ****************************
1350  // ****************************************************************
1351
1352  io.toBpu.redirctFromIFU := ifuRedirectToBpu.valid
1353  io.toBpu.redirect       := Mux(fromBackendRedirect.valid, fromBackendRedirect, ifuRedirectToBpu)
1354  val dummy_s1_pred_cycle_vec = VecInit(List.tabulate(FtqSize)(_ => 0.U(64.W)))
1355  val redirect_latency =
1356    GTimer() - pred_s1_cycle.getOrElse(dummy_s1_pred_cycle_vec)(io.toBpu.redirect.bits.ftqIdx.value) + 1.U
1357  XSPerfHistogram("backend_redirect_latency", redirect_latency, fromBackendRedirect.valid, 0, 60, 1)
1358  XSPerfHistogram(
1359    "ifu_redirect_latency",
1360    redirect_latency,
1361    !fromBackendRedirect.valid && ifuRedirectToBpu.valid,
1362    0,
1363    60,
1364    1
1365  )
1366
1367  XSError(
1368    io.toBpu.redirect.valid && isBefore(io.toBpu.redirect.bits.ftqIdx, commPtr),
1369    "Ftq received a redirect after its commit, check backend or replay"
1370  )
1371
1372  val may_have_stall_from_bpu = Wire(Bool())
1373  val bpu_ftb_update_stall    = RegInit(0.U(2.W)) // 2-cycle stall, so we need 3 states
1374  may_have_stall_from_bpu := bpu_ftb_update_stall =/= 0.U
1375
1376  val validInstructions     = commitStateQueueReg(commPtr.value).map(s => s === c_toCommit || s === c_committed)
1377  val lastInstructionStatus = PriorityMux(validInstructions.reverse.zip(commitStateQueueReg(commPtr.value).reverse))
1378  val firstInstructionFlushed = commitStateQueueReg(commPtr.value)(0) === c_flushed ||
1379    commitStateQueueReg(commPtr.value)(0) === c_empty && commitStateQueueReg(commPtr.value)(1) === c_flushed
1380  canCommit := commPtr =/= ifuWbPtr && !may_have_stall_from_bpu &&
1381    (isAfter(robCommPtr, commPtr) ||
1382      validInstructions.reduce(_ || _) && lastInstructionStatus === c_committed)
1383  val canMoveCommPtr = commPtr =/= ifuWbPtr && !may_have_stall_from_bpu &&
1384    (isAfter(robCommPtr, commPtr) ||
1385      validInstructions.reduce(_ || _) && lastInstructionStatus === c_committed ||
1386      firstInstructionFlushed)
1387
1388  when(io.fromBackend.rob_commits.map(_.valid).reduce(_ | _)) {
1389    robCommPtr_write := ParallelPriorityMux(
1390      io.fromBackend.rob_commits.map(_.valid).reverse,
1391      io.fromBackend.rob_commits.map(_.bits.ftqIdx).reverse
1392    )
1393  }.elsewhen(isAfter(commPtr, robCommPtr)) {
1394    robCommPtr_write := commPtr
1395  }.otherwise {
1396    robCommPtr_write := robCommPtr
1397  }
1398
1399  /**
1400    *************************************************************************************
1401    * MMIO instruction fetch is allowed only if MMIO is the oldest instruction.
1402    *************************************************************************************
1403    */
1404  val mmioReadPtr = io.mmioCommitRead.mmioFtqPtr
1405  val mmioLastCommit = isAfter(commPtr, mmioReadPtr) ||
1406    commPtr === mmioReadPtr && validInstructions.reduce(_ || _) && lastInstructionStatus === c_committed
1407  io.mmioCommitRead.mmioLastCommit := RegNext(mmioLastCommit)
1408
1409  // commit reads
1410  val commit_pc_bundle = RegNext(ftq_pc_mem.io.commPtr_rdata)
1411  val commit_target =
1412    Mux(
1413      RegNext(commPtr === newest_entry_ptr),
1414      RegEnable(newest_entry_target, newest_entry_target_modified),
1415      RegNext(ftq_pc_mem.io.commPtrPlus1_rdata.startAddr)
1416    )
1417  ftq_pd_mem.io.ren.get.last := canCommit
1418  ftq_pd_mem.io.raddr.last   := commPtr.value
1419  val commit_pd = ftq_pd_mem.io.rdata.last
1420  ftq_redirect_mem.io.ren.get.last := canCommit
1421  ftq_redirect_mem.io.raddr.last   := commPtr.value
1422  val commit_spec_meta = ftq_redirect_mem.io.rdata.last
1423  ftq_meta_1r_sram.io.ren(0)   := canCommit
1424  ftq_meta_1r_sram.io.raddr(0) := commPtr.value
1425  val commit_meta      = ftq_meta_1r_sram.io.rdata(0).meta
1426  val commit_ftb_entry = ftq_meta_1r_sram.io.rdata(0).ftb_entry
1427
1428  // need one cycle to read mem and srams
1429  val do_commit_ptr = RegEnable(commPtr, canCommit)
1430  val do_commit     = RegNext(canCommit, init = false.B)
1431  when(canMoveCommPtr) {
1432    commPtr_write      := commPtrPlus1
1433    commPtrPlus1_write := commPtrPlus1 + 1.U
1434  }
1435  val commit_state   = RegEnable(commitStateQueueReg(commPtr.value), canCommit)
1436  val can_commit_cfi = WireInit(cfiIndex_vec(commPtr.value))
1437  val do_commit_cfi  = WireInit(cfiIndex_vec(do_commit_ptr.value))
1438  //
1439  // when (commitStateQueue(commPtr.value)(can_commit_cfi.bits) =/= c_commited) {
1440  //  can_commit_cfi.valid := false.B
1441  // }
1442  val commit_cfi = RegEnable(can_commit_cfi, canCommit)
1443  val debug_cfi  = commitStateQueueReg(do_commit_ptr.value)(do_commit_cfi.bits) =/= c_committed && do_commit_cfi.valid
1444
1445  val commit_mispredict: Vec[Bool] =
1446    VecInit((RegEnable(mispredict_vec(commPtr.value), canCommit) zip commit_state).map {
1447      case (mis, state) => mis && state === c_committed
1448    })
1449  val commit_instCommited: Vec[Bool] = VecInit(commit_state.map(_ === c_committed)) // [PredictWidth]
1450  val can_commit_hit     = entry_hit_status(commPtr.value)
1451  val commit_hit         = RegEnable(can_commit_hit, canCommit)
1452  val diff_commit_target = RegEnable(update_target(commPtr.value), canCommit) // TODO: remove this
1453  val commit_stage       = RegEnable(pred_stage(commPtr.value), canCommit)
1454  val commit_valid       = commit_hit === h_hit || commit_cfi.valid           // hit or taken
1455
1456  val to_bpu_hit = can_commit_hit === h_hit || can_commit_hit === h_false_hit
1457  switch(bpu_ftb_update_stall) {
1458    is(0.U) {
1459      when(can_commit_cfi.valid && !to_bpu_hit && canCommit) {
1460        bpu_ftb_update_stall := 2.U // 2-cycle stall
1461      }
1462    }
1463    is(2.U) {
1464      bpu_ftb_update_stall := 1.U
1465    }
1466    is(1.U) {
1467      bpu_ftb_update_stall := 0.U
1468    }
1469    is(3.U) {
1470      // XSError below
1471    }
1472  }
1473  XSError(bpu_ftb_update_stall === 3.U, "bpu_ftb_update_stall should be 0, 1 or 2")
1474
1475  // TODO: remove this
1476  XSError(do_commit && diff_commit_target =/= commit_target, "\ncommit target should be the same as update target\n")
1477
1478  // update latency stats
1479  val update_latency = GTimer() - pred_s1_cycle.getOrElse(dummy_s1_pred_cycle_vec)(do_commit_ptr.value) + 1.U
1480  XSPerfHistogram("bpu_update_latency", update_latency, io.toBpu.update.valid, 0, 64, 2)
1481
1482  io.toBpu.update       := DontCare
1483  io.toBpu.update.valid := commit_valid && do_commit
1484  val update = io.toBpu.update.bits
1485  update.false_hit   := commit_hit === h_false_hit
1486  update.pc          := commit_pc_bundle.startAddr
1487  update.meta        := commit_meta
1488  update.cfi_idx     := commit_cfi
1489  update.full_target := commit_target
1490  update.from_stage  := commit_stage
1491  update.spec_info   := commit_spec_meta
1492  XSError(commit_valid && do_commit && debug_cfi, "\ncommit cfi can be non c_commited\n")
1493
1494  val commit_real_hit  = commit_hit === h_hit
1495  val update_ftb_entry = update.ftb_entry
1496
1497  val ftbEntryGen = Module(new FTBEntryGen).io
1498  ftbEntryGen.start_addr     := commit_pc_bundle.startAddr
1499  ftbEntryGen.old_entry      := commit_ftb_entry
1500  ftbEntryGen.pd             := commit_pd
1501  ftbEntryGen.cfiIndex       := commit_cfi
1502  ftbEntryGen.target         := commit_target
1503  ftbEntryGen.hit            := commit_real_hit
1504  ftbEntryGen.mispredict_vec := commit_mispredict
1505
1506  update_ftb_entry         := ftbEntryGen.new_entry
1507  update.new_br_insert_pos := ftbEntryGen.new_br_insert_pos
1508  update.mispred_mask      := ftbEntryGen.mispred_mask
1509  update.old_entry         := ftbEntryGen.is_old_entry
1510  update.pred_hit          := commit_hit === h_hit || commit_hit === h_false_hit
1511  update.br_taken_mask     := ftbEntryGen.taken_mask
1512  update.br_committed := (ftbEntryGen.new_entry.brValids zip ftbEntryGen.new_entry.brOffset) map {
1513    case (valid, offset) => valid && commit_instCommited(offset)
1514  }
1515  update.jmp_taken := ftbEntryGen.jmp_taken
1516
1517  // update.full_pred.fromFtbEntry(ftbEntryGen.new_entry, update.pc)
1518  // update.full_pred.jalr_target := commit_target
1519  // update.full_pred.hit := true.B
1520  // when (update.full_pred.is_jalr) {
1521  //   update.full_pred.targets.last := commit_target
1522  // }
1523
1524  // ******************************************************************************
1525  // **************************** commit perf counters ****************************
1526  // ******************************************************************************
1527
1528  val commit_inst_mask        = VecInit(commit_state.map(c => c === c_committed && do_commit)).asUInt
1529  val commit_mispred_mask     = commit_mispredict.asUInt
1530  val commit_not_mispred_mask = ~commit_mispred_mask
1531
1532  val commit_br_mask  = commit_pd.brMask.asUInt
1533  val commit_jmp_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.jmpInfo.valid.asTypeOf(UInt(1.W)))
1534  val commit_cfi_mask = commit_br_mask | commit_jmp_mask
1535
1536  val mbpInstrs = commit_inst_mask & commit_cfi_mask
1537
1538  val mbpRights = mbpInstrs & commit_not_mispred_mask
1539  val mbpWrongs = mbpInstrs & commit_mispred_mask
1540
1541  io.bpuInfo.bpRight := PopCount(mbpRights)
1542  io.bpuInfo.bpWrong := PopCount(mbpWrongs)
1543
1544  val hartId           = p(XSCoreParamsKey).HartId
1545  val isWriteFTQTable  = Constantin.createRecord(s"isWriteFTQTable$hartId")
1546  val ftqBranchTraceDB = ChiselDB.createTable(s"FTQTable$hartId", new FtqDebugBundle)
1547  // Cfi Info
1548  for (i <- 0 until PredictWidth) {
1549    val pc      = commit_pc_bundle.startAddr + (i * instBytes).U
1550    val v       = commit_state(i) === c_committed
1551    val isBr    = commit_pd.brMask(i)
1552    val isJmp   = commit_pd.jmpInfo.valid && commit_pd.jmpOffset === i.U
1553    val isCfi   = isBr || isJmp
1554    val isTaken = commit_cfi.valid && commit_cfi.bits === i.U
1555    val misPred = commit_mispredict(i)
1556    // val ghist = commit_spec_meta.ghist.predHist
1557    val histPtr   = commit_spec_meta.histPtr
1558    val predCycle = commit_meta(63, 0)
1559    val target    = commit_target
1560
1561    val brIdx = OHToUInt(Reverse(Cat(update_ftb_entry.brValids.zip(update_ftb_entry.brOffset).map { case (v, offset) =>
1562      v && offset === i.U
1563    })))
1564    val inFtbEntry = update_ftb_entry.brValids.zip(update_ftb_entry.brOffset).map { case (v, offset) =>
1565      v && offset === i.U
1566    }.reduce(_ || _)
1567    val addIntoHist =
1568      ((commit_hit === h_hit) && inFtbEntry) || (!(commit_hit === h_hit) && i.U === commit_cfi.bits && isBr && commit_cfi.valid)
1569    XSDebug(
1570      v && do_commit && isCfi,
1571      p"cfi_update: isBr(${isBr}) pc(${Hexadecimal(pc)}) " +
1572        p"taken(${isTaken}) mispred(${misPred}) cycle($predCycle) hist(${histPtr.value}) " +
1573        p"startAddr(${Hexadecimal(commit_pc_bundle.startAddr)}) AddIntoHist(${addIntoHist}) " +
1574        p"brInEntry(${inFtbEntry}) brIdx(${brIdx}) target(${Hexadecimal(target)})\n"
1575    )
1576
1577    val logbundle = Wire(new FtqDebugBundle)
1578    logbundle.pc        := pc
1579    logbundle.target    := target
1580    logbundle.isBr      := isBr
1581    logbundle.isJmp     := isJmp
1582    logbundle.isCall    := isJmp && commit_pd.hasCall
1583    logbundle.isRet     := isJmp && commit_pd.hasRet
1584    logbundle.misPred   := misPred
1585    logbundle.isTaken   := isTaken
1586    logbundle.predStage := commit_stage
1587
1588    ftqBranchTraceDB.log(
1589      data = logbundle /* hardware of type T */,
1590      en = isWriteFTQTable.orR && v && do_commit && isCfi,
1591      site = "FTQ" + p(XSCoreParamsKey).HartId.toString,
1592      clock = clock,
1593      reset = reset
1594    )
1595  }
1596
1597  val enq           = io.fromBpu.resp
1598  val perf_redirect = backendRedirect
1599
1600  XSPerfAccumulate("entry", validEntries)
1601  XSPerfAccumulate("bpu_to_ftq_stall", enq.valid && !enq.ready)
1602  XSPerfAccumulate("mispredictRedirect", perf_redirect.valid && RedirectLevel.flushAfter === perf_redirect.bits.level)
1603  XSPerfAccumulate("replayRedirect", perf_redirect.valid && RedirectLevel.flushItself(perf_redirect.bits.level))
1604  XSPerfAccumulate("predecodeRedirect", fromIfuRedirect.valid)
1605
1606  XSPerfAccumulate("to_ifu_bubble", io.toIfu.req.ready && !io.toIfu.req.valid)
1607
1608  XSPerfAccumulate("to_ifu_stall", io.toIfu.req.valid && !io.toIfu.req.ready)
1609  XSPerfAccumulate("from_bpu_real_bubble", !enq.valid && enq.ready && allowBpuIn)
1610  XSPerfAccumulate("bpu_to_ifu_bubble", bpuPtr === ifuPtr)
1611  XSPerfAccumulate(
1612    "bpu_to_ifu_bubble_when_ftq_full",
1613    (bpuPtr === ifuPtr) && isFull(bpuPtr, commPtr) && io.toIfu.req.ready
1614  )
1615
1616  XSPerfAccumulate("redirectAhead_ValidNum", ftqIdxAhead.map(_.valid).reduce(_ | _))
1617  XSPerfAccumulate("fromBackendRedirect_ValidNum", io.fromBackend.redirect.valid)
1618  XSPerfAccumulate("toBpuRedirect_ValidNum", io.toBpu.redirect.valid)
1619
1620  val from_bpu = io.fromBpu.resp.bits
1621  val to_ifu   = io.toIfu.req.bits
1622
1623  XSPerfHistogram("commit_num_inst", PopCount(commit_inst_mask), do_commit, 0, PredictWidth + 1, 1)
1624
1625  val commit_jal_mask  = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasJal.asTypeOf(UInt(1.W)))
1626  val commit_jalr_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasJalr.asTypeOf(UInt(1.W)))
1627  val commit_call_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasCall.asTypeOf(UInt(1.W)))
1628  val commit_ret_mask  = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasRet.asTypeOf(UInt(1.W)))
1629
1630  val mbpBRights = mbpRights & commit_br_mask
1631  val mbpJRights = mbpRights & commit_jal_mask
1632  val mbpIRights = mbpRights & commit_jalr_mask
1633  val mbpCRights = mbpRights & commit_call_mask
1634  val mbpRRights = mbpRights & commit_ret_mask
1635
1636  val mbpBWrongs = mbpWrongs & commit_br_mask
1637  val mbpJWrongs = mbpWrongs & commit_jal_mask
1638  val mbpIWrongs = mbpWrongs & commit_jalr_mask
1639  val mbpCWrongs = mbpWrongs & commit_call_mask
1640  val mbpRWrongs = mbpWrongs & commit_ret_mask
1641
1642  val commit_pred_stage = RegNext(pred_stage(commPtr.value))
1643
1644  def pred_stage_map(src: UInt, name: String) =
1645    (0 until numBpStages).map(i =>
1646      f"${name}_stage_${i + 1}" -> PopCount(src.asBools.map(_ && commit_pred_stage === BP_STAGES(i)))
1647    ).foldLeft(Map[String, UInt]())(_ + _)
1648
1649  val mispred_stage_map      = pred_stage_map(mbpWrongs, "mispredict")
1650  val br_mispred_stage_map   = pred_stage_map(mbpBWrongs, "br_mispredict")
1651  val jalr_mispred_stage_map = pred_stage_map(mbpIWrongs, "jalr_mispredict")
1652  val correct_stage_map      = pred_stage_map(mbpRights, "correct")
1653  val br_correct_stage_map   = pred_stage_map(mbpBRights, "br_correct")
1654  val jalr_correct_stage_map = pred_stage_map(mbpIRights, "jalr_correct")
1655
1656  val update_valid = io.toBpu.update.valid
1657  def u(cond: Bool) = update_valid && cond
1658  val ftb_false_hit = u(update.false_hit)
1659  // assert(!ftb_false_hit)
1660  val ftb_hit = u(commit_hit === h_hit)
1661
1662  val ftb_new_entry                = u(ftbEntryGen.is_init_entry)
1663  val ftb_new_entry_only_br        = ftb_new_entry && !update_ftb_entry.jmpValid
1664  val ftb_new_entry_only_jmp       = ftb_new_entry && !update_ftb_entry.brValids(0)
1665  val ftb_new_entry_has_br_and_jmp = ftb_new_entry && update_ftb_entry.brValids(0) && update_ftb_entry.jmpValid
1666
1667  val ftb_old_entry = u(ftbEntryGen.is_old_entry)
1668
1669  val ftb_modified_entry =
1670    u(ftbEntryGen.is_new_br || ftbEntryGen.is_jalr_target_modified || ftbEntryGen.is_strong_bias_modified)
1671  val ftb_modified_entry_new_br               = u(ftbEntryGen.is_new_br)
1672  val ftb_modified_entry_ifu_redirected       = u(ifuRedirected(do_commit_ptr.value))
1673  val ftb_modified_entry_jalr_target_modified = u(ftbEntryGen.is_jalr_target_modified)
1674  val ftb_modified_entry_br_full              = ftb_modified_entry && ftbEntryGen.is_br_full
1675  val ftb_modified_entry_strong_bias          = ftb_modified_entry && ftbEntryGen.is_strong_bias_modified
1676
1677  def getFtbEntryLen(pc: UInt, entry: FTBEntry) = (entry.getFallThrough(pc) - pc) >> instOffsetBits
1678  val gen_ftb_entry_len = getFtbEntryLen(update.pc, ftbEntryGen.new_entry)
1679  XSPerfHistogram("ftb_init_entry_len", gen_ftb_entry_len, ftb_new_entry, 0, PredictWidth + 1, 1)
1680  XSPerfHistogram("ftb_modified_entry_len", gen_ftb_entry_len, ftb_modified_entry, 0, PredictWidth + 1, 1)
1681  val s3_ftb_entry_len = getFtbEntryLen(from_bpu.s3.pc(0), from_bpu.last_stage_ftb_entry)
1682  XSPerfHistogram("s3_ftb_entry_len", s3_ftb_entry_len, from_bpu.s3.valid(0), 0, PredictWidth + 1, 1)
1683
1684  XSPerfHistogram("ftq_has_entry", validEntries, true.B, 0, FtqSize + 1, 1)
1685
1686  val perfCountsMap = Map(
1687    "BpInstr"                        -> PopCount(mbpInstrs),
1688    "BpBInstr"                       -> PopCount(mbpBRights | mbpBWrongs),
1689    "BpRight"                        -> PopCount(mbpRights),
1690    "BpWrong"                        -> PopCount(mbpWrongs),
1691    "BpBRight"                       -> PopCount(mbpBRights),
1692    "BpBWrong"                       -> PopCount(mbpBWrongs),
1693    "BpJRight"                       -> PopCount(mbpJRights),
1694    "BpJWrong"                       -> PopCount(mbpJWrongs),
1695    "BpIRight"                       -> PopCount(mbpIRights),
1696    "BpIWrong"                       -> PopCount(mbpIWrongs),
1697    "BpCRight"                       -> PopCount(mbpCRights),
1698    "BpCWrong"                       -> PopCount(mbpCWrongs),
1699    "BpRRight"                       -> PopCount(mbpRRights),
1700    "BpRWrong"                       -> PopCount(mbpRWrongs),
1701    "ftb_false_hit"                  -> PopCount(ftb_false_hit),
1702    "ftb_hit"                        -> PopCount(ftb_hit),
1703    "ftb_new_entry"                  -> PopCount(ftb_new_entry),
1704    "ftb_new_entry_only_br"          -> PopCount(ftb_new_entry_only_br),
1705    "ftb_new_entry_only_jmp"         -> PopCount(ftb_new_entry_only_jmp),
1706    "ftb_new_entry_has_br_and_jmp"   -> PopCount(ftb_new_entry_has_br_and_jmp),
1707    "ftb_old_entry"                  -> PopCount(ftb_old_entry),
1708    "ftb_modified_entry"             -> PopCount(ftb_modified_entry),
1709    "ftb_modified_entry_new_br"      -> PopCount(ftb_modified_entry_new_br),
1710    "ftb_jalr_target_modified"       -> PopCount(ftb_modified_entry_jalr_target_modified),
1711    "ftb_modified_entry_br_full"     -> PopCount(ftb_modified_entry_br_full),
1712    "ftb_modified_entry_strong_bias" -> PopCount(ftb_modified_entry_strong_bias)
1713  ) ++ mispred_stage_map ++ br_mispred_stage_map ++ jalr_mispred_stage_map ++
1714    correct_stage_map ++ br_correct_stage_map ++ jalr_correct_stage_map
1715
1716  for ((key, value) <- perfCountsMap) {
1717    XSPerfAccumulate(key, value)
1718  }
1719
1720  // --------------------------- Debug --------------------------------
1721  // XSDebug(enq_fire, p"enq! " + io.fromBpu.resp.bits.toPrintable)
1722  XSDebug(io.toIfu.req.fire, p"fire to ifu " + io.toIfu.req.bits.toPrintable)
1723  XSDebug(do_commit, p"deq! [ptr] $do_commit_ptr\n")
1724  XSDebug(true.B, p"[bpuPtr] $bpuPtr, [ifuPtr] $ifuPtr, [ifuWbPtr] $ifuWbPtr [commPtr] $commPtr\n")
1725  XSDebug(
1726    true.B,
1727    p"[in] v:${io.fromBpu.resp.valid} r:${io.fromBpu.resp.ready} " +
1728      p"[out] v:${io.toIfu.req.valid} r:${io.toIfu.req.ready}\n"
1729  )
1730  XSDebug(do_commit, p"[deq info] cfiIndex: $commit_cfi, $commit_pc_bundle, target: ${Hexadecimal(commit_target)}\n")
1731
1732  //   def ubtbCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1733  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1734  //       case (((valid, pd), ans), taken) =>
1735  //       Mux(valid && pd.isBr,
1736  //         isWrong ^ Mux(ans.hit.asBool,
1737  //           Mux(ans.taken.asBool, taken && ans.target === commitEntry.target,
1738  //           !taken),
1739  //         !taken),
1740  //       false.B)
1741  //     }
1742  //   }
1743
1744  //   def btbCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1745  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1746  //       case (((valid, pd), ans), taken) =>
1747  //       Mux(valid && pd.isBr,
1748  //         isWrong ^ Mux(ans.hit.asBool,
1749  //           Mux(ans.taken.asBool, taken && ans.target === commitEntry.target,
1750  //           !taken),
1751  //         !taken),
1752  //       false.B)
1753  //     }
1754  //   }
1755
1756  //   def tageCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1757  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1758  //       case (((valid, pd), ans), taken) =>
1759  //       Mux(valid && pd.isBr,
1760  //         isWrong ^ (ans.taken.asBool === taken),
1761  //       false.B)
1762  //     }
1763  //   }
1764
1765  //   def loopCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1766  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1767  //       case (((valid, pd), ans), taken) =>
1768  //       Mux(valid && (pd.isBr) && ans.hit.asBool,
1769  //         isWrong ^ (!taken),
1770  //           false.B)
1771  //     }
1772  //   }
1773
1774  //   def rasCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1775  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1776  //       case (((valid, pd), ans), taken) =>
1777  //       Mux(valid && pd.isRet.asBool /*&& taken*/ && ans.hit.asBool,
1778  //         isWrong ^ (ans.target === commitEntry.target),
1779  //           false.B)
1780  //     }
1781  //   }
1782
1783  //   val ubtbRights = ubtbCheck(commitEntry, commitEntry.metas.map(_.ubtbAns), false.B)
1784  //   val ubtbWrongs = ubtbCheck(commitEntry, commitEntry.metas.map(_.ubtbAns), true.B)
1785  //   // btb and ubtb pred jal and jalr as well
1786  //   val btbRights = btbCheck(commitEntry, commitEntry.metas.map(_.btbAns), false.B)
1787  //   val btbWrongs = btbCheck(commitEntry, commitEntry.metas.map(_.btbAns), true.B)
1788  //   val tageRights = tageCheck(commitEntry, commitEntry.metas.map(_.tageAns), false.B)
1789  //   val tageWrongs = tageCheck(commitEntry, commitEntry.metas.map(_.tageAns), true.B)
1790
1791  //   val loopRights = loopCheck(commitEntry, commitEntry.metas.map(_.loopAns), false.B)
1792  //   val loopWrongs = loopCheck(commitEntry, commitEntry.metas.map(_.loopAns), true.B)
1793
1794  //   val rasRights = rasCheck(commitEntry, commitEntry.metas.map(_.rasAns), false.B)
1795  //   val rasWrongs = rasCheck(commitEntry, commitEntry.metas.map(_.rasAns), true.B)
1796
1797  val perfEvents = Seq(
1798    ("bpu_s2_redirect        ", bpu_s2_redirect),
1799    ("bpu_s3_redirect        ", bpu_s3_redirect),
1800    ("bpu_to_ftq_stall       ", enq.valid && ~enq.ready),
1801    ("mispredictRedirect     ", perf_redirect.valid && RedirectLevel.flushAfter === perf_redirect.bits.level),
1802    ("replayRedirect         ", perf_redirect.valid && RedirectLevel.flushItself(perf_redirect.bits.level)),
1803    ("predecodeRedirect      ", fromIfuRedirect.valid),
1804    ("to_ifu_bubble          ", io.toIfu.req.ready && !io.toIfu.req.valid),
1805    ("from_bpu_real_bubble   ", !enq.valid && enq.ready && allowBpuIn),
1806    ("BpInstr                ", PopCount(mbpInstrs)),
1807    ("BpBInstr               ", PopCount(mbpBRights | mbpBWrongs)),
1808    ("BpRight                ", PopCount(mbpRights)),
1809    ("BpWrong                ", PopCount(mbpWrongs)),
1810    ("BpBRight               ", PopCount(mbpBRights)),
1811    ("BpBWrong               ", PopCount(mbpBWrongs)),
1812    ("BpJRight               ", PopCount(mbpJRights)),
1813    ("BpJWrong               ", PopCount(mbpJWrongs)),
1814    ("BpIRight               ", PopCount(mbpIRights)),
1815    ("BpIWrong               ", PopCount(mbpIWrongs)),
1816    ("BpCRight               ", PopCount(mbpCRights)),
1817    ("BpCWrong               ", PopCount(mbpCWrongs)),
1818    ("BpRRight               ", PopCount(mbpRRights)),
1819    ("BpRWrong               ", PopCount(mbpRWrongs)),
1820    ("ftb_false_hit          ", PopCount(ftb_false_hit)),
1821    ("ftb_hit                ", PopCount(ftb_hit))
1822  )
1823  generatePerfEvent()
1824}
1825