xref: /XiangShan/src/main/scala/xiangshan/frontend/NewFtq.scala (revision 42b75a597e916f6a6887cb8bc626483d0d2645dd)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15*
16*
17* Acknowledgement
18*
19* This implementation is inspired by several key papers:
20* [1] Glenn Reinman, Todd Austin, and Brad Calder. "[A scalable front-end architecture for fast instruction delivery.]
21* (https://doi.org/10.1109/ISCA.1999.765954)" 26th International Symposium on Computer Architecture (ISCA). 1999.
22*
23***************************************************************************************/
24
25package xiangshan.frontend
26
27import chisel3._
28import chisel3.util._
29import org.chipsalliance.cde.config.Parameters
30import utility._
31import utility.ChiselDB
32import utility.mbist.MbistPipeline
33import utility.sram.SRAMTemplate
34import utils._
35import xiangshan._
36import xiangshan.backend.CtrlToFtqIO
37import xiangshan.frontend.icache._
38
39class FtqDebugBundle extends Bundle {
40  val pc        = UInt(39.W)
41  val target    = UInt(39.W)
42  val isBr      = Bool()
43  val isJmp     = Bool()
44  val isCall    = Bool()
45  val isRet     = Bool()
46  val misPred   = Bool()
47  val isTaken   = Bool()
48  val predStage = UInt(2.W)
49}
50
51class FtqPtr(entries: Int) extends CircularQueuePtr[FtqPtr](
52      entries
53    ) {
54  def this()(implicit p: Parameters) = this(p(XSCoreParamsKey).FtqSize)
55}
56
57object FtqPtr {
58  def apply(f: Bool, v: UInt)(implicit p: Parameters): FtqPtr = {
59    val ptr = Wire(new FtqPtr)
60    ptr.flag  := f
61    ptr.value := v
62    ptr
63  }
64  def inverse(ptr: FtqPtr)(implicit p: Parameters): FtqPtr =
65    apply(!ptr.flag, ptr.value)
66}
67
68class FtqNRSRAM[T <: Data](gen: T, numRead: Int)(implicit p: Parameters) extends XSModule {
69
70  val io = IO(new Bundle() {
71    val raddr = Input(Vec(numRead, UInt(log2Up(FtqSize).W)))
72    val ren   = Input(Vec(numRead, Bool()))
73    val rdata = Output(Vec(numRead, gen))
74    val waddr = Input(UInt(log2Up(FtqSize).W))
75    val wen   = Input(Bool())
76    val wdata = Input(gen)
77  })
78
79  for (i <- 0 until numRead) {
80    val sram = Module(new SRAMTemplate(gen, FtqSize, withClockGate = true, hasMbist = hasMbist))
81    sram.io.r.req.valid       := io.ren(i)
82    sram.io.r.req.bits.setIdx := io.raddr(i)
83    io.rdata(i)               := sram.io.r.resp.data(0)
84    sram.io.w.req.valid       := io.wen
85    sram.io.w.req.bits.setIdx := io.waddr
86    sram.io.w.req.bits.data   := VecInit(io.wdata)
87  }
88
89}
90
91class Ftq_RF_Components(implicit p: Parameters) extends XSBundle with BPUUtils {
92  val startAddr     = UInt(VAddrBits.W)
93  val nextLineAddr  = UInt(VAddrBits.W)
94  val isNextMask    = Vec(PredictWidth, Bool())
95  val fallThruError = Bool()
96  // val carry = Bool()
97  def getPc(offset: UInt) = {
98    def getHigher(pc: UInt) = pc(VAddrBits - 1, log2Ceil(PredictWidth) + instOffsetBits + 1)
99    def getOffset(pc: UInt) = pc(log2Ceil(PredictWidth) + instOffsetBits, instOffsetBits)
100    Cat(
101      getHigher(Mux(isNextMask(offset) && startAddr(log2Ceil(PredictWidth) + instOffsetBits), nextLineAddr, startAddr)),
102      getOffset(startAddr) + offset,
103      0.U(instOffsetBits.W)
104    )
105  }
106  def fromBranchPrediction(resp: BranchPredictionBundle) = {
107    def carryPos(addr: UInt) = addr(instOffsetBits + log2Ceil(PredictWidth) + 1)
108    this.startAddr    := resp.pc(3)
109    this.nextLineAddr := resp.pc(3) + (FetchWidth * 4 * 2).U // may be broken on other configs
110    this.isNextMask := VecInit((0 until PredictWidth).map(i =>
111      (resp.pc(3)(log2Ceil(PredictWidth), 1) +& i.U)(log2Ceil(PredictWidth)).asBool
112    ))
113    this.fallThruError := resp.fallThruError(3)
114    this
115  }
116  override def toPrintable: Printable =
117    p"startAddr:${Hexadecimal(startAddr)}"
118}
119
120class Ftq_pd_Entry(implicit p: Parameters) extends XSBundle {
121  val brMask    = Vec(PredictWidth, Bool())
122  val jmpInfo   = ValidUndirectioned(Vec(3, Bool()))
123  val jmpOffset = UInt(log2Ceil(PredictWidth).W)
124  val jalTarget = UInt(VAddrBits.W)
125  val rvcMask   = Vec(PredictWidth, Bool())
126  def hasJal    = jmpInfo.valid && !jmpInfo.bits(0)
127  def hasJalr   = jmpInfo.valid && jmpInfo.bits(0)
128  def hasCall   = jmpInfo.valid && jmpInfo.bits(1)
129  def hasRet    = jmpInfo.valid && jmpInfo.bits(2)
130
131  def fromPdWb(pdWb: PredecodeWritebackBundle) = {
132    val pds = pdWb.pd
133    this.brMask        := VecInit(pds.map(pd => pd.isBr && pd.valid))
134    this.jmpInfo.valid := VecInit(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid)).asUInt.orR
135    this.jmpInfo.bits := ParallelPriorityMux(
136      pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid),
137      pds.map(pd => VecInit(pd.isJalr, pd.isCall, pd.isRet))
138    )
139    this.jmpOffset := ParallelPriorityEncoder(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid))
140    this.rvcMask   := VecInit(pds.map(pd => pd.isRVC))
141    this.jalTarget := pdWb.jalTarget
142  }
143
144  def toPd(offset: UInt) = {
145    require(offset.getWidth == log2Ceil(PredictWidth))
146    val pd = Wire(new PreDecodeInfo)
147    pd.valid := true.B
148    pd.isRVC := rvcMask(offset)
149    val isBr   = brMask(offset)
150    val isJalr = offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(0)
151    pd.brType := Cat(offset === jmpOffset && jmpInfo.valid, isJalr || isBr)
152    pd.isCall := offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(1)
153    pd.isRet  := offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(2)
154    pd
155  }
156}
157
158class PrefetchPtrDB(implicit p: Parameters) extends Bundle {
159  val fromFtqPtr = UInt(log2Up(p(XSCoreParamsKey).FtqSize).W)
160  val fromIfuPtr = UInt(log2Up(p(XSCoreParamsKey).FtqSize).W)
161}
162
163class Ftq_Redirect_SRAMEntry(implicit p: Parameters) extends SpeculativeInfo {
164  val sc_disagree = if (!env.FPGAPlatform) Some(Vec(numBr, Bool())) else None
165}
166
167class Ftq_1R_SRAMEntry(implicit p: Parameters) extends XSBundle with HasBPUConst {
168  val meta      = UInt(MaxMetaLength.W)
169  val ftb_entry = new FTBEntry
170}
171
172class Ftq_Pred_Info(implicit p: Parameters) extends XSBundle {
173  val target   = UInt(VAddrBits.W)
174  val cfiIndex = ValidUndirectioned(UInt(log2Ceil(PredictWidth).W))
175}
176
177class FtqRead[T <: Data](private val gen: T)(implicit p: Parameters) extends XSBundle {
178  val valid  = Output(Bool())
179  val ptr    = Output(new FtqPtr)
180  val offset = Output(UInt(log2Ceil(PredictWidth).W))
181  val data   = Input(gen)
182  def apply(valid: Bool, ptr: FtqPtr, offset: UInt) = {
183    this.valid  := valid
184    this.ptr    := ptr
185    this.offset := offset
186    this.data
187  }
188}
189
190class FtqToBpuIO(implicit p: Parameters) extends XSBundle {
191  val redirect       = Valid(new BranchPredictionRedirect)
192  val update         = Valid(new BranchPredictionUpdate)
193  val enq_ptr        = Output(new FtqPtr)
194  val redirctFromIFU = Output(Bool())
195}
196
197class BpuFlushInfo(implicit p: Parameters) extends XSBundle with HasCircularQueuePtrHelper {
198  // when ifu pipeline is not stalled,
199  // a packet from bpu s3 can reach f1 at most
200  val s2 = Valid(new FtqPtr)
201  val s3 = Valid(new FtqPtr)
202  def shouldFlushBy(src: Valid[FtqPtr], idx_to_flush: FtqPtr) =
203    src.valid && !isAfter(src.bits, idx_to_flush)
204  def shouldFlushByStage2(idx: FtqPtr) = shouldFlushBy(s2, idx)
205  def shouldFlushByStage3(idx: FtqPtr) = shouldFlushBy(s3, idx)
206}
207
208class FtqToIfuIO(implicit p: Parameters) extends XSBundle {
209  val req              = Decoupled(new FetchRequestBundle)
210  val redirect         = Valid(new BranchPredictionRedirect)
211  val topdown_redirect = Valid(new BranchPredictionRedirect)
212  val flushFromBpu     = new BpuFlushInfo
213}
214
215class FtqToICacheIO(implicit p: Parameters) extends XSBundle {
216  // NOTE: req.bits must be prepare in T cycle
217  // while req.valid is set true in T + 1 cycle
218  val req = Decoupled(new FtqToICacheRequestBundle)
219}
220
221class FtqToPrefetchIO(implicit p: Parameters) extends XSBundle {
222  val req              = Decoupled(new FtqICacheInfo)
223  val flushFromBpu     = new BpuFlushInfo
224  val backendException = UInt(ExceptionType.width.W)
225}
226
227trait HasBackendRedirectInfo extends HasXSParameter {
228  def isLoadReplay(r: Valid[Redirect]) = r.bits.flushItself()
229}
230
231class FtqToCtrlIO(implicit p: Parameters) extends XSBundle with HasBackendRedirectInfo {
232  // write to backend pc mem
233  val pc_mem_wen   = Output(Bool())
234  val pc_mem_waddr = Output(UInt(log2Ceil(FtqSize).W))
235  val pc_mem_wdata = Output(new Ftq_RF_Components)
236  // newest target
237  val newest_entry_en     = Output(Bool())
238  val newest_entry_target = Output(UInt(VAddrBits.W))
239  val newest_entry_ptr    = Output(new FtqPtr)
240}
241
242class FTBEntryGen(implicit p: Parameters) extends XSModule with HasBackendRedirectInfo with HasBPUParameter {
243  val io = IO(new Bundle {
244    val start_addr     = Input(UInt(VAddrBits.W))
245    val old_entry      = Input(new FTBEntry)
246    val pd             = Input(new Ftq_pd_Entry)
247    val cfiIndex       = Flipped(Valid(UInt(log2Ceil(PredictWidth).W)))
248    val target         = Input(UInt(VAddrBits.W))
249    val hit            = Input(Bool())
250    val mispredict_vec = Input(Vec(PredictWidth, Bool()))
251
252    val new_entry         = Output(new FTBEntry)
253    val new_br_insert_pos = Output(Vec(numBr, Bool()))
254    val taken_mask        = Output(Vec(numBr, Bool()))
255    val jmp_taken         = Output(Bool())
256    val mispred_mask      = Output(Vec(numBr + 1, Bool()))
257
258    // for perf counters
259    val is_init_entry           = Output(Bool())
260    val is_old_entry            = Output(Bool())
261    val is_new_br               = Output(Bool())
262    val is_jalr_target_modified = Output(Bool())
263    val is_strong_bias_modified = Output(Bool())
264    val is_br_full              = Output(Bool())
265  })
266
267  // no mispredictions detected at predecode
268  val hit = io.hit
269  val pd  = io.pd
270
271  val init_entry = WireInit(0.U.asTypeOf(new FTBEntry))
272
273  val cfi_is_br       = pd.brMask(io.cfiIndex.bits) && io.cfiIndex.valid
274  val entry_has_jmp   = pd.jmpInfo.valid
275  val new_jmp_is_jal  = entry_has_jmp && !pd.jmpInfo.bits(0) && io.cfiIndex.valid
276  val new_jmp_is_jalr = entry_has_jmp && pd.jmpInfo.bits(0) && io.cfiIndex.valid
277  val new_jmp_is_call = entry_has_jmp && pd.jmpInfo.bits(1) && io.cfiIndex.valid
278  val new_jmp_is_ret  = entry_has_jmp && pd.jmpInfo.bits(2) && io.cfiIndex.valid
279  val last_jmp_rvi    = entry_has_jmp && pd.jmpOffset === (PredictWidth - 1).U && !pd.rvcMask.last
280  // val last_br_rvi = cfi_is_br && io.cfiIndex.bits === (PredictWidth-1).U && !pd.rvcMask.last
281
282  val cfi_is_jal  = io.cfiIndex.bits === pd.jmpOffset && new_jmp_is_jal
283  val cfi_is_jalr = io.cfiIndex.bits === pd.jmpOffset && new_jmp_is_jalr
284
285  def carryPos = log2Ceil(PredictWidth) + instOffsetBits
286  def getLower(pc: UInt) = pc(carryPos - 1, instOffsetBits)
287  // if not hit, establish a new entry
288  init_entry.valid := true.B
289  // tag is left for ftb to assign
290
291  // case br
292  val init_br_slot = init_entry.getSlotForBr(0)
293  when(cfi_is_br) {
294    init_br_slot.valid  := true.B
295    init_br_slot.offset := io.cfiIndex.bits
296    init_br_slot.setLowerStatByTarget(io.start_addr, io.target, numBr == 1)
297    init_entry.strong_bias(0) := true.B // set to strong bias on init
298  }
299
300  // case jmp
301  when(entry_has_jmp) {
302    init_entry.tailSlot.offset := pd.jmpOffset
303    init_entry.tailSlot.valid  := new_jmp_is_jal || new_jmp_is_jalr
304    init_entry.tailSlot.setLowerStatByTarget(io.start_addr, Mux(cfi_is_jalr, io.target, pd.jalTarget), isShare = false)
305    init_entry.strong_bias(numBr - 1) := new_jmp_is_jalr // set strong bias for the jalr on init
306  }
307
308  val jmpPft = getLower(io.start_addr) +& pd.jmpOffset +& Mux(pd.rvcMask(pd.jmpOffset), 1.U, 2.U)
309  init_entry.pftAddr := Mux(entry_has_jmp && !last_jmp_rvi, jmpPft, getLower(io.start_addr))
310  init_entry.carry   := Mux(entry_has_jmp && !last_jmp_rvi, jmpPft(carryPos - instOffsetBits), true.B)
311
312  require(
313    isPow2(PredictWidth),
314    "If PredictWidth does not satisfy the power of 2," +
315      "pftAddr := getLower(io.start_addr) and carry := true.B  not working!!"
316  )
317
318  init_entry.isJalr := new_jmp_is_jalr
319  init_entry.isCall := new_jmp_is_call
320  init_entry.isRet  := new_jmp_is_ret
321  // that means fall thru points to the middle of an inst
322  init_entry.last_may_be_rvi_call := pd.jmpOffset === (PredictWidth - 1).U && !pd.rvcMask(pd.jmpOffset)
323
324  // if hit, check whether a new cfi(only br is possible) is detected
325  val oe              = io.old_entry
326  val br_recorded_vec = oe.getBrRecordedVec(io.cfiIndex.bits)
327  val br_recorded     = br_recorded_vec.asUInt.orR
328  val is_new_br       = cfi_is_br && !br_recorded
329  val new_br_offset   = io.cfiIndex.bits
330  // vec(i) means new br will be inserted BEFORE old br(i)
331  val allBrSlotsVec = oe.allSlotsForBr
332  val new_br_insert_onehot = VecInit((0 until numBr).map {
333    i =>
334      i match {
335        case 0 =>
336          !allBrSlotsVec(0).valid || new_br_offset < allBrSlotsVec(0).offset
337        case idx =>
338          allBrSlotsVec(idx - 1).valid && new_br_offset > allBrSlotsVec(idx - 1).offset &&
339          (!allBrSlotsVec(idx).valid || new_br_offset < allBrSlotsVec(idx).offset)
340      }
341  })
342
343  val old_entry_modified = WireInit(io.old_entry)
344  for (i <- 0 until numBr) {
345    val slot = old_entry_modified.allSlotsForBr(i)
346    when(new_br_insert_onehot(i)) {
347      slot.valid  := true.B
348      slot.offset := new_br_offset
349      slot.setLowerStatByTarget(io.start_addr, io.target, i == numBr - 1)
350      old_entry_modified.strong_bias(i) := true.B
351    }.elsewhen(new_br_offset > oe.allSlotsForBr(i).offset) {
352      old_entry_modified.strong_bias(i) := false.B
353      // all other fields remain unchanged
354    }.otherwise {
355      // case i == 0, remain unchanged
356      if (i != 0) {
357        val noNeedToMoveFromFormerSlot = (i == numBr - 1).B && !oe.brSlots.last.valid
358        when(!noNeedToMoveFromFormerSlot) {
359          slot.fromAnotherSlot(oe.allSlotsForBr(i - 1))
360          old_entry_modified.strong_bias(i) := oe.strong_bias(i)
361        }
362      }
363    }
364  }
365
366  // two circumstances:
367  // 1. oe: | br | j  |, new br should be in front of j, thus addr of j should be new pft
368  // 2. oe: | br | br |, new br could be anywhere between, thus new pft is the addr of either
369  //        the previous last br or the new br
370  val may_have_to_replace = oe.noEmptySlotForNewBr
371  val pft_need_to_change  = is_new_br && may_have_to_replace
372  // it should either be the given last br or the new br
373  when(pft_need_to_change) {
374    val new_pft_offset =
375      Mux(!new_br_insert_onehot.asUInt.orR, new_br_offset, oe.allSlotsForBr.last.offset)
376
377    // set jmp to invalid
378    old_entry_modified.pftAddr              := getLower(io.start_addr) + new_pft_offset
379    old_entry_modified.carry                := (getLower(io.start_addr) +& new_pft_offset).head(1).asBool
380    old_entry_modified.last_may_be_rvi_call := false.B
381    old_entry_modified.isCall               := false.B
382    old_entry_modified.isRet                := false.B
383    old_entry_modified.isJalr               := false.B
384  }
385
386  val old_entry_jmp_target_modified = WireInit(oe)
387  val old_target      = oe.tailSlot.getTarget(io.start_addr) // may be wrong because we store only 20 lowest bits
388  val old_tail_is_jmp = !oe.tailSlot.sharing
389  val jalr_target_modified = cfi_is_jalr && (old_target =/= io.target) && old_tail_is_jmp // TODO: pass full jalr target
390  when(jalr_target_modified) {
391    old_entry_jmp_target_modified.setByJmpTarget(io.start_addr, io.target)
392    old_entry_jmp_target_modified.strong_bias := 0.U.asTypeOf(Vec(numBr, Bool()))
393  }
394
395  val old_entry_strong_bias    = WireInit(oe)
396  val strong_bias_modified_vec = Wire(Vec(numBr, Bool())) // whether modified or not
397  for (i <- 0 until numBr) {
398    when(br_recorded_vec(0)) {
399      old_entry_strong_bias.strong_bias(0) :=
400        oe.strong_bias(0) && io.cfiIndex.valid && oe.brValids(0) && io.cfiIndex.bits === oe.brOffset(0)
401    }.elsewhen(br_recorded_vec(numBr - 1)) {
402      old_entry_strong_bias.strong_bias(0) := false.B
403      old_entry_strong_bias.strong_bias(numBr - 1) :=
404        oe.strong_bias(numBr - 1) && io.cfiIndex.valid && oe.brValids(numBr - 1) && io.cfiIndex.bits === oe.brOffset(
405          numBr - 1
406        )
407    }
408    strong_bias_modified_vec(i) := oe.strong_bias(i) && oe.brValids(i) && !old_entry_strong_bias.strong_bias(i)
409  }
410  val strong_bias_modified = strong_bias_modified_vec.reduce(_ || _)
411
412  val derived_from_old_entry =
413    Mux(is_new_br, old_entry_modified, Mux(jalr_target_modified, old_entry_jmp_target_modified, old_entry_strong_bias))
414
415  io.new_entry := Mux(!hit, init_entry, derived_from_old_entry)
416
417  io.new_br_insert_pos := new_br_insert_onehot
418  io.taken_mask := VecInit((io.new_entry.brOffset zip io.new_entry.brValids).map {
419    case (off, v) => io.cfiIndex.bits === off && io.cfiIndex.valid && v
420  })
421  io.jmp_taken := io.new_entry.jmpValid && io.new_entry.tailSlot.offset === io.cfiIndex.bits
422  for (i <- 0 until numBr) {
423    io.mispred_mask(i) := io.new_entry.brValids(i) && io.mispredict_vec(io.new_entry.brOffset(i))
424  }
425  io.mispred_mask.last := io.new_entry.jmpValid && io.mispredict_vec(pd.jmpOffset)
426
427  // for perf counters
428  io.is_init_entry           := !hit
429  io.is_old_entry            := hit && !is_new_br && !jalr_target_modified && !strong_bias_modified
430  io.is_new_br               := hit && is_new_br
431  io.is_jalr_target_modified := hit && jalr_target_modified
432  io.is_strong_bias_modified := hit && strong_bias_modified
433  io.is_br_full              := hit && is_new_br && may_have_to_replace
434}
435
436class FtqPcMemWrapper(numOtherReads: Int)(implicit p: Parameters) extends XSModule with HasBackendRedirectInfo {
437  val io = IO(new Bundle {
438    val ifuPtr_w           = Input(new FtqPtr)
439    val ifuPtrPlus1_w      = Input(new FtqPtr)
440    val ifuPtrPlus2_w      = Input(new FtqPtr)
441    val pfPtr_w            = Input(new FtqPtr)
442    val pfPtrPlus1_w       = Input(new FtqPtr)
443    val commPtr_w          = Input(new FtqPtr)
444    val commPtrPlus1_w     = Input(new FtqPtr)
445    val ifuPtr_rdata       = Output(new Ftq_RF_Components)
446    val ifuPtrPlus1_rdata  = Output(new Ftq_RF_Components)
447    val ifuPtrPlus2_rdata  = Output(new Ftq_RF_Components)
448    val pfPtr_rdata        = Output(new Ftq_RF_Components)
449    val pfPtrPlus1_rdata   = Output(new Ftq_RF_Components)
450    val commPtr_rdata      = Output(new Ftq_RF_Components)
451    val commPtrPlus1_rdata = Output(new Ftq_RF_Components)
452
453    val wen   = Input(Bool())
454    val waddr = Input(UInt(log2Ceil(FtqSize).W))
455    val wdata = Input(new Ftq_RF_Components)
456  })
457
458  val num_pc_read = numOtherReads + 5
459  val mem         = Module(new SyncDataModuleTemplate(new Ftq_RF_Components, FtqSize, num_pc_read, 1, "FtqPC"))
460  mem.io.wen(0)   := io.wen
461  mem.io.waddr(0) := io.waddr
462  mem.io.wdata(0) := io.wdata
463
464  // read one cycle ahead for ftq local reads
465  val raddr_vec = VecInit(Seq(
466    io.ifuPtr_w.value,
467    io.ifuPtrPlus1_w.value,
468    io.ifuPtrPlus2_w.value,
469    io.pfPtr_w.value,
470    io.pfPtrPlus1_w.value,
471    io.commPtrPlus1_w.value,
472    io.commPtr_w.value
473  ))
474
475  mem.io.raddr := raddr_vec
476
477  io.ifuPtr_rdata       := mem.io.rdata.dropRight(6).last
478  io.ifuPtrPlus1_rdata  := mem.io.rdata.dropRight(5).last
479  io.ifuPtrPlus2_rdata  := mem.io.rdata.dropRight(4).last
480  io.pfPtr_rdata        := mem.io.rdata.dropRight(3).last
481  io.pfPtrPlus1_rdata   := mem.io.rdata.dropRight(2).last
482  io.commPtrPlus1_rdata := mem.io.rdata.dropRight(1).last
483  io.commPtr_rdata      := mem.io.rdata.last
484}
485
486class Ftq(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelper
487    with HasBackendRedirectInfo with BPUUtils with HasBPUConst with HasPerfEvents
488    with HasICacheParameters {
489  val io = IO(new Bundle {
490    val fromBpu     = Flipped(new BpuToFtqIO)
491    val fromIfu     = Flipped(new IfuToFtqIO)
492    val fromBackend = Flipped(new CtrlToFtqIO)
493
494    val toBpu       = new FtqToBpuIO
495    val toIfu       = new FtqToIfuIO
496    val toICache    = new FtqToICacheIO
497    val toBackend   = new FtqToCtrlIO
498    val toPrefetch  = new FtqToPrefetchIO
499    val icacheFlush = Output(Bool())
500
501    val bpuInfo = new Bundle {
502      val bpRight = Output(UInt(XLEN.W))
503      val bpWrong = Output(UInt(XLEN.W))
504    }
505
506    val mmioCommitRead = Flipped(new mmioCommitRead)
507
508    // for perf
509    val ControlBTBMissBubble = Output(Bool())
510    val TAGEMissBubble       = Output(Bool())
511    val SCMissBubble         = Output(Bool())
512    val ITTAGEMissBubble     = Output(Bool())
513    val RASMissBubble        = Output(Bool())
514  })
515  io.bpuInfo := DontCare
516
517  val topdown_stage = RegInit(0.U.asTypeOf(new FrontendTopDownBundle))
518  // only driven by clock, not valid-ready
519  topdown_stage                  := io.fromBpu.resp.bits.topdown_info
520  io.toIfu.req.bits.topdown_info := topdown_stage
521
522  val ifuRedirected = RegInit(VecInit(Seq.fill(FtqSize)(false.B)))
523
524  // io.fromBackend.ftqIdxAhead: bju(BjuCnt) + ldReplay + exception
525  val ftqIdxAhead = VecInit(Seq.tabulate(FtqRedirectAheadNum)(i => io.fromBackend.ftqIdxAhead(i))) // only bju
526  val ftqIdxSelOH = io.fromBackend.ftqIdxSelOH.bits(FtqRedirectAheadNum - 1, 0)
527
528  val aheadValid         = ftqIdxAhead.map(_.valid).reduce(_ | _) && !io.fromBackend.redirect.valid
529  val realAhdValid       = io.fromBackend.redirect.valid && (ftqIdxSelOH > 0.U) && RegNext(aheadValid)
530  val backendRedirect    = Wire(Valid(new BranchPredictionRedirect))
531  val backendRedirectReg = Wire(Valid(new BranchPredictionRedirect))
532  backendRedirectReg.valid := RegNext(Mux(realAhdValid, false.B, backendRedirect.valid))
533  backendRedirectReg.bits  := RegEnable(backendRedirect.bits, backendRedirect.valid)
534  val fromBackendRedirect = Wire(Valid(new BranchPredictionRedirect))
535  fromBackendRedirect := Mux(realAhdValid, backendRedirect, backendRedirectReg)
536
537  val stage2Flush  = backendRedirect.valid
538  val backendFlush = stage2Flush || RegNext(stage2Flush)
539  val ifuFlush     = Wire(Bool())
540
541  val flush = stage2Flush || RegNext(stage2Flush)
542
543  val allowBpuIn, allowToIfu = WireInit(false.B)
544  val flushToIfu             = !allowToIfu
545  allowBpuIn := !ifuFlush && !backendRedirect.valid && !backendRedirectReg.valid
546  allowToIfu := !ifuFlush && !backendRedirect.valid && !backendRedirectReg.valid
547
548  def copyNum                                              = 5
549  val bpuPtr, ifuPtr, pfPtr, ifuWbPtr, commPtr, robCommPtr = RegInit(FtqPtr(false.B, 0.U))
550  val ifuPtrPlus1                                          = RegInit(FtqPtr(false.B, 1.U))
551  val ifuPtrPlus2                                          = RegInit(FtqPtr(false.B, 2.U))
552  val pfPtrPlus1                                           = RegInit(FtqPtr(false.B, 1.U))
553  val commPtrPlus1                                         = RegInit(FtqPtr(false.B, 1.U))
554  val copied_ifu_ptr                                       = Seq.fill(copyNum)(RegInit(FtqPtr(false.B, 0.U)))
555  val copied_bpu_ptr                                       = Seq.fill(copyNum)(RegInit(FtqPtr(false.B, 0.U)))
556  require(FtqSize >= 4)
557  val ifuPtr_write       = WireInit(ifuPtr)
558  val ifuPtrPlus1_write  = WireInit(ifuPtrPlus1)
559  val ifuPtrPlus2_write  = WireInit(ifuPtrPlus2)
560  val pfPtr_write        = WireInit(pfPtr)
561  val pfPtrPlus1_write   = WireInit(pfPtrPlus1)
562  val ifuWbPtr_write     = WireInit(ifuWbPtr)
563  val commPtr_write      = WireInit(commPtr)
564  val commPtrPlus1_write = WireInit(commPtrPlus1)
565  val robCommPtr_write   = WireInit(robCommPtr)
566  ifuPtr       := ifuPtr_write
567  ifuPtrPlus1  := ifuPtrPlus1_write
568  ifuPtrPlus2  := ifuPtrPlus2_write
569  pfPtr        := pfPtr_write
570  pfPtrPlus1   := pfPtrPlus1_write
571  ifuWbPtr     := ifuWbPtr_write
572  commPtr      := commPtr_write
573  commPtrPlus1 := commPtrPlus1_write
574  copied_ifu_ptr.map { ptr =>
575    ptr := ifuPtr_write
576    dontTouch(ptr)
577  }
578  robCommPtr := robCommPtr_write
579  val validEntries = distanceBetween(bpuPtr, commPtr)
580  val canCommit    = Wire(Bool())
581
582  // Instruction page fault and instruction access fault are sent from backend with redirect requests.
583  // When IPF and IAF are sent, backendPcFaultIfuPtr points to the FTQ entry whose first instruction
584  // raises IPF or IAF, which is ifuWbPtr_write or IfuPtr_write.
585  // Only when IFU has written back that FTQ entry can backendIpf and backendIaf be false because this
586  // makes sure that IAF and IPF are correctly raised instead of being flushed by redirect requests.
587  val backendException  = RegInit(ExceptionType.none)
588  val backendPcFaultPtr = RegInit(FtqPtr(false.B, 0.U))
589  when(fromBackendRedirect.valid) {
590    backendException := ExceptionType.fromOH(
591      has_pf = fromBackendRedirect.bits.cfiUpdate.backendIPF,
592      has_gpf = fromBackendRedirect.bits.cfiUpdate.backendIGPF,
593      has_af = fromBackendRedirect.bits.cfiUpdate.backendIAF
594    )
595    when(
596      fromBackendRedirect.bits.cfiUpdate.backendIPF || fromBackendRedirect.bits.cfiUpdate.backendIGPF ||
597        fromBackendRedirect.bits.cfiUpdate.backendIAF
598    ) {
599      backendPcFaultPtr := ifuWbPtr_write
600    }
601  }.elsewhen(ifuWbPtr =/= backendPcFaultPtr) {
602    backendException := ExceptionType.none
603  }
604
605  // **********************************************************************
606  // **************************** enq from bpu ****************************
607  // **********************************************************************
608  val new_entry_ready = validEntries < FtqSize.U || canCommit
609  io.fromBpu.resp.ready := new_entry_ready
610
611  val bpu_s2_resp     = io.fromBpu.resp.bits.s2
612  val bpu_s3_resp     = io.fromBpu.resp.bits.s3
613  val bpu_s2_redirect = bpu_s2_resp.valid(3) && bpu_s2_resp.hasRedirect(3)
614  val bpu_s3_redirect = bpu_s3_resp.valid(3) && bpu_s3_resp.hasRedirect(3)
615
616  io.toBpu.enq_ptr := bpuPtr
617  val enq_fire    = io.fromBpu.resp.fire && allowBpuIn // from bpu s1
618  val bpu_in_fire = (io.fromBpu.resp.fire || bpu_s2_redirect || bpu_s3_redirect) && allowBpuIn
619
620  val bpu_in_resp     = io.fromBpu.resp.bits.selectedResp
621  val bpu_in_stage    = io.fromBpu.resp.bits.selectedRespIdxForFtq
622  val bpu_in_resp_ptr = Mux(bpu_in_stage === BP_S1, bpuPtr, bpu_in_resp.ftq_idx)
623  val bpu_in_resp_idx = bpu_in_resp_ptr.value
624
625  // read ports:      pfReq1 + pfReq2 ++  ifuReq1 + ifuReq2 + ifuReq3 + commitUpdate2 + commitUpdate
626  val ftq_pc_mem = Module(new FtqPcMemWrapper(2))
627  // resp from uBTB
628  ftq_pc_mem.io.wen   := bpu_in_fire
629  ftq_pc_mem.io.waddr := bpu_in_resp_idx
630  ftq_pc_mem.io.wdata.fromBranchPrediction(bpu_in_resp)
631
632  //                                                            ifuRedirect + backendRedirect + commit
633  val ftq_redirect_mem = Module(new SyncDataModuleTemplate(
634    new Ftq_Redirect_SRAMEntry,
635    FtqSize,
636    IfuRedirectNum + FtqRedirectAheadNum + 1,
637    1,
638    hasRen = true
639  ))
640  // these info is intended to enq at the last stage of bpu
641  ftq_redirect_mem.io.wen(0)   := io.fromBpu.resp.bits.lastStage.valid(3)
642  ftq_redirect_mem.io.waddr(0) := io.fromBpu.resp.bits.lastStage.ftq_idx.value
643  ftq_redirect_mem.io.wdata(0) := io.fromBpu.resp.bits.last_stage_spec_info
644  println(f"ftq redirect MEM: entry ${ftq_redirect_mem.io.wdata(0).getWidth} * ${FtqSize} * 3")
645
646  val ftq_meta_1r_sram = Module(new FtqNRSRAM(new Ftq_1R_SRAMEntry, 1))
647  // these info is intended to enq at the last stage of bpu
648  ftq_meta_1r_sram.io.wen             := io.fromBpu.resp.bits.lastStage.valid(3)
649  ftq_meta_1r_sram.io.waddr           := io.fromBpu.resp.bits.lastStage.ftq_idx.value
650  ftq_meta_1r_sram.io.wdata.meta      := io.fromBpu.resp.bits.last_stage_meta
651  ftq_meta_1r_sram.io.wdata.ftb_entry := io.fromBpu.resp.bits.last_stage_ftb_entry
652  //                                                            ifuRedirect + backendRedirect (commit moved to ftq_meta_1r_sram)
653  val ftb_entry_mem = Module(new SyncDataModuleTemplate(
654    new FTBEntry_FtqMem,
655    FtqSize,
656    IfuRedirectNum + FtqRedirectAheadNum,
657    1,
658    hasRen = true
659  ))
660  ftb_entry_mem.io.wen(0)   := io.fromBpu.resp.bits.lastStage.valid(3)
661  ftb_entry_mem.io.waddr(0) := io.fromBpu.resp.bits.lastStage.ftq_idx.value
662  ftb_entry_mem.io.wdata(0) := io.fromBpu.resp.bits.last_stage_ftb_entry
663  private val mbistPl = MbistPipeline.PlaceMbistPipeline(1, "MbistPipeFtq", hasMbist)
664
665  // multi-write
666  val update_target = Reg(Vec(FtqSize, UInt(VAddrBits.W))) // could be taken target or fallThrough //TODO: remove this
667  val newest_entry_target          = Reg(UInt(VAddrBits.W))
668  val newest_entry_target_modified = RegInit(false.B)
669  val newest_entry_ptr             = Reg(new FtqPtr)
670  val newest_entry_ptr_modified    = RegInit(false.B)
671  val cfiIndex_vec                 = Reg(Vec(FtqSize, ValidUndirectioned(UInt(log2Ceil(PredictWidth).W))))
672  val mispredict_vec               = Reg(Vec(FtqSize, Vec(PredictWidth, Bool())))
673  val pred_stage                   = Reg(Vec(FtqSize, UInt(2.W)))
674  val pred_s1_cycle                = if (!env.FPGAPlatform) Some(Reg(Vec(FtqSize, UInt(64.W)))) else None
675
676  val c_empty :: c_toCommit :: c_committed :: c_flushed :: Nil = Enum(4)
677  val commitStateQueueReg = RegInit(VecInit(Seq.fill(FtqSize) {
678    VecInit(Seq.fill(PredictWidth)(c_empty))
679  }))
680  val commitStateQueueEnable = WireInit(VecInit(Seq.fill(FtqSize)(false.B)))
681  val commitStateQueueNext   = WireInit(commitStateQueueReg)
682
683  for (f <- 0 until FtqSize) {
684    when(commitStateQueueEnable(f)) {
685      commitStateQueueReg(f) := commitStateQueueNext(f)
686    }
687  }
688
689  val f_to_send :: f_sent :: Nil = Enum(2)
690  val entry_fetch_status         = RegInit(VecInit(Seq.fill(FtqSize)(f_sent)))
691
692  val h_not_hit :: h_false_hit :: h_hit :: Nil = Enum(3)
693  val entry_hit_status                         = RegInit(VecInit(Seq.fill(FtqSize)(h_not_hit)))
694
695  // modify registers one cycle later to cut critical path
696  val last_cycle_bpu_in       = RegNext(bpu_in_fire)
697  val last_cycle_bpu_in_ptr   = RegEnable(bpu_in_resp_ptr, bpu_in_fire)
698  val last_cycle_bpu_in_idx   = last_cycle_bpu_in_ptr.value
699  val last_cycle_bpu_target   = RegEnable(bpu_in_resp.getTarget(3), bpu_in_fire)
700  val last_cycle_cfiIndex     = RegEnable(bpu_in_resp.cfiIndex(3), bpu_in_fire)
701  val last_cycle_bpu_in_stage = RegEnable(bpu_in_stage, bpu_in_fire)
702
703  def extra_copyNum_for_commitStateQueue = 2
704  val copied_last_cycle_bpu_in =
705    VecInit(Seq.fill(copyNum + extra_copyNum_for_commitStateQueue)(RegNext(bpu_in_fire)))
706  val copied_last_cycle_bpu_in_ptr_for_ftq =
707    VecInit(Seq.fill(extra_copyNum_for_commitStateQueue)(RegEnable(bpu_in_resp_ptr, bpu_in_fire)))
708
709  newest_entry_target_modified := false.B
710  newest_entry_ptr_modified    := false.B
711  when(last_cycle_bpu_in) {
712    entry_fetch_status(last_cycle_bpu_in_idx) := f_to_send
713    cfiIndex_vec(last_cycle_bpu_in_idx)       := last_cycle_cfiIndex
714    pred_stage(last_cycle_bpu_in_idx)         := last_cycle_bpu_in_stage
715
716    update_target(last_cycle_bpu_in_idx) := last_cycle_bpu_target // TODO: remove this
717    newest_entry_target_modified         := true.B
718    newest_entry_target                  := last_cycle_bpu_target
719    newest_entry_ptr_modified            := true.B
720    newest_entry_ptr                     := last_cycle_bpu_in_ptr
721  }
722
723  // reduce fanout by delay write for a cycle
724  when(RegNext(last_cycle_bpu_in)) {
725    mispredict_vec(RegEnable(last_cycle_bpu_in_idx, last_cycle_bpu_in)) :=
726      WireInit(VecInit(Seq.fill(PredictWidth)(false.B)))
727  }
728
729  // record s1 pred cycles
730  pred_s1_cycle.map { vec =>
731    when(bpu_in_fire && (bpu_in_stage === BP_S1)) {
732      vec(bpu_in_resp_ptr.value) := bpu_in_resp.full_pred(0).predCycle.getOrElse(0.U)
733    }
734  }
735
736  // reduce fanout using copied last_cycle_bpu_in and copied last_cycle_bpu_in_ptr
737  val copied_last_cycle_bpu_in_for_ftq = copied_last_cycle_bpu_in.takeRight(extra_copyNum_for_commitStateQueue)
738  copied_last_cycle_bpu_in_for_ftq.zip(copied_last_cycle_bpu_in_ptr_for_ftq).zipWithIndex.map {
739    case ((in, ptr), i) =>
740      when(in) {
741        val perSetEntries = FtqSize / extra_copyNum_for_commitStateQueue // 32
742        require(FtqSize % extra_copyNum_for_commitStateQueue == 0)
743        for (j <- 0 until perSetEntries) {
744          when(ptr.value === (i * perSetEntries + j).U) {
745            commitStateQueueNext(i * perSetEntries + j) := VecInit(Seq.fill(PredictWidth)(c_empty))
746            // Clock gating optimization, use 1 gate cell to control a row
747            commitStateQueueEnable(i * perSetEntries + j) := true.B
748          }
749        }
750      }
751  }
752
753  bpuPtr := bpuPtr + enq_fire
754  copied_bpu_ptr.map(_ := bpuPtr + enq_fire)
755  when(io.toIfu.req.fire && allowToIfu) {
756    ifuPtr_write      := ifuPtrPlus1
757    ifuPtrPlus1_write := ifuPtrPlus2
758    ifuPtrPlus2_write := ifuPtrPlus2 + 1.U
759  }
760  when(io.toPrefetch.req.fire && allowToIfu) {
761    pfPtr_write      := pfPtrPlus1
762    pfPtrPlus1_write := pfPtrPlus1 + 1.U
763  }
764
765  // only use ftb result to assign hit status
766  when(bpu_s2_resp.valid(3)) {
767    entry_hit_status(bpu_s2_resp.ftq_idx.value) := Mux(bpu_s2_resp.full_pred(3).hit, h_hit, h_not_hit)
768  }
769
770  io.toIfu.flushFromBpu.s2.valid      := bpu_s2_redirect
771  io.toIfu.flushFromBpu.s2.bits       := bpu_s2_resp.ftq_idx
772  io.toPrefetch.flushFromBpu.s2.valid := bpu_s2_redirect
773  io.toPrefetch.flushFromBpu.s2.bits  := bpu_s2_resp.ftq_idx
774  when(bpu_s2_redirect) {
775    bpuPtr := bpu_s2_resp.ftq_idx + 1.U
776    copied_bpu_ptr.map(_ := bpu_s2_resp.ftq_idx + 1.U)
777    // only when ifuPtr runs ahead of bpu s2 resp should we recover it
778    when(!isBefore(ifuPtr, bpu_s2_resp.ftq_idx)) {
779      ifuPtr_write      := bpu_s2_resp.ftq_idx
780      ifuPtrPlus1_write := bpu_s2_resp.ftq_idx + 1.U
781      ifuPtrPlus2_write := bpu_s2_resp.ftq_idx + 2.U
782    }
783    when(!isBefore(pfPtr, bpu_s2_resp.ftq_idx)) {
784      pfPtr_write      := bpu_s2_resp.ftq_idx
785      pfPtrPlus1_write := bpu_s2_resp.ftq_idx + 1.U
786    }
787  }
788
789  io.toIfu.flushFromBpu.s3.valid      := bpu_s3_redirect
790  io.toIfu.flushFromBpu.s3.bits       := bpu_s3_resp.ftq_idx
791  io.toPrefetch.flushFromBpu.s3.valid := bpu_s3_redirect
792  io.toPrefetch.flushFromBpu.s3.bits  := bpu_s3_resp.ftq_idx
793  when(bpu_s3_redirect) {
794    bpuPtr := bpu_s3_resp.ftq_idx + 1.U
795    copied_bpu_ptr.map(_ := bpu_s3_resp.ftq_idx + 1.U)
796    // only when ifuPtr runs ahead of bpu s2 resp should we recover it
797    when(!isBefore(ifuPtr, bpu_s3_resp.ftq_idx)) {
798      ifuPtr_write      := bpu_s3_resp.ftq_idx
799      ifuPtrPlus1_write := bpu_s3_resp.ftq_idx + 1.U
800      ifuPtrPlus2_write := bpu_s3_resp.ftq_idx + 2.U
801    }
802    when(!isBefore(pfPtr, bpu_s3_resp.ftq_idx)) {
803      pfPtr_write      := bpu_s3_resp.ftq_idx
804      pfPtrPlus1_write := bpu_s3_resp.ftq_idx + 1.U
805    }
806  }
807
808  XSError(isBefore(bpuPtr, ifuPtr) && !isFull(bpuPtr, ifuPtr), "\nifuPtr is before bpuPtr!\n")
809  XSError(isBefore(bpuPtr, pfPtr) && !isFull(bpuPtr, pfPtr), "\npfPtr is before bpuPtr!\n")
810  XSError(isBefore(ifuWbPtr, commPtr) && !isFull(ifuWbPtr, commPtr), "\ncommPtr is before ifuWbPtr!\n")
811
812  (0 until copyNum).map(i => XSError(copied_bpu_ptr(i) =/= bpuPtr, "\ncopiedBpuPtr is different from bpuPtr!\n"))
813
814  // ****************************************************************
815  // **************************** to ifu ****************************
816  // ****************************************************************
817  // 0  for ifu, and 1-4 for ICache
818  val bpu_in_bypass_buf         = RegEnable(ftq_pc_mem.io.wdata, bpu_in_fire)
819  val copied_bpu_in_bypass_buf  = VecInit(Seq.fill(copyNum)(RegEnable(ftq_pc_mem.io.wdata, bpu_in_fire)))
820  val bpu_in_bypass_buf_for_ifu = bpu_in_bypass_buf
821  val bpu_in_bypass_ptr         = RegEnable(bpu_in_resp_ptr, bpu_in_fire)
822  val last_cycle_to_ifu_fire    = RegNext(io.toIfu.req.fire)
823  val last_cycle_to_pf_fire     = RegNext(io.toPrefetch.req.fire)
824
825  val copied_bpu_in_bypass_ptr      = VecInit(Seq.fill(copyNum)(RegEnable(bpu_in_resp_ptr, bpu_in_fire)))
826  val copied_last_cycle_to_ifu_fire = VecInit(Seq.fill(copyNum)(RegNext(io.toIfu.req.fire)))
827
828  // read pc and target
829  ftq_pc_mem.io.ifuPtr_w       := ifuPtr_write
830  ftq_pc_mem.io.ifuPtrPlus1_w  := ifuPtrPlus1_write
831  ftq_pc_mem.io.ifuPtrPlus2_w  := ifuPtrPlus2_write
832  ftq_pc_mem.io.pfPtr_w        := pfPtr_write
833  ftq_pc_mem.io.pfPtrPlus1_w   := pfPtrPlus1_write
834  ftq_pc_mem.io.commPtr_w      := commPtr_write
835  ftq_pc_mem.io.commPtrPlus1_w := commPtrPlus1_write
836
837  io.toIfu.req.bits.ftqIdx := ifuPtr
838
839  val toICachePcBundle               = Wire(Vec(copyNum, new Ftq_RF_Components))
840  val toICacheEntryToSend            = Wire(Vec(copyNum, Bool()))
841  val nextCycleToPrefetchPcBundle    = Wire(new Ftq_RF_Components)
842  val nextCycleToPrefetchEntryToSend = Wire(Bool())
843  val toPrefetchPcBundle             = RegNext(nextCycleToPrefetchPcBundle)
844  val toPrefetchEntryToSend          = RegNext(nextCycleToPrefetchEntryToSend)
845  val toIfuPcBundle                  = Wire(new Ftq_RF_Components)
846  val entry_is_to_send               = WireInit(entry_fetch_status(ifuPtr.value) === f_to_send)
847  val entry_ftq_offset               = WireInit(cfiIndex_vec(ifuPtr.value))
848  val entry_next_addr                = Wire(UInt(VAddrBits.W))
849
850  val pc_mem_ifu_ptr_rdata   = VecInit(Seq.fill(copyNum)(RegNext(ftq_pc_mem.io.ifuPtr_rdata)))
851  val pc_mem_ifu_plus1_rdata = VecInit(Seq.fill(copyNum)(RegNext(ftq_pc_mem.io.ifuPtrPlus1_rdata)))
852  val diff_entry_next_addr   = WireInit(update_target(ifuPtr.value)) // TODO: remove this
853
854  val copied_ifu_plus1_to_send = VecInit(Seq.fill(copyNum)(RegNext(
855    entry_fetch_status(ifuPtrPlus1.value) === f_to_send
856  ) || RegNext(last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtrPlus1)))
857  val copied_ifu_ptr_to_send = VecInit(Seq.fill(copyNum)(RegNext(
858    entry_fetch_status(ifuPtr.value) === f_to_send
859  ) || RegNext(last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtr)))
860
861  for (i <- 0 until copyNum) {
862    when(copied_last_cycle_bpu_in(i) && copied_bpu_in_bypass_ptr(i) === copied_ifu_ptr(i)) {
863      toICachePcBundle(i)    := copied_bpu_in_bypass_buf(i)
864      toICacheEntryToSend(i) := true.B
865    }.elsewhen(copied_last_cycle_to_ifu_fire(i)) {
866      toICachePcBundle(i)    := pc_mem_ifu_plus1_rdata(i)
867      toICacheEntryToSend(i) := copied_ifu_plus1_to_send(i)
868    }.otherwise {
869      toICachePcBundle(i)    := pc_mem_ifu_ptr_rdata(i)
870      toICacheEntryToSend(i) := copied_ifu_ptr_to_send(i)
871    }
872  }
873
874  // Calculate requests sent to prefetcher one cycle in advance to cut critical path
875  when(bpu_in_fire && bpu_in_resp_ptr === pfPtr_write) {
876    nextCycleToPrefetchPcBundle    := ftq_pc_mem.io.wdata
877    nextCycleToPrefetchEntryToSend := true.B
878  }.elsewhen(io.toPrefetch.req.fire) {
879    nextCycleToPrefetchPcBundle := ftq_pc_mem.io.pfPtrPlus1_rdata
880    nextCycleToPrefetchEntryToSend := entry_fetch_status(pfPtrPlus1.value) === f_to_send ||
881      last_cycle_bpu_in && bpu_in_bypass_ptr === pfPtrPlus1
882  }.otherwise {
883    nextCycleToPrefetchPcBundle := ftq_pc_mem.io.pfPtr_rdata
884    nextCycleToPrefetchEntryToSend := entry_fetch_status(pfPtr.value) === f_to_send ||
885      last_cycle_bpu_in && bpu_in_bypass_ptr === pfPtr // reduce potential bubbles
886  }
887
888  // TODO: reconsider target address bypass logic
889  when(last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtr) {
890    toIfuPcBundle        := bpu_in_bypass_buf_for_ifu
891    entry_is_to_send     := true.B
892    entry_next_addr      := last_cycle_bpu_target
893    entry_ftq_offset     := last_cycle_cfiIndex
894    diff_entry_next_addr := last_cycle_bpu_target // TODO: remove this
895  }.elsewhen(last_cycle_to_ifu_fire) {
896    toIfuPcBundle := RegNext(ftq_pc_mem.io.ifuPtrPlus1_rdata)
897    entry_is_to_send := RegNext(entry_fetch_status(ifuPtrPlus1.value) === f_to_send) ||
898      RegNext(last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtrPlus1) // reduce potential bubbles
899    entry_next_addr := Mux(
900      last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtrPlus1,
901      bpu_in_bypass_buf_for_ifu.startAddr,
902      Mux(ifuPtr === newest_entry_ptr, newest_entry_target, RegNext(ftq_pc_mem.io.ifuPtrPlus2_rdata.startAddr))
903    ) // ifuPtr+2
904  }.otherwise {
905    toIfuPcBundle := RegNext(ftq_pc_mem.io.ifuPtr_rdata)
906    entry_is_to_send := RegNext(entry_fetch_status(ifuPtr.value) === f_to_send) ||
907      RegNext(last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtr) // reduce potential bubbles
908    entry_next_addr := Mux(
909      last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtrPlus1,
910      bpu_in_bypass_buf_for_ifu.startAddr,
911      Mux(ifuPtr === newest_entry_ptr, newest_entry_target, RegNext(ftq_pc_mem.io.ifuPtrPlus1_rdata.startAddr))
912    ) // ifuPtr+1
913  }
914
915  io.toIfu.req.valid              := entry_is_to_send && ifuPtr =/= bpuPtr
916  io.toIfu.req.bits.nextStartAddr := entry_next_addr
917  io.toIfu.req.bits.ftqOffset     := entry_ftq_offset
918  io.toIfu.req.bits.fromFtqPcBundle(toIfuPcBundle)
919
920  io.toICache.req.valid := entry_is_to_send && ifuPtr =/= bpuPtr
921  io.toICache.req.bits.readValid.zipWithIndex.map { case (copy, i) =>
922    copy := toICacheEntryToSend(i) && copied_ifu_ptr(i) =/= copied_bpu_ptr(i)
923  }
924  io.toICache.req.bits.pcMemRead.zipWithIndex.foreach { case (copy, i) =>
925    copy.fromFtqPcBundle(toICachePcBundle(i))
926    copy.ftqIdx := ifuPtr
927  }
928  io.toICache.req.bits.backendException := ExceptionType.hasException(backendException) && backendPcFaultPtr === ifuPtr
929
930  io.toPrefetch.req.valid := toPrefetchEntryToSend && pfPtr =/= bpuPtr
931  io.toPrefetch.req.bits.fromFtqPcBundle(toPrefetchPcBundle)
932  io.toPrefetch.req.bits.ftqIdx  := pfPtr
933  io.toPrefetch.backendException := Mux(backendPcFaultPtr === pfPtr, backendException, ExceptionType.none)
934  // io.toICache.req.bits.bypassSelect := last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtr
935  // io.toICache.req.bits.bpuBypassWrite.zipWithIndex.map{case(bypassWrtie, i) =>
936  //   bypassWrtie.startAddr := bpu_in_bypass_buf.tail(i).startAddr
937  //   bypassWrtie.nextlineStart := bpu_in_bypass_buf.tail(i).nextLineAddr
938  // }
939
940  // TODO: remove this
941  XSError(
942    io.toIfu.req.valid && diff_entry_next_addr =/= entry_next_addr,
943    p"\nifu_req_target wrong! ifuPtr: ${ifuPtr}, entry_next_addr: ${Hexadecimal(entry_next_addr)} diff_entry_next_addr: ${Hexadecimal(diff_entry_next_addr)}\n"
944  )
945
946  // when fall through is smaller in value than start address, there must be a false hit
947  when(toIfuPcBundle.fallThruError && entry_hit_status(ifuPtr.value) === h_hit) {
948    when(io.toIfu.req.fire &&
949      !(bpu_s2_redirect && bpu_s2_resp.ftq_idx === ifuPtr) &&
950      !(bpu_s3_redirect && bpu_s3_resp.ftq_idx === ifuPtr)) {
951      entry_hit_status(ifuPtr.value) := h_false_hit
952      // XSError(true.B, "FTB false hit by fallThroughError, startAddr: %x, fallTHru: %x\n", io.toIfu.req.bits.startAddr, io.toIfu.req.bits.nextStartAddr)
953    }
954  }
955  XSDebug(
956    toIfuPcBundle.fallThruError && entry_hit_status(ifuPtr.value) === h_hit,
957    "fallThruError! start:%x, fallThru:%x\n",
958    io.toIfu.req.bits.startAddr,
959    io.toIfu.req.bits.nextStartAddr
960  )
961
962  XSPerfAccumulate(
963    f"fall_through_error_to_ifu",
964    toIfuPcBundle.fallThruError && entry_hit_status(ifuPtr.value) === h_hit &&
965      io.toIfu.req.fire && !(bpu_s2_redirect && bpu_s2_resp.ftq_idx === ifuPtr) && !(bpu_s3_redirect && bpu_s3_resp.ftq_idx === ifuPtr)
966  )
967
968  val ifu_req_should_be_flushed =
969    io.toIfu.flushFromBpu.shouldFlushByStage2(io.toIfu.req.bits.ftqIdx) ||
970      io.toIfu.flushFromBpu.shouldFlushByStage3(io.toIfu.req.bits.ftqIdx)
971
972  when(io.toIfu.req.fire && !ifu_req_should_be_flushed) {
973    entry_fetch_status(ifuPtr.value) := f_sent
974  }
975
976  // *********************************************************************
977  // **************************** wb from ifu ****************************
978  // *********************************************************************
979  val pdWb         = io.fromIfu.pdWb
980  val pds          = pdWb.bits.pd
981  val ifu_wb_valid = pdWb.valid
982  val ifu_wb_idx   = pdWb.bits.ftqIdx.value
983  // read ports:                                                         commit update
984  val ftq_pd_mem =
985    Module(new SyncDataModuleTemplate(new Ftq_pd_Entry, FtqSize, FtqRedirectAheadNum + 1, 1, hasRen = true))
986  ftq_pd_mem.io.wen(0)   := ifu_wb_valid
987  ftq_pd_mem.io.waddr(0) := pdWb.bits.ftqIdx.value
988  ftq_pd_mem.io.wdata(0).fromPdWb(pdWb.bits)
989
990  val hit_pd_valid       = entry_hit_status(ifu_wb_idx) === h_hit && ifu_wb_valid
991  val hit_pd_mispred     = hit_pd_valid && pdWb.bits.misOffset.valid
992  val hit_pd_mispred_reg = RegNext(hit_pd_mispred, init = false.B)
993  val pd_reg             = RegEnable(pds, pdWb.valid)
994  val start_pc_reg       = RegEnable(pdWb.bits.pc(0), pdWb.valid)
995  val wb_idx_reg         = RegEnable(ifu_wb_idx, pdWb.valid)
996
997  when(ifu_wb_valid) {
998    val comm_stq_wen = VecInit(pds.map(_.valid).zip(pdWb.bits.instrRange).map {
999      case (v, inRange) => v && inRange
1000    })
1001    commitStateQueueEnable(ifu_wb_idx) := true.B
1002    (commitStateQueueNext(ifu_wb_idx) zip comm_stq_wen).map {
1003      case (qe, v) => when(v) {
1004          qe := c_toCommit
1005        }
1006    }
1007  }
1008
1009  when(ifu_wb_valid) {
1010    ifuWbPtr_write := ifuWbPtr + 1.U
1011  }
1012
1013  XSError(ifu_wb_valid && isAfter(pdWb.bits.ftqIdx, ifuPtr), "IFU returned a predecode before its req, check IFU")
1014
1015  ftb_entry_mem.io.ren.get.head := ifu_wb_valid
1016  ftb_entry_mem.io.raddr.head   := ifu_wb_idx
1017  val has_false_hit = WireInit(false.B)
1018  when(RegNext(hit_pd_valid)) {
1019    // check for false hit
1020    val pred_ftb_entry = ftb_entry_mem.io.rdata.head
1021    val brSlots        = pred_ftb_entry.brSlots
1022    val tailSlot       = pred_ftb_entry.tailSlot
1023    // we check cfis that bpu predicted
1024
1025    // bpu predicted branches but denied by predecode
1026    val br_false_hit =
1027      brSlots.map {
1028        s => s.valid && !(pd_reg(s.offset).valid && pd_reg(s.offset).isBr)
1029      }.reduce(_ || _) ||
1030        (tailSlot.valid && pred_ftb_entry.tailSlot.sharing &&
1031          !(pd_reg(tailSlot.offset).valid && pd_reg(tailSlot.offset).isBr))
1032
1033    val jmpOffset = tailSlot.offset
1034    val jmp_pd    = pd_reg(jmpOffset)
1035    val jal_false_hit = pred_ftb_entry.jmpValid &&
1036      ((pred_ftb_entry.isJal && !(jmp_pd.valid && jmp_pd.isJal)) ||
1037        (pred_ftb_entry.isJalr && !(jmp_pd.valid && jmp_pd.isJalr)) ||
1038        (pred_ftb_entry.isCall && !(jmp_pd.valid && jmp_pd.isCall)) ||
1039        (pred_ftb_entry.isRet && !(jmp_pd.valid && jmp_pd.isRet)))
1040
1041    has_false_hit := br_false_hit || jal_false_hit || hit_pd_mispred_reg
1042    // assert(!has_false_hit)
1043  }
1044  XSDebug(
1045    RegNext(hit_pd_valid) && has_false_hit,
1046    "FTB false hit by br or jal or hit_pd, startAddr: %x\n",
1047    pdWb.bits.pc(0)
1048  )
1049
1050  when(has_false_hit) {
1051    entry_hit_status(wb_idx_reg) := h_false_hit
1052  }
1053
1054  // *******************************************************************************
1055  // **************************** redirect from backend ****************************
1056  // *******************************************************************************
1057
1058  // redirect read cfiInfo, couples to redirectGen s2
1059  // ftqIdxAhead(0-3) => ftq_redirect_mem(1-4), reuse ftq_redirect_mem(1)
1060  val ftq_redirect_rdata = Wire(Vec(FtqRedirectAheadNum, new Ftq_Redirect_SRAMEntry))
1061  val ftb_redirect_rdata = Wire(Vec(FtqRedirectAheadNum, new FTBEntry_FtqMem))
1062
1063  val ftq_pd_rdata = Wire(Vec(FtqRedirectAheadNum, new Ftq_pd_Entry))
1064  for (i <- 1 until FtqRedirectAheadNum) {
1065    ftq_redirect_mem.io.ren.get(i + IfuRedirectNum) := ftqIdxAhead(i).valid
1066    ftq_redirect_mem.io.raddr(i + IfuRedirectNum)   := ftqIdxAhead(i).bits.value
1067    ftb_entry_mem.io.ren.get(i + IfuRedirectNum)    := ftqIdxAhead(i).valid
1068    ftb_entry_mem.io.raddr(i + IfuRedirectNum)      := ftqIdxAhead(i).bits.value
1069
1070    ftq_pd_mem.io.ren.get(i) := ftqIdxAhead(i).valid
1071    ftq_pd_mem.io.raddr(i)   := ftqIdxAhead(i).bits.value
1072  }
1073  ftq_redirect_mem.io.ren.get(IfuRedirectNum) := Mux(aheadValid, ftqIdxAhead(0).valid, backendRedirect.valid)
1074  ftq_redirect_mem.io.raddr(IfuRedirectNum) := Mux(
1075    aheadValid,
1076    ftqIdxAhead(0).bits.value,
1077    backendRedirect.bits.ftqIdx.value
1078  )
1079  ftb_entry_mem.io.ren.get(IfuRedirectNum) := Mux(aheadValid, ftqIdxAhead(0).valid, backendRedirect.valid)
1080  ftb_entry_mem.io.raddr(IfuRedirectNum) := Mux(
1081    aheadValid,
1082    ftqIdxAhead(0).bits.value,
1083    backendRedirect.bits.ftqIdx.value
1084  )
1085
1086  ftq_pd_mem.io.ren.get(0) := Mux(aheadValid, ftqIdxAhead(0).valid, backendRedirect.valid)
1087  ftq_pd_mem.io.raddr(0)   := Mux(aheadValid, ftqIdxAhead(0).bits.value, backendRedirect.bits.ftqIdx.value)
1088
1089  for (i <- 0 until FtqRedirectAheadNum) {
1090    ftq_redirect_rdata(i) := ftq_redirect_mem.io.rdata(i + IfuRedirectNum)
1091    ftb_redirect_rdata(i) := ftb_entry_mem.io.rdata(i + IfuRedirectNum)
1092
1093    ftq_pd_rdata(i) := ftq_pd_mem.io.rdata(i)
1094  }
1095  val stage3CfiInfo =
1096    Mux(realAhdValid, Mux1H(ftqIdxSelOH, ftq_redirect_rdata), ftq_redirect_mem.io.rdata(IfuRedirectNum))
1097  val stage3PdInfo       = Mux(realAhdValid, Mux1H(ftqIdxSelOH, ftq_pd_rdata), ftq_pd_mem.io.rdata(0))
1098  val backendRedirectCfi = fromBackendRedirect.bits.cfiUpdate
1099  backendRedirectCfi.fromFtqRedirectSram(stage3CfiInfo)
1100  backendRedirectCfi.pd := stage3PdInfo.toPd(fromBackendRedirect.bits.ftqOffset)
1101
1102  val r_ftb_entry = Mux(realAhdValid, Mux1H(ftqIdxSelOH, ftb_redirect_rdata), ftb_entry_mem.io.rdata(IfuRedirectNum))
1103  val r_ftqOffset = fromBackendRedirect.bits.ftqOffset
1104
1105  backendRedirectCfi.br_hit := r_ftb_entry.brIsSaved(r_ftqOffset)
1106  backendRedirectCfi.jr_hit := r_ftb_entry.isJalr && r_ftb_entry.tailSlot.offset === r_ftqOffset
1107  // FIXME: not portable
1108  val sc_disagree = stage3CfiInfo.sc_disagree.getOrElse(VecInit(Seq.fill(numBr)(false.B)))
1109  backendRedirectCfi.sc_hit := backendRedirectCfi.br_hit && Mux(
1110    r_ftb_entry.brSlots(0).offset === r_ftqOffset,
1111    sc_disagree(0),
1112    sc_disagree(1)
1113  )
1114
1115  when(entry_hit_status(fromBackendRedirect.bits.ftqIdx.value) === h_hit) {
1116    backendRedirectCfi.shift := PopCount(r_ftb_entry.getBrMaskByOffset(r_ftqOffset)) +&
1117      (backendRedirectCfi.pd.isBr && !r_ftb_entry.brIsSaved(r_ftqOffset) &&
1118        !r_ftb_entry.newBrCanNotInsert(r_ftqOffset))
1119
1120    backendRedirectCfi.addIntoHist := backendRedirectCfi.pd.isBr && (r_ftb_entry.brIsSaved(r_ftqOffset) ||
1121      !r_ftb_entry.newBrCanNotInsert(r_ftqOffset))
1122  }.otherwise {
1123    backendRedirectCfi.shift       := (backendRedirectCfi.pd.isBr && backendRedirectCfi.taken).asUInt
1124    backendRedirectCfi.addIntoHist := backendRedirectCfi.pd.isBr.asUInt
1125  }
1126
1127  // ***************************************************************************
1128  // **************************** redirect from ifu ****************************
1129  // ***************************************************************************
1130  val fromIfuRedirect = WireInit(0.U.asTypeOf(Valid(new BranchPredictionRedirect)))
1131  fromIfuRedirect.valid              := pdWb.valid && pdWb.bits.misOffset.valid && !backendFlush
1132  fromIfuRedirect.bits.ftqIdx        := pdWb.bits.ftqIdx
1133  fromIfuRedirect.bits.ftqOffset     := pdWb.bits.misOffset.bits
1134  fromIfuRedirect.bits.level         := RedirectLevel.flushAfter
1135  fromIfuRedirect.bits.BTBMissBubble := true.B
1136  fromIfuRedirect.bits.debugIsMemVio := false.B
1137  fromIfuRedirect.bits.debugIsCtrl   := false.B
1138
1139  val ifuRedirectCfiUpdate = fromIfuRedirect.bits.cfiUpdate
1140  ifuRedirectCfiUpdate.pc        := pdWb.bits.pc(pdWb.bits.misOffset.bits)
1141  ifuRedirectCfiUpdate.pd        := pdWb.bits.pd(pdWb.bits.misOffset.bits)
1142  ifuRedirectCfiUpdate.predTaken := cfiIndex_vec(pdWb.bits.ftqIdx.value).valid
1143  ifuRedirectCfiUpdate.target    := pdWb.bits.target
1144  ifuRedirectCfiUpdate.taken     := pdWb.bits.cfiOffset.valid
1145  ifuRedirectCfiUpdate.isMisPred := pdWb.bits.misOffset.valid
1146
1147  val ifuRedirectReg   = RegNextWithEnable(fromIfuRedirect, hasInit = true)
1148  val ifuRedirectToBpu = WireInit(ifuRedirectReg)
1149  ifuFlush := fromIfuRedirect.valid || ifuRedirectToBpu.valid
1150
1151  ftq_redirect_mem.io.ren.get.head := fromIfuRedirect.valid
1152  ftq_redirect_mem.io.raddr.head   := fromIfuRedirect.bits.ftqIdx.value
1153
1154  val toBpuCfi = ifuRedirectToBpu.bits.cfiUpdate
1155  toBpuCfi.fromFtqRedirectSram(ftq_redirect_mem.io.rdata.head)
1156  when(ifuRedirectReg.bits.cfiUpdate.pd.isRet && ifuRedirectReg.bits.cfiUpdate.pd.valid) {
1157    toBpuCfi.target := toBpuCfi.topAddr
1158  }
1159
1160  when(ifuRedirectReg.valid) {
1161    ifuRedirected(ifuRedirectReg.bits.ftqIdx.value) := true.B
1162  }.elsewhen(RegNext(pdWb.valid)) {
1163    // if pdWb and no redirect, set to false
1164    ifuRedirected(last_cycle_bpu_in_ptr.value) := false.B
1165  }
1166
1167  // **********************************************************************
1168  // ***************************** to backend *****************************
1169  // **********************************************************************
1170  // to backend pc mem / target
1171  io.toBackend.pc_mem_wen   := RegNext(last_cycle_bpu_in)
1172  io.toBackend.pc_mem_waddr := RegEnable(last_cycle_bpu_in_idx, last_cycle_bpu_in)
1173  io.toBackend.pc_mem_wdata := RegEnable(bpu_in_bypass_buf_for_ifu, last_cycle_bpu_in)
1174
1175  // num cycle is fixed
1176  val newest_entry_en: Bool = RegNext(last_cycle_bpu_in || backendRedirect.valid || ifuRedirectToBpu.valid)
1177  io.toBackend.newest_entry_en     := RegNext(newest_entry_en)
1178  io.toBackend.newest_entry_ptr    := RegEnable(newest_entry_ptr, newest_entry_en)
1179  io.toBackend.newest_entry_target := RegEnable(newest_entry_target, newest_entry_en)
1180
1181  // *********************************************************************
1182  // **************************** wb from exu ****************************
1183  // *********************************************************************
1184
1185  backendRedirect.valid := io.fromBackend.redirect.valid
1186  backendRedirect.bits.connectRedirect(io.fromBackend.redirect.bits)
1187  backendRedirect.bits.BTBMissBubble := false.B
1188
1189  def extractRedirectInfo(wb: Valid[Redirect]) = {
1190    val ftqPtr    = wb.bits.ftqIdx
1191    val ftqOffset = wb.bits.ftqOffset
1192    val taken     = wb.bits.cfiUpdate.taken
1193    val mispred   = wb.bits.cfiUpdate.isMisPred
1194    (wb.valid, ftqPtr, ftqOffset, taken, mispred)
1195  }
1196
1197  // fix mispredict entry
1198  val lastIsMispredict = RegNext(
1199    backendRedirect.valid && backendRedirect.bits.level === RedirectLevel.flushAfter,
1200    init = false.B
1201  )
1202
1203  def updateCfiInfo(redirect: Valid[Redirect], isBackend: Boolean = true) = {
1204    val (r_valid, r_ptr, r_offset, r_taken, r_mispred) = extractRedirectInfo(redirect)
1205    val r_idx                                          = r_ptr.value
1206    val cfiIndex_bits_wen                              = r_valid && r_taken && r_offset < cfiIndex_vec(r_idx).bits
1207    val cfiIndex_valid_wen                             = r_valid && r_offset === cfiIndex_vec(r_idx).bits
1208    when(cfiIndex_bits_wen || cfiIndex_valid_wen) {
1209      cfiIndex_vec(r_idx).valid := cfiIndex_bits_wen || cfiIndex_valid_wen && r_taken
1210    }.elsewhen(r_valid && !r_taken && r_offset =/= cfiIndex_vec(r_idx).bits) {
1211      cfiIndex_vec(r_idx).valid := false.B
1212    }
1213    when(cfiIndex_bits_wen) {
1214      cfiIndex_vec(r_idx).bits := r_offset
1215    }
1216    newest_entry_target_modified := true.B
1217    newest_entry_target          := redirect.bits.cfiUpdate.target
1218    newest_entry_ptr_modified    := true.B
1219    newest_entry_ptr             := r_ptr
1220
1221    update_target(r_idx) := redirect.bits.cfiUpdate.target // TODO: remove this
1222    if (isBackend) {
1223      mispredict_vec(r_idx)(r_offset) := r_mispred
1224    }
1225  }
1226
1227  when(fromBackendRedirect.valid) {
1228    updateCfiInfo(fromBackendRedirect)
1229  }.elsewhen(ifuRedirectToBpu.valid) {
1230    updateCfiInfo(ifuRedirectToBpu, isBackend = false)
1231  }
1232
1233  when(fromBackendRedirect.valid) {
1234    when(fromBackendRedirect.bits.ControlRedirectBubble) {
1235      when(fromBackendRedirect.bits.ControlBTBMissBubble) {
1236        topdown_stage.reasons(TopDownCounters.BTBMissBubble.id)                  := true.B
1237        io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.BTBMissBubble.id) := true.B
1238      }.elsewhen(fromBackendRedirect.bits.TAGEMissBubble) {
1239        topdown_stage.reasons(TopDownCounters.TAGEMissBubble.id)                  := true.B
1240        io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.TAGEMissBubble.id) := true.B
1241      }.elsewhen(fromBackendRedirect.bits.SCMissBubble) {
1242        topdown_stage.reasons(TopDownCounters.SCMissBubble.id)                  := true.B
1243        io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.SCMissBubble.id) := true.B
1244      }.elsewhen(fromBackendRedirect.bits.ITTAGEMissBubble) {
1245        topdown_stage.reasons(TopDownCounters.ITTAGEMissBubble.id)                  := true.B
1246        io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.ITTAGEMissBubble.id) := true.B
1247      }.elsewhen(fromBackendRedirect.bits.RASMissBubble) {
1248        topdown_stage.reasons(TopDownCounters.RASMissBubble.id)                  := true.B
1249        io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.RASMissBubble.id) := true.B
1250      }
1251
1252    }.elsewhen(backendRedirect.bits.MemVioRedirectBubble) {
1253      topdown_stage.reasons(TopDownCounters.MemVioRedirectBubble.id)                  := true.B
1254      io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.MemVioRedirectBubble.id) := true.B
1255    }.otherwise {
1256      topdown_stage.reasons(TopDownCounters.OtherRedirectBubble.id)                  := true.B
1257      io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.OtherRedirectBubble.id) := true.B
1258    }
1259  }.elsewhen(ifuRedirectReg.valid) {
1260    topdown_stage.reasons(TopDownCounters.BTBMissBubble.id)                  := true.B
1261    io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.BTBMissBubble.id) := true.B
1262  }
1263
1264  io.ControlBTBMissBubble := fromBackendRedirect.bits.ControlBTBMissBubble
1265  io.TAGEMissBubble       := fromBackendRedirect.bits.TAGEMissBubble
1266  io.SCMissBubble         := fromBackendRedirect.bits.SCMissBubble
1267  io.ITTAGEMissBubble     := fromBackendRedirect.bits.ITTAGEMissBubble
1268  io.RASMissBubble        := fromBackendRedirect.bits.RASMissBubble
1269
1270  // ***********************************************************************************
1271  // **************************** flush ptr and state queue ****************************
1272  // ***********************************************************************************
1273
1274  val redirectVec = VecInit(backendRedirect, fromIfuRedirect)
1275
1276  // when redirect, we should reset ptrs and status queues
1277  io.icacheFlush := redirectVec.map(r => r.valid).reduce(_ || _)
1278  XSPerfAccumulate("icacheFlushFromBackend", backendRedirect.valid)
1279  XSPerfAccumulate("icacheFlushFromIFU", fromIfuRedirect.valid)
1280  when(redirectVec.map(r => r.valid).reduce(_ || _)) {
1281    val r                          = PriorityMux(redirectVec.map(r => r.valid -> r.bits))
1282    val notIfu                     = redirectVec.dropRight(1).map(r => r.valid).reduce(_ || _)
1283    val (idx, offset, flushItSelf) = (r.ftqIdx, r.ftqOffset, RedirectLevel.flushItself(r.level))
1284    val next                       = idx + 1.U
1285    bpuPtr := next
1286    copied_bpu_ptr.map(_ := next)
1287    ifuPtr_write      := next
1288    ifuWbPtr_write    := next
1289    ifuPtrPlus1_write := idx + 2.U
1290    ifuPtrPlus2_write := idx + 3.U
1291    pfPtr_write       := next
1292    pfPtrPlus1_write  := idx + 2.U
1293  }
1294  when(RegNext(redirectVec.map(r => r.valid).reduce(_ || _))) {
1295    val r                          = PriorityMux(redirectVec.map(r => r.valid -> r.bits))
1296    val notIfu                     = redirectVec.dropRight(1).map(r => r.valid).reduce(_ || _)
1297    val (idx, offset, flushItSelf) = (r.ftqIdx, r.ftqOffset, RedirectLevel.flushItself(r.level))
1298    when(RegNext(notIfu)) {
1299      commitStateQueueEnable(RegNext(idx.value)) := true.B
1300      commitStateQueueNext(RegNext(idx.value)).zipWithIndex.foreach { case (s, i) =>
1301        when(i.U > RegNext(offset)) {
1302          s := c_empty
1303        }
1304        when(i.U === RegNext(offset) && RegNext(flushItSelf)) {
1305          s := c_flushed
1306        }
1307      }
1308    }
1309  }
1310
1311  // only the valid bit is actually needed
1312  io.toIfu.redirect.bits    := backendRedirect.bits
1313  io.toIfu.redirect.valid   := stage2Flush
1314  io.toIfu.topdown_redirect := fromBackendRedirect
1315
1316  // commit
1317  for (c <- io.fromBackend.rob_commits) {
1318    when(c.valid) {
1319      commitStateQueueEnable(c.bits.ftqIdx.value)                 := true.B
1320      commitStateQueueNext(c.bits.ftqIdx.value)(c.bits.ftqOffset) := c_committed
1321      // TODO: remove this
1322      // For instruction fusions, we also update the next instruction
1323      when(c.bits.commitType === 4.U) {
1324        commitStateQueueNext(c.bits.ftqIdx.value)(c.bits.ftqOffset + 1.U) := c_committed
1325      }.elsewhen(c.bits.commitType === 5.U) {
1326        commitStateQueueNext(c.bits.ftqIdx.value)(c.bits.ftqOffset + 2.U) := c_committed
1327      }.elsewhen(c.bits.commitType === 6.U) {
1328        val index = (c.bits.ftqIdx + 1.U).value
1329        commitStateQueueEnable(index)  := true.B
1330        commitStateQueueNext(index)(0) := c_committed
1331      }.elsewhen(c.bits.commitType === 7.U) {
1332        val index = (c.bits.ftqIdx + 1.U).value
1333        commitStateQueueEnable(index)  := true.B
1334        commitStateQueueNext(index)(1) := c_committed
1335      }
1336    }
1337  }
1338
1339  // ****************************************************************
1340  // **************************** to bpu ****************************
1341  // ****************************************************************
1342
1343  io.toBpu.redirctFromIFU := ifuRedirectToBpu.valid
1344  io.toBpu.redirect       := Mux(fromBackendRedirect.valid, fromBackendRedirect, ifuRedirectToBpu)
1345  val dummy_s1_pred_cycle_vec = VecInit(List.tabulate(FtqSize)(_ => 0.U(64.W)))
1346  val redirect_latency =
1347    GTimer() - pred_s1_cycle.getOrElse(dummy_s1_pred_cycle_vec)(io.toBpu.redirect.bits.ftqIdx.value) + 1.U
1348  XSPerfHistogram("backend_redirect_latency", redirect_latency, fromBackendRedirect.valid, 0, 60, 1)
1349  XSPerfHistogram(
1350    "ifu_redirect_latency",
1351    redirect_latency,
1352    !fromBackendRedirect.valid && ifuRedirectToBpu.valid,
1353    0,
1354    60,
1355    1
1356  )
1357
1358  XSError(
1359    io.toBpu.redirect.valid && isBefore(io.toBpu.redirect.bits.ftqIdx, commPtr),
1360    "Ftq received a redirect after its commit, check backend or replay"
1361  )
1362
1363  val may_have_stall_from_bpu = Wire(Bool())
1364  val bpu_ftb_update_stall    = RegInit(0.U(2.W)) // 2-cycle stall, so we need 3 states
1365  may_have_stall_from_bpu := bpu_ftb_update_stall =/= 0.U
1366
1367  val validInstructions     = commitStateQueueReg(commPtr.value).map(s => s === c_toCommit || s === c_committed)
1368  val lastInstructionStatus = PriorityMux(validInstructions.reverse.zip(commitStateQueueReg(commPtr.value).reverse))
1369  val firstInstructionFlushed = commitStateQueueReg(commPtr.value)(0) === c_flushed ||
1370    commitStateQueueReg(commPtr.value)(0) === c_empty && commitStateQueueReg(commPtr.value)(1) === c_flushed
1371  canCommit := commPtr =/= ifuWbPtr && !may_have_stall_from_bpu &&
1372    (isAfter(robCommPtr, commPtr) ||
1373      validInstructions.reduce(_ || _) && lastInstructionStatus === c_committed)
1374  val canMoveCommPtr = commPtr =/= ifuWbPtr && !may_have_stall_from_bpu &&
1375    (isAfter(robCommPtr, commPtr) ||
1376      validInstructions.reduce(_ || _) && lastInstructionStatus === c_committed ||
1377      firstInstructionFlushed)
1378
1379  when(io.fromBackend.rob_commits.map(_.valid).reduce(_ | _)) {
1380    robCommPtr_write := ParallelPriorityMux(
1381      io.fromBackend.rob_commits.map(_.valid).reverse,
1382      io.fromBackend.rob_commits.map(_.bits.ftqIdx).reverse
1383    )
1384  }.elsewhen(isAfter(commPtr, robCommPtr)) {
1385    robCommPtr_write := commPtr
1386  }.otherwise {
1387    robCommPtr_write := robCommPtr
1388  }
1389
1390  /**
1391    *************************************************************************************
1392    * MMIO instruction fetch is allowed only if MMIO is the oldest instruction.
1393    *************************************************************************************
1394    */
1395  val mmioReadPtr = io.mmioCommitRead.mmioFtqPtr
1396  val mmioLastCommit = isAfter(commPtr, mmioReadPtr) ||
1397    commPtr === mmioReadPtr && validInstructions.reduce(_ || _) && lastInstructionStatus === c_committed
1398  io.mmioCommitRead.mmioLastCommit := RegNext(mmioLastCommit)
1399
1400  // commit reads
1401  val commit_pc_bundle = RegNext(ftq_pc_mem.io.commPtr_rdata)
1402  val commit_target =
1403    Mux(
1404      RegNext(commPtr === newest_entry_ptr),
1405      RegEnable(newest_entry_target, newest_entry_target_modified),
1406      RegNext(ftq_pc_mem.io.commPtrPlus1_rdata.startAddr)
1407    )
1408  ftq_pd_mem.io.ren.get.last := canCommit
1409  ftq_pd_mem.io.raddr.last   := commPtr.value
1410  val commit_pd = ftq_pd_mem.io.rdata.last
1411  ftq_redirect_mem.io.ren.get.last := canCommit
1412  ftq_redirect_mem.io.raddr.last   := commPtr.value
1413  val commit_spec_meta = ftq_redirect_mem.io.rdata.last
1414  ftq_meta_1r_sram.io.ren(0)   := canCommit
1415  ftq_meta_1r_sram.io.raddr(0) := commPtr.value
1416  val commit_meta      = ftq_meta_1r_sram.io.rdata(0).meta
1417  val commit_ftb_entry = ftq_meta_1r_sram.io.rdata(0).ftb_entry
1418
1419  // need one cycle to read mem and srams
1420  val do_commit_ptr = RegEnable(commPtr, canCommit)
1421  val do_commit     = RegNext(canCommit, init = false.B)
1422  when(canMoveCommPtr) {
1423    commPtr_write      := commPtrPlus1
1424    commPtrPlus1_write := commPtrPlus1 + 1.U
1425  }
1426  val commit_state   = RegEnable(commitStateQueueReg(commPtr.value), canCommit)
1427  val can_commit_cfi = WireInit(cfiIndex_vec(commPtr.value))
1428  val do_commit_cfi  = WireInit(cfiIndex_vec(do_commit_ptr.value))
1429  //
1430  // when (commitStateQueue(commPtr.value)(can_commit_cfi.bits) =/= c_commited) {
1431  //  can_commit_cfi.valid := false.B
1432  // }
1433  val commit_cfi = RegEnable(can_commit_cfi, canCommit)
1434  val debug_cfi  = commitStateQueueReg(do_commit_ptr.value)(do_commit_cfi.bits) =/= c_committed && do_commit_cfi.valid
1435
1436  val commit_mispredict: Vec[Bool] =
1437    VecInit((RegEnable(mispredict_vec(commPtr.value), canCommit) zip commit_state).map {
1438      case (mis, state) => mis && state === c_committed
1439    })
1440  val commit_instCommited: Vec[Bool] = VecInit(commit_state.map(_ === c_committed)) // [PredictWidth]
1441  val can_commit_hit     = entry_hit_status(commPtr.value)
1442  val commit_hit         = RegEnable(can_commit_hit, canCommit)
1443  val diff_commit_target = RegEnable(update_target(commPtr.value), canCommit) // TODO: remove this
1444  val commit_stage       = RegEnable(pred_stage(commPtr.value), canCommit)
1445  val commit_valid       = commit_hit === h_hit || commit_cfi.valid           // hit or taken
1446
1447  val to_bpu_hit = can_commit_hit === h_hit || can_commit_hit === h_false_hit
1448  switch(bpu_ftb_update_stall) {
1449    is(0.U) {
1450      when(can_commit_cfi.valid && !to_bpu_hit && canCommit) {
1451        bpu_ftb_update_stall := 2.U // 2-cycle stall
1452      }
1453    }
1454    is(2.U) {
1455      bpu_ftb_update_stall := 1.U
1456    }
1457    is(1.U) {
1458      bpu_ftb_update_stall := 0.U
1459    }
1460    is(3.U) {
1461      // XSError below
1462    }
1463  }
1464  XSError(bpu_ftb_update_stall === 3.U, "bpu_ftb_update_stall should be 0, 1 or 2")
1465
1466  // TODO: remove this
1467  XSError(do_commit && diff_commit_target =/= commit_target, "\ncommit target should be the same as update target\n")
1468
1469  // update latency stats
1470  val update_latency = GTimer() - pred_s1_cycle.getOrElse(dummy_s1_pred_cycle_vec)(do_commit_ptr.value) + 1.U
1471  XSPerfHistogram("bpu_update_latency", update_latency, io.toBpu.update.valid, 0, 64, 2)
1472
1473  io.toBpu.update       := DontCare
1474  io.toBpu.update.valid := commit_valid && do_commit
1475  val update = io.toBpu.update.bits
1476  update.false_hit   := commit_hit === h_false_hit
1477  update.pc          := commit_pc_bundle.startAddr
1478  update.meta        := commit_meta
1479  update.cfi_idx     := commit_cfi
1480  update.full_target := commit_target
1481  update.from_stage  := commit_stage
1482  update.spec_info   := commit_spec_meta
1483  XSError(commit_valid && do_commit && debug_cfi, "\ncommit cfi can be non c_commited\n")
1484
1485  val commit_real_hit  = commit_hit === h_hit
1486  val update_ftb_entry = update.ftb_entry
1487
1488  val ftbEntryGen = Module(new FTBEntryGen).io
1489  ftbEntryGen.start_addr     := commit_pc_bundle.startAddr
1490  ftbEntryGen.old_entry      := commit_ftb_entry
1491  ftbEntryGen.pd             := commit_pd
1492  ftbEntryGen.cfiIndex       := commit_cfi
1493  ftbEntryGen.target         := commit_target
1494  ftbEntryGen.hit            := commit_real_hit
1495  ftbEntryGen.mispredict_vec := commit_mispredict
1496
1497  update_ftb_entry         := ftbEntryGen.new_entry
1498  update.new_br_insert_pos := ftbEntryGen.new_br_insert_pos
1499  update.mispred_mask      := ftbEntryGen.mispred_mask
1500  update.old_entry         := ftbEntryGen.is_old_entry
1501  update.pred_hit          := commit_hit === h_hit || commit_hit === h_false_hit
1502  update.br_taken_mask     := ftbEntryGen.taken_mask
1503  update.br_committed := (ftbEntryGen.new_entry.brValids zip ftbEntryGen.new_entry.brOffset) map {
1504    case (valid, offset) => valid && commit_instCommited(offset)
1505  }
1506  update.jmp_taken := ftbEntryGen.jmp_taken
1507
1508  // update.full_pred.fromFtbEntry(ftbEntryGen.new_entry, update.pc)
1509  // update.full_pred.jalr_target := commit_target
1510  // update.full_pred.hit := true.B
1511  // when (update.full_pred.is_jalr) {
1512  //   update.full_pred.targets.last := commit_target
1513  // }
1514
1515  // ******************************************************************************
1516  // **************************** commit perf counters ****************************
1517  // ******************************************************************************
1518
1519  val commit_inst_mask        = VecInit(commit_state.map(c => c === c_committed && do_commit)).asUInt
1520  val commit_mispred_mask     = commit_mispredict.asUInt
1521  val commit_not_mispred_mask = ~commit_mispred_mask
1522
1523  val commit_br_mask  = commit_pd.brMask.asUInt
1524  val commit_jmp_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.jmpInfo.valid.asTypeOf(UInt(1.W)))
1525  val commit_cfi_mask = commit_br_mask | commit_jmp_mask
1526
1527  val mbpInstrs = commit_inst_mask & commit_cfi_mask
1528
1529  val mbpRights = mbpInstrs & commit_not_mispred_mask
1530  val mbpWrongs = mbpInstrs & commit_mispred_mask
1531
1532  io.bpuInfo.bpRight := PopCount(mbpRights)
1533  io.bpuInfo.bpWrong := PopCount(mbpWrongs)
1534
1535  val hartId           = p(XSCoreParamsKey).HartId
1536  val isWriteFTQTable  = Constantin.createRecord(s"isWriteFTQTable$hartId")
1537  val ftqBranchTraceDB = ChiselDB.createTable(s"FTQTable$hartId", new FtqDebugBundle)
1538  // Cfi Info
1539  for (i <- 0 until PredictWidth) {
1540    val pc      = commit_pc_bundle.startAddr + (i * instBytes).U
1541    val v       = commit_state(i) === c_committed
1542    val isBr    = commit_pd.brMask(i)
1543    val isJmp   = commit_pd.jmpInfo.valid && commit_pd.jmpOffset === i.U
1544    val isCfi   = isBr || isJmp
1545    val isTaken = commit_cfi.valid && commit_cfi.bits === i.U
1546    val misPred = commit_mispredict(i)
1547    // val ghist = commit_spec_meta.ghist.predHist
1548    val histPtr   = commit_spec_meta.histPtr
1549    val predCycle = commit_meta(63, 0)
1550    val target    = commit_target
1551
1552    val brIdx = OHToUInt(Reverse(Cat(update_ftb_entry.brValids.zip(update_ftb_entry.brOffset).map { case (v, offset) =>
1553      v && offset === i.U
1554    })))
1555    val inFtbEntry = update_ftb_entry.brValids.zip(update_ftb_entry.brOffset).map { case (v, offset) =>
1556      v && offset === i.U
1557    }.reduce(_ || _)
1558    val addIntoHist =
1559      ((commit_hit === h_hit) && inFtbEntry) || (!(commit_hit === h_hit) && i.U === commit_cfi.bits && isBr && commit_cfi.valid)
1560    XSDebug(
1561      v && do_commit && isCfi,
1562      p"cfi_update: isBr(${isBr}) pc(${Hexadecimal(pc)}) " +
1563        p"taken(${isTaken}) mispred(${misPred}) cycle($predCycle) hist(${histPtr.value}) " +
1564        p"startAddr(${Hexadecimal(commit_pc_bundle.startAddr)}) AddIntoHist(${addIntoHist}) " +
1565        p"brInEntry(${inFtbEntry}) brIdx(${brIdx}) target(${Hexadecimal(target)})\n"
1566    )
1567
1568    val logbundle = Wire(new FtqDebugBundle)
1569    logbundle.pc        := pc
1570    logbundle.target    := target
1571    logbundle.isBr      := isBr
1572    logbundle.isJmp     := isJmp
1573    logbundle.isCall    := isJmp && commit_pd.hasCall
1574    logbundle.isRet     := isJmp && commit_pd.hasRet
1575    logbundle.misPred   := misPred
1576    logbundle.isTaken   := isTaken
1577    logbundle.predStage := commit_stage
1578
1579    ftqBranchTraceDB.log(
1580      data = logbundle /* hardware of type T */,
1581      en = isWriteFTQTable.orR && v && do_commit && isCfi,
1582      site = "FTQ" + p(XSCoreParamsKey).HartId.toString,
1583      clock = clock,
1584      reset = reset
1585    )
1586  }
1587
1588  val enq           = io.fromBpu.resp
1589  val perf_redirect = backendRedirect
1590
1591  XSPerfAccumulate("entry", validEntries)
1592  XSPerfAccumulate("bpu_to_ftq_stall", enq.valid && !enq.ready)
1593  XSPerfAccumulate("mispredictRedirect", perf_redirect.valid && RedirectLevel.flushAfter === perf_redirect.bits.level)
1594  XSPerfAccumulate("replayRedirect", perf_redirect.valid && RedirectLevel.flushItself(perf_redirect.bits.level))
1595  XSPerfAccumulate("predecodeRedirect", fromIfuRedirect.valid)
1596
1597  XSPerfAccumulate("to_ifu_bubble", io.toIfu.req.ready && !io.toIfu.req.valid)
1598
1599  XSPerfAccumulate("to_ifu_stall", io.toIfu.req.valid && !io.toIfu.req.ready)
1600  XSPerfAccumulate("from_bpu_real_bubble", !enq.valid && enq.ready && allowBpuIn)
1601  XSPerfAccumulate("bpu_to_ifu_bubble", bpuPtr === ifuPtr)
1602  XSPerfAccumulate(
1603    "bpu_to_ifu_bubble_when_ftq_full",
1604    (bpuPtr === ifuPtr) && isFull(bpuPtr, commPtr) && io.toIfu.req.ready
1605  )
1606
1607  XSPerfAccumulate("redirectAhead_ValidNum", ftqIdxAhead.map(_.valid).reduce(_ | _))
1608  XSPerfAccumulate("fromBackendRedirect_ValidNum", io.fromBackend.redirect.valid)
1609  XSPerfAccumulate("toBpuRedirect_ValidNum", io.toBpu.redirect.valid)
1610
1611  val from_bpu = io.fromBpu.resp.bits
1612  val to_ifu   = io.toIfu.req.bits
1613
1614  XSPerfHistogram("commit_num_inst", PopCount(commit_inst_mask), do_commit, 0, PredictWidth + 1, 1)
1615
1616  val commit_jal_mask  = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasJal.asTypeOf(UInt(1.W)))
1617  val commit_jalr_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasJalr.asTypeOf(UInt(1.W)))
1618  val commit_call_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasCall.asTypeOf(UInt(1.W)))
1619  val commit_ret_mask  = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasRet.asTypeOf(UInt(1.W)))
1620
1621  val mbpBRights = mbpRights & commit_br_mask
1622  val mbpJRights = mbpRights & commit_jal_mask
1623  val mbpIRights = mbpRights & commit_jalr_mask
1624  val mbpCRights = mbpRights & commit_call_mask
1625  val mbpRRights = mbpRights & commit_ret_mask
1626
1627  val mbpBWrongs = mbpWrongs & commit_br_mask
1628  val mbpJWrongs = mbpWrongs & commit_jal_mask
1629  val mbpIWrongs = mbpWrongs & commit_jalr_mask
1630  val mbpCWrongs = mbpWrongs & commit_call_mask
1631  val mbpRWrongs = mbpWrongs & commit_ret_mask
1632
1633  val commit_pred_stage = RegNext(pred_stage(commPtr.value))
1634
1635  def pred_stage_map(src: UInt, name: String) =
1636    (0 until numBpStages).map(i =>
1637      f"${name}_stage_${i + 1}" -> PopCount(src.asBools.map(_ && commit_pred_stage === BP_STAGES(i)))
1638    ).foldLeft(Map[String, UInt]())(_ + _)
1639
1640  val mispred_stage_map      = pred_stage_map(mbpWrongs, "mispredict")
1641  val br_mispred_stage_map   = pred_stage_map(mbpBWrongs, "br_mispredict")
1642  val jalr_mispred_stage_map = pred_stage_map(mbpIWrongs, "jalr_mispredict")
1643  val correct_stage_map      = pred_stage_map(mbpRights, "correct")
1644  val br_correct_stage_map   = pred_stage_map(mbpBRights, "br_correct")
1645  val jalr_correct_stage_map = pred_stage_map(mbpIRights, "jalr_correct")
1646
1647  val update_valid = io.toBpu.update.valid
1648  def u(cond: Bool) = update_valid && cond
1649  val ftb_false_hit = u(update.false_hit)
1650  // assert(!ftb_false_hit)
1651  val ftb_hit = u(commit_hit === h_hit)
1652
1653  val ftb_new_entry                = u(ftbEntryGen.is_init_entry)
1654  val ftb_new_entry_only_br        = ftb_new_entry && !update_ftb_entry.jmpValid
1655  val ftb_new_entry_only_jmp       = ftb_new_entry && !update_ftb_entry.brValids(0)
1656  val ftb_new_entry_has_br_and_jmp = ftb_new_entry && update_ftb_entry.brValids(0) && update_ftb_entry.jmpValid
1657
1658  val ftb_old_entry = u(ftbEntryGen.is_old_entry)
1659
1660  val ftb_modified_entry =
1661    u(ftbEntryGen.is_new_br || ftbEntryGen.is_jalr_target_modified || ftbEntryGen.is_strong_bias_modified)
1662  val ftb_modified_entry_new_br               = u(ftbEntryGen.is_new_br)
1663  val ftb_modified_entry_ifu_redirected       = u(ifuRedirected(do_commit_ptr.value))
1664  val ftb_modified_entry_jalr_target_modified = u(ftbEntryGen.is_jalr_target_modified)
1665  val ftb_modified_entry_br_full              = ftb_modified_entry && ftbEntryGen.is_br_full
1666  val ftb_modified_entry_strong_bias          = ftb_modified_entry && ftbEntryGen.is_strong_bias_modified
1667
1668  def getFtbEntryLen(pc: UInt, entry: FTBEntry) = (entry.getFallThrough(pc) - pc) >> instOffsetBits
1669  val gen_ftb_entry_len = getFtbEntryLen(update.pc, ftbEntryGen.new_entry)
1670  XSPerfHistogram("ftb_init_entry_len", gen_ftb_entry_len, ftb_new_entry, 0, PredictWidth + 1, 1)
1671  XSPerfHistogram("ftb_modified_entry_len", gen_ftb_entry_len, ftb_modified_entry, 0, PredictWidth + 1, 1)
1672  val s3_ftb_entry_len = getFtbEntryLen(from_bpu.s3.pc(0), from_bpu.last_stage_ftb_entry)
1673  XSPerfHistogram("s3_ftb_entry_len", s3_ftb_entry_len, from_bpu.s3.valid(0), 0, PredictWidth + 1, 1)
1674
1675  XSPerfHistogram("ftq_has_entry", validEntries, true.B, 0, FtqSize + 1, 1)
1676
1677  val perfCountsMap = Map(
1678    "BpInstr"                        -> PopCount(mbpInstrs),
1679    "BpBInstr"                       -> PopCount(mbpBRights | mbpBWrongs),
1680    "BpRight"                        -> PopCount(mbpRights),
1681    "BpWrong"                        -> PopCount(mbpWrongs),
1682    "BpBRight"                       -> PopCount(mbpBRights),
1683    "BpBWrong"                       -> PopCount(mbpBWrongs),
1684    "BpJRight"                       -> PopCount(mbpJRights),
1685    "BpJWrong"                       -> PopCount(mbpJWrongs),
1686    "BpIRight"                       -> PopCount(mbpIRights),
1687    "BpIWrong"                       -> PopCount(mbpIWrongs),
1688    "BpCRight"                       -> PopCount(mbpCRights),
1689    "BpCWrong"                       -> PopCount(mbpCWrongs),
1690    "BpRRight"                       -> PopCount(mbpRRights),
1691    "BpRWrong"                       -> PopCount(mbpRWrongs),
1692    "ftb_false_hit"                  -> PopCount(ftb_false_hit),
1693    "ftb_hit"                        -> PopCount(ftb_hit),
1694    "ftb_new_entry"                  -> PopCount(ftb_new_entry),
1695    "ftb_new_entry_only_br"          -> PopCount(ftb_new_entry_only_br),
1696    "ftb_new_entry_only_jmp"         -> PopCount(ftb_new_entry_only_jmp),
1697    "ftb_new_entry_has_br_and_jmp"   -> PopCount(ftb_new_entry_has_br_and_jmp),
1698    "ftb_old_entry"                  -> PopCount(ftb_old_entry),
1699    "ftb_modified_entry"             -> PopCount(ftb_modified_entry),
1700    "ftb_modified_entry_new_br"      -> PopCount(ftb_modified_entry_new_br),
1701    "ftb_jalr_target_modified"       -> PopCount(ftb_modified_entry_jalr_target_modified),
1702    "ftb_modified_entry_br_full"     -> PopCount(ftb_modified_entry_br_full),
1703    "ftb_modified_entry_strong_bias" -> PopCount(ftb_modified_entry_strong_bias)
1704  ) ++ mispred_stage_map ++ br_mispred_stage_map ++ jalr_mispred_stage_map ++
1705    correct_stage_map ++ br_correct_stage_map ++ jalr_correct_stage_map
1706
1707  for ((key, value) <- perfCountsMap) {
1708    XSPerfAccumulate(key, value)
1709  }
1710
1711  // --------------------------- Debug --------------------------------
1712  // XSDebug(enq_fire, p"enq! " + io.fromBpu.resp.bits.toPrintable)
1713  XSDebug(io.toIfu.req.fire, p"fire to ifu " + io.toIfu.req.bits.toPrintable)
1714  XSDebug(do_commit, p"deq! [ptr] $do_commit_ptr\n")
1715  XSDebug(true.B, p"[bpuPtr] $bpuPtr, [ifuPtr] $ifuPtr, [ifuWbPtr] $ifuWbPtr [commPtr] $commPtr\n")
1716  XSDebug(
1717    true.B,
1718    p"[in] v:${io.fromBpu.resp.valid} r:${io.fromBpu.resp.ready} " +
1719      p"[out] v:${io.toIfu.req.valid} r:${io.toIfu.req.ready}\n"
1720  )
1721  XSDebug(do_commit, p"[deq info] cfiIndex: $commit_cfi, $commit_pc_bundle, target: ${Hexadecimal(commit_target)}\n")
1722
1723  //   def ubtbCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1724  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1725  //       case (((valid, pd), ans), taken) =>
1726  //       Mux(valid && pd.isBr,
1727  //         isWrong ^ Mux(ans.hit.asBool,
1728  //           Mux(ans.taken.asBool, taken && ans.target === commitEntry.target,
1729  //           !taken),
1730  //         !taken),
1731  //       false.B)
1732  //     }
1733  //   }
1734
1735  //   def btbCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1736  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1737  //       case (((valid, pd), ans), taken) =>
1738  //       Mux(valid && pd.isBr,
1739  //         isWrong ^ Mux(ans.hit.asBool,
1740  //           Mux(ans.taken.asBool, taken && ans.target === commitEntry.target,
1741  //           !taken),
1742  //         !taken),
1743  //       false.B)
1744  //     }
1745  //   }
1746
1747  //   def tageCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1748  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1749  //       case (((valid, pd), ans), taken) =>
1750  //       Mux(valid && pd.isBr,
1751  //         isWrong ^ (ans.taken.asBool === taken),
1752  //       false.B)
1753  //     }
1754  //   }
1755
1756  //   def loopCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1757  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1758  //       case (((valid, pd), ans), taken) =>
1759  //       Mux(valid && (pd.isBr) && ans.hit.asBool,
1760  //         isWrong ^ (!taken),
1761  //           false.B)
1762  //     }
1763  //   }
1764
1765  //   def rasCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1766  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1767  //       case (((valid, pd), ans), taken) =>
1768  //       Mux(valid && pd.isRet.asBool /*&& taken*/ && ans.hit.asBool,
1769  //         isWrong ^ (ans.target === commitEntry.target),
1770  //           false.B)
1771  //     }
1772  //   }
1773
1774  //   val ubtbRights = ubtbCheck(commitEntry, commitEntry.metas.map(_.ubtbAns), false.B)
1775  //   val ubtbWrongs = ubtbCheck(commitEntry, commitEntry.metas.map(_.ubtbAns), true.B)
1776  //   // btb and ubtb pred jal and jalr as well
1777  //   val btbRights = btbCheck(commitEntry, commitEntry.metas.map(_.btbAns), false.B)
1778  //   val btbWrongs = btbCheck(commitEntry, commitEntry.metas.map(_.btbAns), true.B)
1779  //   val tageRights = tageCheck(commitEntry, commitEntry.metas.map(_.tageAns), false.B)
1780  //   val tageWrongs = tageCheck(commitEntry, commitEntry.metas.map(_.tageAns), true.B)
1781
1782  //   val loopRights = loopCheck(commitEntry, commitEntry.metas.map(_.loopAns), false.B)
1783  //   val loopWrongs = loopCheck(commitEntry, commitEntry.metas.map(_.loopAns), true.B)
1784
1785  //   val rasRights = rasCheck(commitEntry, commitEntry.metas.map(_.rasAns), false.B)
1786  //   val rasWrongs = rasCheck(commitEntry, commitEntry.metas.map(_.rasAns), true.B)
1787
1788  val perfEvents = Seq(
1789    ("bpu_s2_redirect        ", bpu_s2_redirect),
1790    ("bpu_s3_redirect        ", bpu_s3_redirect),
1791    ("bpu_to_ftq_stall       ", enq.valid && ~enq.ready),
1792    ("mispredictRedirect     ", perf_redirect.valid && RedirectLevel.flushAfter === perf_redirect.bits.level),
1793    ("replayRedirect         ", perf_redirect.valid && RedirectLevel.flushItself(perf_redirect.bits.level)),
1794    ("predecodeRedirect      ", fromIfuRedirect.valid),
1795    ("to_ifu_bubble          ", io.toIfu.req.ready && !io.toIfu.req.valid),
1796    ("from_bpu_real_bubble   ", !enq.valid && enq.ready && allowBpuIn),
1797    ("BpInstr                ", PopCount(mbpInstrs)),
1798    ("BpBInstr               ", PopCount(mbpBRights | mbpBWrongs)),
1799    ("BpRight                ", PopCount(mbpRights)),
1800    ("BpWrong                ", PopCount(mbpWrongs)),
1801    ("BpBRight               ", PopCount(mbpBRights)),
1802    ("BpBWrong               ", PopCount(mbpBWrongs)),
1803    ("BpJRight               ", PopCount(mbpJRights)),
1804    ("BpJWrong               ", PopCount(mbpJWrongs)),
1805    ("BpIRight               ", PopCount(mbpIRights)),
1806    ("BpIWrong               ", PopCount(mbpIWrongs)),
1807    ("BpCRight               ", PopCount(mbpCRights)),
1808    ("BpCWrong               ", PopCount(mbpCWrongs)),
1809    ("BpRRight               ", PopCount(mbpRRights)),
1810    ("BpRWrong               ", PopCount(mbpRWrongs)),
1811    ("ftb_false_hit          ", PopCount(ftb_false_hit)),
1812    ("ftb_hit                ", PopCount(ftb_hit))
1813  )
1814  generatePerfEvent()
1815}
1816