xref: /XiangShan/src/main/scala/xiangshan/frontend/NewFtq.scala (revision 92b88f30156d46e844042eea94f7121557fd09a1)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.frontend
18
19import chipsalliance.rocketchip.config.Parameters
20import chisel3._
21import chisel3.util._
22import utils._
23import utility._
24import xiangshan._
25import xiangshan.frontend.icache._
26import xiangshan.backend.decode.ImmUnion
27import utility.ChiselDB
28import xiangshan.backend.ctrlblock.CtrlToFtqIO
29
30class FtqDebugBundle extends Bundle {
31  val pc = UInt(39.W)
32  val target = UInt(39.W)
33  val isBr = Bool()
34  val isJmp = Bool()
35  val isCall = Bool()
36  val isRet = Bool()
37  val misPred = Bool()
38  val isTaken = Bool()
39  val predStage = UInt(2.W)
40}
41
42class FtqPtr(entries: Int) extends CircularQueuePtr[FtqPtr](
43  entries
44){
45  def this()(implicit p: Parameters) = this(p(XSCoreParamsKey).FtqSize)
46}
47
48object FtqPtr {
49  def apply(f: Bool, v: UInt)(implicit p: Parameters): FtqPtr = {
50    val ptr = Wire(new FtqPtr)
51    ptr.flag := f
52    ptr.value := v
53    ptr
54  }
55  def inverse(ptr: FtqPtr)(implicit p: Parameters): FtqPtr = {
56    apply(!ptr.flag, ptr.value)
57  }
58}
59
60class FtqNRSRAM[T <: Data](gen: T, numRead: Int)(implicit p: Parameters) extends XSModule {
61
62  val io = IO(new Bundle() {
63    val raddr = Input(Vec(numRead, UInt(log2Up(FtqSize).W)))
64    val ren = Input(Vec(numRead, Bool()))
65    val rdata = Output(Vec(numRead, gen))
66    val waddr = Input(UInt(log2Up(FtqSize).W))
67    val wen = Input(Bool())
68    val wdata = Input(gen)
69  })
70
71  for(i <- 0 until numRead){
72    val sram = Module(new SRAMTemplate(gen, FtqSize))
73    sram.io.r.req.valid := io.ren(i)
74    sram.io.r.req.bits.setIdx := io.raddr(i)
75    io.rdata(i) := sram.io.r.resp.data(0)
76    sram.io.w.req.valid := io.wen
77    sram.io.w.req.bits.setIdx := io.waddr
78    sram.io.w.req.bits.data := VecInit(io.wdata)
79  }
80
81}
82
83class Ftq_RF_Components(implicit p: Parameters) extends XSBundle with BPUUtils {
84  val startAddr = UInt(VAddrBits.W)
85  val nextLineAddr = UInt(VAddrBits.W)
86  val isNextMask = Vec(PredictWidth, Bool())
87  val fallThruError = Bool()
88  // val carry = Bool()
89  def getPc(offset: UInt) = {
90    def getHigher(pc: UInt) = pc(VAddrBits-1, log2Ceil(PredictWidth)+instOffsetBits+1)
91    def getOffset(pc: UInt) = pc(log2Ceil(PredictWidth)+instOffsetBits, instOffsetBits)
92    Cat(getHigher(Mux(isNextMask(offset) && startAddr(log2Ceil(PredictWidth)+instOffsetBits), nextLineAddr, startAddr)),
93        getOffset(startAddr)+offset, 0.U(instOffsetBits.W))
94  }
95  def fromBranchPrediction(resp: BranchPredictionBundle) = {
96    def carryPos(addr: UInt) = addr(instOffsetBits+log2Ceil(PredictWidth)+1)
97    this.startAddr := resp.pc
98    this.nextLineAddr := resp.pc + (FetchWidth * 4 * 2).U // may be broken on other configs
99    this.isNextMask := VecInit((0 until PredictWidth).map(i =>
100      (resp.pc(log2Ceil(PredictWidth), 1) +& i.U)(log2Ceil(PredictWidth)).asBool()
101    ))
102    this.fallThruError := resp.fallThruError
103    this
104  }
105  override def toPrintable: Printable = {
106    p"startAddr:${Hexadecimal(startAddr)}"
107  }
108}
109
110class Ftq_pd_Entry(implicit p: Parameters) extends XSBundle {
111  val brMask = Vec(PredictWidth, Bool())
112  val jmpInfo = ValidUndirectioned(Vec(3, Bool()))
113  val jmpOffset = UInt(log2Ceil(PredictWidth).W)
114  val jalTarget = UInt(VAddrBits.W)
115  val rvcMask = Vec(PredictWidth, Bool())
116  def hasJal  = jmpInfo.valid && !jmpInfo.bits(0)
117  def hasJalr = jmpInfo.valid && jmpInfo.bits(0)
118  def hasCall = jmpInfo.valid && jmpInfo.bits(1)
119  def hasRet  = jmpInfo.valid && jmpInfo.bits(2)
120
121  def fromPdWb(pdWb: PredecodeWritebackBundle) = {
122    val pds = pdWb.pd
123    this.brMask := VecInit(pds.map(pd => pd.isBr && pd.valid))
124    this.jmpInfo.valid := VecInit(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid)).asUInt.orR
125    this.jmpInfo.bits := ParallelPriorityMux(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid),
126                                             pds.map(pd => VecInit(pd.isJalr, pd.isCall, pd.isRet)))
127    this.jmpOffset := ParallelPriorityEncoder(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid))
128    this.rvcMask := VecInit(pds.map(pd => pd.isRVC))
129    this.jalTarget := pdWb.jalTarget
130  }
131
132  def toPd(offset: UInt) = {
133    require(offset.getWidth == log2Ceil(PredictWidth))
134    val pd = Wire(new PreDecodeInfo)
135    pd.valid := true.B
136    pd.isRVC := rvcMask(offset)
137    val isBr = brMask(offset)
138    val isJalr = offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(0)
139    pd.brType := Cat(offset === jmpOffset && jmpInfo.valid, isJalr || isBr)
140    pd.isCall := offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(1)
141    pd.isRet  := offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(2)
142    pd
143  }
144}
145
146
147
148class Ftq_Redirect_SRAMEntry(implicit p: Parameters) extends SpeculativeInfo {}
149
150class Ftq_1R_SRAMEntry(implicit p: Parameters) extends XSBundle with HasBPUConst {
151  val meta = UInt(MaxMetaLength.W)
152}
153
154class Ftq_Pred_Info(implicit p: Parameters) extends XSBundle {
155  val target = UInt(VAddrBits.W)
156  val cfiIndex = ValidUndirectioned(UInt(log2Ceil(PredictWidth).W))
157}
158
159
160class FtqRead[T <: Data](private val gen: T)(implicit p: Parameters) extends XSBundle {
161  val ptr = Output(new FtqPtr)
162  val offset = Output(UInt(log2Ceil(PredictWidth).W))
163  val data = Input(gen)
164  def apply(ptr: FtqPtr, offset: UInt) = {
165    this.ptr := ptr
166    this.offset := offset
167    this.data
168  }
169}
170
171
172class FtqToBpuIO(implicit p: Parameters) extends XSBundle {
173  val redirect = Valid(new BranchPredictionRedirect)
174  val update = Valid(new BranchPredictionUpdate)
175  val enq_ptr = Output(new FtqPtr)
176}
177
178class FtqToIfuIO(implicit p: Parameters) extends XSBundle with HasCircularQueuePtrHelper {
179  val req = Decoupled(new FetchRequestBundle)
180  val redirect = Valid(new Redirect)
181  val flushFromBpu = new Bundle {
182    // when ifu pipeline is not stalled,
183    // a packet from bpu s3 can reach f1 at most
184    val s2 = Valid(new FtqPtr)
185    val s3 = Valid(new FtqPtr)
186    def shouldFlushBy(src: Valid[FtqPtr], idx_to_flush: FtqPtr) = {
187      src.valid && !isAfter(src.bits, idx_to_flush)
188    }
189    def shouldFlushByStage2(idx: FtqPtr) = shouldFlushBy(s2, idx)
190    def shouldFlushByStage3(idx: FtqPtr) = shouldFlushBy(s3, idx)
191  }
192}
193
194class FtqToICacheIO(implicit p: Parameters) extends XSBundle with HasCircularQueuePtrHelper {
195  //NOTE: req.bits must be prepare in T cycle
196  // while req.valid is set true in T + 1 cycle
197  val req = Decoupled(new FtqToICacheRequestBundle)
198}
199
200trait HasBackendRedirectInfo extends HasXSParameter {
201  def isLoadReplay(r: Valid[Redirect]) = r.bits.flushItself()
202}
203
204class FtqToCtrlIO(implicit p: Parameters) extends XSBundle with HasBackendRedirectInfo {
205  // write to backend pc mem
206  val pc_mem_wen = Output(Bool())
207  val pc_mem_waddr = Output(UInt(log2Ceil(FtqSize).W))
208  val pc_mem_wdata = Output(new Ftq_RF_Components)
209  // newest target
210  val newest_entry_target = Output(UInt(VAddrBits.W))
211  val newest_entry_ptr = Output(new FtqPtr)
212}
213
214
215class FTBEntryGen(implicit p: Parameters) extends XSModule with HasBackendRedirectInfo with HasBPUParameter {
216  val io = IO(new Bundle {
217    val start_addr = Input(UInt(VAddrBits.W))
218    val old_entry = Input(new FTBEntry)
219    val pd = Input(new Ftq_pd_Entry)
220    val cfiIndex = Flipped(Valid(UInt(log2Ceil(PredictWidth).W)))
221    val target = Input(UInt(VAddrBits.W))
222    val hit = Input(Bool())
223    val mispredict_vec = Input(Vec(PredictWidth, Bool()))
224
225    val new_entry = Output(new FTBEntry)
226    val new_br_insert_pos = Output(Vec(numBr, Bool()))
227    val taken_mask = Output(Vec(numBr, Bool()))
228    val jmp_taken = Output(Bool())
229    val mispred_mask = Output(Vec(numBr+1, Bool()))
230
231    // for perf counters
232    val is_init_entry = Output(Bool())
233    val is_old_entry = Output(Bool())
234    val is_new_br = Output(Bool())
235    val is_jalr_target_modified = Output(Bool())
236    val is_always_taken_modified = Output(Bool())
237    val is_br_full = Output(Bool())
238  })
239
240  // no mispredictions detected at predecode
241  val hit = io.hit
242  val pd = io.pd
243
244  val init_entry = WireInit(0.U.asTypeOf(new FTBEntry))
245
246
247  val cfi_is_br = pd.brMask(io.cfiIndex.bits) && io.cfiIndex.valid
248  val entry_has_jmp = pd.jmpInfo.valid
249  val new_jmp_is_jal  = entry_has_jmp && !pd.jmpInfo.bits(0) && io.cfiIndex.valid
250  val new_jmp_is_jalr = entry_has_jmp &&  pd.jmpInfo.bits(0) && io.cfiIndex.valid
251  val new_jmp_is_call = entry_has_jmp &&  pd.jmpInfo.bits(1) && io.cfiIndex.valid
252  val new_jmp_is_ret  = entry_has_jmp &&  pd.jmpInfo.bits(2) && io.cfiIndex.valid
253  val last_jmp_rvi = entry_has_jmp && pd.jmpOffset === (PredictWidth-1).U && !pd.rvcMask.last
254  // val last_br_rvi = cfi_is_br && io.cfiIndex.bits === (PredictWidth-1).U && !pd.rvcMask.last
255
256  val cfi_is_jal = io.cfiIndex.bits === pd.jmpOffset && new_jmp_is_jal
257  val cfi_is_jalr = io.cfiIndex.bits === pd.jmpOffset && new_jmp_is_jalr
258
259  def carryPos = log2Ceil(PredictWidth)+instOffsetBits
260  def getLower(pc: UInt) = pc(carryPos-1, instOffsetBits)
261  // if not hit, establish a new entry
262  init_entry.valid := true.B
263  // tag is left for ftb to assign
264
265  // case br
266  val init_br_slot = init_entry.getSlotForBr(0)
267  when (cfi_is_br) {
268    init_br_slot.valid := true.B
269    init_br_slot.offset := io.cfiIndex.bits
270    init_br_slot.setLowerStatByTarget(io.start_addr, io.target, numBr == 1)
271    init_entry.always_taken(0) := true.B // set to always taken on init
272  }
273
274  // case jmp
275  when (entry_has_jmp) {
276    init_entry.tailSlot.offset := pd.jmpOffset
277    init_entry.tailSlot.valid := new_jmp_is_jal || new_jmp_is_jalr
278    init_entry.tailSlot.setLowerStatByTarget(io.start_addr, Mux(cfi_is_jalr, io.target, pd.jalTarget), isShare=false)
279  }
280
281  val jmpPft = getLower(io.start_addr) +& pd.jmpOffset +& Mux(pd.rvcMask(pd.jmpOffset), 1.U, 2.U)
282  init_entry.pftAddr := Mux(entry_has_jmp && !last_jmp_rvi, jmpPft, getLower(io.start_addr))
283  init_entry.carry   := Mux(entry_has_jmp && !last_jmp_rvi, jmpPft(carryPos-instOffsetBits), true.B)
284  init_entry.isJalr := new_jmp_is_jalr
285  init_entry.isCall := new_jmp_is_call
286  init_entry.isRet  := new_jmp_is_ret
287  // that means fall thru points to the middle of an inst
288  init_entry.last_may_be_rvi_call := pd.jmpOffset === (PredictWidth-1).U && !pd.rvcMask(pd.jmpOffset)
289
290  // if hit, check whether a new cfi(only br is possible) is detected
291  val oe = io.old_entry
292  val br_recorded_vec = oe.getBrRecordedVec(io.cfiIndex.bits)
293  val br_recorded = br_recorded_vec.asUInt.orR
294  val is_new_br = cfi_is_br && !br_recorded
295  val new_br_offset = io.cfiIndex.bits
296  // vec(i) means new br will be inserted BEFORE old br(i)
297  val allBrSlotsVec = oe.allSlotsForBr
298  val new_br_insert_onehot = VecInit((0 until numBr).map{
299    i => i match {
300      case 0 =>
301        !allBrSlotsVec(0).valid || new_br_offset < allBrSlotsVec(0).offset
302      case idx =>
303        allBrSlotsVec(idx-1).valid && new_br_offset > allBrSlotsVec(idx-1).offset &&
304        (!allBrSlotsVec(idx).valid || new_br_offset < allBrSlotsVec(idx).offset)
305    }
306  })
307
308  val old_entry_modified = WireInit(io.old_entry)
309  for (i <- 0 until numBr) {
310    val slot = old_entry_modified.allSlotsForBr(i)
311    when (new_br_insert_onehot(i)) {
312      slot.valid := true.B
313      slot.offset := new_br_offset
314      slot.setLowerStatByTarget(io.start_addr, io.target, i == numBr-1)
315      old_entry_modified.always_taken(i) := true.B
316    }.elsewhen (new_br_offset > oe.allSlotsForBr(i).offset) {
317      old_entry_modified.always_taken(i) := false.B
318      // all other fields remain unchanged
319    }.otherwise {
320      // case i == 0, remain unchanged
321      if (i != 0) {
322        val noNeedToMoveFromFormerSlot = (i == numBr-1).B && !oe.brSlots.last.valid
323        when (!noNeedToMoveFromFormerSlot) {
324          slot.fromAnotherSlot(oe.allSlotsForBr(i-1))
325          old_entry_modified.always_taken(i) := oe.always_taken(i)
326        }
327      }
328    }
329  }
330
331  // two circumstances:
332  // 1. oe: | br | j  |, new br should be in front of j, thus addr of j should be new pft
333  // 2. oe: | br | br |, new br could be anywhere between, thus new pft is the addr of either
334  //        the previous last br or the new br
335  val may_have_to_replace = oe.noEmptySlotForNewBr
336  val pft_need_to_change = is_new_br && may_have_to_replace
337  // it should either be the given last br or the new br
338  when (pft_need_to_change) {
339    val new_pft_offset =
340      Mux(!new_br_insert_onehot.asUInt.orR,
341        new_br_offset, oe.allSlotsForBr.last.offset)
342
343    // set jmp to invalid
344    old_entry_modified.pftAddr := getLower(io.start_addr) + new_pft_offset
345    old_entry_modified.carry := (getLower(io.start_addr) +& new_pft_offset).head(1).asBool
346    old_entry_modified.last_may_be_rvi_call := false.B
347    old_entry_modified.isCall := false.B
348    old_entry_modified.isRet := false.B
349    old_entry_modified.isJalr := false.B
350  }
351
352  val old_entry_jmp_target_modified = WireInit(oe)
353  val old_target = oe.tailSlot.getTarget(io.start_addr) // may be wrong because we store only 20 lowest bits
354  val old_tail_is_jmp = !oe.tailSlot.sharing
355  val jalr_target_modified = cfi_is_jalr && (old_target =/= io.target) && old_tail_is_jmp // TODO: pass full jalr target
356  when (jalr_target_modified) {
357    old_entry_jmp_target_modified.setByJmpTarget(io.start_addr, io.target)
358    old_entry_jmp_target_modified.always_taken := 0.U.asTypeOf(Vec(numBr, Bool()))
359  }
360
361  val old_entry_always_taken = WireInit(oe)
362  val always_taken_modified_vec = Wire(Vec(numBr, Bool())) // whether modified or not
363  for (i <- 0 until numBr) {
364    old_entry_always_taken.always_taken(i) :=
365      oe.always_taken(i) && io.cfiIndex.valid && oe.brValids(i) && io.cfiIndex.bits === oe.brOffset(i)
366    always_taken_modified_vec(i) := oe.always_taken(i) && !old_entry_always_taken.always_taken(i)
367  }
368  val always_taken_modified = always_taken_modified_vec.reduce(_||_)
369
370
371
372  val derived_from_old_entry =
373    Mux(is_new_br, old_entry_modified,
374      Mux(jalr_target_modified, old_entry_jmp_target_modified, old_entry_always_taken))
375
376
377  io.new_entry := Mux(!hit, init_entry, derived_from_old_entry)
378
379  io.new_br_insert_pos := new_br_insert_onehot
380  io.taken_mask := VecInit((io.new_entry.brOffset zip io.new_entry.brValids).map{
381    case (off, v) => io.cfiIndex.bits === off && io.cfiIndex.valid && v
382  })
383  io.jmp_taken := io.new_entry.jmpValid && io.new_entry.tailSlot.offset === io.cfiIndex.bits
384  for (i <- 0 until numBr) {
385    io.mispred_mask(i) := io.new_entry.brValids(i) && io.mispredict_vec(io.new_entry.brOffset(i))
386  }
387  io.mispred_mask.last := io.new_entry.jmpValid && io.mispredict_vec(pd.jmpOffset)
388
389  // for perf counters
390  io.is_init_entry := !hit
391  io.is_old_entry := hit && !is_new_br && !jalr_target_modified && !always_taken_modified
392  io.is_new_br := hit && is_new_br
393  io.is_jalr_target_modified := hit && jalr_target_modified
394  io.is_always_taken_modified := hit && always_taken_modified
395  io.is_br_full := hit && is_new_br && may_have_to_replace
396}
397
398class FtqPcMemWrapper(numOtherReads: Int)(implicit p: Parameters) extends XSModule with HasBackendRedirectInfo {
399  val io = IO(new Bundle {
400    val ifuPtr_w       = Input(new FtqPtr)
401    val ifuPtrPlus1_w  = Input(new FtqPtr)
402    val ifuPtrPlus2_w  = Input(new FtqPtr)
403    val commPtr_w      = Input(new FtqPtr)
404    val commPtrPlus1_w = Input(new FtqPtr)
405    val ifuPtr_rdata       = Output(new Ftq_RF_Components)
406    val ifuPtrPlus1_rdata  = Output(new Ftq_RF_Components)
407    val ifuPtrPlus2_rdata  = Output(new Ftq_RF_Components)
408    val commPtr_rdata      = Output(new Ftq_RF_Components)
409    val commPtrPlus1_rdata = Output(new Ftq_RF_Components)
410
411    val other_raddrs = Input(Vec(numOtherReads, UInt(log2Ceil(FtqSize).W)))
412    val other_rdatas = Output(Vec(numOtherReads, new Ftq_RF_Components))
413
414    val wen = Input(Bool())
415    val waddr = Input(UInt(log2Ceil(FtqSize).W))
416    val wdata = Input(new Ftq_RF_Components)
417  })
418
419  val num_pc_read = numOtherReads + 5
420  val mem = Module(new SyncDataModuleTemplate(new Ftq_RF_Components, FtqSize,
421    num_pc_read, 1, "FtqPC"))
422  mem.io.wen(0)   := io.wen
423  mem.io.waddr(0) := io.waddr
424  mem.io.wdata(0) := io.wdata
425
426  // read one cycle ahead for ftq local reads
427  val raddr_vec = VecInit(io.other_raddrs ++
428    Seq(io.ifuPtr_w.value, io.ifuPtrPlus1_w.value, io.ifuPtrPlus2_w.value, io.commPtrPlus1_w.value, io.commPtr_w.value))
429
430  mem.io.raddr := raddr_vec
431
432  io.other_rdatas       := mem.io.rdata.dropRight(5)
433  io.ifuPtr_rdata       := mem.io.rdata.dropRight(4).last
434  io.ifuPtrPlus1_rdata  := mem.io.rdata.dropRight(3).last
435  io.ifuPtrPlus2_rdata  := mem.io.rdata.dropRight(2).last
436  io.commPtrPlus1_rdata := mem.io.rdata.dropRight(1).last
437  io.commPtr_rdata      := mem.io.rdata.last
438}
439
440class Ftq(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelper
441  with HasBackendRedirectInfo with BPUUtils with HasBPUConst with HasPerfEvents
442  with HasICacheParameters{
443  val io = IO(new Bundle {
444    val fromBpu = Flipped(new BpuToFtqIO)
445    val fromIfu = Flipped(new IfuToFtqIO)
446    val fromBackend = Flipped(new CtrlToFtqIO)
447
448    val toBpu = new FtqToBpuIO
449    val toIfu = new FtqToIfuIO
450    val toICache = new FtqToICacheIO
451    val toBackend = new FtqToCtrlIO
452
453    val toPrefetch = new FtqPrefechBundle
454
455    val bpuInfo = new Bundle {
456      val bpRight = Output(UInt(XLEN.W))
457      val bpWrong = Output(UInt(XLEN.W))
458    }
459
460    val mmioCommitRead = Flipped(new mmioCommitRead)
461  })
462  io.bpuInfo := DontCare
463
464  val backendRedirect = Wire(Valid(new Redirect))
465  val backendRedirectReg = RegNext(backendRedirect)
466
467  val stage2Flush = backendRedirect.valid
468  val backendFlush = stage2Flush || RegNext(stage2Flush)
469  val ifuFlush = Wire(Bool())
470
471  val flush = stage2Flush || RegNext(stage2Flush)
472
473  val allowBpuIn, allowToIfu = WireInit(false.B)
474  val flushToIfu = !allowToIfu
475  allowBpuIn := !ifuFlush && !backendRedirect.valid && !backendRedirectReg.valid
476  allowToIfu := !ifuFlush && !backendRedirect.valid && !backendRedirectReg.valid
477
478  def copyNum = 5
479  val bpuPtr, ifuPtr, ifuWbPtr, commPtr = RegInit(FtqPtr(false.B, 0.U))
480  val ifuPtrPlus1 = RegInit(FtqPtr(false.B, 1.U))
481  val ifuPtrPlus2 = RegInit(FtqPtr(false.B, 2.U))
482  val commPtrPlus1 = RegInit(FtqPtr(false.B, 1.U))
483  val copied_ifu_ptr = Seq.fill(copyNum)(RegInit(FtqPtr(false.B, 0.U)))
484  val copied_bpu_ptr = Seq.fill(copyNum)(RegInit(FtqPtr(false.B, 0.U)))
485  require(FtqSize >= 4)
486  val ifuPtr_write       = WireInit(ifuPtr)
487  val ifuPtrPlus1_write  = WireInit(ifuPtrPlus1)
488  val ifuPtrPlus2_write  = WireInit(ifuPtrPlus2)
489  val ifuWbPtr_write     = WireInit(ifuWbPtr)
490  val commPtr_write      = WireInit(commPtr)
491  val commPtrPlus1_write = WireInit(commPtrPlus1)
492  ifuPtr       := ifuPtr_write
493  ifuPtrPlus1  := ifuPtrPlus1_write
494  ifuPtrPlus2  := ifuPtrPlus2_write
495  ifuWbPtr     := ifuWbPtr_write
496  commPtr      := commPtr_write
497  commPtrPlus1 := commPtrPlus1_write
498  copied_ifu_ptr.map{ptr =>
499    ptr := ifuPtr_write
500    dontTouch(ptr)
501  }
502  val validEntries = distanceBetween(bpuPtr, commPtr)
503  val canCommit = Wire(Bool())
504
505  // **********************************************************************
506  // **************************** enq from bpu ****************************
507  // **********************************************************************
508  val new_entry_ready = validEntries < FtqSize.U || canCommit
509  io.fromBpu.resp.ready := new_entry_ready
510
511  val bpu_s2_resp = io.fromBpu.resp.bits.s2
512  val bpu_s3_resp = io.fromBpu.resp.bits.s3
513  val bpu_s2_redirect = bpu_s2_resp.valid && bpu_s2_resp.hasRedirect
514  val bpu_s3_redirect = bpu_s3_resp.valid && bpu_s3_resp.hasRedirect
515
516  io.toBpu.enq_ptr := bpuPtr
517  val enq_fire = io.fromBpu.resp.fire() && allowBpuIn // from bpu s1
518  val bpu_in_fire = (io.fromBpu.resp.fire() || bpu_s2_redirect || bpu_s3_redirect) && allowBpuIn
519
520  val bpu_in_resp = io.fromBpu.resp.bits.selectedResp
521  val bpu_in_stage = io.fromBpu.resp.bits.selectedRespIdx
522  val bpu_in_resp_ptr = Mux(bpu_in_stage === BP_S1, bpuPtr, bpu_in_resp.ftq_idx)
523  val bpu_in_resp_idx = bpu_in_resp_ptr.value
524
525  // read ports:      prefetchReq ++  ifuReq1 + ifuReq2 + ifuReq3 + commitUpdate2 + commitUpdate
526  val ftq_pc_mem = Module(new FtqPcMemWrapper(1))
527  // resp from uBTB
528  ftq_pc_mem.io.wen := bpu_in_fire
529  ftq_pc_mem.io.waddr := bpu_in_resp_idx
530  ftq_pc_mem.io.wdata.fromBranchPrediction(bpu_in_resp)
531
532  //                                                            ifuRedirect + backendRedirect + commit
533  val ftq_redirect_sram = Module(new FtqNRSRAM(new Ftq_Redirect_SRAMEntry, 1+1+1))
534  // these info is intended to enq at the last stage of bpu
535  ftq_redirect_sram.io.wen := io.fromBpu.resp.bits.lastStage.valid
536  ftq_redirect_sram.io.waddr := io.fromBpu.resp.bits.lastStage.ftq_idx.value
537  ftq_redirect_sram.io.wdata := io.fromBpu.resp.bits.last_stage_spec_info
538  println(f"ftq redirect SRAM: entry ${ftq_redirect_sram.io.wdata.getWidth} * ${FtqSize} * 3")
539  println(f"ftq redirect SRAM: ahead fh ${ftq_redirect_sram.io.wdata.afhob.getWidth} * ${FtqSize} * 3")
540
541  val ftq_meta_1r_sram = Module(new FtqNRSRAM(new Ftq_1R_SRAMEntry, 1))
542  // these info is intended to enq at the last stage of bpu
543  ftq_meta_1r_sram.io.wen := io.fromBpu.resp.bits.lastStage.valid
544  ftq_meta_1r_sram.io.waddr := io.fromBpu.resp.bits.lastStage.ftq_idx.value
545  ftq_meta_1r_sram.io.wdata.meta := io.fromBpu.resp.bits.last_stage_meta
546  //                                                            ifuRedirect + backendRedirect + commit
547  val ftb_entry_mem = Module(new SyncDataModuleTemplate(new FTBEntry, FtqSize, 1+1+1, 1))
548  ftb_entry_mem.io.wen(0) := io.fromBpu.resp.bits.lastStage.valid
549  ftb_entry_mem.io.waddr(0) := io.fromBpu.resp.bits.lastStage.ftq_idx.value
550  ftb_entry_mem.io.wdata(0) := io.fromBpu.resp.bits.last_stage_ftb_entry
551
552
553  // multi-write
554  val update_target = Reg(Vec(FtqSize, UInt(VAddrBits.W))) // could be taken target or fallThrough //TODO: remove this
555  val newest_entry_target = Reg(UInt(VAddrBits.W))
556  val newest_entry_ptr = Reg(new FtqPtr)
557  val cfiIndex_vec = Reg(Vec(FtqSize, ValidUndirectioned(UInt(log2Ceil(PredictWidth).W))))
558  val mispredict_vec = Reg(Vec(FtqSize, Vec(PredictWidth, Bool())))
559  val pred_stage = Reg(Vec(FtqSize, UInt(2.W)))
560
561  val c_invalid :: c_valid :: c_commited :: Nil = Enum(3)
562  val commitStateQueue = RegInit(VecInit(Seq.fill(FtqSize) {
563    VecInit(Seq.fill(PredictWidth)(c_invalid))
564  }))
565
566  val f_to_send :: f_sent :: Nil = Enum(2)
567  val entry_fetch_status = RegInit(VecInit(Seq.fill(FtqSize)(f_sent)))
568
569  val h_not_hit :: h_false_hit :: h_hit :: Nil = Enum(3)
570  val entry_hit_status = RegInit(VecInit(Seq.fill(FtqSize)(h_not_hit)))
571
572  // modify registers one cycle later to cut critical path
573  val last_cycle_bpu_in = RegNext(bpu_in_fire)
574  val last_cycle_bpu_in_ptr = RegNext(bpu_in_resp_ptr)
575  val last_cycle_bpu_in_idx = last_cycle_bpu_in_ptr.value
576  val last_cycle_bpu_target = RegNext(bpu_in_resp.getTarget)
577  val last_cycle_cfiIndex = RegNext(bpu_in_resp.cfiIndex)
578  val last_cycle_bpu_in_stage = RegNext(bpu_in_stage)
579
580  def extra_copyNum_for_commitStateQueue = 2
581  val copied_last_cycle_bpu_in = VecInit(Seq.fill(copyNum+extra_copyNum_for_commitStateQueue)(RegNext(bpu_in_fire)))
582  val copied_last_cycle_bpu_in_ptr_for_ftq = VecInit(Seq.fill(extra_copyNum_for_commitStateQueue)(RegNext(bpu_in_resp_ptr)))
583
584  when (last_cycle_bpu_in) {
585    entry_fetch_status(last_cycle_bpu_in_idx) := f_to_send
586    cfiIndex_vec(last_cycle_bpu_in_idx) := last_cycle_cfiIndex
587    pred_stage(last_cycle_bpu_in_idx) := last_cycle_bpu_in_stage
588
589    update_target(last_cycle_bpu_in_idx) := last_cycle_bpu_target // TODO: remove this
590    newest_entry_target := last_cycle_bpu_target
591    newest_entry_ptr := last_cycle_bpu_in_ptr
592  }
593
594  // reduce fanout by delay write for a cycle
595  when (RegNext(last_cycle_bpu_in)) {
596    mispredict_vec(RegNext(last_cycle_bpu_in_idx)) := WireInit(VecInit(Seq.fill(PredictWidth)(false.B)))
597  }
598
599  // reduce fanout using copied last_cycle_bpu_in and copied last_cycle_bpu_in_ptr
600  val copied_last_cycle_bpu_in_for_ftq = copied_last_cycle_bpu_in.takeRight(extra_copyNum_for_commitStateQueue)
601  copied_last_cycle_bpu_in_for_ftq.zip(copied_last_cycle_bpu_in_ptr_for_ftq).zipWithIndex.map {
602    case ((in, ptr), i) =>
603      when (in) {
604        val perSetEntries = FtqSize / extra_copyNum_for_commitStateQueue // 32
605        require(FtqSize % extra_copyNum_for_commitStateQueue == 0)
606        for (j <- 0 until perSetEntries) {
607          when (ptr.value === (i*perSetEntries+j).U) {
608            commitStateQueue(i*perSetEntries+j) := VecInit(Seq.fill(PredictWidth)(c_invalid))
609          }
610        }
611      }
612  }
613
614  // num cycle is fixed
615  io.toBackend.newest_entry_ptr := RegNext(newest_entry_ptr)
616  io.toBackend.newest_entry_target := RegNext(newest_entry_target)
617
618
619  bpuPtr := bpuPtr + enq_fire
620  copied_bpu_ptr.map(_ := bpuPtr + enq_fire)
621  when (io.toIfu.req.fire && allowToIfu) {
622    ifuPtr_write := ifuPtrPlus1
623    ifuPtrPlus1_write := ifuPtrPlus2
624    ifuPtrPlus2_write := ifuPtrPlus2 + 1.U
625  }
626
627  // only use ftb result to assign hit status
628  when (bpu_s2_resp.valid) {
629    entry_hit_status(bpu_s2_resp.ftq_idx.value) := Mux(bpu_s2_resp.full_pred.hit, h_hit, h_not_hit)
630  }
631
632
633  io.toIfu.flushFromBpu.s2.valid := bpu_s2_redirect
634  io.toIfu.flushFromBpu.s2.bits := bpu_s2_resp.ftq_idx
635  when (bpu_s2_resp.valid && bpu_s2_resp.hasRedirect) {
636    bpuPtr := bpu_s2_resp.ftq_idx + 1.U
637    copied_bpu_ptr.map(_ := bpu_s2_resp.ftq_idx + 1.U)
638    // only when ifuPtr runs ahead of bpu s2 resp should we recover it
639    when (!isBefore(ifuPtr, bpu_s2_resp.ftq_idx)) {
640      ifuPtr_write := bpu_s2_resp.ftq_idx
641      ifuPtrPlus1_write := bpu_s2_resp.ftq_idx + 1.U
642      ifuPtrPlus2_write := bpu_s2_resp.ftq_idx + 2.U
643    }
644  }
645
646  io.toIfu.flushFromBpu.s3.valid := bpu_s3_redirect
647  io.toIfu.flushFromBpu.s3.bits := bpu_s3_resp.ftq_idx
648  when (bpu_s3_resp.valid && bpu_s3_resp.hasRedirect) {
649    bpuPtr := bpu_s3_resp.ftq_idx + 1.U
650    copied_bpu_ptr.map(_ := bpu_s3_resp.ftq_idx + 1.U)
651    // only when ifuPtr runs ahead of bpu s2 resp should we recover it
652    when (!isBefore(ifuPtr, bpu_s3_resp.ftq_idx)) {
653      ifuPtr_write := bpu_s3_resp.ftq_idx
654      ifuPtrPlus1_write := bpu_s3_resp.ftq_idx + 1.U
655      ifuPtrPlus2_write := bpu_s3_resp.ftq_idx + 2.U
656    }
657  }
658
659  XSError(isBefore(bpuPtr, ifuPtr) && !isFull(bpuPtr, ifuPtr), "\nifuPtr is before bpuPtr!\n")
660  XSError(isBefore(ifuWbPtr, commPtr) && !isFull(ifuWbPtr, commPtr), "\ncommPtr is before ifuWbPtr!\n")
661
662  (0 until copyNum).map{i =>
663    XSError(copied_bpu_ptr(i) =/= bpuPtr, "\ncopiedBpuPtr is different from bpuPtr!\n")
664  }
665
666  // ****************************************************************
667  // **************************** to ifu ****************************
668  // ****************************************************************
669  // 0  for ifu, and 1-4 for ICache
670  val bpu_in_bypass_buf = RegEnable(ftq_pc_mem.io.wdata, enable=bpu_in_fire)
671  val copied_bpu_in_bypass_buf = VecInit(Seq.fill(copyNum)(RegEnable(ftq_pc_mem.io.wdata, enable=bpu_in_fire)))
672  val bpu_in_bypass_buf_for_ifu = bpu_in_bypass_buf
673  val bpu_in_bypass_ptr = RegNext(bpu_in_resp_ptr)
674  val last_cycle_to_ifu_fire = RegNext(io.toIfu.req.fire)
675
676  val copied_bpu_in_bypass_ptr = VecInit(Seq.fill(copyNum)(RegNext(bpu_in_resp_ptr)))
677  val copied_last_cycle_to_ifu_fire = VecInit(Seq.fill(copyNum)(RegNext(io.toIfu.req.fire)))
678
679  // read pc and target
680  ftq_pc_mem.io.ifuPtr_w       := ifuPtr_write
681  ftq_pc_mem.io.ifuPtrPlus1_w  := ifuPtrPlus1_write
682  ftq_pc_mem.io.ifuPtrPlus2_w  := ifuPtrPlus2_write
683  ftq_pc_mem.io.commPtr_w      := commPtr_write
684  ftq_pc_mem.io.commPtrPlus1_w := commPtrPlus1_write
685
686
687  io.toIfu.req.bits.ftqIdx := ifuPtr
688
689  val toICachePcBundle = Wire(Vec(copyNum,new Ftq_RF_Components))
690  val toICacheEntryToSend = Wire(Vec(copyNum,Bool()))
691  val toIfuPcBundle = Wire(new Ftq_RF_Components)
692  val entry_is_to_send = WireInit(entry_fetch_status(ifuPtr.value) === f_to_send)
693  val entry_ftq_offset = WireInit(cfiIndex_vec(ifuPtr.value))
694  val entry_next_addr  = Wire(UInt(VAddrBits.W))
695
696  val pc_mem_ifu_ptr_rdata   = VecInit(Seq.fill(copyNum)(RegNext(ftq_pc_mem.io.ifuPtr_rdata)))
697  val pc_mem_ifu_plus1_rdata = VecInit(Seq.fill(copyNum)(RegNext(ftq_pc_mem.io.ifuPtrPlus1_rdata)))
698  val diff_entry_next_addr = WireInit(update_target(ifuPtr.value)) //TODO: remove this
699
700  val copied_ifu_plus1_to_send = VecInit(Seq.fill(copyNum)(RegNext(entry_fetch_status(ifuPtrPlus1.value) === f_to_send) || RegNext(last_cycle_bpu_in && bpu_in_bypass_ptr === (ifuPtrPlus1))))
701  val copied_ifu_ptr_to_send   = VecInit(Seq.fill(copyNum)(RegNext(entry_fetch_status(ifuPtr.value) === f_to_send) || RegNext(last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtr)))
702
703  for(i <- 0 until copyNum){
704    when(copied_last_cycle_bpu_in(i) && copied_bpu_in_bypass_ptr(i) === copied_ifu_ptr(i)){
705      toICachePcBundle(i) := copied_bpu_in_bypass_buf(i)
706      toICacheEntryToSend(i)   := true.B
707    }.elsewhen(copied_last_cycle_to_ifu_fire(i)){
708      toICachePcBundle(i) := pc_mem_ifu_plus1_rdata(i)
709      toICacheEntryToSend(i)   := copied_ifu_plus1_to_send(i)
710    }.otherwise{
711      toICachePcBundle(i) := pc_mem_ifu_ptr_rdata(i)
712      toICacheEntryToSend(i)   := copied_ifu_ptr_to_send(i)
713    }
714  }
715
716  // TODO: reconsider target address bypass logic
717  when (last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtr) {
718    toIfuPcBundle := bpu_in_bypass_buf_for_ifu
719    entry_is_to_send := true.B
720    entry_next_addr := last_cycle_bpu_target
721    entry_ftq_offset := last_cycle_cfiIndex
722    diff_entry_next_addr := last_cycle_bpu_target // TODO: remove this
723  }.elsewhen (last_cycle_to_ifu_fire) {
724    toIfuPcBundle := RegNext(ftq_pc_mem.io.ifuPtrPlus1_rdata)
725    entry_is_to_send := RegNext(entry_fetch_status(ifuPtrPlus1.value) === f_to_send) ||
726                        RegNext(last_cycle_bpu_in && bpu_in_bypass_ptr === (ifuPtrPlus1)) // reduce potential bubbles
727    entry_next_addr := Mux(last_cycle_bpu_in && bpu_in_bypass_ptr === (ifuPtrPlus1),
728                          bpu_in_bypass_buf_for_ifu.startAddr,
729                          Mux(ifuPtr === newest_entry_ptr,
730                            newest_entry_target,
731                            RegNext(ftq_pc_mem.io.ifuPtrPlus2_rdata.startAddr))) // ifuPtr+2
732  }.otherwise {
733    toIfuPcBundle := RegNext(ftq_pc_mem.io.ifuPtr_rdata)
734    entry_is_to_send := RegNext(entry_fetch_status(ifuPtr.value) === f_to_send) ||
735                        RegNext(last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtr) // reduce potential bubbles
736    entry_next_addr := Mux(last_cycle_bpu_in && bpu_in_bypass_ptr === (ifuPtrPlus1),
737                          bpu_in_bypass_buf_for_ifu.startAddr,
738                          Mux(ifuPtr === newest_entry_ptr,
739                            newest_entry_target,
740                            RegNext(ftq_pc_mem.io.ifuPtrPlus1_rdata.startAddr))) // ifuPtr+1
741  }
742
743  io.toIfu.req.valid := entry_is_to_send && ifuPtr =/= bpuPtr
744  io.toIfu.req.bits.nextStartAddr := entry_next_addr
745  io.toIfu.req.bits.ftqOffset := entry_ftq_offset
746  io.toIfu.req.bits.fromFtqPcBundle(toIfuPcBundle)
747
748  io.toICache.req.valid := entry_is_to_send && ifuPtr =/= bpuPtr
749  io.toICache.req.bits.readValid.zipWithIndex.map{case(copy, i) => copy := toICacheEntryToSend(i) && copied_ifu_ptr(i) =/= copied_bpu_ptr(i)}
750  io.toICache.req.bits.pcMemRead.zipWithIndex.map{case(copy,i) => copy.fromFtqPcBundle(toICachePcBundle(i))}
751  // io.toICache.req.bits.bypassSelect := last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtr
752  // io.toICache.req.bits.bpuBypassWrite.zipWithIndex.map{case(bypassWrtie, i) =>
753  //   bypassWrtie.startAddr := bpu_in_bypass_buf.tail(i).startAddr
754  //   bypassWrtie.nextlineStart := bpu_in_bypass_buf.tail(i).nextLineAddr
755  // }
756
757  // TODO: remove this
758  XSError(io.toIfu.req.valid && diff_entry_next_addr =/= entry_next_addr,
759          p"\nifu_req_target wrong! ifuPtr: ${ifuPtr}, entry_next_addr: ${Hexadecimal(entry_next_addr)} diff_entry_next_addr: ${Hexadecimal(diff_entry_next_addr)}\n")
760
761  // when fall through is smaller in value than start address, there must be a false hit
762  when (toIfuPcBundle.fallThruError && entry_hit_status(ifuPtr.value) === h_hit) {
763    when (io.toIfu.req.fire &&
764      !(bpu_s2_redirect && bpu_s2_resp.ftq_idx === ifuPtr) &&
765      !(bpu_s3_redirect && bpu_s3_resp.ftq_idx === ifuPtr)
766    ) {
767      entry_hit_status(ifuPtr.value) := h_false_hit
768      // XSError(true.B, "FTB false hit by fallThroughError, startAddr: %x, fallTHru: %x\n", io.toIfu.req.bits.startAddr, io.toIfu.req.bits.nextStartAddr)
769    }
770    XSDebug(true.B, "fallThruError! start:%x, fallThru:%x\n", io.toIfu.req.bits.startAddr, io.toIfu.req.bits.nextStartAddr)
771  }
772
773  XSPerfAccumulate(f"fall_through_error_to_ifu", toIfuPcBundle.fallThruError && entry_hit_status(ifuPtr.value) === h_hit &&
774    io.toIfu.req.fire && !(bpu_s2_redirect && bpu_s2_resp.ftq_idx === ifuPtr) && !(bpu_s3_redirect && bpu_s3_resp.ftq_idx === ifuPtr))
775
776  val ifu_req_should_be_flushed =
777    io.toIfu.flushFromBpu.shouldFlushByStage2(io.toIfu.req.bits.ftqIdx) ||
778    io.toIfu.flushFromBpu.shouldFlushByStage3(io.toIfu.req.bits.ftqIdx)
779
780    when (io.toIfu.req.fire && !ifu_req_should_be_flushed) {
781      entry_fetch_status(ifuPtr.value) := f_sent
782    }
783
784  // *********************************************************************
785  // **************************** wb from ifu ****************************
786  // *********************************************************************
787  val pdWb = io.fromIfu.pdWb
788  val pds = pdWb.bits.pd
789  val ifu_wb_valid = pdWb.valid
790  val ifu_wb_idx = pdWb.bits.ftqIdx.value
791  // read ports:                                                         commit update
792  val ftq_pd_mem = Module(new SyncDataModuleTemplate(new Ftq_pd_Entry, FtqSize, 1, 1))
793  ftq_pd_mem.io.wen(0) := ifu_wb_valid
794  ftq_pd_mem.io.waddr(0) := pdWb.bits.ftqIdx.value
795  ftq_pd_mem.io.wdata(0).fromPdWb(pdWb.bits)
796
797  val hit_pd_valid = entry_hit_status(ifu_wb_idx) === h_hit && ifu_wb_valid
798  val hit_pd_mispred = hit_pd_valid && pdWb.bits.misOffset.valid
799  val hit_pd_mispred_reg = RegNext(hit_pd_mispred, init=false.B)
800  val pd_reg       = RegEnable(pds,             pdWb.valid)
801  val start_pc_reg = RegEnable(pdWb.bits.pc(0), pdWb.valid)
802  val wb_idx_reg   = RegEnable(ifu_wb_idx,      pdWb.valid)
803
804  when (ifu_wb_valid) {
805    val comm_stq_wen = VecInit(pds.map(_.valid).zip(pdWb.bits.instrRange).map{
806      case (v, inRange) => v && inRange
807    })
808    (commitStateQueue(ifu_wb_idx) zip comm_stq_wen).map{
809      case (qe, v) => when (v) { qe := c_valid }
810    }
811  }
812
813  when (ifu_wb_valid) {
814    ifuWbPtr_write := ifuWbPtr + 1.U
815  }
816
817  XSError(ifu_wb_valid && isAfter(pdWb.bits.ftqIdx, ifuPtr), "IFU returned a predecode before its req, check IFU")
818
819  ftb_entry_mem.io.raddr.head := ifu_wb_idx
820  val has_false_hit = WireInit(false.B)
821  when (RegNext(hit_pd_valid)) {
822    // check for false hit
823    val pred_ftb_entry = ftb_entry_mem.io.rdata.head
824    val brSlots = pred_ftb_entry.brSlots
825    val tailSlot = pred_ftb_entry.tailSlot
826    // we check cfis that bpu predicted
827
828    // bpu predicted branches but denied by predecode
829    val br_false_hit =
830      brSlots.map{
831        s => s.valid && !(pd_reg(s.offset).valid && pd_reg(s.offset).isBr)
832      }.reduce(_||_) ||
833      (tailSlot.valid && pred_ftb_entry.tailSlot.sharing &&
834        !(pd_reg(tailSlot.offset).valid && pd_reg(tailSlot.offset).isBr))
835
836    val jmpOffset = tailSlot.offset
837    val jmp_pd = pd_reg(jmpOffset)
838    val jal_false_hit = pred_ftb_entry.jmpValid &&
839      ((pred_ftb_entry.isJal  && !(jmp_pd.valid && jmp_pd.isJal)) ||
840       (pred_ftb_entry.isJalr && !(jmp_pd.valid && jmp_pd.isJalr)) ||
841       (pred_ftb_entry.isCall && !(jmp_pd.valid && jmp_pd.isCall)) ||
842       (pred_ftb_entry.isRet  && !(jmp_pd.valid && jmp_pd.isRet))
843      )
844
845    has_false_hit := br_false_hit || jal_false_hit || hit_pd_mispred_reg
846    XSDebug(has_false_hit, "FTB false hit by br or jal or hit_pd, startAddr: %x\n", pdWb.bits.pc(0))
847
848    // assert(!has_false_hit)
849  }
850
851  when (has_false_hit) {
852    entry_hit_status(wb_idx_reg) := h_false_hit
853  }
854
855
856  // **********************************************************************
857  // ***************************** to backend *****************************
858  // **********************************************************************
859  // to backend pc mem / target
860  io.toBackend.pc_mem_wen   := RegNext(last_cycle_bpu_in)
861  io.toBackend.pc_mem_waddr := RegNext(last_cycle_bpu_in_idx)
862  io.toBackend.pc_mem_wdata := RegNext(bpu_in_bypass_buf_for_ifu)
863
864  // *******************************************************************************
865  // **************************** redirect from backend ****************************
866  // *******************************************************************************
867
868  // redirect read cfiInfo, couples to redirectGen s2
869  ftq_redirect_sram.io.ren.init.last := backendRedirect.valid
870  ftq_redirect_sram.io.raddr.init.last := backendRedirect.bits.ftqIdx.value
871
872  ftb_entry_mem.io.raddr.init.last := backendRedirect.bits.ftqIdx.value
873
874  val stage3CfiInfo = ftq_redirect_sram.io.rdata.init.last
875  val fromBackendRedirect = WireInit(backendRedirectReg)
876  val backendRedirectCfi = fromBackendRedirect.bits.cfiUpdate
877  backendRedirectCfi.fromFtqRedirectSram(stage3CfiInfo)
878
879  val r_ftb_entry = ftb_entry_mem.io.rdata.init.last
880  val r_ftqOffset = fromBackendRedirect.bits.ftqOffset
881
882  when (entry_hit_status(fromBackendRedirect.bits.ftqIdx.value) === h_hit) {
883    backendRedirectCfi.shift := PopCount(r_ftb_entry.getBrMaskByOffset(r_ftqOffset)) +&
884      (backendRedirectCfi.pd.isBr && !r_ftb_entry.brIsSaved(r_ftqOffset) &&
885      !r_ftb_entry.newBrCanNotInsert(r_ftqOffset))
886
887    backendRedirectCfi.addIntoHist := backendRedirectCfi.pd.isBr && (r_ftb_entry.brIsSaved(r_ftqOffset) ||
888        !r_ftb_entry.newBrCanNotInsert(r_ftqOffset))
889  }.otherwise {
890    backendRedirectCfi.shift := (backendRedirectCfi.pd.isBr && backendRedirectCfi.taken).asUInt
891    backendRedirectCfi.addIntoHist := backendRedirectCfi.pd.isBr.asUInt
892  }
893
894
895  // ***************************************************************************
896  // **************************** redirect from ifu ****************************
897  // ***************************************************************************
898  val fromIfuRedirect = WireInit(0.U.asTypeOf(Valid(new Redirect)))
899  fromIfuRedirect.valid := pdWb.valid && pdWb.bits.misOffset.valid && !backendFlush
900  fromIfuRedirect.bits.ftqIdx := pdWb.bits.ftqIdx
901  fromIfuRedirect.bits.ftqOffset := pdWb.bits.misOffset.bits
902  fromIfuRedirect.bits.level := RedirectLevel.flushAfter
903
904  val ifuRedirectCfiUpdate = fromIfuRedirect.bits.cfiUpdate
905  ifuRedirectCfiUpdate.pc := pdWb.bits.pc(pdWb.bits.misOffset.bits)
906  ifuRedirectCfiUpdate.pd := pdWb.bits.pd(pdWb.bits.misOffset.bits)
907  ifuRedirectCfiUpdate.predTaken := cfiIndex_vec(pdWb.bits.ftqIdx.value).valid
908  ifuRedirectCfiUpdate.target := pdWb.bits.target
909  ifuRedirectCfiUpdate.taken := pdWb.bits.cfiOffset.valid
910  ifuRedirectCfiUpdate.isMisPred := pdWb.bits.misOffset.valid
911
912  val ifuRedirectReg = RegNext(fromIfuRedirect, init=0.U.asTypeOf(Valid(new Redirect)))
913  val ifuRedirectToBpu = WireInit(ifuRedirectReg)
914  ifuFlush := fromIfuRedirect.valid || ifuRedirectToBpu.valid
915
916  ftq_redirect_sram.io.ren.head := fromIfuRedirect.valid
917  ftq_redirect_sram.io.raddr.head := fromIfuRedirect.bits.ftqIdx.value
918
919  ftb_entry_mem.io.raddr.head := fromIfuRedirect.bits.ftqIdx.value
920
921  val toBpuCfi = ifuRedirectToBpu.bits.cfiUpdate
922  toBpuCfi.fromFtqRedirectSram(ftq_redirect_sram.io.rdata.head)
923  when (ifuRedirectReg.bits.cfiUpdate.pd.isRet) {
924    toBpuCfi.target := toBpuCfi.rasEntry.retAddr
925  }
926
927  // *********************************************************************
928  // **************************** wb from exu ****************************
929  // *********************************************************************
930
931  backendRedirect := io.fromBackend.redirect
932
933  def extractRedirectInfo(wb: Valid[Redirect]) = {
934    val ftqPtr = wb.bits.ftqIdx
935    val ftqOffset = wb.bits.ftqOffset
936    val taken = wb.bits.cfiUpdate.taken
937    val mispred = wb.bits.cfiUpdate.isMisPred
938    (wb.valid, ftqPtr, ftqOffset, taken, mispred)
939  }
940
941  // fix mispredict entry
942  val lastIsMispredict = RegNext(
943    backendRedirect.valid && backendRedirect.bits.level === RedirectLevel.flushAfter, init = false.B
944  )
945
946  def updateCfiInfo(redirect: Valid[Redirect], isBackend: Boolean = true) = {
947    val (r_valid, r_ptr, r_offset, r_taken, r_mispred) = extractRedirectInfo(redirect)
948    val r_idx = r_ptr.value
949    val cfiIndex_bits_wen = r_valid && r_taken && r_offset < cfiIndex_vec(r_idx).bits
950    val cfiIndex_valid_wen = r_valid && r_offset === cfiIndex_vec(r_idx).bits
951    when (cfiIndex_bits_wen || cfiIndex_valid_wen) {
952      cfiIndex_vec(r_idx).valid := cfiIndex_bits_wen || cfiIndex_valid_wen && r_taken
953    } .elsewhen (r_valid && !r_taken && r_offset =/= cfiIndex_vec(r_idx).bits) {
954      cfiIndex_vec(r_idx).valid :=false.B
955    }
956    when (cfiIndex_bits_wen) {
957      cfiIndex_vec(r_idx).bits := r_offset
958    }
959    newest_entry_target := redirect.bits.cfiUpdate.target
960    newest_entry_ptr := r_ptr
961    update_target(r_idx) := redirect.bits.cfiUpdate.target // TODO: remove this
962    if (isBackend) {
963      mispredict_vec(r_idx)(r_offset) := r_mispred
964    }
965  }
966
967  when(backendRedirectReg.valid) {
968    updateCfiInfo(backendRedirectReg)
969  }.elsewhen (ifuRedirectToBpu.valid) {
970    updateCfiInfo(ifuRedirectToBpu, isBackend=false)
971  }
972
973  // ***********************************************************************************
974  // **************************** flush ptr and state queue ****************************
975  // ***********************************************************************************
976
977  val redirectVec = VecInit(backendRedirect, fromIfuRedirect)
978
979  // when redirect, we should reset ptrs and status queues
980  when(redirectVec.map(r => r.valid).reduce(_||_)){
981    val r = PriorityMux(redirectVec.map(r => (r.valid -> r.bits)))
982    val notIfu = redirectVec.dropRight(1).map(r => r.valid).reduce(_||_)
983    val (idx, offset, flushItSelf) = (r.ftqIdx, r.ftqOffset, RedirectLevel.flushItself(r.level))
984    val next = idx + 1.U
985    bpuPtr := next
986    copied_bpu_ptr.map(_ := next)
987    ifuPtr_write := next
988    ifuWbPtr_write := next
989    ifuPtrPlus1_write := idx + 2.U
990    ifuPtrPlus2_write := idx + 3.U
991
992  }
993  when(RegNext(redirectVec.map(r => r.valid).reduce(_||_))){
994    val r = PriorityMux(redirectVec.map(r => (r.valid -> r.bits)))
995    val notIfu = redirectVec.dropRight(1).map(r => r.valid).reduce(_||_)
996    val (idx, offset, flushItSelf) = (r.ftqIdx, r.ftqOffset, RedirectLevel.flushItself(r.level))
997    when (RegNext(notIfu)) {
998      commitStateQueue(RegNext(idx.value)).zipWithIndex.foreach({ case (s, i) =>
999        when(i.U > RegNext(offset) || i.U === RegNext(offset) && RegNext(flushItSelf)){
1000          s := c_invalid
1001        }
1002      })
1003    }
1004  }
1005
1006
1007  // only the valid bit is actually needed
1008  io.toIfu.redirect.bits    := backendRedirect.bits
1009  io.toIfu.redirect.valid   := stage2Flush
1010
1011  // commit
1012  for (c <- io.fromBackend.rob_commits) {
1013    when(c.valid) {
1014      commitStateQueue(c.bits.ftqIdx.value)(c.bits.ftqOffset) := c_commited
1015      // TODO: remove this
1016      // For instruction fusions, we also update the next instruction
1017      when (c.bits.commitType === 4.U) {
1018        commitStateQueue(c.bits.ftqIdx.value)(c.bits.ftqOffset + 1.U) := c_commited
1019      }.elsewhen(c.bits.commitType === 5.U) {
1020        commitStateQueue(c.bits.ftqIdx.value)(c.bits.ftqOffset + 2.U) := c_commited
1021      }.elsewhen(c.bits.commitType === 6.U) {
1022        val index = (c.bits.ftqIdx + 1.U).value
1023        commitStateQueue(index)(0) := c_commited
1024      }.elsewhen(c.bits.commitType === 7.U) {
1025        val index = (c.bits.ftqIdx + 1.U).value
1026        commitStateQueue(index)(1) := c_commited
1027      }
1028    }
1029  }
1030
1031  // ****************************************************************
1032  // **************************** to bpu ****************************
1033  // ****************************************************************
1034
1035  io.toBpu.redirect := Mux(fromBackendRedirect.valid, fromBackendRedirect, ifuRedirectToBpu)
1036
1037  XSError(io.toBpu.redirect.valid && isBefore(io.toBpu.redirect.bits.ftqIdx, commPtr), "Ftq received a redirect after its commit, check backend or replay")
1038
1039  val may_have_stall_from_bpu = Wire(Bool())
1040  val bpu_ftb_update_stall = RegInit(0.U(2.W)) // 2-cycle stall, so we need 3 states
1041  may_have_stall_from_bpu := bpu_ftb_update_stall =/= 0.U
1042  canCommit := commPtr =/= ifuWbPtr && !may_have_stall_from_bpu &&
1043    Cat(commitStateQueue(commPtr.value).map(s => {
1044      s === c_invalid || s === c_commited
1045    })).andR()
1046
1047  val mmioReadPtr = io.mmioCommitRead.mmioFtqPtr
1048  val mmioLastCommit = isBefore(commPtr, mmioReadPtr) && (isAfter(ifuPtr,mmioReadPtr)  ||  mmioReadPtr ===   ifuPtr) &&
1049                       Cat(commitStateQueue(mmioReadPtr.value).map(s => { s === c_invalid || s === c_commited})).andR()
1050  io.mmioCommitRead.mmioLastCommit := RegNext(mmioLastCommit)
1051
1052  // commit reads
1053  val commit_pc_bundle = RegNext(ftq_pc_mem.io.commPtr_rdata)
1054  val commit_target =
1055    Mux(RegNext(commPtr === newest_entry_ptr),
1056      RegNext(newest_entry_target),
1057      RegNext(ftq_pc_mem.io.commPtrPlus1_rdata.startAddr))
1058  ftq_pd_mem.io.raddr.last := commPtr.value
1059  val commit_pd = ftq_pd_mem.io.rdata.last
1060  ftq_redirect_sram.io.ren.last := canCommit
1061  ftq_redirect_sram.io.raddr.last := commPtr.value
1062  val commit_spec_meta = ftq_redirect_sram.io.rdata.last
1063  ftq_meta_1r_sram.io.ren(0) := canCommit
1064  ftq_meta_1r_sram.io.raddr(0) := commPtr.value
1065  val commit_meta = ftq_meta_1r_sram.io.rdata(0)
1066  ftb_entry_mem.io.raddr.last := commPtr.value
1067  val commit_ftb_entry = ftb_entry_mem.io.rdata.last
1068
1069  // need one cycle to read mem and srams
1070  val do_commit_ptr = RegNext(commPtr)
1071  val do_commit = RegNext(canCommit, init=false.B)
1072  when (canCommit) {
1073    commPtr_write := commPtrPlus1
1074    commPtrPlus1_write := commPtrPlus1 + 1.U
1075  }
1076  val commit_state = RegNext(commitStateQueue(commPtr.value))
1077  val can_commit_cfi = WireInit(cfiIndex_vec(commPtr.value))
1078  //
1079  //when (commitStateQueue(commPtr.value)(can_commit_cfi.bits) =/= c_commited) {
1080  //  can_commit_cfi.valid := false.B
1081  //}
1082  val commit_cfi = RegNext(can_commit_cfi)
1083  val debug_cfi = RegNext(commitStateQueue(commPtr.value)(can_commit_cfi.bits) =/= c_commited && can_commit_cfi.valid)
1084
1085  val commit_mispredict  : Vec[Bool] = VecInit((RegNext(mispredict_vec(commPtr.value)) zip commit_state).map {
1086    case (mis, state) => mis && state === c_commited
1087  })
1088  val commit_instCommited: Vec[Bool] = VecInit(commit_state.map(_ === c_commited)) // [PredictWidth]
1089  val can_commit_hit                 = entry_hit_status(commPtr.value)
1090  val commit_hit                     = RegNext(can_commit_hit)
1091  val diff_commit_target             = RegNext(update_target(commPtr.value)) // TODO: remove this
1092  val commit_stage                   = RegNext(pred_stage(commPtr.value))
1093  val commit_valid                   = commit_hit === h_hit || commit_cfi.valid // hit or taken
1094
1095  val to_bpu_hit = can_commit_hit === h_hit || can_commit_hit === h_false_hit
1096  switch (bpu_ftb_update_stall) {
1097    is (0.U) {
1098      when (can_commit_cfi.valid && !to_bpu_hit && canCommit) {
1099        bpu_ftb_update_stall := 2.U // 2-cycle stall
1100      }
1101    }
1102    is (2.U) {
1103      bpu_ftb_update_stall := 1.U
1104    }
1105    is (1.U) {
1106      bpu_ftb_update_stall := 0.U
1107    }
1108    is (3.U) {
1109      XSError(true.B, "bpu_ftb_update_stall should be 0, 1 or 2")
1110    }
1111  }
1112
1113  // TODO: remove this
1114  XSError(do_commit && diff_commit_target =/= commit_target, "\ncommit target should be the same as update target\n")
1115
1116  io.toBpu.update := DontCare
1117  io.toBpu.update.valid := commit_valid && do_commit
1118  val update = io.toBpu.update.bits
1119  update.false_hit   := commit_hit === h_false_hit
1120  update.pc          := commit_pc_bundle.startAddr
1121  update.meta        := commit_meta.meta
1122  update.cfi_idx     := commit_cfi
1123  update.full_target := commit_target
1124  update.from_stage  := commit_stage
1125  update.spec_info   := commit_spec_meta
1126  XSError(commit_valid && do_commit && debug_cfi, "\ncommit cfi can be non c_commited\n")
1127
1128  val commit_real_hit = commit_hit === h_hit
1129  val update_ftb_entry = update.ftb_entry
1130
1131  val ftbEntryGen = Module(new FTBEntryGen).io
1132  ftbEntryGen.start_addr     := commit_pc_bundle.startAddr
1133  ftbEntryGen.old_entry      := commit_ftb_entry
1134  ftbEntryGen.pd             := commit_pd
1135  ftbEntryGen.cfiIndex       := commit_cfi
1136  ftbEntryGen.target         := commit_target
1137  ftbEntryGen.hit            := commit_real_hit
1138  ftbEntryGen.mispredict_vec := commit_mispredict
1139
1140  update_ftb_entry         := ftbEntryGen.new_entry
1141  update.new_br_insert_pos := ftbEntryGen.new_br_insert_pos
1142  update.mispred_mask      := ftbEntryGen.mispred_mask
1143  update.old_entry         := ftbEntryGen.is_old_entry
1144  update.pred_hit          := commit_hit === h_hit || commit_hit === h_false_hit
1145  update.br_taken_mask     := ftbEntryGen.taken_mask
1146  update.br_committed      := (ftbEntryGen.new_entry.brValids zip ftbEntryGen.new_entry.brOffset) map {
1147    case (valid, offset) => valid && commit_instCommited(offset)
1148  }
1149  update.jmp_taken         := ftbEntryGen.jmp_taken
1150
1151  // update.full_pred.fromFtbEntry(ftbEntryGen.new_entry, update.pc)
1152  // update.full_pred.jalr_target := commit_target
1153  // update.full_pred.hit := true.B
1154  // when (update.full_pred.is_jalr) {
1155  //   update.full_pred.targets.last := commit_target
1156  // }
1157
1158  // ****************************************************************
1159  // *********************** to prefetch ****************************
1160  // ****************************************************************
1161
1162  ftq_pc_mem.io.other_raddrs(0) := DontCare
1163  if(cacheParams.hasPrefetch){
1164    val prefetchPtr = RegInit(FtqPtr(false.B, 0.U))
1165    val diff_prefetch_addr = WireInit(update_target(prefetchPtr.value)) //TODO: remove this
1166    // TODO : MUST WIDER
1167    prefetchPtr := prefetchPtr + io.toPrefetch.req.fire()
1168
1169    val prefetch_too_late = (isBefore(prefetchPtr, ifuPtr) && !isFull(ifuPtr, prefetchPtr)) || (prefetchPtr === ifuPtr)
1170    when(prefetch_too_late){
1171      when(prefetchPtr =/= bpuPtr){
1172        prefetchPtr := bpuPtr - 1.U
1173      }.otherwise{
1174        prefetchPtr := ifuPtr
1175      }
1176    }
1177
1178    ftq_pc_mem.io.other_raddrs(0) := prefetchPtr.value
1179
1180    when (bpu_s2_resp.valid && bpu_s2_resp.hasRedirect && !isBefore(prefetchPtr, bpu_s2_resp.ftq_idx)) {
1181      prefetchPtr := bpu_s2_resp.ftq_idx
1182    }
1183
1184    when (bpu_s3_resp.valid && bpu_s3_resp.hasRedirect && !isBefore(prefetchPtr, bpu_s3_resp.ftq_idx)) {
1185      prefetchPtr := bpu_s3_resp.ftq_idx
1186      // XSError(true.B, "\ns3_redirect mechanism not implemented!\n")
1187    }
1188
1189
1190    val prefetch_is_to_send = WireInit(entry_fetch_status(prefetchPtr.value) === f_to_send)
1191    val prefetch_addr = Wire(UInt(VAddrBits.W))
1192
1193    when (last_cycle_bpu_in && bpu_in_bypass_ptr === prefetchPtr) {
1194      prefetch_is_to_send := true.B
1195      prefetch_addr := last_cycle_bpu_target
1196      diff_prefetch_addr := last_cycle_bpu_target // TODO: remove this
1197    }.otherwise{
1198      prefetch_addr := RegNext( ftq_pc_mem.io.other_rdatas(0).startAddr)
1199    }
1200    io.toPrefetch.req.valid := prefetchPtr =/= bpuPtr && prefetch_is_to_send
1201    io.toPrefetch.req.bits.target := prefetch_addr
1202
1203    when(redirectVec.map(r => r.valid).reduce(_||_)){
1204      val r = PriorityMux(redirectVec.map(r => (r.valid -> r.bits)))
1205      val next = r.ftqIdx + 1.U
1206      prefetchPtr := next
1207    }
1208
1209    // TODO: remove this
1210    // XSError(io.toPrefetch.req.valid && diff_prefetch_addr =/= prefetch_addr,
1211    //         f"\nprefetch_req_target wrong! prefetchPtr: ${prefetchPtr}, prefetch_addr: ${Hexadecimal(prefetch_addr)} diff_prefetch_addr: ${Hexadecimal(diff_prefetch_addr)}\n")
1212
1213
1214    XSError(isBefore(bpuPtr, prefetchPtr) && !isFull(bpuPtr, prefetchPtr), "\nprefetchPtr is before bpuPtr!\n")
1215//    XSError(isBefore(prefetchPtr, ifuPtr) && !isFull(ifuPtr, prefetchPtr), "\nifuPtr is before prefetchPtr!\n")
1216  }
1217  else {
1218    io.toPrefetch.req <> DontCare
1219  }
1220
1221  // ******************************************************************************
1222  // **************************** commit perf counters ****************************
1223  // ******************************************************************************
1224
1225  val commit_inst_mask    = VecInit(commit_state.map(c => c === c_commited && do_commit)).asUInt
1226  val commit_mispred_mask = commit_mispredict.asUInt
1227  val commit_not_mispred_mask = ~commit_mispred_mask
1228
1229  val commit_br_mask = commit_pd.brMask.asUInt
1230  val commit_jmp_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.jmpInfo.valid.asTypeOf(UInt(1.W)))
1231  val commit_cfi_mask = (commit_br_mask | commit_jmp_mask)
1232
1233  val mbpInstrs = commit_inst_mask & commit_cfi_mask
1234
1235  val mbpRights = mbpInstrs & commit_not_mispred_mask
1236  val mbpWrongs = mbpInstrs & commit_mispred_mask
1237
1238  io.bpuInfo.bpRight := PopCount(mbpRights)
1239  io.bpuInfo.bpWrong := PopCount(mbpWrongs)
1240
1241  val isWriteFTQTable = WireInit(Constantin.createRecord("isWriteFTQTable" + p(XSCoreParamsKey).HartId.toString))
1242  val ftqBranchTraceDB = ChiselDB.createTable("FTQTable" + p(XSCoreParamsKey).HartId.toString, new FtqDebugBundle)
1243  // Cfi Info
1244  for (i <- 0 until PredictWidth) {
1245    val pc = commit_pc_bundle.startAddr + (i * instBytes).U
1246    val v = commit_state(i) === c_commited
1247    val isBr = commit_pd.brMask(i)
1248    val isJmp = commit_pd.jmpInfo.valid && commit_pd.jmpOffset === i.U
1249    val isCfi = isBr || isJmp
1250    val isTaken = commit_cfi.valid && commit_cfi.bits === i.U
1251    val misPred = commit_mispredict(i)
1252    // val ghist = commit_spec_meta.ghist.predHist
1253    val histPtr = commit_spec_meta.histPtr
1254    val predCycle = commit_meta.meta(63, 0)
1255    val target = commit_target
1256
1257    val brIdx = OHToUInt(Reverse(Cat(update_ftb_entry.brValids.zip(update_ftb_entry.brOffset).map{case(v, offset) => v && offset === i.U})))
1258    val inFtbEntry = update_ftb_entry.brValids.zip(update_ftb_entry.brOffset).map{case(v, offset) => v && offset === i.U}.reduce(_||_)
1259    val addIntoHist = ((commit_hit === h_hit) && inFtbEntry) || ((!(commit_hit === h_hit) && i.U === commit_cfi.bits && isBr && commit_cfi.valid))
1260    XSDebug(v && do_commit && isCfi, p"cfi_update: isBr(${isBr}) pc(${Hexadecimal(pc)}) " +
1261    p"taken(${isTaken}) mispred(${misPred}) cycle($predCycle) hist(${histPtr.value}) " +
1262    p"startAddr(${Hexadecimal(commit_pc_bundle.startAddr)}) AddIntoHist(${addIntoHist}) " +
1263    p"brInEntry(${inFtbEntry}) brIdx(${brIdx}) target(${Hexadecimal(target)})\n")
1264
1265    val logbundle = Wire(new FtqDebugBundle)
1266    logbundle.pc := pc
1267    logbundle.target := target
1268    logbundle.isBr := isBr
1269    logbundle.isJmp := isJmp
1270    logbundle.isCall := isJmp && commit_pd.hasCall
1271    logbundle.isRet := isJmp && commit_pd.hasRet
1272    logbundle.misPred := misPred
1273    logbundle.isTaken := isTaken
1274    logbundle.predStage := commit_stage
1275
1276    ftqBranchTraceDB.log(
1277      data = logbundle /* hardware of type T */,
1278      en = isWriteFTQTable.orR && v && do_commit && isCfi,
1279      site = "FTQ" + p(XSCoreParamsKey).HartId.toString,
1280      clock = clock,
1281      reset = reset
1282    )
1283  }
1284
1285  val enq = io.fromBpu.resp
1286  val perf_redirect = backendRedirect
1287
1288  XSPerfAccumulate("entry", validEntries)
1289  XSPerfAccumulate("bpu_to_ftq_stall", enq.valid && !enq.ready)
1290  XSPerfAccumulate("mispredictRedirect", perf_redirect.valid && RedirectLevel.flushAfter === perf_redirect.bits.level)
1291  XSPerfAccumulate("replayRedirect", perf_redirect.valid && RedirectLevel.flushItself(perf_redirect.bits.level))
1292  XSPerfAccumulate("predecodeRedirect", fromIfuRedirect.valid)
1293
1294  XSPerfAccumulate("to_ifu_bubble", io.toIfu.req.ready && !io.toIfu.req.valid)
1295
1296  XSPerfAccumulate("to_ifu_stall", io.toIfu.req.valid && !io.toIfu.req.ready)
1297  XSPerfAccumulate("from_bpu_real_bubble", !enq.valid && enq.ready && allowBpuIn)
1298  XSPerfAccumulate("bpu_to_ifu_bubble", bpuPtr === ifuPtr)
1299
1300  val from_bpu = io.fromBpu.resp.bits
1301  def in_entry_len_map_gen(resp: BpuToFtqBundle)(stage: String) = {
1302    val entry_len = (resp.last_stage_ftb_entry.getFallThrough(resp.s3.pc) - resp.s3.pc) >> instOffsetBits
1303    val entry_len_recording_vec = (1 to PredictWidth+1).map(i => entry_len === i.U)
1304    val entry_len_map = (1 to PredictWidth+1).map(i =>
1305      f"${stage}_ftb_entry_len_$i" -> (entry_len_recording_vec(i-1) && resp.s3.valid)
1306    ).foldLeft(Map[String, UInt]())(_+_)
1307    entry_len_map
1308  }
1309  val s3_entry_len_map = in_entry_len_map_gen(from_bpu)("s3")
1310
1311  val to_ifu = io.toIfu.req.bits
1312
1313
1314
1315  val commit_num_inst_recording_vec = (1 to PredictWidth).map(i => PopCount(commit_inst_mask) === i.U)
1316  val commit_num_inst_map = (1 to PredictWidth).map(i =>
1317    f"commit_num_inst_$i" -> (commit_num_inst_recording_vec(i-1) && do_commit)
1318  ).foldLeft(Map[String, UInt]())(_+_)
1319
1320
1321
1322  val commit_jal_mask  = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasJal.asTypeOf(UInt(1.W)))
1323  val commit_jalr_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasJalr.asTypeOf(UInt(1.W)))
1324  val commit_call_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasCall.asTypeOf(UInt(1.W)))
1325  val commit_ret_mask  = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasRet.asTypeOf(UInt(1.W)))
1326
1327
1328  val mbpBRights = mbpRights & commit_br_mask
1329  val mbpJRights = mbpRights & commit_jal_mask
1330  val mbpIRights = mbpRights & commit_jalr_mask
1331  val mbpCRights = mbpRights & commit_call_mask
1332  val mbpRRights = mbpRights & commit_ret_mask
1333
1334  val mbpBWrongs = mbpWrongs & commit_br_mask
1335  val mbpJWrongs = mbpWrongs & commit_jal_mask
1336  val mbpIWrongs = mbpWrongs & commit_jalr_mask
1337  val mbpCWrongs = mbpWrongs & commit_call_mask
1338  val mbpRWrongs = mbpWrongs & commit_ret_mask
1339
1340  val commit_pred_stage = RegNext(pred_stage(commPtr.value))
1341
1342  def pred_stage_map(src: UInt, name: String) = {
1343    (0 until numBpStages).map(i =>
1344      f"${name}_stage_${i+1}" -> PopCount(src.asBools.map(_ && commit_pred_stage === BP_STAGES(i)))
1345    ).foldLeft(Map[String, UInt]())(_+_)
1346  }
1347
1348  val mispred_stage_map      = pred_stage_map(mbpWrongs,  "mispredict")
1349  val br_mispred_stage_map   = pred_stage_map(mbpBWrongs, "br_mispredict")
1350  val jalr_mispred_stage_map = pred_stage_map(mbpIWrongs, "jalr_mispredict")
1351  val correct_stage_map      = pred_stage_map(mbpRights,  "correct")
1352  val br_correct_stage_map   = pred_stage_map(mbpBRights, "br_correct")
1353  val jalr_correct_stage_map = pred_stage_map(mbpIRights, "jalr_correct")
1354
1355  val update_valid = io.toBpu.update.valid
1356  def u(cond: Bool) = update_valid && cond
1357  val ftb_false_hit = u(update.false_hit)
1358  // assert(!ftb_false_hit)
1359  val ftb_hit = u(commit_hit === h_hit)
1360
1361  val ftb_new_entry = u(ftbEntryGen.is_init_entry)
1362  val ftb_new_entry_only_br = ftb_new_entry && !update_ftb_entry.jmpValid
1363  val ftb_new_entry_only_jmp = ftb_new_entry && !update_ftb_entry.brValids(0)
1364  val ftb_new_entry_has_br_and_jmp = ftb_new_entry && update_ftb_entry.brValids(0) && update_ftb_entry.jmpValid
1365
1366  val ftb_old_entry = u(ftbEntryGen.is_old_entry)
1367
1368  val ftb_modified_entry = u(ftbEntryGen.is_new_br || ftbEntryGen.is_jalr_target_modified || ftbEntryGen.is_always_taken_modified)
1369  val ftb_modified_entry_new_br = u(ftbEntryGen.is_new_br)
1370  val ftb_modified_entry_jalr_target_modified = u(ftbEntryGen.is_jalr_target_modified)
1371  val ftb_modified_entry_br_full = ftb_modified_entry && ftbEntryGen.is_br_full
1372  val ftb_modified_entry_always_taken = ftb_modified_entry && ftbEntryGen.is_always_taken_modified
1373
1374  val ftb_entry_len = (ftbEntryGen.new_entry.getFallThrough(update.pc) - update.pc) >> instOffsetBits
1375  val ftb_entry_len_recording_vec = (1 to PredictWidth+1).map(i => ftb_entry_len === i.U)
1376  val ftb_init_entry_len_map = (1 to PredictWidth+1).map(i =>
1377    f"ftb_init_entry_len_$i" -> (ftb_entry_len_recording_vec(i-1) && ftb_new_entry)
1378  ).foldLeft(Map[String, UInt]())(_+_)
1379  val ftb_modified_entry_len_map = (1 to PredictWidth+1).map(i =>
1380    f"ftb_modified_entry_len_$i" -> (ftb_entry_len_recording_vec(i-1) && ftb_modified_entry)
1381  ).foldLeft(Map[String, UInt]())(_+_)
1382
1383  val ftq_occupancy_map = (0 to FtqSize).map(i =>
1384    f"ftq_has_entry_$i" ->( validEntries === i.U)
1385  ).foldLeft(Map[String, UInt]())(_+_)
1386
1387  val perfCountsMap = Map(
1388    "BpInstr" -> PopCount(mbpInstrs),
1389    "BpBInstr" -> PopCount(mbpBRights | mbpBWrongs),
1390    "BpRight"  -> PopCount(mbpRights),
1391    "BpWrong"  -> PopCount(mbpWrongs),
1392    "BpBRight" -> PopCount(mbpBRights),
1393    "BpBWrong" -> PopCount(mbpBWrongs),
1394    "BpJRight" -> PopCount(mbpJRights),
1395    "BpJWrong" -> PopCount(mbpJWrongs),
1396    "BpIRight" -> PopCount(mbpIRights),
1397    "BpIWrong" -> PopCount(mbpIWrongs),
1398    "BpCRight" -> PopCount(mbpCRights),
1399    "BpCWrong" -> PopCount(mbpCWrongs),
1400    "BpRRight" -> PopCount(mbpRRights),
1401    "BpRWrong" -> PopCount(mbpRWrongs),
1402
1403    "ftb_false_hit"                -> PopCount(ftb_false_hit),
1404    "ftb_hit"                      -> PopCount(ftb_hit),
1405    "ftb_new_entry"                -> PopCount(ftb_new_entry),
1406    "ftb_new_entry_only_br"        -> PopCount(ftb_new_entry_only_br),
1407    "ftb_new_entry_only_jmp"       -> PopCount(ftb_new_entry_only_jmp),
1408    "ftb_new_entry_has_br_and_jmp" -> PopCount(ftb_new_entry_has_br_and_jmp),
1409    "ftb_old_entry"                -> PopCount(ftb_old_entry),
1410    "ftb_modified_entry"           -> PopCount(ftb_modified_entry),
1411    "ftb_modified_entry_new_br"    -> PopCount(ftb_modified_entry_new_br),
1412    "ftb_jalr_target_modified"     -> PopCount(ftb_modified_entry_jalr_target_modified),
1413    "ftb_modified_entry_br_full"   -> PopCount(ftb_modified_entry_br_full),
1414    "ftb_modified_entry_always_taken" -> PopCount(ftb_modified_entry_always_taken)
1415  ) ++ ftb_init_entry_len_map ++ ftb_modified_entry_len_map ++
1416  s3_entry_len_map ++ commit_num_inst_map ++ ftq_occupancy_map ++
1417  mispred_stage_map ++ br_mispred_stage_map ++ jalr_mispred_stage_map ++
1418  correct_stage_map ++ br_correct_stage_map ++ jalr_correct_stage_map
1419
1420  for((key, value) <- perfCountsMap) {
1421    XSPerfAccumulate(key, value)
1422  }
1423
1424  // --------------------------- Debug --------------------------------
1425  // XSDebug(enq_fire, p"enq! " + io.fromBpu.resp.bits.toPrintable)
1426  XSDebug(io.toIfu.req.fire, p"fire to ifu " + io.toIfu.req.bits.toPrintable)
1427  XSDebug(do_commit, p"deq! [ptr] $do_commit_ptr\n")
1428  XSDebug(true.B, p"[bpuPtr] $bpuPtr, [ifuPtr] $ifuPtr, [ifuWbPtr] $ifuWbPtr [commPtr] $commPtr\n")
1429  XSDebug(true.B, p"[in] v:${io.fromBpu.resp.valid} r:${io.fromBpu.resp.ready} " +
1430    p"[out] v:${io.toIfu.req.valid} r:${io.toIfu.req.ready}\n")
1431  XSDebug(do_commit, p"[deq info] cfiIndex: $commit_cfi, $commit_pc_bundle, target: ${Hexadecimal(commit_target)}\n")
1432
1433  //   def ubtbCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1434  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1435  //       case (((valid, pd), ans), taken) =>
1436  //       Mux(valid && pd.isBr,
1437  //         isWrong ^ Mux(ans.hit.asBool,
1438  //           Mux(ans.taken.asBool, taken && ans.target === commitEntry.target,
1439  //           !taken),
1440  //         !taken),
1441  //       false.B)
1442  //     }
1443  //   }
1444
1445  //   def btbCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1446  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1447  //       case (((valid, pd), ans), taken) =>
1448  //       Mux(valid && pd.isBr,
1449  //         isWrong ^ Mux(ans.hit.asBool,
1450  //           Mux(ans.taken.asBool, taken && ans.target === commitEntry.target,
1451  //           !taken),
1452  //         !taken),
1453  //       false.B)
1454  //     }
1455  //   }
1456
1457  //   def tageCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1458  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1459  //       case (((valid, pd), ans), taken) =>
1460  //       Mux(valid && pd.isBr,
1461  //         isWrong ^ (ans.taken.asBool === taken),
1462  //       false.B)
1463  //     }
1464  //   }
1465
1466  //   def loopCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1467  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1468  //       case (((valid, pd), ans), taken) =>
1469  //       Mux(valid && (pd.isBr) && ans.hit.asBool,
1470  //         isWrong ^ (!taken),
1471  //           false.B)
1472  //     }
1473  //   }
1474
1475  //   def rasCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1476  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1477  //       case (((valid, pd), ans), taken) =>
1478  //       Mux(valid && pd.isRet.asBool /*&& taken*/ && ans.hit.asBool,
1479  //         isWrong ^ (ans.target === commitEntry.target),
1480  //           false.B)
1481  //     }
1482  //   }
1483
1484  //   val ubtbRights = ubtbCheck(commitEntry, commitEntry.metas.map(_.ubtbAns), false.B)
1485  //   val ubtbWrongs = ubtbCheck(commitEntry, commitEntry.metas.map(_.ubtbAns), true.B)
1486  //   // btb and ubtb pred jal and jalr as well
1487  //   val btbRights = btbCheck(commitEntry, commitEntry.metas.map(_.btbAns), false.B)
1488  //   val btbWrongs = btbCheck(commitEntry, commitEntry.metas.map(_.btbAns), true.B)
1489  //   val tageRights = tageCheck(commitEntry, commitEntry.metas.map(_.tageAns), false.B)
1490  //   val tageWrongs = tageCheck(commitEntry, commitEntry.metas.map(_.tageAns), true.B)
1491
1492  //   val loopRights = loopCheck(commitEntry, commitEntry.metas.map(_.loopAns), false.B)
1493  //   val loopWrongs = loopCheck(commitEntry, commitEntry.metas.map(_.loopAns), true.B)
1494
1495  //   val rasRights = rasCheck(commitEntry, commitEntry.metas.map(_.rasAns), false.B)
1496  //   val rasWrongs = rasCheck(commitEntry, commitEntry.metas.map(_.rasAns), true.B)
1497
1498  val perfEvents = Seq(
1499    ("bpu_s2_redirect        ", bpu_s2_redirect                                                             ),
1500    ("bpu_s3_redirect        ", bpu_s3_redirect                                                             ),
1501    ("bpu_to_ftq_stall       ", enq.valid && ~enq.ready                                                     ),
1502    ("mispredictRedirect     ", perf_redirect.valid && RedirectLevel.flushAfter === perf_redirect.bits.level),
1503    ("replayRedirect         ", perf_redirect.valid && RedirectLevel.flushItself(perf_redirect.bits.level)  ),
1504    ("predecodeRedirect      ", fromIfuRedirect.valid                                                       ),
1505    ("to_ifu_bubble          ", io.toIfu.req.ready && !io.toIfu.req.valid                                   ),
1506    ("from_bpu_real_bubble   ", !enq.valid && enq.ready && allowBpuIn                                       ),
1507    ("BpInstr                ", PopCount(mbpInstrs)                                                         ),
1508    ("BpBInstr               ", PopCount(mbpBRights | mbpBWrongs)                                           ),
1509    ("BpRight                ", PopCount(mbpRights)                                                         ),
1510    ("BpWrong                ", PopCount(mbpWrongs)                                                         ),
1511    ("BpBRight               ", PopCount(mbpBRights)                                                        ),
1512    ("BpBWrong               ", PopCount(mbpBWrongs)                                                        ),
1513    ("BpJRight               ", PopCount(mbpJRights)                                                        ),
1514    ("BpJWrong               ", PopCount(mbpJWrongs)                                                        ),
1515    ("BpIRight               ", PopCount(mbpIRights)                                                        ),
1516    ("BpIWrong               ", PopCount(mbpIWrongs)                                                        ),
1517    ("BpCRight               ", PopCount(mbpCRights)                                                        ),
1518    ("BpCWrong               ", PopCount(mbpCWrongs)                                                        ),
1519    ("BpRRight               ", PopCount(mbpRRights)                                                        ),
1520    ("BpRWrong               ", PopCount(mbpRWrongs)                                                        ),
1521    ("ftb_false_hit          ", PopCount(ftb_false_hit)                                                     ),
1522    ("ftb_hit                ", PopCount(ftb_hit)                                                           ),
1523  )
1524  generatePerfEvent()
1525}