xref: /XiangShan/src/main/scala/xiangshan/frontend/NewFtq.scala (revision eb163ef08fc5ac1da1f32d948699bd6de053e444)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.frontend
18
19import chipsalliance.rocketchip.config.Parameters
20import chisel3._
21import chisel3.util._
22import utils._
23import xiangshan._
24import xiangshan.frontend.icache._
25import xiangshan.backend.CtrlToFtqIO
26import xiangshan.backend.decode.ImmUnion
27import huancun.utils.ChiselDB
28
29class FtqDebugBundle extends Bundle {
30  val pc = UInt(39.W)
31  val target = UInt(39.W)
32  val isBr = Bool()
33  val isJmp = Bool()
34  val isCall = Bool()
35  val isRet = Bool()
36  val misPred = Bool()
37  val isTaken = Bool()
38  val predStage = UInt(2.W)
39}
40
41class FtqPtr(implicit p: Parameters) extends CircularQueuePtr[FtqPtr](
42  p => p(XSCoreParamsKey).FtqSize
43){
44}
45
46object FtqPtr {
47  def apply(f: Bool, v: UInt)(implicit p: Parameters): FtqPtr = {
48    val ptr = Wire(new FtqPtr)
49    ptr.flag := f
50    ptr.value := v
51    ptr
52  }
53  def inverse(ptr: FtqPtr)(implicit p: Parameters): FtqPtr = {
54    apply(!ptr.flag, ptr.value)
55  }
56}
57
58class FtqNRSRAM[T <: Data](gen: T, numRead: Int)(implicit p: Parameters) extends XSModule {
59
60  val io = IO(new Bundle() {
61    val raddr = Input(Vec(numRead, UInt(log2Up(FtqSize).W)))
62    val ren = Input(Vec(numRead, Bool()))
63    val rdata = Output(Vec(numRead, gen))
64    val waddr = Input(UInt(log2Up(FtqSize).W))
65    val wen = Input(Bool())
66    val wdata = Input(gen)
67  })
68
69  for(i <- 0 until numRead){
70    val sram = Module(new SRAMTemplate(gen, FtqSize))
71    sram.io.r.req.valid := io.ren(i)
72    sram.io.r.req.bits.setIdx := io.raddr(i)
73    io.rdata(i) := sram.io.r.resp.data(0)
74    sram.io.w.req.valid := io.wen
75    sram.io.w.req.bits.setIdx := io.waddr
76    sram.io.w.req.bits.data := VecInit(io.wdata)
77  }
78
79}
80
81class Ftq_RF_Components(implicit p: Parameters) extends XSBundle with BPUUtils {
82  val startAddr = UInt(VAddrBits.W)
83  val nextLineAddr = UInt(VAddrBits.W)
84  val isNextMask = Vec(PredictWidth, Bool())
85  val fallThruError = Bool()
86  // val carry = Bool()
87  def getPc(offset: UInt) = {
88    def getHigher(pc: UInt) = pc(VAddrBits-1, log2Ceil(PredictWidth)+instOffsetBits+1)
89    def getOffset(pc: UInt) = pc(log2Ceil(PredictWidth)+instOffsetBits, instOffsetBits)
90    Cat(getHigher(Mux(isNextMask(offset) && startAddr(log2Ceil(PredictWidth)+instOffsetBits), nextLineAddr, startAddr)),
91        getOffset(startAddr)+offset, 0.U(instOffsetBits.W))
92  }
93  def fromBranchPrediction(resp: BranchPredictionBundle) = {
94    def carryPos(addr: UInt) = addr(instOffsetBits+log2Ceil(PredictWidth)+1)
95    this.startAddr := resp.pc
96    this.nextLineAddr := resp.pc + (FetchWidth * 4 * 2).U // may be broken on other configs
97    this.isNextMask := VecInit((0 until PredictWidth).map(i =>
98      (resp.pc(log2Ceil(PredictWidth), 1) +& i.U)(log2Ceil(PredictWidth)).asBool()
99    ))
100    this.fallThruError := resp.fallThruError
101    this
102  }
103  override def toPrintable: Printable = {
104    p"startAddr:${Hexadecimal(startAddr)}"
105  }
106}
107
108class Ftq_pd_Entry(implicit p: Parameters) extends XSBundle {
109  val brMask = Vec(PredictWidth, Bool())
110  val jmpInfo = ValidUndirectioned(Vec(3, Bool()))
111  val jmpOffset = UInt(log2Ceil(PredictWidth).W)
112  val jalTarget = UInt(VAddrBits.W)
113  val rvcMask = Vec(PredictWidth, Bool())
114  def hasJal  = jmpInfo.valid && !jmpInfo.bits(0)
115  def hasJalr = jmpInfo.valid && jmpInfo.bits(0)
116  def hasCall = jmpInfo.valid && jmpInfo.bits(1)
117  def hasRet  = jmpInfo.valid && jmpInfo.bits(2)
118
119  def fromPdWb(pdWb: PredecodeWritebackBundle) = {
120    val pds = pdWb.pd
121    this.brMask := VecInit(pds.map(pd => pd.isBr && pd.valid))
122    this.jmpInfo.valid := VecInit(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid)).asUInt.orR
123    this.jmpInfo.bits := ParallelPriorityMux(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid),
124                                             pds.map(pd => VecInit(pd.isJalr, pd.isCall, pd.isRet)))
125    this.jmpOffset := ParallelPriorityEncoder(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid))
126    this.rvcMask := VecInit(pds.map(pd => pd.isRVC))
127    this.jalTarget := pdWb.jalTarget
128  }
129
130  def toPd(offset: UInt) = {
131    require(offset.getWidth == log2Ceil(PredictWidth))
132    val pd = Wire(new PreDecodeInfo)
133    pd.valid := true.B
134    pd.isRVC := rvcMask(offset)
135    val isBr = brMask(offset)
136    val isJalr = offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(0)
137    pd.brType := Cat(offset === jmpOffset && jmpInfo.valid, isJalr || isBr)
138    pd.isCall := offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(1)
139    pd.isRet  := offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(2)
140    pd
141  }
142}
143
144
145
146class Ftq_Redirect_SRAMEntry(implicit p: Parameters) extends SpeculativeInfo {}
147
148class Ftq_1R_SRAMEntry(implicit p: Parameters) extends XSBundle with HasBPUConst {
149  val meta = UInt(MaxMetaLength.W)
150}
151
152class Ftq_Pred_Info(implicit p: Parameters) extends XSBundle {
153  val target = UInt(VAddrBits.W)
154  val cfiIndex = ValidUndirectioned(UInt(log2Ceil(PredictWidth).W))
155}
156
157
158class FtqRead[T <: Data](private val gen: T)(implicit p: Parameters) extends XSBundle {
159  val ptr = Output(new FtqPtr)
160  val offset = Output(UInt(log2Ceil(PredictWidth).W))
161  val data = Input(gen)
162  def apply(ptr: FtqPtr, offset: UInt) = {
163    this.ptr := ptr
164    this.offset := offset
165    this.data
166  }
167}
168
169
170class FtqToBpuIO(implicit p: Parameters) extends XSBundle {
171  val redirect = Valid(new BranchPredictionRedirect)
172  val update = Valid(new BranchPredictionUpdate)
173  val enq_ptr = Output(new FtqPtr)
174}
175
176class FtqToIfuIO(implicit p: Parameters) extends XSBundle with HasCircularQueuePtrHelper {
177  val req = Decoupled(new FetchRequestBundle)
178  val redirect = Valid(new Redirect)
179  val flushFromBpu = new Bundle {
180    // when ifu pipeline is not stalled,
181    // a packet from bpu s3 can reach f1 at most
182    val s2 = Valid(new FtqPtr)
183    val s3 = Valid(new FtqPtr)
184    def shouldFlushBy(src: Valid[FtqPtr], idx_to_flush: FtqPtr) = {
185      src.valid && !isAfter(src.bits, idx_to_flush)
186    }
187    def shouldFlushByStage2(idx: FtqPtr) = shouldFlushBy(s2, idx)
188    def shouldFlushByStage3(idx: FtqPtr) = shouldFlushBy(s3, idx)
189  }
190}
191
192class FtqToICacheIO(implicit p: Parameters) extends XSBundle with HasCircularQueuePtrHelper {
193  //NOTE: req.bits must be prepare in T cycle
194  // while req.valid is set true in T + 1 cycle
195  val req = Decoupled(new FtqToICacheRequestBundle)
196}
197
198trait HasBackendRedirectInfo extends HasXSParameter {
199  def numRedirectPcRead = exuParameters.JmpCnt + exuParameters.AluCnt + 1
200  def isLoadReplay(r: Valid[Redirect]) = r.bits.flushItself()
201}
202
203class FtqToCtrlIO(implicit p: Parameters) extends XSBundle with HasBackendRedirectInfo {
204  // write to backend pc mem
205  val pc_mem_wen = Output(Bool())
206  val pc_mem_waddr = Output(UInt(log2Ceil(FtqSize).W))
207  val pc_mem_wdata = Output(new Ftq_RF_Components)
208  // newest target
209  val newest_entry_target = Output(UInt(VAddrBits.W))
210  val newest_entry_ptr = Output(new FtqPtr)
211}
212
213
214class FTBEntryGen(implicit p: Parameters) extends XSModule with HasBackendRedirectInfo with HasBPUParameter {
215  val io = IO(new Bundle {
216    val start_addr = Input(UInt(VAddrBits.W))
217    val old_entry = Input(new FTBEntry)
218    val pd = Input(new Ftq_pd_Entry)
219    val cfiIndex = Flipped(Valid(UInt(log2Ceil(PredictWidth).W)))
220    val target = Input(UInt(VAddrBits.W))
221    val hit = Input(Bool())
222    val mispredict_vec = Input(Vec(PredictWidth, Bool()))
223
224    val new_entry = Output(new FTBEntry)
225    val new_br_insert_pos = Output(Vec(numBr, Bool()))
226    val taken_mask = Output(Vec(numBr, Bool()))
227    val jmp_taken = Output(Bool())
228    val mispred_mask = Output(Vec(numBr+1, Bool()))
229
230    // for perf counters
231    val is_init_entry = Output(Bool())
232    val is_old_entry = Output(Bool())
233    val is_new_br = Output(Bool())
234    val is_jalr_target_modified = Output(Bool())
235    val is_always_taken_modified = Output(Bool())
236    val is_br_full = Output(Bool())
237  })
238
239  // no mispredictions detected at predecode
240  val hit = io.hit
241  val pd = io.pd
242
243  val init_entry = WireInit(0.U.asTypeOf(new FTBEntry))
244
245
246  val cfi_is_br = pd.brMask(io.cfiIndex.bits) && io.cfiIndex.valid
247  val entry_has_jmp = pd.jmpInfo.valid
248  val new_jmp_is_jal  = entry_has_jmp && !pd.jmpInfo.bits(0) && io.cfiIndex.valid
249  val new_jmp_is_jalr = entry_has_jmp &&  pd.jmpInfo.bits(0) && io.cfiIndex.valid
250  val new_jmp_is_call = entry_has_jmp &&  pd.jmpInfo.bits(1) && io.cfiIndex.valid
251  val new_jmp_is_ret  = entry_has_jmp &&  pd.jmpInfo.bits(2) && io.cfiIndex.valid
252  val last_jmp_rvi = entry_has_jmp && pd.jmpOffset === (PredictWidth-1).U && !pd.rvcMask.last
253  // val last_br_rvi = cfi_is_br && io.cfiIndex.bits === (PredictWidth-1).U && !pd.rvcMask.last
254
255  val cfi_is_jal = io.cfiIndex.bits === pd.jmpOffset && new_jmp_is_jal
256  val cfi_is_jalr = io.cfiIndex.bits === pd.jmpOffset && new_jmp_is_jalr
257
258  def carryPos = log2Ceil(PredictWidth)+instOffsetBits
259  def getLower(pc: UInt) = pc(carryPos-1, instOffsetBits)
260  // if not hit, establish a new entry
261  init_entry.valid := true.B
262  // tag is left for ftb to assign
263
264  // case br
265  val init_br_slot = init_entry.getSlotForBr(0)
266  when (cfi_is_br) {
267    init_br_slot.valid := true.B
268    init_br_slot.offset := io.cfiIndex.bits
269    init_br_slot.setLowerStatByTarget(io.start_addr, io.target, numBr == 1)
270    init_entry.always_taken(0) := true.B // set to always taken on init
271  }
272
273  // case jmp
274  when (entry_has_jmp) {
275    init_entry.tailSlot.offset := pd.jmpOffset
276    init_entry.tailSlot.valid := new_jmp_is_jal || new_jmp_is_jalr
277    init_entry.tailSlot.setLowerStatByTarget(io.start_addr, Mux(cfi_is_jalr, io.target, pd.jalTarget), isShare=false)
278  }
279
280  val jmpPft = getLower(io.start_addr) +& pd.jmpOffset +& Mux(pd.rvcMask(pd.jmpOffset), 1.U, 2.U)
281  init_entry.pftAddr := Mux(entry_has_jmp && !last_jmp_rvi, jmpPft, getLower(io.start_addr))
282  init_entry.carry   := Mux(entry_has_jmp && !last_jmp_rvi, jmpPft(carryPos-instOffsetBits), true.B)
283  init_entry.isJalr := new_jmp_is_jalr
284  init_entry.isCall := new_jmp_is_call
285  init_entry.isRet  := new_jmp_is_ret
286  // that means fall thru points to the middle of an inst
287  init_entry.last_may_be_rvi_call := pd.jmpOffset === (PredictWidth-1).U && !pd.rvcMask(pd.jmpOffset)
288
289  // if hit, check whether a new cfi(only br is possible) is detected
290  val oe = io.old_entry
291  val br_recorded_vec = oe.getBrRecordedVec(io.cfiIndex.bits)
292  val br_recorded = br_recorded_vec.asUInt.orR
293  val is_new_br = cfi_is_br && !br_recorded
294  val new_br_offset = io.cfiIndex.bits
295  // vec(i) means new br will be inserted BEFORE old br(i)
296  val allBrSlotsVec = oe.allSlotsForBr
297  val new_br_insert_onehot = VecInit((0 until numBr).map{
298    i => i match {
299      case 0 =>
300        !allBrSlotsVec(0).valid || new_br_offset < allBrSlotsVec(0).offset
301      case idx =>
302        allBrSlotsVec(idx-1).valid && new_br_offset > allBrSlotsVec(idx-1).offset &&
303        (!allBrSlotsVec(idx).valid || new_br_offset < allBrSlotsVec(idx).offset)
304    }
305  })
306
307  val old_entry_modified = WireInit(io.old_entry)
308  for (i <- 0 until numBr) {
309    val slot = old_entry_modified.allSlotsForBr(i)
310    when (new_br_insert_onehot(i)) {
311      slot.valid := true.B
312      slot.offset := new_br_offset
313      slot.setLowerStatByTarget(io.start_addr, io.target, i == numBr-1)
314      old_entry_modified.always_taken(i) := true.B
315    }.elsewhen (new_br_offset > oe.allSlotsForBr(i).offset) {
316      old_entry_modified.always_taken(i) := false.B
317      // all other fields remain unchanged
318    }.otherwise {
319      // case i == 0, remain unchanged
320      if (i != 0) {
321        val noNeedToMoveFromFormerSlot = (i == numBr-1).B && !oe.brSlots.last.valid
322        when (!noNeedToMoveFromFormerSlot) {
323          slot.fromAnotherSlot(oe.allSlotsForBr(i-1))
324          old_entry_modified.always_taken(i) := oe.always_taken(i)
325        }
326      }
327    }
328  }
329
330  // two circumstances:
331  // 1. oe: | br | j  |, new br should be in front of j, thus addr of j should be new pft
332  // 2. oe: | br | br |, new br could be anywhere between, thus new pft is the addr of either
333  //        the previous last br or the new br
334  val may_have_to_replace = oe.noEmptySlotForNewBr
335  val pft_need_to_change = is_new_br && may_have_to_replace
336  // it should either be the given last br or the new br
337  when (pft_need_to_change) {
338    val new_pft_offset =
339      Mux(!new_br_insert_onehot.asUInt.orR,
340        new_br_offset, oe.allSlotsForBr.last.offset)
341
342    // set jmp to invalid
343    old_entry_modified.pftAddr := getLower(io.start_addr) + new_pft_offset
344    old_entry_modified.carry := (getLower(io.start_addr) +& new_pft_offset).head(1).asBool
345    old_entry_modified.last_may_be_rvi_call := false.B
346    old_entry_modified.isCall := false.B
347    old_entry_modified.isRet := false.B
348    old_entry_modified.isJalr := false.B
349  }
350
351  val old_entry_jmp_target_modified = WireInit(oe)
352  val old_target = oe.tailSlot.getTarget(io.start_addr) // may be wrong because we store only 20 lowest bits
353  val old_tail_is_jmp = !oe.tailSlot.sharing
354  val jalr_target_modified = cfi_is_jalr && (old_target =/= io.target) && old_tail_is_jmp // TODO: pass full jalr target
355  when (jalr_target_modified) {
356    old_entry_jmp_target_modified.setByJmpTarget(io.start_addr, io.target)
357    old_entry_jmp_target_modified.always_taken := 0.U.asTypeOf(Vec(numBr, Bool()))
358  }
359
360  val old_entry_always_taken = WireInit(oe)
361  val always_taken_modified_vec = Wire(Vec(numBr, Bool())) // whether modified or not
362  for (i <- 0 until numBr) {
363    old_entry_always_taken.always_taken(i) :=
364      oe.always_taken(i) && io.cfiIndex.valid && oe.brValids(i) && io.cfiIndex.bits === oe.brOffset(i)
365    always_taken_modified_vec(i) := oe.always_taken(i) && !old_entry_always_taken.always_taken(i)
366  }
367  val always_taken_modified = always_taken_modified_vec.reduce(_||_)
368
369
370
371  val derived_from_old_entry =
372    Mux(is_new_br, old_entry_modified,
373      Mux(jalr_target_modified, old_entry_jmp_target_modified, old_entry_always_taken))
374
375
376  io.new_entry := Mux(!hit, init_entry, derived_from_old_entry)
377
378  io.new_br_insert_pos := new_br_insert_onehot
379  io.taken_mask := VecInit((io.new_entry.brOffset zip io.new_entry.brValids).map{
380    case (off, v) => io.cfiIndex.bits === off && io.cfiIndex.valid && v
381  })
382  io.jmp_taken := io.new_entry.jmpValid && io.new_entry.tailSlot.offset === io.cfiIndex.bits
383  for (i <- 0 until numBr) {
384    io.mispred_mask(i) := io.new_entry.brValids(i) && io.mispredict_vec(io.new_entry.brOffset(i))
385  }
386  io.mispred_mask.last := io.new_entry.jmpValid && io.mispredict_vec(pd.jmpOffset)
387
388  // for perf counters
389  io.is_init_entry := !hit
390  io.is_old_entry := hit && !is_new_br && !jalr_target_modified && !always_taken_modified
391  io.is_new_br := hit && is_new_br
392  io.is_jalr_target_modified := hit && jalr_target_modified
393  io.is_always_taken_modified := hit && always_taken_modified
394  io.is_br_full := hit && is_new_br && may_have_to_replace
395}
396
397class FtqPcMemWrapper(numOtherReads: Int)(implicit p: Parameters) extends XSModule with HasBackendRedirectInfo {
398  val io = IO(new Bundle {
399    val ifuPtr_w       = Input(new FtqPtr)
400    val ifuPtrPlus1_w  = Input(new FtqPtr)
401    val ifuPtrPlus2_w  = Input(new FtqPtr)
402    val commPtr_w      = Input(new FtqPtr)
403    val commPtrPlus1_w = Input(new FtqPtr)
404    val ifuPtr_rdata       = Output(new Ftq_RF_Components)
405    val ifuPtrPlus1_rdata  = Output(new Ftq_RF_Components)
406    val ifuPtrPlus2_rdata  = Output(new Ftq_RF_Components)
407    val commPtr_rdata      = Output(new Ftq_RF_Components)
408    val commPtrPlus1_rdata = Output(new Ftq_RF_Components)
409
410    val other_raddrs = Input(Vec(numOtherReads, UInt(log2Ceil(FtqSize).W)))
411    val other_rdatas = Output(Vec(numOtherReads, new Ftq_RF_Components))
412
413    val wen = Input(Bool())
414    val waddr = Input(UInt(log2Ceil(FtqSize).W))
415    val wdata = Input(new Ftq_RF_Components)
416  })
417
418  val num_pc_read = numOtherReads + 5
419  val mem = Module(new SyncDataModuleTemplate(new Ftq_RF_Components, FtqSize,
420    num_pc_read, 1, "FtqPC"))
421  mem.io.wen(0)   := io.wen
422  mem.io.waddr(0) := io.waddr
423  mem.io.wdata(0) := io.wdata
424
425  // read one cycle ahead for ftq local reads
426  val raddr_vec = VecInit(io.other_raddrs ++
427    Seq(io.ifuPtr_w.value, io.ifuPtrPlus1_w.value, io.ifuPtrPlus2_w.value, io.commPtrPlus1_w.value, io.commPtr_w.value))
428
429  mem.io.raddr := raddr_vec
430
431  io.other_rdatas       := mem.io.rdata.dropRight(5)
432  io.ifuPtr_rdata       := mem.io.rdata.dropRight(4).last
433  io.ifuPtrPlus1_rdata  := mem.io.rdata.dropRight(3).last
434  io.ifuPtrPlus2_rdata  := mem.io.rdata.dropRight(2).last
435  io.commPtrPlus1_rdata := mem.io.rdata.dropRight(1).last
436  io.commPtr_rdata      := mem.io.rdata.last
437}
438
439class Ftq(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelper
440  with HasBackendRedirectInfo with BPUUtils with HasBPUConst with HasPerfEvents
441  with HasICacheParameters{
442  val io = IO(new Bundle {
443    val fromBpu = Flipped(new BpuToFtqIO)
444    val fromIfu = Flipped(new IfuToFtqIO)
445    val fromBackend = Flipped(new CtrlToFtqIO)
446
447    val toBpu = new FtqToBpuIO
448    val toIfu = new FtqToIfuIO
449    val toICache = new FtqToICacheIO
450    val toBackend = new FtqToCtrlIO
451
452    val toPrefetch = new FtqPrefechBundle
453
454    val bpuInfo = new Bundle {
455      val bpRight = Output(UInt(XLEN.W))
456      val bpWrong = Output(UInt(XLEN.W))
457    }
458
459    val mmioCommitRead = Flipped(new mmioCommitRead)
460  })
461  io.bpuInfo := DontCare
462
463  val backendRedirect = Wire(Valid(new Redirect))
464  val backendRedirectReg = RegNext(backendRedirect)
465
466  val stage2Flush = backendRedirect.valid
467  val backendFlush = stage2Flush || RegNext(stage2Flush)
468  val ifuFlush = Wire(Bool())
469
470  val flush = stage2Flush || RegNext(stage2Flush)
471
472  val allowBpuIn, allowToIfu = WireInit(false.B)
473  val flushToIfu = !allowToIfu
474  allowBpuIn := !ifuFlush && !backendRedirect.valid && !backendRedirectReg.valid
475  allowToIfu := !ifuFlush && !backendRedirect.valid && !backendRedirectReg.valid
476
477  def copyNum = 5
478  val bpuPtr, ifuPtr, ifuWbPtr, commPtr = RegInit(FtqPtr(false.B, 0.U))
479  val ifuPtrPlus1 = RegInit(FtqPtr(false.B, 1.U))
480  val ifuPtrPlus2 = RegInit(FtqPtr(false.B, 2.U))
481  val commPtrPlus1 = RegInit(FtqPtr(false.B, 1.U))
482  val copied_ifu_ptr = Seq.fill(copyNum)(RegInit(FtqPtr(false.B, 0.U)))
483  val copied_bpu_ptr = Seq.fill(copyNum)(RegInit(FtqPtr(false.B, 0.U)))
484  require(FtqSize >= 4)
485  val ifuPtr_write       = WireInit(ifuPtr)
486  val ifuPtrPlus1_write  = WireInit(ifuPtrPlus1)
487  val ifuPtrPlus2_write  = WireInit(ifuPtrPlus2)
488  val ifuWbPtr_write     = WireInit(ifuWbPtr)
489  val commPtr_write      = WireInit(commPtr)
490  val commPtrPlus1_write = WireInit(commPtrPlus1)
491  ifuPtr       := ifuPtr_write
492  ifuPtrPlus1  := ifuPtrPlus1_write
493  ifuPtrPlus2  := ifuPtrPlus2_write
494  ifuWbPtr     := ifuWbPtr_write
495  commPtr      := commPtr_write
496  commPtrPlus1 := commPtrPlus1_write
497  copied_ifu_ptr.map{ptr =>
498    ptr := ifuPtr_write
499    dontTouch(ptr)
500  }
501  val validEntries = distanceBetween(bpuPtr, commPtr)
502
503  // **********************************************************************
504  // **************************** enq from bpu ****************************
505  // **********************************************************************
506  val new_entry_ready = validEntries < FtqSize.U
507  io.fromBpu.resp.ready := new_entry_ready
508
509  val bpu_s2_resp = io.fromBpu.resp.bits.s2
510  val bpu_s3_resp = io.fromBpu.resp.bits.s3
511  val bpu_s2_redirect = bpu_s2_resp.valid && bpu_s2_resp.hasRedirect
512  val bpu_s3_redirect = bpu_s3_resp.valid && bpu_s3_resp.hasRedirect
513
514  io.toBpu.enq_ptr := bpuPtr
515  val enq_fire = io.fromBpu.resp.fire() && allowBpuIn // from bpu s1
516  val bpu_in_fire = (io.fromBpu.resp.fire() || bpu_s2_redirect || bpu_s3_redirect) && allowBpuIn
517
518  val bpu_in_resp = io.fromBpu.resp.bits.selectedResp
519  val bpu_in_stage = io.fromBpu.resp.bits.selectedRespIdx
520  val bpu_in_resp_ptr = Mux(bpu_in_stage === BP_S1, bpuPtr, bpu_in_resp.ftq_idx)
521  val bpu_in_resp_idx = bpu_in_resp_ptr.value
522
523  // read ports:      prefetchReq ++  ifuReq1 + ifuReq2 + ifuReq3 + commitUpdate2 + commitUpdate
524  val ftq_pc_mem = Module(new FtqPcMemWrapper(1))
525  // resp from uBTB
526  ftq_pc_mem.io.wen := bpu_in_fire
527  ftq_pc_mem.io.waddr := bpu_in_resp_idx
528  ftq_pc_mem.io.wdata.fromBranchPrediction(bpu_in_resp)
529
530  //                                                            ifuRedirect + backendRedirect + commit
531  val ftq_redirect_sram = Module(new FtqNRSRAM(new Ftq_Redirect_SRAMEntry, 1+1+1))
532  // these info is intended to enq at the last stage of bpu
533  ftq_redirect_sram.io.wen := io.fromBpu.resp.bits.lastStage.valid
534  ftq_redirect_sram.io.waddr := io.fromBpu.resp.bits.lastStage.ftq_idx.value
535  ftq_redirect_sram.io.wdata := io.fromBpu.resp.bits.last_stage_spec_info
536  println(f"ftq redirect SRAM: entry ${ftq_redirect_sram.io.wdata.getWidth} * ${FtqSize} * 3")
537  println(f"ftq redirect SRAM: ahead fh ${ftq_redirect_sram.io.wdata.afhob.getWidth} * ${FtqSize} * 3")
538
539  val ftq_meta_1r_sram = Module(new FtqNRSRAM(new Ftq_1R_SRAMEntry, 1))
540  // these info is intended to enq at the last stage of bpu
541  ftq_meta_1r_sram.io.wen := io.fromBpu.resp.bits.lastStage.valid
542  ftq_meta_1r_sram.io.waddr := io.fromBpu.resp.bits.lastStage.ftq_idx.value
543  ftq_meta_1r_sram.io.wdata.meta := io.fromBpu.resp.bits.last_stage_meta
544  //                                                            ifuRedirect + backendRedirect + commit
545  val ftb_entry_mem = Module(new SyncDataModuleTemplate(new FTBEntry, FtqSize, 1+1+1, 1))
546  ftb_entry_mem.io.wen(0) := io.fromBpu.resp.bits.lastStage.valid
547  ftb_entry_mem.io.waddr(0) := io.fromBpu.resp.bits.lastStage.ftq_idx.value
548  ftb_entry_mem.io.wdata(0) := io.fromBpu.resp.bits.last_stage_ftb_entry
549
550
551  // multi-write
552  val update_target = Reg(Vec(FtqSize, UInt(VAddrBits.W))) // could be taken target or fallThrough //TODO: remove this
553  val newest_entry_target = Reg(UInt(VAddrBits.W))
554  val newest_entry_ptr = Reg(new FtqPtr)
555  val cfiIndex_vec = Reg(Vec(FtqSize, ValidUndirectioned(UInt(log2Ceil(PredictWidth).W))))
556  val mispredict_vec = Reg(Vec(FtqSize, Vec(PredictWidth, Bool())))
557  val pred_stage = Reg(Vec(FtqSize, UInt(2.W)))
558
559  val c_invalid :: c_valid :: c_commited :: Nil = Enum(3)
560  val commitStateQueue = RegInit(VecInit(Seq.fill(FtqSize) {
561    VecInit(Seq.fill(PredictWidth)(c_invalid))
562  }))
563
564  val f_to_send :: f_sent :: Nil = Enum(2)
565  val entry_fetch_status = RegInit(VecInit(Seq.fill(FtqSize)(f_sent)))
566
567  val h_not_hit :: h_false_hit :: h_hit :: Nil = Enum(3)
568  val entry_hit_status = RegInit(VecInit(Seq.fill(FtqSize)(h_not_hit)))
569
570  // modify registers one cycle later to cut critical path
571  val last_cycle_bpu_in = RegNext(bpu_in_fire)
572  val last_cycle_bpu_in_ptr = RegNext(bpu_in_resp_ptr)
573  val last_cycle_bpu_in_idx = last_cycle_bpu_in_ptr.value
574  val last_cycle_bpu_target = RegNext(bpu_in_resp.getTarget)
575  val last_cycle_cfiIndex = RegNext(bpu_in_resp.cfiIndex)
576  val last_cycle_bpu_in_stage = RegNext(bpu_in_stage)
577
578  def extra_copyNum_for_commitStateQueue = 2
579  val copied_last_cycle_bpu_in = VecInit(Seq.fill(copyNum+extra_copyNum_for_commitStateQueue)(RegNext(bpu_in_fire)))
580  val copied_last_cycle_bpu_in_ptr_for_ftq = VecInit(Seq.fill(extra_copyNum_for_commitStateQueue)(RegNext(bpu_in_resp_ptr)))
581
582  when (last_cycle_bpu_in) {
583    entry_fetch_status(last_cycle_bpu_in_idx) := f_to_send
584    cfiIndex_vec(last_cycle_bpu_in_idx) := last_cycle_cfiIndex
585    pred_stage(last_cycle_bpu_in_idx) := last_cycle_bpu_in_stage
586
587    update_target(last_cycle_bpu_in_idx) := last_cycle_bpu_target // TODO: remove this
588    newest_entry_target := last_cycle_bpu_target
589    newest_entry_ptr := last_cycle_bpu_in_ptr
590  }
591
592  // reduce fanout by delay write for a cycle
593  when (RegNext(last_cycle_bpu_in)) {
594    mispredict_vec(RegNext(last_cycle_bpu_in_idx)) := WireInit(VecInit(Seq.fill(PredictWidth)(false.B)))
595  }
596
597  // reduce fanout using copied last_cycle_bpu_in and copied last_cycle_bpu_in_ptr
598  val copied_last_cycle_bpu_in_for_ftq = copied_last_cycle_bpu_in.takeRight(extra_copyNum_for_commitStateQueue)
599  copied_last_cycle_bpu_in_for_ftq.zip(copied_last_cycle_bpu_in_ptr_for_ftq).zipWithIndex.map {
600    case ((in, ptr), i) =>
601      when (in) {
602        val perSetEntries = FtqSize / extra_copyNum_for_commitStateQueue // 32
603        require(FtqSize % extra_copyNum_for_commitStateQueue == 0)
604        for (j <- 0 until perSetEntries) {
605          when (ptr.value === (i*perSetEntries+j).U) {
606            commitStateQueue(i*perSetEntries+j) := VecInit(Seq.fill(PredictWidth)(c_invalid))
607          }
608        }
609      }
610  }
611
612  // num cycle is fixed
613  io.toBackend.newest_entry_ptr := RegNext(newest_entry_ptr)
614  io.toBackend.newest_entry_target := RegNext(newest_entry_target)
615
616
617  bpuPtr := bpuPtr + enq_fire
618  copied_bpu_ptr.map(_ := bpuPtr + enq_fire)
619  when (io.toIfu.req.fire && allowToIfu) {
620    ifuPtr_write := ifuPtrPlus1
621    ifuPtrPlus1_write := ifuPtrPlus2
622    ifuPtrPlus2_write := ifuPtrPlus2 + 1.U
623  }
624
625  // only use ftb result to assign hit status
626  when (bpu_s2_resp.valid) {
627    entry_hit_status(bpu_s2_resp.ftq_idx.value) := Mux(bpu_s2_resp.full_pred.hit, h_hit, h_not_hit)
628  }
629
630
631  io.toIfu.flushFromBpu.s2.valid := bpu_s2_redirect
632  io.toIfu.flushFromBpu.s2.bits := bpu_s2_resp.ftq_idx
633  when (bpu_s2_resp.valid && bpu_s2_resp.hasRedirect) {
634    bpuPtr := bpu_s2_resp.ftq_idx + 1.U
635    copied_bpu_ptr.map(_ := bpu_s2_resp.ftq_idx + 1.U)
636    // only when ifuPtr runs ahead of bpu s2 resp should we recover it
637    when (!isBefore(ifuPtr, bpu_s2_resp.ftq_idx)) {
638      ifuPtr_write := bpu_s2_resp.ftq_idx
639      ifuPtrPlus1_write := bpu_s2_resp.ftq_idx + 1.U
640      ifuPtrPlus2_write := bpu_s2_resp.ftq_idx + 2.U
641    }
642  }
643
644  io.toIfu.flushFromBpu.s3.valid := bpu_s3_redirect
645  io.toIfu.flushFromBpu.s3.bits := bpu_s3_resp.ftq_idx
646  when (bpu_s3_resp.valid && bpu_s3_resp.hasRedirect) {
647    bpuPtr := bpu_s3_resp.ftq_idx + 1.U
648    copied_bpu_ptr.map(_ := bpu_s3_resp.ftq_idx + 1.U)
649    // only when ifuPtr runs ahead of bpu s2 resp should we recover it
650    when (!isBefore(ifuPtr, bpu_s3_resp.ftq_idx)) {
651      ifuPtr_write := bpu_s3_resp.ftq_idx
652      ifuPtrPlus1_write := bpu_s3_resp.ftq_idx + 1.U
653      ifuPtrPlus2_write := bpu_s3_resp.ftq_idx + 2.U
654    }
655  }
656
657  XSError(isBefore(bpuPtr, ifuPtr) && !isFull(bpuPtr, ifuPtr), "\nifuPtr is before bpuPtr!\n")
658
659  (0 until copyNum).map{i =>
660    XSError(copied_bpu_ptr(i) =/= bpuPtr, "\ncopiedBpuPtr is different from bpuPtr!\n")
661  }
662
663  // ****************************************************************
664  // **************************** to ifu ****************************
665  // ****************************************************************
666  // 0  for ifu, and 1-4 for ICache
667  val bpu_in_bypass_buf = RegEnable(ftq_pc_mem.io.wdata, enable=bpu_in_fire)
668  val copied_bpu_in_bypass_buf = VecInit(Seq.fill(copyNum)(RegEnable(ftq_pc_mem.io.wdata, enable=bpu_in_fire)))
669  val bpu_in_bypass_buf_for_ifu = bpu_in_bypass_buf
670  val bpu_in_bypass_ptr = RegNext(bpu_in_resp_ptr)
671  val last_cycle_to_ifu_fire = RegNext(io.toIfu.req.fire)
672
673  val copied_bpu_in_bypass_ptr = VecInit(Seq.fill(copyNum)(RegNext(bpu_in_resp_ptr)))
674  val copied_last_cycle_to_ifu_fire = VecInit(Seq.fill(copyNum)(RegNext(io.toIfu.req.fire)))
675
676  // read pc and target
677  ftq_pc_mem.io.ifuPtr_w       := ifuPtr_write
678  ftq_pc_mem.io.ifuPtrPlus1_w  := ifuPtrPlus1_write
679  ftq_pc_mem.io.ifuPtrPlus2_w  := ifuPtrPlus2_write
680  ftq_pc_mem.io.commPtr_w      := commPtr_write
681  ftq_pc_mem.io.commPtrPlus1_w := commPtrPlus1_write
682
683
684  io.toIfu.req.bits.ftqIdx := ifuPtr
685
686  val toICachePcBundle = Wire(Vec(copyNum,new Ftq_RF_Components))
687  val toICacheEntryToSend = Wire(Vec(copyNum,Bool()))
688  val toIfuPcBundle = Wire(new Ftq_RF_Components)
689  val entry_is_to_send = WireInit(entry_fetch_status(ifuPtr.value) === f_to_send)
690  val entry_ftq_offset = WireInit(cfiIndex_vec(ifuPtr.value))
691  val entry_next_addr  = Wire(UInt(VAddrBits.W))
692
693  val pc_mem_ifu_ptr_rdata   = VecInit(Seq.fill(copyNum)(RegNext(ftq_pc_mem.io.ifuPtr_rdata)))
694  val pc_mem_ifu_plus1_rdata = VecInit(Seq.fill(copyNum)(RegNext(ftq_pc_mem.io.ifuPtrPlus1_rdata)))
695  val diff_entry_next_addr = WireInit(update_target(ifuPtr.value)) //TODO: remove this
696
697  val copied_ifu_plus1_to_send = VecInit(Seq.fill(copyNum)(RegNext(entry_fetch_status(ifuPtrPlus1.value) === f_to_send) || RegNext(last_cycle_bpu_in && bpu_in_bypass_ptr === (ifuPtrPlus1))))
698  val copied_ifu_ptr_to_send   = VecInit(Seq.fill(copyNum)(RegNext(entry_fetch_status(ifuPtr.value) === f_to_send) || RegNext(last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtr)))
699
700  for(i <- 0 until copyNum){
701    when(copied_last_cycle_bpu_in(i) && copied_bpu_in_bypass_ptr(i) === copied_ifu_ptr(i)){
702      toICachePcBundle(i) := copied_bpu_in_bypass_buf(i)
703      toICacheEntryToSend(i)   := true.B
704    }.elsewhen(copied_last_cycle_to_ifu_fire(i)){
705      toICachePcBundle(i) := pc_mem_ifu_plus1_rdata(i)
706      toICacheEntryToSend(i)   := copied_ifu_plus1_to_send(i)
707    }.otherwise{
708      toICachePcBundle(i) := pc_mem_ifu_ptr_rdata(i)
709      toICacheEntryToSend(i)   := copied_ifu_ptr_to_send(i)
710    }
711  }
712
713  // TODO: reconsider target address bypass logic
714  when (last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtr) {
715    toIfuPcBundle := bpu_in_bypass_buf_for_ifu
716    entry_is_to_send := true.B
717    entry_next_addr := last_cycle_bpu_target
718    entry_ftq_offset := last_cycle_cfiIndex
719    diff_entry_next_addr := last_cycle_bpu_target // TODO: remove this
720  }.elsewhen (last_cycle_to_ifu_fire) {
721    toIfuPcBundle := RegNext(ftq_pc_mem.io.ifuPtrPlus1_rdata)
722    entry_is_to_send := RegNext(entry_fetch_status(ifuPtrPlus1.value) === f_to_send) ||
723                        RegNext(last_cycle_bpu_in && bpu_in_bypass_ptr === (ifuPtrPlus1)) // reduce potential bubbles
724    entry_next_addr := Mux(last_cycle_bpu_in && bpu_in_bypass_ptr === (ifuPtrPlus1),
725                          bpu_in_bypass_buf_for_ifu.startAddr,
726                          Mux(ifuPtr === newest_entry_ptr,
727                            newest_entry_target,
728                            RegNext(ftq_pc_mem.io.ifuPtrPlus2_rdata.startAddr))) // ifuPtr+2
729  }.otherwise {
730    toIfuPcBundle := RegNext(ftq_pc_mem.io.ifuPtr_rdata)
731    entry_is_to_send := RegNext(entry_fetch_status(ifuPtr.value) === f_to_send) ||
732                        RegNext(last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtr) // reduce potential bubbles
733    entry_next_addr := Mux(last_cycle_bpu_in && bpu_in_bypass_ptr === (ifuPtrPlus1),
734                          bpu_in_bypass_buf_for_ifu.startAddr,
735                          Mux(ifuPtr === newest_entry_ptr,
736                            newest_entry_target,
737                            RegNext(ftq_pc_mem.io.ifuPtrPlus1_rdata.startAddr))) // ifuPtr+1
738  }
739
740  io.toIfu.req.valid := entry_is_to_send && ifuPtr =/= bpuPtr
741  io.toIfu.req.bits.nextStartAddr := entry_next_addr
742  io.toIfu.req.bits.ftqOffset := entry_ftq_offset
743  io.toIfu.req.bits.fromFtqPcBundle(toIfuPcBundle)
744
745  io.toICache.req.valid := entry_is_to_send && ifuPtr =/= bpuPtr
746  io.toICache.req.bits.readValid.zipWithIndex.map{case(copy, i) => copy := toICacheEntryToSend(i) && copied_ifu_ptr(i) =/= copied_bpu_ptr(i)}
747  io.toICache.req.bits.pcMemRead.zipWithIndex.map{case(copy,i) => copy.fromFtqPcBundle(toICachePcBundle(i))}
748  // io.toICache.req.bits.bypassSelect := last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtr
749  // io.toICache.req.bits.bpuBypassWrite.zipWithIndex.map{case(bypassWrtie, i) =>
750  //   bypassWrtie.startAddr := bpu_in_bypass_buf.tail(i).startAddr
751  //   bypassWrtie.nextlineStart := bpu_in_bypass_buf.tail(i).nextLineAddr
752  // }
753
754  // TODO: remove this
755  XSError(io.toIfu.req.valid && diff_entry_next_addr =/= entry_next_addr,
756          p"\nifu_req_target wrong! ifuPtr: ${ifuPtr}, entry_next_addr: ${Hexadecimal(entry_next_addr)} diff_entry_next_addr: ${Hexadecimal(diff_entry_next_addr)}\n")
757
758  // when fall through is smaller in value than start address, there must be a false hit
759  when (toIfuPcBundle.fallThruError && entry_hit_status(ifuPtr.value) === h_hit) {
760    when (io.toIfu.req.fire &&
761      !(bpu_s2_redirect && bpu_s2_resp.ftq_idx === ifuPtr) &&
762      !(bpu_s3_redirect && bpu_s3_resp.ftq_idx === ifuPtr)
763    ) {
764      entry_hit_status(ifuPtr.value) := h_false_hit
765      // XSError(true.B, "FTB false hit by fallThroughError, startAddr: %x, fallTHru: %x\n", io.toIfu.req.bits.startAddr, io.toIfu.req.bits.nextStartAddr)
766    }
767    XSDebug(true.B, "fallThruError! start:%x, fallThru:%x\n", io.toIfu.req.bits.startAddr, io.toIfu.req.bits.nextStartAddr)
768  }
769
770  XSPerfAccumulate(f"fall_through_error_to_ifu", toIfuPcBundle.fallThruError && entry_hit_status(ifuPtr.value) === h_hit &&
771    io.toIfu.req.fire && !(bpu_s2_redirect && bpu_s2_resp.ftq_idx === ifuPtr) && !(bpu_s3_redirect && bpu_s3_resp.ftq_idx === ifuPtr))
772
773  val ifu_req_should_be_flushed =
774    io.toIfu.flushFromBpu.shouldFlushByStage2(io.toIfu.req.bits.ftqIdx) ||
775    io.toIfu.flushFromBpu.shouldFlushByStage3(io.toIfu.req.bits.ftqIdx)
776
777    when (io.toIfu.req.fire && !ifu_req_should_be_flushed) {
778      entry_fetch_status(ifuPtr.value) := f_sent
779    }
780
781  // *********************************************************************
782  // **************************** wb from ifu ****************************
783  // *********************************************************************
784  val pdWb = io.fromIfu.pdWb
785  val pds = pdWb.bits.pd
786  val ifu_wb_valid = pdWb.valid
787  val ifu_wb_idx = pdWb.bits.ftqIdx.value
788  // read ports:                                                         commit update
789  val ftq_pd_mem = Module(new SyncDataModuleTemplate(new Ftq_pd_Entry, FtqSize, 1, 1))
790  ftq_pd_mem.io.wen(0) := ifu_wb_valid
791  ftq_pd_mem.io.waddr(0) := pdWb.bits.ftqIdx.value
792  ftq_pd_mem.io.wdata(0).fromPdWb(pdWb.bits)
793
794  val hit_pd_valid = entry_hit_status(ifu_wb_idx) === h_hit && ifu_wb_valid
795  val hit_pd_mispred = hit_pd_valid && pdWb.bits.misOffset.valid
796  val hit_pd_mispred_reg = RegNext(hit_pd_mispred, init=false.B)
797  val pd_reg       = RegEnable(pds,             pdWb.valid)
798  val start_pc_reg = RegEnable(pdWb.bits.pc(0), pdWb.valid)
799  val wb_idx_reg   = RegEnable(ifu_wb_idx,      pdWb.valid)
800
801  when (ifu_wb_valid) {
802    val comm_stq_wen = VecInit(pds.map(_.valid).zip(pdWb.bits.instrRange).map{
803      case (v, inRange) => v && inRange
804    })
805    (commitStateQueue(ifu_wb_idx) zip comm_stq_wen).map{
806      case (qe, v) => when (v) { qe := c_valid }
807    }
808  }
809
810  when (ifu_wb_valid) {
811    ifuWbPtr_write := ifuWbPtr + 1.U
812  }
813
814  ftb_entry_mem.io.raddr.head := ifu_wb_idx
815  val has_false_hit = WireInit(false.B)
816  when (RegNext(hit_pd_valid)) {
817    // check for false hit
818    val pred_ftb_entry = ftb_entry_mem.io.rdata.head
819    val brSlots = pred_ftb_entry.brSlots
820    val tailSlot = pred_ftb_entry.tailSlot
821    // we check cfis that bpu predicted
822
823    // bpu predicted branches but denied by predecode
824    val br_false_hit =
825      brSlots.map{
826        s => s.valid && !(pd_reg(s.offset).valid && pd_reg(s.offset).isBr)
827      }.reduce(_||_) ||
828      (tailSlot.valid && pred_ftb_entry.tailSlot.sharing &&
829        !(pd_reg(tailSlot.offset).valid && pd_reg(tailSlot.offset).isBr))
830
831    val jmpOffset = tailSlot.offset
832    val jmp_pd = pd_reg(jmpOffset)
833    val jal_false_hit = pred_ftb_entry.jmpValid &&
834      ((pred_ftb_entry.isJal  && !(jmp_pd.valid && jmp_pd.isJal)) ||
835       (pred_ftb_entry.isJalr && !(jmp_pd.valid && jmp_pd.isJalr)) ||
836       (pred_ftb_entry.isCall && !(jmp_pd.valid && jmp_pd.isCall)) ||
837       (pred_ftb_entry.isRet  && !(jmp_pd.valid && jmp_pd.isRet))
838      )
839
840    has_false_hit := br_false_hit || jal_false_hit || hit_pd_mispred_reg
841    XSDebug(has_false_hit, "FTB false hit by br or jal or hit_pd, startAddr: %x\n", pdWb.bits.pc(0))
842
843    // assert(!has_false_hit)
844  }
845
846  when (has_false_hit) {
847    entry_hit_status(wb_idx_reg) := h_false_hit
848  }
849
850
851  // **********************************************************************
852  // ***************************** to backend *****************************
853  // **********************************************************************
854  // to backend pc mem / target
855  io.toBackend.pc_mem_wen   := RegNext(last_cycle_bpu_in)
856  io.toBackend.pc_mem_waddr := RegNext(last_cycle_bpu_in_idx)
857  io.toBackend.pc_mem_wdata := RegNext(bpu_in_bypass_buf_for_ifu)
858
859  // *******************************************************************************
860  // **************************** redirect from backend ****************************
861  // *******************************************************************************
862
863  // redirect read cfiInfo, couples to redirectGen s2
864  ftq_redirect_sram.io.ren.init.last := backendRedirect.valid
865  ftq_redirect_sram.io.raddr.init.last := backendRedirect.bits.ftqIdx.value
866
867  ftb_entry_mem.io.raddr.init.last := backendRedirect.bits.ftqIdx.value
868
869  val stage3CfiInfo = ftq_redirect_sram.io.rdata.init.last
870  val fromBackendRedirect = WireInit(backendRedirectReg)
871  val backendRedirectCfi = fromBackendRedirect.bits.cfiUpdate
872  backendRedirectCfi.fromFtqRedirectSram(stage3CfiInfo)
873
874  val r_ftb_entry = ftb_entry_mem.io.rdata.init.last
875  val r_ftqOffset = fromBackendRedirect.bits.ftqOffset
876
877  when (entry_hit_status(fromBackendRedirect.bits.ftqIdx.value) === h_hit) {
878    backendRedirectCfi.shift := PopCount(r_ftb_entry.getBrMaskByOffset(r_ftqOffset)) +&
879      (backendRedirectCfi.pd.isBr && !r_ftb_entry.brIsSaved(r_ftqOffset) &&
880      !r_ftb_entry.newBrCanNotInsert(r_ftqOffset))
881
882    backendRedirectCfi.addIntoHist := backendRedirectCfi.pd.isBr && (r_ftb_entry.brIsSaved(r_ftqOffset) ||
883        !r_ftb_entry.newBrCanNotInsert(r_ftqOffset))
884  }.otherwise {
885    backendRedirectCfi.shift := (backendRedirectCfi.pd.isBr && backendRedirectCfi.taken).asUInt
886    backendRedirectCfi.addIntoHist := backendRedirectCfi.pd.isBr.asUInt
887  }
888
889
890  // ***************************************************************************
891  // **************************** redirect from ifu ****************************
892  // ***************************************************************************
893  val fromIfuRedirect = WireInit(0.U.asTypeOf(Valid(new Redirect)))
894  fromIfuRedirect.valid := pdWb.valid && pdWb.bits.misOffset.valid && !backendFlush
895  fromIfuRedirect.bits.ftqIdx := pdWb.bits.ftqIdx
896  fromIfuRedirect.bits.ftqOffset := pdWb.bits.misOffset.bits
897  fromIfuRedirect.bits.level := RedirectLevel.flushAfter
898
899  val ifuRedirectCfiUpdate = fromIfuRedirect.bits.cfiUpdate
900  ifuRedirectCfiUpdate.pc := pdWb.bits.pc(pdWb.bits.misOffset.bits)
901  ifuRedirectCfiUpdate.pd := pdWb.bits.pd(pdWb.bits.misOffset.bits)
902  ifuRedirectCfiUpdate.predTaken := cfiIndex_vec(pdWb.bits.ftqIdx.value).valid
903  ifuRedirectCfiUpdate.target := pdWb.bits.target
904  ifuRedirectCfiUpdate.taken := pdWb.bits.cfiOffset.valid
905  ifuRedirectCfiUpdate.isMisPred := pdWb.bits.misOffset.valid
906
907  val ifuRedirectReg = RegNext(fromIfuRedirect, init=0.U.asTypeOf(Valid(new Redirect)))
908  val ifuRedirectToBpu = WireInit(ifuRedirectReg)
909  ifuFlush := fromIfuRedirect.valid || ifuRedirectToBpu.valid
910
911  ftq_redirect_sram.io.ren.head := fromIfuRedirect.valid
912  ftq_redirect_sram.io.raddr.head := fromIfuRedirect.bits.ftqIdx.value
913
914  ftb_entry_mem.io.raddr.head := fromIfuRedirect.bits.ftqIdx.value
915
916  val toBpuCfi = ifuRedirectToBpu.bits.cfiUpdate
917  toBpuCfi.fromFtqRedirectSram(ftq_redirect_sram.io.rdata.head)
918  when (ifuRedirectReg.bits.cfiUpdate.pd.isRet) {
919    toBpuCfi.target := toBpuCfi.rasEntry.retAddr
920  }
921
922  // *********************************************************************
923  // **************************** wb from exu ****************************
924  // *********************************************************************
925
926  backendRedirect := io.fromBackend.redirect
927
928  def extractRedirectInfo(wb: Valid[Redirect]) = {
929    val ftqPtr = wb.bits.ftqIdx
930    val ftqOffset = wb.bits.ftqOffset
931    val taken = wb.bits.cfiUpdate.taken
932    val mispred = wb.bits.cfiUpdate.isMisPred
933    (wb.valid, ftqPtr, ftqOffset, taken, mispred)
934  }
935
936  // fix mispredict entry
937  val lastIsMispredict = RegNext(
938    backendRedirect.valid && backendRedirect.bits.level === RedirectLevel.flushAfter, init = false.B
939  )
940
941  def updateCfiInfo(redirect: Valid[Redirect], isBackend: Boolean = true) = {
942    val (r_valid, r_ptr, r_offset, r_taken, r_mispred) = extractRedirectInfo(redirect)
943    val r_idx = r_ptr.value
944    val cfiIndex_bits_wen = r_valid && r_taken && r_offset < cfiIndex_vec(r_idx).bits
945    val cfiIndex_valid_wen = r_valid && r_offset === cfiIndex_vec(r_idx).bits
946    when (cfiIndex_bits_wen || cfiIndex_valid_wen) {
947      cfiIndex_vec(r_idx).valid := cfiIndex_bits_wen || cfiIndex_valid_wen && r_taken
948    }
949    when (cfiIndex_bits_wen) {
950      cfiIndex_vec(r_idx).bits := r_offset
951    }
952    newest_entry_target := redirect.bits.cfiUpdate.target
953    newest_entry_ptr := r_ptr
954    update_target(r_idx) := redirect.bits.cfiUpdate.target // TODO: remove this
955    if (isBackend) {
956      mispredict_vec(r_idx)(r_offset) := r_mispred
957    }
958  }
959
960  when(backendRedirectReg.valid) {
961    updateCfiInfo(backendRedirectReg)
962  }.elsewhen (ifuRedirectToBpu.valid) {
963    updateCfiInfo(ifuRedirectToBpu, isBackend=false)
964  }
965
966  // ***********************************************************************************
967  // **************************** flush ptr and state queue ****************************
968  // ***********************************************************************************
969
970  val redirectVec = VecInit(backendRedirect, fromIfuRedirect)
971
972  // when redirect, we should reset ptrs and status queues
973  when(redirectVec.map(r => r.valid).reduce(_||_)){
974    val r = PriorityMux(redirectVec.map(r => (r.valid -> r.bits)))
975    val notIfu = redirectVec.dropRight(1).map(r => r.valid).reduce(_||_)
976    val (idx, offset, flushItSelf) = (r.ftqIdx, r.ftqOffset, RedirectLevel.flushItself(r.level))
977    val next = idx + 1.U
978    bpuPtr := next
979    copied_bpu_ptr.map(_ := next)
980    ifuPtr_write := next
981    ifuWbPtr_write := next
982    ifuPtrPlus1_write := idx + 2.U
983    ifuPtrPlus2_write := idx + 3.U
984    when (notIfu) {
985      commitStateQueue(idx.value).zipWithIndex.foreach({ case (s, i) =>
986        when(i.U > offset || i.U === offset && flushItSelf){
987          s := c_invalid
988        }
989      })
990    }
991  }
992
993  // only the valid bit is actually needed
994  io.toIfu.redirect.bits    := backendRedirect.bits
995  io.toIfu.redirect.valid   := stage2Flush
996
997  // commit
998  for (c <- io.fromBackend.rob_commits) {
999    when(c.valid) {
1000      commitStateQueue(c.bits.ftqIdx.value)(c.bits.ftqOffset) := c_commited
1001      // TODO: remove this
1002      // For instruction fusions, we also update the next instruction
1003      when (c.bits.commitType === 4.U) {
1004        commitStateQueue(c.bits.ftqIdx.value)(c.bits.ftqOffset + 1.U) := c_commited
1005      }.elsewhen(c.bits.commitType === 5.U) {
1006        commitStateQueue(c.bits.ftqIdx.value)(c.bits.ftqOffset + 2.U) := c_commited
1007      }.elsewhen(c.bits.commitType === 6.U) {
1008        val index = (c.bits.ftqIdx + 1.U).value
1009        commitStateQueue(index)(0) := c_commited
1010      }.elsewhen(c.bits.commitType === 7.U) {
1011        val index = (c.bits.ftqIdx + 1.U).value
1012        commitStateQueue(index)(1) := c_commited
1013      }
1014    }
1015  }
1016
1017  // ****************************************************************
1018  // **************************** to bpu ****************************
1019  // ****************************************************************
1020
1021  io.toBpu.redirect <> Mux(fromBackendRedirect.valid, fromBackendRedirect, ifuRedirectToBpu)
1022
1023  val may_have_stall_from_bpu = Wire(Bool())
1024  val bpu_ftb_update_stall = RegInit(0.U(2.W)) // 2-cycle stall, so we need 3 states
1025  may_have_stall_from_bpu := bpu_ftb_update_stall =/= 0.U
1026  val canCommit = commPtr =/= ifuWbPtr && !may_have_stall_from_bpu &&
1027    Cat(commitStateQueue(commPtr.value).map(s => {
1028      s === c_invalid || s === c_commited
1029    })).andR()
1030
1031  val mmioReadPtr = io.mmioCommitRead.mmioFtqPtr
1032  val mmioLastCommit = isBefore(commPtr, mmioReadPtr) && (isAfter(ifuPtr,mmioReadPtr)  ||  mmioReadPtr ===   ifuPtr) &&
1033                       Cat(commitStateQueue(mmioReadPtr.value).map(s => { s === c_invalid || s === c_commited})).andR()
1034  io.mmioCommitRead.mmioLastCommit := RegNext(mmioLastCommit)
1035
1036  // commit reads
1037  val commit_pc_bundle = RegNext(ftq_pc_mem.io.commPtr_rdata)
1038  val commit_target =
1039    Mux(RegNext(commPtr === newest_entry_ptr),
1040      RegNext(newest_entry_target),
1041      RegNext(ftq_pc_mem.io.commPtrPlus1_rdata.startAddr))
1042  ftq_pd_mem.io.raddr.last := commPtr.value
1043  val commit_pd = ftq_pd_mem.io.rdata.last
1044  ftq_redirect_sram.io.ren.last := canCommit
1045  ftq_redirect_sram.io.raddr.last := commPtr.value
1046  val commit_spec_meta = ftq_redirect_sram.io.rdata.last
1047  ftq_meta_1r_sram.io.ren(0) := canCommit
1048  ftq_meta_1r_sram.io.raddr(0) := commPtr.value
1049  val commit_meta = ftq_meta_1r_sram.io.rdata(0)
1050  ftb_entry_mem.io.raddr.last := commPtr.value
1051  val commit_ftb_entry = ftb_entry_mem.io.rdata.last
1052
1053  // need one cycle to read mem and srams
1054  val do_commit_ptr = RegNext(commPtr)
1055  val do_commit = RegNext(canCommit, init=false.B)
1056  when (canCommit) {
1057    commPtr_write := commPtrPlus1
1058    commPtrPlus1_write := commPtrPlus1 + 1.U
1059  }
1060  val commit_state = RegNext(commitStateQueue(commPtr.value))
1061  val can_commit_cfi = WireInit(cfiIndex_vec(commPtr.value))
1062  when (commitStateQueue(commPtr.value)(can_commit_cfi.bits) =/= c_commited) {
1063    can_commit_cfi.valid := false.B
1064  }
1065  val commit_cfi = RegNext(can_commit_cfi)
1066
1067  val commit_mispredict = VecInit((RegNext(mispredict_vec(commPtr.value)) zip commit_state).map {
1068    case (mis, state) => mis && state === c_commited
1069  })
1070  val can_commit_hit = entry_hit_status(commPtr.value)
1071  val commit_hit = RegNext(can_commit_hit)
1072  val diff_commit_target = RegNext(update_target(commPtr.value)) // TODO: remove this
1073  val commit_stage = RegNext(pred_stage(commPtr.value))
1074  val commit_valid = commit_hit === h_hit || commit_cfi.valid // hit or taken
1075
1076  val to_bpu_hit = can_commit_hit === h_hit || can_commit_hit === h_false_hit
1077  switch (bpu_ftb_update_stall) {
1078    is (0.U) {
1079      when (can_commit_cfi.valid && !to_bpu_hit && canCommit) {
1080        bpu_ftb_update_stall := 2.U // 2-cycle stall
1081      }
1082    }
1083    is (2.U) {
1084      bpu_ftb_update_stall := 1.U
1085    }
1086    is (1.U) {
1087      bpu_ftb_update_stall := 0.U
1088    }
1089    is (3.U) {
1090      XSError(true.B, "bpu_ftb_update_stall should be 0, 1 or 2")
1091    }
1092  }
1093
1094  // TODO: remove this
1095  XSError(do_commit && diff_commit_target =/= commit_target, "\ncommit target should be the same as update target\n")
1096
1097  io.toBpu.update := DontCare
1098  io.toBpu.update.valid := commit_valid && do_commit
1099  val update = io.toBpu.update.bits
1100  update.false_hit   := commit_hit === h_false_hit
1101  update.pc          := commit_pc_bundle.startAddr
1102  update.meta        := commit_meta.meta
1103  update.cfi_idx     := commit_cfi
1104  update.full_target := commit_target
1105  update.from_stage  := commit_stage
1106  update.spec_info   := commit_spec_meta
1107
1108  val commit_real_hit = commit_hit === h_hit
1109  val update_ftb_entry = update.ftb_entry
1110
1111  val ftbEntryGen = Module(new FTBEntryGen).io
1112  ftbEntryGen.start_addr     := commit_pc_bundle.startAddr
1113  ftbEntryGen.old_entry      := commit_ftb_entry
1114  ftbEntryGen.pd             := commit_pd
1115  ftbEntryGen.cfiIndex       := commit_cfi
1116  ftbEntryGen.target         := commit_target
1117  ftbEntryGen.hit            := commit_real_hit
1118  ftbEntryGen.mispredict_vec := commit_mispredict
1119
1120  update_ftb_entry         := ftbEntryGen.new_entry
1121  update.new_br_insert_pos := ftbEntryGen.new_br_insert_pos
1122  update.mispred_mask      := ftbEntryGen.mispred_mask
1123  update.old_entry         := ftbEntryGen.is_old_entry
1124  update.pred_hit          := commit_hit === h_hit || commit_hit === h_false_hit
1125  update.br_taken_mask     := ftbEntryGen.taken_mask
1126  update.jmp_taken         := ftbEntryGen.jmp_taken
1127
1128  // update.full_pred.fromFtbEntry(ftbEntryGen.new_entry, update.pc)
1129  // update.full_pred.jalr_target := commit_target
1130  // update.full_pred.hit := true.B
1131  // when (update.full_pred.is_jalr) {
1132  //   update.full_pred.targets.last := commit_target
1133  // }
1134
1135  // ****************************************************************
1136  // *********************** to prefetch ****************************
1137  // ****************************************************************
1138
1139  ftq_pc_mem.io.other_raddrs(0) := DontCare
1140  if(cacheParams.hasPrefetch){
1141    val prefetchPtr = RegInit(FtqPtr(false.B, 0.U))
1142    val diff_prefetch_addr = WireInit(update_target(prefetchPtr.value)) //TODO: remove this
1143
1144    prefetchPtr := prefetchPtr + io.toPrefetch.req.fire()
1145
1146    ftq_pc_mem.io.other_raddrs(0) := prefetchPtr.value
1147
1148    when (bpu_s2_resp.valid && bpu_s2_resp.hasRedirect && !isBefore(prefetchPtr, bpu_s2_resp.ftq_idx)) {
1149      prefetchPtr := bpu_s2_resp.ftq_idx
1150    }
1151
1152    when (bpu_s3_resp.valid && bpu_s3_resp.hasRedirect && !isBefore(prefetchPtr, bpu_s3_resp.ftq_idx)) {
1153      prefetchPtr := bpu_s3_resp.ftq_idx
1154      // XSError(true.B, "\ns3_redirect mechanism not implemented!\n")
1155    }
1156
1157
1158    val prefetch_is_to_send = WireInit(entry_fetch_status(prefetchPtr.value) === f_to_send)
1159    val prefetch_addr = Wire(UInt(VAddrBits.W))
1160
1161    when (last_cycle_bpu_in && bpu_in_bypass_ptr === prefetchPtr) {
1162      prefetch_is_to_send := true.B
1163      prefetch_addr := last_cycle_bpu_target
1164      diff_prefetch_addr := last_cycle_bpu_target // TODO: remove this
1165    }.otherwise{
1166      prefetch_addr := RegNext( ftq_pc_mem.io.other_rdatas(0).startAddr)
1167    }
1168    io.toPrefetch.req.valid := prefetchPtr =/= bpuPtr && prefetch_is_to_send
1169    io.toPrefetch.req.bits.target := prefetch_addr
1170
1171    when(redirectVec.map(r => r.valid).reduce(_||_)){
1172      val r = PriorityMux(redirectVec.map(r => (r.valid -> r.bits)))
1173      val next = r.ftqIdx + 1.U
1174      prefetchPtr := next
1175    }
1176
1177    // TODO: remove this
1178    // XSError(io.toPrefetch.req.valid && diff_prefetch_addr =/= prefetch_addr,
1179    //         f"\nprefetch_req_target wrong! prefetchPtr: ${prefetchPtr}, prefetch_addr: ${Hexadecimal(prefetch_addr)} diff_prefetch_addr: ${Hexadecimal(diff_prefetch_addr)}\n")
1180
1181
1182    XSError(isBefore(bpuPtr, prefetchPtr) && !isFull(bpuPtr, prefetchPtr), "\nprefetchPtr is before bpuPtr!\n")
1183    XSError(isBefore(prefetchPtr, ifuPtr) && !isFull(ifuPtr, prefetchPtr), "\nifuPtr is before prefetchPtr!\n")
1184  }
1185  else {
1186    io.toPrefetch.req <> DontCare
1187  }
1188
1189  // ******************************************************************************
1190  // **************************** commit perf counters ****************************
1191  // ******************************************************************************
1192
1193  val commit_inst_mask    = VecInit(commit_state.map(c => c === c_commited && do_commit)).asUInt
1194  val commit_mispred_mask = commit_mispredict.asUInt
1195  val commit_not_mispred_mask = ~commit_mispred_mask
1196
1197  val commit_br_mask = commit_pd.brMask.asUInt
1198  val commit_jmp_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.jmpInfo.valid.asTypeOf(UInt(1.W)))
1199  val commit_cfi_mask = (commit_br_mask | commit_jmp_mask)
1200
1201  val mbpInstrs = commit_inst_mask & commit_cfi_mask
1202
1203  val mbpRights = mbpInstrs & commit_not_mispred_mask
1204  val mbpWrongs = mbpInstrs & commit_mispred_mask
1205
1206  io.bpuInfo.bpRight := PopCount(mbpRights)
1207  io.bpuInfo.bpWrong := PopCount(mbpWrongs)
1208
1209  val ftqBranchTraceDB = ChiselDB.createTable("FTQTable" + p(XSCoreParamsKey).HartId.toString, new FtqDebugBundle)
1210  // Cfi Info
1211  for (i <- 0 until PredictWidth) {
1212    val pc = commit_pc_bundle.startAddr + (i * instBytes).U
1213    val v = commit_state(i) === c_commited
1214    val isBr = commit_pd.brMask(i)
1215    val isJmp = commit_pd.jmpInfo.valid && commit_pd.jmpOffset === i.U
1216    val isCfi = isBr || isJmp
1217    val isTaken = commit_cfi.valid && commit_cfi.bits === i.U
1218    val misPred = commit_mispredict(i)
1219    // val ghist = commit_spec_meta.ghist.predHist
1220    val histPtr = commit_spec_meta.histPtr
1221    val predCycle = commit_meta.meta(63, 0)
1222    val target = commit_target
1223
1224    val brIdx = OHToUInt(Reverse(Cat(update_ftb_entry.brValids.zip(update_ftb_entry.brOffset).map{case(v, offset) => v && offset === i.U})))
1225    val inFtbEntry = update_ftb_entry.brValids.zip(update_ftb_entry.brOffset).map{case(v, offset) => v && offset === i.U}.reduce(_||_)
1226    val addIntoHist = ((commit_hit === h_hit) && inFtbEntry) || ((!(commit_hit === h_hit) && i.U === commit_cfi.bits && isBr && commit_cfi.valid))
1227    XSDebug(v && do_commit && isCfi, p"cfi_update: isBr(${isBr}) pc(${Hexadecimal(pc)}) " +
1228    p"taken(${isTaken}) mispred(${misPred}) cycle($predCycle) hist(${histPtr.value}) " +
1229    p"startAddr(${Hexadecimal(commit_pc_bundle.startAddr)}) AddIntoHist(${addIntoHist}) " +
1230    p"brInEntry(${inFtbEntry}) brIdx(${brIdx}) target(${Hexadecimal(target)})\n")
1231
1232    val logbundle = Wire(new FtqDebugBundle)
1233    logbundle.pc := pc
1234    logbundle.target := target
1235    logbundle.isBr := isBr
1236    logbundle.isJmp := isJmp
1237    logbundle.isCall := isJmp && commit_pd.hasCall
1238    logbundle.isRet := isJmp && commit_pd.hasRet
1239    logbundle.misPred := misPred
1240    logbundle.isTaken := isTaken
1241    logbundle.predStage := commit_stage
1242
1243    ftqBranchTraceDB.log(
1244      data = logbundle /* hardware of type T */,
1245      en = v && do_commit && isCfi,
1246      site = "FTQ" + p(XSCoreParamsKey).HartId.toString,
1247      clock = clock,
1248      reset = reset
1249    )
1250  }
1251
1252  val enq = io.fromBpu.resp
1253  val perf_redirect = backendRedirect
1254
1255  XSPerfAccumulate("entry", validEntries)
1256  XSPerfAccumulate("bpu_to_ftq_stall", enq.valid && !enq.ready)
1257  XSPerfAccumulate("mispredictRedirect", perf_redirect.valid && RedirectLevel.flushAfter === perf_redirect.bits.level)
1258  XSPerfAccumulate("replayRedirect", perf_redirect.valid && RedirectLevel.flushItself(perf_redirect.bits.level))
1259  XSPerfAccumulate("predecodeRedirect", fromIfuRedirect.valid)
1260
1261  XSPerfAccumulate("to_ifu_bubble", io.toIfu.req.ready && !io.toIfu.req.valid)
1262
1263  XSPerfAccumulate("to_ifu_stall", io.toIfu.req.valid && !io.toIfu.req.ready)
1264  XSPerfAccumulate("from_bpu_real_bubble", !enq.valid && enq.ready && allowBpuIn)
1265  XSPerfAccumulate("bpu_to_ifu_bubble", bpuPtr === ifuPtr)
1266
1267  val from_bpu = io.fromBpu.resp.bits
1268  def in_entry_len_map_gen(resp: BpuToFtqBundle)(stage: String) = {
1269    val entry_len = (resp.last_stage_ftb_entry.getFallThrough(resp.s3.pc) - resp.s3.pc) >> instOffsetBits
1270    val entry_len_recording_vec = (1 to PredictWidth+1).map(i => entry_len === i.U)
1271    val entry_len_map = (1 to PredictWidth+1).map(i =>
1272      f"${stage}_ftb_entry_len_$i" -> (entry_len_recording_vec(i-1) && resp.s3.valid)
1273    ).foldLeft(Map[String, UInt]())(_+_)
1274    entry_len_map
1275  }
1276  val s3_entry_len_map = in_entry_len_map_gen(from_bpu)("s3")
1277
1278  val to_ifu = io.toIfu.req.bits
1279
1280
1281
1282  val commit_num_inst_recording_vec = (1 to PredictWidth).map(i => PopCount(commit_inst_mask) === i.U)
1283  val commit_num_inst_map = (1 to PredictWidth).map(i =>
1284    f"commit_num_inst_$i" -> (commit_num_inst_recording_vec(i-1) && do_commit)
1285  ).foldLeft(Map[String, UInt]())(_+_)
1286
1287
1288
1289  val commit_jal_mask  = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasJal.asTypeOf(UInt(1.W)))
1290  val commit_jalr_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasJalr.asTypeOf(UInt(1.W)))
1291  val commit_call_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasCall.asTypeOf(UInt(1.W)))
1292  val commit_ret_mask  = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasRet.asTypeOf(UInt(1.W)))
1293
1294
1295  val mbpBRights = mbpRights & commit_br_mask
1296  val mbpJRights = mbpRights & commit_jal_mask
1297  val mbpIRights = mbpRights & commit_jalr_mask
1298  val mbpCRights = mbpRights & commit_call_mask
1299  val mbpRRights = mbpRights & commit_ret_mask
1300
1301  val mbpBWrongs = mbpWrongs & commit_br_mask
1302  val mbpJWrongs = mbpWrongs & commit_jal_mask
1303  val mbpIWrongs = mbpWrongs & commit_jalr_mask
1304  val mbpCWrongs = mbpWrongs & commit_call_mask
1305  val mbpRWrongs = mbpWrongs & commit_ret_mask
1306
1307  val commit_pred_stage = RegNext(pred_stage(commPtr.value))
1308
1309  def pred_stage_map(src: UInt, name: String) = {
1310    (0 until numBpStages).map(i =>
1311      f"${name}_stage_${i+1}" -> PopCount(src.asBools.map(_ && commit_pred_stage === BP_STAGES(i)))
1312    ).foldLeft(Map[String, UInt]())(_+_)
1313  }
1314
1315  val mispred_stage_map      = pred_stage_map(mbpWrongs,  "mispredict")
1316  val br_mispred_stage_map   = pred_stage_map(mbpBWrongs, "br_mispredict")
1317  val jalr_mispred_stage_map = pred_stage_map(mbpIWrongs, "jalr_mispredict")
1318  val correct_stage_map      = pred_stage_map(mbpRights,  "correct")
1319  val br_correct_stage_map   = pred_stage_map(mbpBRights, "br_correct")
1320  val jalr_correct_stage_map = pred_stage_map(mbpIRights, "jalr_correct")
1321
1322  val update_valid = io.toBpu.update.valid
1323  def u(cond: Bool) = update_valid && cond
1324  val ftb_false_hit = u(update.false_hit)
1325  // assert(!ftb_false_hit)
1326  val ftb_hit = u(commit_hit === h_hit)
1327
1328  val ftb_new_entry = u(ftbEntryGen.is_init_entry)
1329  val ftb_new_entry_only_br = ftb_new_entry && !update_ftb_entry.jmpValid
1330  val ftb_new_entry_only_jmp = ftb_new_entry && !update_ftb_entry.brValids(0)
1331  val ftb_new_entry_has_br_and_jmp = ftb_new_entry && update_ftb_entry.brValids(0) && update_ftb_entry.jmpValid
1332
1333  val ftb_old_entry = u(ftbEntryGen.is_old_entry)
1334
1335  val ftb_modified_entry = u(ftbEntryGen.is_new_br || ftbEntryGen.is_jalr_target_modified || ftbEntryGen.is_always_taken_modified)
1336  val ftb_modified_entry_new_br = u(ftbEntryGen.is_new_br)
1337  val ftb_modified_entry_jalr_target_modified = u(ftbEntryGen.is_jalr_target_modified)
1338  val ftb_modified_entry_br_full = ftb_modified_entry && ftbEntryGen.is_br_full
1339  val ftb_modified_entry_always_taken = ftb_modified_entry && ftbEntryGen.is_always_taken_modified
1340
1341  val ftb_entry_len = (ftbEntryGen.new_entry.getFallThrough(update.pc) - update.pc) >> instOffsetBits
1342  val ftb_entry_len_recording_vec = (1 to PredictWidth+1).map(i => ftb_entry_len === i.U)
1343  val ftb_init_entry_len_map = (1 to PredictWidth+1).map(i =>
1344    f"ftb_init_entry_len_$i" -> (ftb_entry_len_recording_vec(i-1) && ftb_new_entry)
1345  ).foldLeft(Map[String, UInt]())(_+_)
1346  val ftb_modified_entry_len_map = (1 to PredictWidth+1).map(i =>
1347    f"ftb_modified_entry_len_$i" -> (ftb_entry_len_recording_vec(i-1) && ftb_modified_entry)
1348  ).foldLeft(Map[String, UInt]())(_+_)
1349
1350  val ftq_occupancy_map = (0 to FtqSize).map(i =>
1351    f"ftq_has_entry_$i" ->( validEntries === i.U)
1352  ).foldLeft(Map[String, UInt]())(_+_)
1353
1354  val perfCountsMap = Map(
1355    "BpInstr" -> PopCount(mbpInstrs),
1356    "BpBInstr" -> PopCount(mbpBRights | mbpBWrongs),
1357    "BpRight"  -> PopCount(mbpRights),
1358    "BpWrong"  -> PopCount(mbpWrongs),
1359    "BpBRight" -> PopCount(mbpBRights),
1360    "BpBWrong" -> PopCount(mbpBWrongs),
1361    "BpJRight" -> PopCount(mbpJRights),
1362    "BpJWrong" -> PopCount(mbpJWrongs),
1363    "BpIRight" -> PopCount(mbpIRights),
1364    "BpIWrong" -> PopCount(mbpIWrongs),
1365    "BpCRight" -> PopCount(mbpCRights),
1366    "BpCWrong" -> PopCount(mbpCWrongs),
1367    "BpRRight" -> PopCount(mbpRRights),
1368    "BpRWrong" -> PopCount(mbpRWrongs),
1369
1370    "ftb_false_hit"                -> PopCount(ftb_false_hit),
1371    "ftb_hit"                      -> PopCount(ftb_hit),
1372    "ftb_new_entry"                -> PopCount(ftb_new_entry),
1373    "ftb_new_entry_only_br"        -> PopCount(ftb_new_entry_only_br),
1374    "ftb_new_entry_only_jmp"       -> PopCount(ftb_new_entry_only_jmp),
1375    "ftb_new_entry_has_br_and_jmp" -> PopCount(ftb_new_entry_has_br_and_jmp),
1376    "ftb_old_entry"                -> PopCount(ftb_old_entry),
1377    "ftb_modified_entry"           -> PopCount(ftb_modified_entry),
1378    "ftb_modified_entry_new_br"    -> PopCount(ftb_modified_entry_new_br),
1379    "ftb_jalr_target_modified"     -> PopCount(ftb_modified_entry_jalr_target_modified),
1380    "ftb_modified_entry_br_full"   -> PopCount(ftb_modified_entry_br_full),
1381    "ftb_modified_entry_always_taken" -> PopCount(ftb_modified_entry_always_taken)
1382  ) ++ ftb_init_entry_len_map ++ ftb_modified_entry_len_map ++
1383  s3_entry_len_map ++ commit_num_inst_map ++ ftq_occupancy_map ++
1384  mispred_stage_map ++ br_mispred_stage_map ++ jalr_mispred_stage_map ++
1385  correct_stage_map ++ br_correct_stage_map ++ jalr_correct_stage_map
1386
1387  for((key, value) <- perfCountsMap) {
1388    XSPerfAccumulate(key, value)
1389  }
1390
1391  // --------------------------- Debug --------------------------------
1392  // XSDebug(enq_fire, p"enq! " + io.fromBpu.resp.bits.toPrintable)
1393  XSDebug(io.toIfu.req.fire, p"fire to ifu " + io.toIfu.req.bits.toPrintable)
1394  XSDebug(do_commit, p"deq! [ptr] $do_commit_ptr\n")
1395  XSDebug(true.B, p"[bpuPtr] $bpuPtr, [ifuPtr] $ifuPtr, [ifuWbPtr] $ifuWbPtr [commPtr] $commPtr\n")
1396  XSDebug(true.B, p"[in] v:${io.fromBpu.resp.valid} r:${io.fromBpu.resp.ready} " +
1397    p"[out] v:${io.toIfu.req.valid} r:${io.toIfu.req.ready}\n")
1398  XSDebug(do_commit, p"[deq info] cfiIndex: $commit_cfi, $commit_pc_bundle, target: ${Hexadecimal(commit_target)}\n")
1399
1400  //   def ubtbCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1401  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1402  //       case (((valid, pd), ans), taken) =>
1403  //       Mux(valid && pd.isBr,
1404  //         isWrong ^ Mux(ans.hit.asBool,
1405  //           Mux(ans.taken.asBool, taken && ans.target === commitEntry.target,
1406  //           !taken),
1407  //         !taken),
1408  //       false.B)
1409  //     }
1410  //   }
1411
1412  //   def btbCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1413  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1414  //       case (((valid, pd), ans), taken) =>
1415  //       Mux(valid && pd.isBr,
1416  //         isWrong ^ Mux(ans.hit.asBool,
1417  //           Mux(ans.taken.asBool, taken && ans.target === commitEntry.target,
1418  //           !taken),
1419  //         !taken),
1420  //       false.B)
1421  //     }
1422  //   }
1423
1424  //   def tageCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1425  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1426  //       case (((valid, pd), ans), taken) =>
1427  //       Mux(valid && pd.isBr,
1428  //         isWrong ^ (ans.taken.asBool === taken),
1429  //       false.B)
1430  //     }
1431  //   }
1432
1433  //   def loopCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1434  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1435  //       case (((valid, pd), ans), taken) =>
1436  //       Mux(valid && (pd.isBr) && ans.hit.asBool,
1437  //         isWrong ^ (!taken),
1438  //           false.B)
1439  //     }
1440  //   }
1441
1442  //   def rasCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1443  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1444  //       case (((valid, pd), ans), taken) =>
1445  //       Mux(valid && pd.isRet.asBool /*&& taken*/ && ans.hit.asBool,
1446  //         isWrong ^ (ans.target === commitEntry.target),
1447  //           false.B)
1448  //     }
1449  //   }
1450
1451  //   val ubtbRights = ubtbCheck(commitEntry, commitEntry.metas.map(_.ubtbAns), false.B)
1452  //   val ubtbWrongs = ubtbCheck(commitEntry, commitEntry.metas.map(_.ubtbAns), true.B)
1453  //   // btb and ubtb pred jal and jalr as well
1454  //   val btbRights = btbCheck(commitEntry, commitEntry.metas.map(_.btbAns), false.B)
1455  //   val btbWrongs = btbCheck(commitEntry, commitEntry.metas.map(_.btbAns), true.B)
1456  //   val tageRights = tageCheck(commitEntry, commitEntry.metas.map(_.tageAns), false.B)
1457  //   val tageWrongs = tageCheck(commitEntry, commitEntry.metas.map(_.tageAns), true.B)
1458
1459  //   val loopRights = loopCheck(commitEntry, commitEntry.metas.map(_.loopAns), false.B)
1460  //   val loopWrongs = loopCheck(commitEntry, commitEntry.metas.map(_.loopAns), true.B)
1461
1462  //   val rasRights = rasCheck(commitEntry, commitEntry.metas.map(_.rasAns), false.B)
1463  //   val rasWrongs = rasCheck(commitEntry, commitEntry.metas.map(_.rasAns), true.B)
1464
1465  val perfEvents = Seq(
1466    ("bpu_s2_redirect        ", bpu_s2_redirect                                                             ),
1467    ("bpu_s3_redirect        ", bpu_s3_redirect                                                             ),
1468    ("bpu_to_ftq_stall       ", enq.valid && ~enq.ready                                                     ),
1469    ("mispredictRedirect     ", perf_redirect.valid && RedirectLevel.flushAfter === perf_redirect.bits.level),
1470    ("replayRedirect         ", perf_redirect.valid && RedirectLevel.flushItself(perf_redirect.bits.level)  ),
1471    ("predecodeRedirect      ", fromIfuRedirect.valid                                                       ),
1472    ("to_ifu_bubble          ", io.toIfu.req.ready && !io.toIfu.req.valid                                   ),
1473    ("from_bpu_real_bubble   ", !enq.valid && enq.ready && allowBpuIn                                       ),
1474    ("BpInstr                ", PopCount(mbpInstrs)                                                         ),
1475    ("BpBInstr               ", PopCount(mbpBRights | mbpBWrongs)                                           ),
1476    ("BpRight                ", PopCount(mbpRights)                                                         ),
1477    ("BpWrong                ", PopCount(mbpWrongs)                                                         ),
1478    ("BpBRight               ", PopCount(mbpBRights)                                                        ),
1479    ("BpBWrong               ", PopCount(mbpBWrongs)                                                        ),
1480    ("BpJRight               ", PopCount(mbpJRights)                                                        ),
1481    ("BpJWrong               ", PopCount(mbpJWrongs)                                                        ),
1482    ("BpIRight               ", PopCount(mbpIRights)                                                        ),
1483    ("BpIWrong               ", PopCount(mbpIWrongs)                                                        ),
1484    ("BpCRight               ", PopCount(mbpCRights)                                                        ),
1485    ("BpCWrong               ", PopCount(mbpCWrongs)                                                        ),
1486    ("BpRRight               ", PopCount(mbpRRights)                                                        ),
1487    ("BpRWrong               ", PopCount(mbpRWrongs)                                                        ),
1488    ("ftb_false_hit          ", PopCount(ftb_false_hit)                                                     ),
1489    ("ftb_hit                ", PopCount(ftb_hit)                                                           ),
1490  )
1491  generatePerfEvent()
1492}