xref: /XiangShan/src/main/scala/xiangshan/frontend/NewFtq.scala (revision abdc3a32eaa34df50f2b425ee3b9e027fbe16faa)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.frontend
18
19import org.chipsalliance.cde.config.Parameters
20import chisel3._
21import chisel3.util._
22import utils._
23import utility._
24import xiangshan._
25import xiangshan.frontend.icache._
26import xiangshan.backend.CtrlToFtqIO
27import xiangshan.backend.decode.ImmUnion
28import utility.ChiselDB
29
30class FtqDebugBundle extends Bundle {
31  val pc = UInt(39.W)
32  val target = UInt(39.W)
33  val isBr = Bool()
34  val isJmp = Bool()
35  val isCall = Bool()
36  val isRet = Bool()
37  val misPred = Bool()
38  val isTaken = Bool()
39  val predStage = UInt(2.W)
40}
41
42class FtqPtr(implicit p: Parameters) extends CircularQueuePtr[FtqPtr](
43  p => p(XSCoreParamsKey).FtqSize
44){
45}
46
47object FtqPtr {
48  def apply(f: Bool, v: UInt)(implicit p: Parameters): FtqPtr = {
49    val ptr = Wire(new FtqPtr)
50    ptr.flag := f
51    ptr.value := v
52    ptr
53  }
54  def inverse(ptr: FtqPtr)(implicit p: Parameters): FtqPtr = {
55    apply(!ptr.flag, ptr.value)
56  }
57}
58
59class FtqNRSRAM[T <: Data](gen: T, numRead: Int)(implicit p: Parameters) extends XSModule {
60
61  val io = IO(new Bundle() {
62    val raddr = Input(Vec(numRead, UInt(log2Up(FtqSize).W)))
63    val ren = Input(Vec(numRead, Bool()))
64    val rdata = Output(Vec(numRead, gen))
65    val waddr = Input(UInt(log2Up(FtqSize).W))
66    val wen = Input(Bool())
67    val wdata = Input(gen)
68  })
69
70  for(i <- 0 until numRead){
71    val sram = Module(new SRAMTemplate(gen, FtqSize))
72    sram.io.r.req.valid := io.ren(i)
73    sram.io.r.req.bits.setIdx := io.raddr(i)
74    io.rdata(i) := sram.io.r.resp.data(0)
75    sram.io.w.req.valid := io.wen
76    sram.io.w.req.bits.setIdx := io.waddr
77    sram.io.w.req.bits.data := VecInit(io.wdata)
78  }
79
80}
81
82class Ftq_RF_Components(implicit p: Parameters) extends XSBundle with BPUUtils {
83  val startAddr = UInt(VAddrBits.W)
84  val nextLineAddr = UInt(VAddrBits.W)
85  val isNextMask = Vec(PredictWidth, Bool())
86  val fallThruError = Bool()
87  // val carry = Bool()
88  def getPc(offset: UInt) = {
89    def getHigher(pc: UInt) = pc(VAddrBits-1, log2Ceil(PredictWidth)+instOffsetBits+1)
90    def getOffset(pc: UInt) = pc(log2Ceil(PredictWidth)+instOffsetBits, instOffsetBits)
91    Cat(getHigher(Mux(isNextMask(offset) && startAddr(log2Ceil(PredictWidth)+instOffsetBits), nextLineAddr, startAddr)),
92        getOffset(startAddr)+offset, 0.U(instOffsetBits.W))
93  }
94  def fromBranchPrediction(resp: BranchPredictionBundle) = {
95    def carryPos(addr: UInt) = addr(instOffsetBits+log2Ceil(PredictWidth)+1)
96    this.startAddr := resp.pc(3)
97    this.nextLineAddr := resp.pc(3) + (FetchWidth * 4 * 2).U // may be broken on other configs
98    this.isNextMask := VecInit((0 until PredictWidth).map(i =>
99      (resp.pc(3)(log2Ceil(PredictWidth), 1) +& i.U)(log2Ceil(PredictWidth)).asBool
100    ))
101    this.fallThruError := resp.fallThruError(3)
102    this
103  }
104  override def toPrintable: Printable = {
105    p"startAddr:${Hexadecimal(startAddr)}"
106  }
107}
108
109class Ftq_pd_Entry(implicit p: Parameters) extends XSBundle {
110  val brMask = Vec(PredictWidth, Bool())
111  val jmpInfo = ValidUndirectioned(Vec(3, Bool()))
112  val jmpOffset = UInt(log2Ceil(PredictWidth).W)
113  val jalTarget = UInt(VAddrBits.W)
114  val rvcMask = Vec(PredictWidth, Bool())
115  def hasJal  = jmpInfo.valid && !jmpInfo.bits(0)
116  def hasJalr = jmpInfo.valid && jmpInfo.bits(0)
117  def hasCall = jmpInfo.valid && jmpInfo.bits(1)
118  def hasRet  = jmpInfo.valid && jmpInfo.bits(2)
119
120  def fromPdWb(pdWb: PredecodeWritebackBundle) = {
121    val pds = pdWb.pd
122    this.brMask := VecInit(pds.map(pd => pd.isBr && pd.valid))
123    this.jmpInfo.valid := VecInit(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid)).asUInt.orR
124    this.jmpInfo.bits := ParallelPriorityMux(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid),
125                                             pds.map(pd => VecInit(pd.isJalr, pd.isCall, pd.isRet)))
126    this.jmpOffset := ParallelPriorityEncoder(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid))
127    this.rvcMask := VecInit(pds.map(pd => pd.isRVC))
128    this.jalTarget := pdWb.jalTarget
129  }
130
131  def toPd(offset: UInt) = {
132    require(offset.getWidth == log2Ceil(PredictWidth))
133    val pd = Wire(new PreDecodeInfo)
134    pd.valid := true.B
135    pd.isRVC := rvcMask(offset)
136    val isBr = brMask(offset)
137    val isJalr = offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(0)
138    pd.brType := Cat(offset === jmpOffset && jmpInfo.valid, isJalr || isBr)
139    pd.isCall := offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(1)
140    pd.isRet  := offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(2)
141    pd
142  }
143}
144
145class PrefetchPtrDB(implicit p: Parameters) extends Bundle {
146  val fromFtqPtr  = UInt(log2Up(p(XSCoreParamsKey).FtqSize).W)
147  val fromIfuPtr  = UInt(log2Up(p(XSCoreParamsKey).FtqSize).W)
148}
149
150class Ftq_Redirect_SRAMEntry(implicit p: Parameters) extends SpeculativeInfo {
151  val sc_disagree = if (!env.FPGAPlatform) Some(Vec(numBr, Bool())) else None
152}
153
154class Ftq_1R_SRAMEntry(implicit p: Parameters) extends XSBundle with HasBPUConst {
155  val meta = UInt(MaxMetaLength.W)
156}
157
158class Ftq_Pred_Info(implicit p: Parameters) extends XSBundle {
159  val target = UInt(VAddrBits.W)
160  val cfiIndex = ValidUndirectioned(UInt(log2Ceil(PredictWidth).W))
161}
162
163
164class FtqRead[T <: Data](private val gen: T)(implicit p: Parameters) extends XSBundle {
165  val ptr = Output(new FtqPtr)
166  val offset = Output(UInt(log2Ceil(PredictWidth).W))
167  val data = Input(gen)
168  def apply(ptr: FtqPtr, offset: UInt) = {
169    this.ptr := ptr
170    this.offset := offset
171    this.data
172  }
173}
174
175
176class FtqToBpuIO(implicit p: Parameters) extends XSBundle {
177  val redirect = Valid(new BranchPredictionRedirect)
178  val update = Valid(new BranchPredictionUpdate)
179  val enq_ptr = Output(new FtqPtr)
180}
181
182class FtqToIfuIO(implicit p: Parameters) extends XSBundle with HasCircularQueuePtrHelper {
183  val req = Decoupled(new FetchRequestBundle)
184  val redirect = Valid(new BranchPredictionRedirect)
185  val topdown_redirect = Valid(new BranchPredictionRedirect)
186  val flushFromBpu = new Bundle {
187    // when ifu pipeline is not stalled,
188    // a packet from bpu s3 can reach f1 at most
189    val s2 = Valid(new FtqPtr)
190    val s3 = Valid(new FtqPtr)
191    def shouldFlushBy(src: Valid[FtqPtr], idx_to_flush: FtqPtr) = {
192      src.valid && !isAfter(src.bits, idx_to_flush)
193    }
194    def shouldFlushByStage2(idx: FtqPtr) = shouldFlushBy(s2, idx)
195    def shouldFlushByStage3(idx: FtqPtr) = shouldFlushBy(s3, idx)
196  }
197}
198
199class FtqToICacheIO(implicit p: Parameters) extends XSBundle with HasCircularQueuePtrHelper {
200  //NOTE: req.bits must be prepare in T cycle
201  // while req.valid is set true in T + 1 cycle
202  val req = Decoupled(new FtqToICacheRequestBundle)
203}
204
205trait HasBackendRedirectInfo extends HasXSParameter {
206  def numRedirectPcRead = exuParameters.JmpCnt + exuParameters.AluCnt + 1
207  def isLoadReplay(r: Valid[Redirect]) = r.bits.flushItself()
208}
209
210class FtqToCtrlIO(implicit p: Parameters) extends XSBundle with HasBackendRedirectInfo {
211  // write to backend pc mem
212  val pc_mem_wen = Output(Bool())
213  val pc_mem_waddr = Output(UInt(log2Ceil(FtqSize).W))
214  val pc_mem_wdata = Output(new Ftq_RF_Components)
215  // newest target
216  val newest_entry_target = Output(UInt(VAddrBits.W))
217  val newest_entry_ptr = Output(new FtqPtr)
218}
219
220
221class FTBEntryGen(implicit p: Parameters) extends XSModule with HasBackendRedirectInfo with HasBPUParameter {
222  val io = IO(new Bundle {
223    val start_addr = Input(UInt(VAddrBits.W))
224    val old_entry = Input(new FTBEntry)
225    val pd = Input(new Ftq_pd_Entry)
226    val cfiIndex = Flipped(Valid(UInt(log2Ceil(PredictWidth).W)))
227    val target = Input(UInt(VAddrBits.W))
228    val hit = Input(Bool())
229    val mispredict_vec = Input(Vec(PredictWidth, Bool()))
230
231    val new_entry = Output(new FTBEntry)
232    val new_br_insert_pos = Output(Vec(numBr, Bool()))
233    val taken_mask = Output(Vec(numBr, Bool()))
234    val jmp_taken = Output(Bool())
235    val mispred_mask = Output(Vec(numBr+1, Bool()))
236
237    // for perf counters
238    val is_init_entry = Output(Bool())
239    val is_old_entry = Output(Bool())
240    val is_new_br = Output(Bool())
241    val is_jalr_target_modified = Output(Bool())
242    val is_always_taken_modified = Output(Bool())
243    val is_br_full = Output(Bool())
244  })
245
246  // no mispredictions detected at predecode
247  val hit = io.hit
248  val pd = io.pd
249
250  val init_entry = WireInit(0.U.asTypeOf(new FTBEntry))
251
252
253  val cfi_is_br = pd.brMask(io.cfiIndex.bits) && io.cfiIndex.valid
254  val entry_has_jmp = pd.jmpInfo.valid
255  val new_jmp_is_jal  = entry_has_jmp && !pd.jmpInfo.bits(0) && io.cfiIndex.valid
256  val new_jmp_is_jalr = entry_has_jmp &&  pd.jmpInfo.bits(0) && io.cfiIndex.valid
257  val new_jmp_is_call = entry_has_jmp &&  pd.jmpInfo.bits(1) && io.cfiIndex.valid
258  val new_jmp_is_ret  = entry_has_jmp &&  pd.jmpInfo.bits(2) && io.cfiIndex.valid
259  val last_jmp_rvi = entry_has_jmp && pd.jmpOffset === (PredictWidth-1).U && !pd.rvcMask.last
260  // val last_br_rvi = cfi_is_br && io.cfiIndex.bits === (PredictWidth-1).U && !pd.rvcMask.last
261
262  val cfi_is_jal = io.cfiIndex.bits === pd.jmpOffset && new_jmp_is_jal
263  val cfi_is_jalr = io.cfiIndex.bits === pd.jmpOffset && new_jmp_is_jalr
264
265  def carryPos = log2Ceil(PredictWidth)+instOffsetBits
266  def getLower(pc: UInt) = pc(carryPos-1, instOffsetBits)
267  // if not hit, establish a new entry
268  init_entry.valid := true.B
269  // tag is left for ftb to assign
270
271  // case br
272  val init_br_slot = init_entry.getSlotForBr(0)
273  when (cfi_is_br) {
274    init_br_slot.valid := true.B
275    init_br_slot.offset := io.cfiIndex.bits
276    init_br_slot.setLowerStatByTarget(io.start_addr, io.target, numBr == 1)
277    init_entry.always_taken(0) := true.B // set to always taken on init
278  }
279
280  // case jmp
281  when (entry_has_jmp) {
282    init_entry.tailSlot.offset := pd.jmpOffset
283    init_entry.tailSlot.valid := new_jmp_is_jal || new_jmp_is_jalr
284    init_entry.tailSlot.setLowerStatByTarget(io.start_addr, Mux(cfi_is_jalr, io.target, pd.jalTarget), isShare=false)
285  }
286
287  val jmpPft = getLower(io.start_addr) +& pd.jmpOffset +& Mux(pd.rvcMask(pd.jmpOffset), 1.U, 2.U)
288  init_entry.pftAddr := Mux(entry_has_jmp && !last_jmp_rvi, jmpPft, getLower(io.start_addr))
289  init_entry.carry   := Mux(entry_has_jmp && !last_jmp_rvi, jmpPft(carryPos-instOffsetBits), true.B)
290  init_entry.isJalr := new_jmp_is_jalr
291  init_entry.isCall := new_jmp_is_call
292  init_entry.isRet  := new_jmp_is_ret
293  // that means fall thru points to the middle of an inst
294  init_entry.last_may_be_rvi_call := pd.jmpOffset === (PredictWidth-1).U && !pd.rvcMask(pd.jmpOffset)
295
296  // if hit, check whether a new cfi(only br is possible) is detected
297  val oe = io.old_entry
298  val br_recorded_vec = oe.getBrRecordedVec(io.cfiIndex.bits)
299  val br_recorded = br_recorded_vec.asUInt.orR
300  val is_new_br = cfi_is_br && !br_recorded
301  val new_br_offset = io.cfiIndex.bits
302  // vec(i) means new br will be inserted BEFORE old br(i)
303  val allBrSlotsVec = oe.allSlotsForBr
304  val new_br_insert_onehot = VecInit((0 until numBr).map{
305    i => i match {
306      case 0 =>
307        !allBrSlotsVec(0).valid || new_br_offset < allBrSlotsVec(0).offset
308      case idx =>
309        allBrSlotsVec(idx-1).valid && new_br_offset > allBrSlotsVec(idx-1).offset &&
310        (!allBrSlotsVec(idx).valid || new_br_offset < allBrSlotsVec(idx).offset)
311    }
312  })
313
314  val old_entry_modified = WireInit(io.old_entry)
315  for (i <- 0 until numBr) {
316    val slot = old_entry_modified.allSlotsForBr(i)
317    when (new_br_insert_onehot(i)) {
318      slot.valid := true.B
319      slot.offset := new_br_offset
320      slot.setLowerStatByTarget(io.start_addr, io.target, i == numBr-1)
321      old_entry_modified.always_taken(i) := true.B
322    }.elsewhen (new_br_offset > oe.allSlotsForBr(i).offset) {
323      old_entry_modified.always_taken(i) := false.B
324      // all other fields remain unchanged
325    }.otherwise {
326      // case i == 0, remain unchanged
327      if (i != 0) {
328        val noNeedToMoveFromFormerSlot = (i == numBr-1).B && !oe.brSlots.last.valid
329        when (!noNeedToMoveFromFormerSlot) {
330          slot.fromAnotherSlot(oe.allSlotsForBr(i-1))
331          old_entry_modified.always_taken(i) := oe.always_taken(i)
332        }
333      }
334    }
335  }
336
337  // two circumstances:
338  // 1. oe: | br | j  |, new br should be in front of j, thus addr of j should be new pft
339  // 2. oe: | br | br |, new br could be anywhere between, thus new pft is the addr of either
340  //        the previous last br or the new br
341  val may_have_to_replace = oe.noEmptySlotForNewBr
342  val pft_need_to_change = is_new_br && may_have_to_replace
343  // it should either be the given last br or the new br
344  when (pft_need_to_change) {
345    val new_pft_offset =
346      Mux(!new_br_insert_onehot.asUInt.orR,
347        new_br_offset, oe.allSlotsForBr.last.offset)
348
349    // set jmp to invalid
350    old_entry_modified.pftAddr := getLower(io.start_addr) + new_pft_offset
351    old_entry_modified.carry := (getLower(io.start_addr) +& new_pft_offset).head(1).asBool
352    old_entry_modified.last_may_be_rvi_call := false.B
353    old_entry_modified.isCall := false.B
354    old_entry_modified.isRet := false.B
355    old_entry_modified.isJalr := false.B
356  }
357
358  val old_entry_jmp_target_modified = WireInit(oe)
359  val old_target = oe.tailSlot.getTarget(io.start_addr) // may be wrong because we store only 20 lowest bits
360  val old_tail_is_jmp = !oe.tailSlot.sharing
361  val jalr_target_modified = cfi_is_jalr && (old_target =/= io.target) && old_tail_is_jmp // TODO: pass full jalr target
362  when (jalr_target_modified) {
363    old_entry_jmp_target_modified.setByJmpTarget(io.start_addr, io.target)
364    old_entry_jmp_target_modified.always_taken := 0.U.asTypeOf(Vec(numBr, Bool()))
365  }
366
367  val old_entry_always_taken = WireInit(oe)
368  val always_taken_modified_vec = Wire(Vec(numBr, Bool())) // whether modified or not
369  for (i <- 0 until numBr) {
370    old_entry_always_taken.always_taken(i) :=
371      oe.always_taken(i) && io.cfiIndex.valid && oe.brValids(i) && io.cfiIndex.bits === oe.brOffset(i)
372    always_taken_modified_vec(i) := oe.always_taken(i) && !old_entry_always_taken.always_taken(i)
373  }
374  val always_taken_modified = always_taken_modified_vec.reduce(_||_)
375
376
377
378  val derived_from_old_entry =
379    Mux(is_new_br, old_entry_modified,
380      Mux(jalr_target_modified, old_entry_jmp_target_modified, old_entry_always_taken))
381
382
383  io.new_entry := Mux(!hit, init_entry, derived_from_old_entry)
384
385  io.new_br_insert_pos := new_br_insert_onehot
386  io.taken_mask := VecInit((io.new_entry.brOffset zip io.new_entry.brValids).map{
387    case (off, v) => io.cfiIndex.bits === off && io.cfiIndex.valid && v
388  })
389  io.jmp_taken := io.new_entry.jmpValid && io.new_entry.tailSlot.offset === io.cfiIndex.bits
390  for (i <- 0 until numBr) {
391    io.mispred_mask(i) := io.new_entry.brValids(i) && io.mispredict_vec(io.new_entry.brOffset(i))
392  }
393  io.mispred_mask.last := io.new_entry.jmpValid && io.mispredict_vec(pd.jmpOffset)
394
395  // for perf counters
396  io.is_init_entry := !hit
397  io.is_old_entry := hit && !is_new_br && !jalr_target_modified && !always_taken_modified
398  io.is_new_br := hit && is_new_br
399  io.is_jalr_target_modified := hit && jalr_target_modified
400  io.is_always_taken_modified := hit && always_taken_modified
401  io.is_br_full := hit && is_new_br && may_have_to_replace
402}
403
404class FtqPcMemWrapper(numOtherReads: Int)(implicit p: Parameters) extends XSModule with HasBackendRedirectInfo {
405  val io = IO(new Bundle {
406    val ifuPtr_w       = Input(new FtqPtr)
407    val ifuPtrPlus1_w  = Input(new FtqPtr)
408    val ifuPtrPlus2_w  = Input(new FtqPtr)
409    val commPtr_w      = Input(new FtqPtr)
410    val commPtrPlus1_w = Input(new FtqPtr)
411    val ifuPtr_rdata       = Output(new Ftq_RF_Components)
412    val ifuPtrPlus1_rdata  = Output(new Ftq_RF_Components)
413    val ifuPtrPlus2_rdata  = Output(new Ftq_RF_Components)
414    val commPtr_rdata      = Output(new Ftq_RF_Components)
415    val commPtrPlus1_rdata = Output(new Ftq_RF_Components)
416
417    val other_raddrs = Input(Vec(numOtherReads, UInt(log2Ceil(FtqSize).W)))
418    val other_rdatas = Output(Vec(numOtherReads, new Ftq_RF_Components))
419
420    val wen = Input(Bool())
421    val waddr = Input(UInt(log2Ceil(FtqSize).W))
422    val wdata = Input(new Ftq_RF_Components)
423  })
424
425  val num_pc_read = numOtherReads + 5
426  val mem = Module(new SyncDataModuleTemplate(new Ftq_RF_Components, FtqSize,
427    num_pc_read, 1, "FtqPC"))
428  mem.io.wen(0)   := io.wen
429  mem.io.waddr(0) := io.waddr
430  mem.io.wdata(0) := io.wdata
431
432  // read one cycle ahead for ftq local reads
433  val raddr_vec = VecInit(io.other_raddrs ++
434    Seq(io.ifuPtr_w.value, io.ifuPtrPlus1_w.value, io.ifuPtrPlus2_w.value, io.commPtrPlus1_w.value, io.commPtr_w.value))
435
436  mem.io.raddr := raddr_vec
437
438  io.other_rdatas       := mem.io.rdata.dropRight(5)
439  io.ifuPtr_rdata       := mem.io.rdata.dropRight(4).last
440  io.ifuPtrPlus1_rdata  := mem.io.rdata.dropRight(3).last
441  io.ifuPtrPlus2_rdata  := mem.io.rdata.dropRight(2).last
442  io.commPtrPlus1_rdata := mem.io.rdata.dropRight(1).last
443  io.commPtr_rdata      := mem.io.rdata.last
444}
445
446class Ftq(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelper
447  with HasBackendRedirectInfo with BPUUtils with HasBPUConst with HasPerfEvents
448  with HasICacheParameters{
449  val io = IO(new Bundle {
450    val fromBpu = Flipped(new BpuToFtqIO)
451    val fromIfu = Flipped(new IfuToFtqIO)
452    val fromBackend = Flipped(new CtrlToFtqIO)
453
454    val toBpu = new FtqToBpuIO
455    val toIfu = new FtqToIfuIO
456    val toICache = new FtqToICacheIO
457    val toBackend = new FtqToCtrlIO
458
459    val toPrefetch = new FtqPrefechBundle
460
461    val bpuInfo = new Bundle {
462      val bpRight = Output(UInt(XLEN.W))
463      val bpWrong = Output(UInt(XLEN.W))
464    }
465
466    val mmioCommitRead = Flipped(new mmioCommitRead)
467
468    // for perf
469    val ControlBTBMissBubble = Output(Bool())
470    val TAGEMissBubble = Output(Bool())
471    val SCMissBubble = Output(Bool())
472    val ITTAGEMissBubble = Output(Bool())
473    val RASMissBubble = Output(Bool())
474  })
475  io.bpuInfo := DontCare
476
477  val topdown_stage = RegInit(0.U.asTypeOf(new FrontendTopDownBundle))
478  // only driven by clock, not valid-ready
479  topdown_stage := io.fromBpu.resp.bits.topdown_info
480  io.toIfu.req.bits.topdown_info := topdown_stage
481
482  val ifuRedirected = RegInit(VecInit(Seq.fill(FtqSize)(false.B)))
483
484  val backendRedirect = Wire(Valid(new BranchPredictionRedirect))
485  when(io.fromBackend.redirect.valid) {
486    assert(RegNext(io.fromBackend.ftqIdxAhead.map(_.valid).reduce(_|_)))
487    assert(io.fromBackend.ftqIdxSelOH.valid)
488    assert(PopCount(io.fromBackend.ftqIdxSelOH.bits) === 1.U)
489  }
490
491  val stage2Flush = backendRedirect.valid
492  val backendFlush = stage2Flush || RegNext(stage2Flush)
493  val ifuFlush = Wire(Bool())
494
495  val flush = stage2Flush || RegNext(stage2Flush)
496
497  val allowBpuIn, allowToIfu = WireInit(false.B)
498  val flushToIfu = !allowToIfu
499  allowBpuIn := !ifuFlush && !backendRedirect.valid
500  allowToIfu := !ifuFlush && !backendRedirect.valid
501
502  def copyNum = 5
503  val bpuPtr, ifuPtr, ifuWbPtr, commPtr = RegInit(FtqPtr(false.B, 0.U))
504  val ifuPtrPlus1 = RegInit(FtqPtr(false.B, 1.U))
505  val ifuPtrPlus2 = RegInit(FtqPtr(false.B, 2.U))
506  val commPtrPlus1 = RegInit(FtqPtr(false.B, 1.U))
507  val copied_ifu_ptr = Seq.fill(copyNum)(RegInit(FtqPtr(false.B, 0.U)))
508  val copied_bpu_ptr = Seq.fill(copyNum)(RegInit(FtqPtr(false.B, 0.U)))
509  require(FtqSize >= 4)
510  val ifuPtr_write       = WireInit(ifuPtr)
511  val ifuPtrPlus1_write  = WireInit(ifuPtrPlus1)
512  val ifuPtrPlus2_write  = WireInit(ifuPtrPlus2)
513  val ifuWbPtr_write     = WireInit(ifuWbPtr)
514  val commPtr_write      = WireInit(commPtr)
515  val commPtrPlus1_write = WireInit(commPtrPlus1)
516  ifuPtr       := ifuPtr_write
517  ifuPtrPlus1  := ifuPtrPlus1_write
518  ifuPtrPlus2  := ifuPtrPlus2_write
519  ifuWbPtr     := ifuWbPtr_write
520  commPtr      := commPtr_write
521  commPtrPlus1 := commPtrPlus1_write
522  copied_ifu_ptr.map{ptr =>
523    ptr := ifuPtr_write
524    dontTouch(ptr)
525  }
526  val validEntries = distanceBetween(bpuPtr, commPtr)
527  val canCommit = Wire(Bool())
528
529  // **********************************************************************
530  // **************************** enq from bpu ****************************
531  // **********************************************************************
532  val new_entry_ready = validEntries < FtqSize.U || canCommit
533  io.fromBpu.resp.ready := new_entry_ready
534
535  val bpu_s2_resp = io.fromBpu.resp.bits.s2
536  val bpu_s3_resp = io.fromBpu.resp.bits.s3
537  val bpu_s2_redirect = bpu_s2_resp.valid(3) && bpu_s2_resp.hasRedirect(3)
538  val bpu_s3_redirect = bpu_s3_resp.valid(3) && bpu_s3_resp.hasRedirect(3)
539
540  io.toBpu.enq_ptr := bpuPtr
541  val enq_fire = io.fromBpu.resp.fire && allowBpuIn // from bpu s1
542  val bpu_in_fire = (io.fromBpu.resp.fire || bpu_s2_redirect || bpu_s3_redirect) && allowBpuIn
543
544  val bpu_in_resp = io.fromBpu.resp.bits.selectedResp
545  val bpu_in_stage = io.fromBpu.resp.bits.selectedRespIdxForFtq
546  val bpu_in_resp_ptr = Mux(bpu_in_stage === BP_S1, bpuPtr, bpu_in_resp.ftq_idx)
547  val bpu_in_resp_idx = bpu_in_resp_ptr.value
548
549  // read ports:      prefetchReq ++  ifuReq1 + ifuReq2 + ifuReq3 + commitUpdate2 + commitUpdate
550  val ftq_pc_mem = Module(new FtqPcMemWrapper(1))
551  // resp from uBTB
552  ftq_pc_mem.io.wen := bpu_in_fire
553  ftq_pc_mem.io.waddr := bpu_in_resp_idx
554  ftq_pc_mem.io.wdata.fromBranchPrediction(bpu_in_resp)
555
556  //                                                            ifuRedirect + backendRedirect + commit
557  val ftq_redirect_sram = Module(new FtqNRSRAM(new Ftq_Redirect_SRAMEntry, 1+BackendRedirectNum+1))
558  // these info is intended to enq at the last stage of bpu
559  ftq_redirect_sram.io.wen := io.fromBpu.resp.bits.lastStage.valid(3)
560  ftq_redirect_sram.io.waddr := io.fromBpu.resp.bits.lastStage.ftq_idx.value
561  ftq_redirect_sram.io.wdata := io.fromBpu.resp.bits.last_stage_spec_info
562  println(f"ftq redirect SRAM: entry ${ftq_redirect_sram.io.wdata.getWidth} * ${FtqSize} * 3")
563  println(f"ftq redirect SRAM: ahead fh ${ftq_redirect_sram.io.wdata.afhob.getWidth} * ${FtqSize} * 3")
564
565  val ftq_meta_1r_sram = Module(new FtqNRSRAM(new Ftq_1R_SRAMEntry, 1))
566  // these info is intended to enq at the last stage of bpu
567  ftq_meta_1r_sram.io.wen := io.fromBpu.resp.bits.lastStage.valid(3)
568  ftq_meta_1r_sram.io.waddr := io.fromBpu.resp.bits.lastStage.ftq_idx.value
569  ftq_meta_1r_sram.io.wdata.meta := io.fromBpu.resp.bits.last_stage_meta
570  //                                                            ifuRedirect + backendRedirect + commit
571  val ftb_entry_mem = Module(new SyncDataModuleTemplate(new FTBEntry, FtqSize, 1+BackendRedirectNum+1, 1))
572  ftb_entry_mem.io.wen(0) := io.fromBpu.resp.bits.lastStage.valid(3)
573  ftb_entry_mem.io.waddr(0) := io.fromBpu.resp.bits.lastStage.ftq_idx.value
574  ftb_entry_mem.io.wdata(0) := io.fromBpu.resp.bits.last_stage_ftb_entry
575
576
577  // multi-write
578  val update_target = Reg(Vec(FtqSize, UInt(VAddrBits.W))) // could be taken target or fallThrough //TODO: remove this
579  val newest_entry_target = Reg(UInt(VAddrBits.W))
580  val newest_entry_ptr = Reg(new FtqPtr)
581  val cfiIndex_vec = Reg(Vec(FtqSize, ValidUndirectioned(UInt(log2Ceil(PredictWidth).W))))
582  val mispredict_vec = Reg(Vec(FtqSize, Vec(PredictWidth, Bool())))
583  val pred_stage = Reg(Vec(FtqSize, UInt(2.W)))
584  val pred_s1_cycle = if (!env.FPGAPlatform) Some(Reg(Vec(FtqSize, UInt(64.W)))) else None
585
586  val c_invalid :: c_valid :: c_commited :: Nil = Enum(3)
587  val commitStateQueue = RegInit(VecInit(Seq.fill(FtqSize) {
588    VecInit(Seq.fill(PredictWidth)(c_invalid))
589  }))
590
591  val f_to_send :: f_sent :: Nil = Enum(2)
592  val entry_fetch_status = RegInit(VecInit(Seq.fill(FtqSize)(f_sent)))
593
594  val h_not_hit :: h_false_hit :: h_hit :: Nil = Enum(3)
595  val entry_hit_status = RegInit(VecInit(Seq.fill(FtqSize)(h_not_hit)))
596
597  // modify registers one cycle later to cut critical path
598  val last_cycle_bpu_in = RegNext(bpu_in_fire)
599  val last_cycle_bpu_in_ptr = RegNext(bpu_in_resp_ptr)
600  val last_cycle_bpu_in_idx = last_cycle_bpu_in_ptr.value
601  val last_cycle_bpu_target = RegNext(bpu_in_resp.getTarget(3))
602  val last_cycle_cfiIndex = RegNext(bpu_in_resp.cfiIndex(3))
603  val last_cycle_bpu_in_stage = RegNext(bpu_in_stage)
604
605  def extra_copyNum_for_commitStateQueue = 2
606  val copied_last_cycle_bpu_in = VecInit(Seq.fill(copyNum+extra_copyNum_for_commitStateQueue)(RegNext(bpu_in_fire)))
607  val copied_last_cycle_bpu_in_ptr_for_ftq = VecInit(Seq.fill(extra_copyNum_for_commitStateQueue)(RegNext(bpu_in_resp_ptr)))
608
609  when (last_cycle_bpu_in) {
610    entry_fetch_status(last_cycle_bpu_in_idx) := f_to_send
611    cfiIndex_vec(last_cycle_bpu_in_idx) := last_cycle_cfiIndex
612    pred_stage(last_cycle_bpu_in_idx) := last_cycle_bpu_in_stage
613
614    update_target(last_cycle_bpu_in_idx) := last_cycle_bpu_target // TODO: remove this
615    newest_entry_target := last_cycle_bpu_target
616    newest_entry_ptr := last_cycle_bpu_in_ptr
617  }
618
619  // reduce fanout by delay write for a cycle
620  when (RegNext(last_cycle_bpu_in)) {
621    mispredict_vec(RegNext(last_cycle_bpu_in_idx)) := WireInit(VecInit(Seq.fill(PredictWidth)(false.B)))
622  }
623
624  // record s1 pred cycles
625  pred_s1_cycle.map(vec => {
626    when (bpu_in_fire && (bpu_in_stage === BP_S1)) {
627      vec(bpu_in_resp_ptr.value) := bpu_in_resp.full_pred(0).predCycle.getOrElse(0.U)
628    }
629  })
630
631  // reduce fanout using copied last_cycle_bpu_in and copied last_cycle_bpu_in_ptr
632  val copied_last_cycle_bpu_in_for_ftq = copied_last_cycle_bpu_in.takeRight(extra_copyNum_for_commitStateQueue)
633  copied_last_cycle_bpu_in_for_ftq.zip(copied_last_cycle_bpu_in_ptr_for_ftq).zipWithIndex.map {
634    case ((in, ptr), i) =>
635      when (in) {
636        val perSetEntries = FtqSize / extra_copyNum_for_commitStateQueue // 32
637        require(FtqSize % extra_copyNum_for_commitStateQueue == 0)
638        for (j <- 0 until perSetEntries) {
639          when (ptr.value === (i*perSetEntries+j).U) {
640            commitStateQueue(i*perSetEntries+j) := VecInit(Seq.fill(PredictWidth)(c_invalid))
641          }
642        }
643      }
644  }
645
646  // num cycle is fixed
647  io.toBackend.newest_entry_ptr := RegNext(newest_entry_ptr)
648  io.toBackend.newest_entry_target := RegNext(newest_entry_target)
649
650
651  bpuPtr := bpuPtr + enq_fire
652  copied_bpu_ptr.map(_ := bpuPtr + enq_fire)
653  when (io.toIfu.req.fire && allowToIfu) {
654    ifuPtr_write := ifuPtrPlus1
655    ifuPtrPlus1_write := ifuPtrPlus2
656    ifuPtrPlus2_write := ifuPtrPlus2 + 1.U
657  }
658
659  // only use ftb result to assign hit status
660  when (bpu_s2_resp.valid(3)) {
661    entry_hit_status(bpu_s2_resp.ftq_idx.value) := Mux(bpu_s2_resp.full_pred(3).hit, h_hit, h_not_hit)
662  }
663
664
665  io.toIfu.flushFromBpu.s2.valid := bpu_s2_redirect
666  io.toIfu.flushFromBpu.s2.bits := bpu_s2_resp.ftq_idx
667  when (bpu_s2_redirect) {
668    bpuPtr := bpu_s2_resp.ftq_idx + 1.U
669    copied_bpu_ptr.map(_ := bpu_s2_resp.ftq_idx + 1.U)
670    // only when ifuPtr runs ahead of bpu s2 resp should we recover it
671    when (!isBefore(ifuPtr, bpu_s2_resp.ftq_idx)) {
672      ifuPtr_write := bpu_s2_resp.ftq_idx
673      ifuPtrPlus1_write := bpu_s2_resp.ftq_idx + 1.U
674      ifuPtrPlus2_write := bpu_s2_resp.ftq_idx + 2.U
675    }
676  }
677
678  io.toIfu.flushFromBpu.s3.valid := bpu_s3_redirect
679  io.toIfu.flushFromBpu.s3.bits := bpu_s3_resp.ftq_idx
680  when (bpu_s3_redirect) {
681    bpuPtr := bpu_s3_resp.ftq_idx + 1.U
682    copied_bpu_ptr.map(_ := bpu_s3_resp.ftq_idx + 1.U)
683    // only when ifuPtr runs ahead of bpu s2 resp should we recover it
684    when (!isBefore(ifuPtr, bpu_s3_resp.ftq_idx)) {
685      ifuPtr_write := bpu_s3_resp.ftq_idx
686      ifuPtrPlus1_write := bpu_s3_resp.ftq_idx + 1.U
687      ifuPtrPlus2_write := bpu_s3_resp.ftq_idx + 2.U
688    }
689  }
690
691  XSError(isBefore(bpuPtr, ifuPtr) && !isFull(bpuPtr, ifuPtr), "\nifuPtr is before bpuPtr!\n")
692  XSError(isBefore(ifuWbPtr, commPtr) && !isFull(ifuWbPtr, commPtr), "\ncommPtr is before ifuWbPtr!\n")
693
694  (0 until copyNum).map{i =>
695    XSError(copied_bpu_ptr(i) =/= bpuPtr, "\ncopiedBpuPtr is different from bpuPtr!\n")
696  }
697
698  // ****************************************************************
699  // **************************** to ifu ****************************
700  // ****************************************************************
701  // 0  for ifu, and 1-4 for ICache
702  val bpu_in_bypass_buf = RegEnable(ftq_pc_mem.io.wdata, bpu_in_fire)
703  val copied_bpu_in_bypass_buf = VecInit(Seq.fill(copyNum)(RegEnable(ftq_pc_mem.io.wdata, bpu_in_fire)))
704  val bpu_in_bypass_buf_for_ifu = bpu_in_bypass_buf
705  val bpu_in_bypass_ptr = RegNext(bpu_in_resp_ptr)
706  val last_cycle_to_ifu_fire = RegNext(io.toIfu.req.fire)
707
708  val copied_bpu_in_bypass_ptr = VecInit(Seq.fill(copyNum)(RegNext(bpu_in_resp_ptr)))
709  val copied_last_cycle_to_ifu_fire = VecInit(Seq.fill(copyNum)(RegNext(io.toIfu.req.fire)))
710
711  // read pc and target
712  ftq_pc_mem.io.ifuPtr_w       := ifuPtr_write
713  ftq_pc_mem.io.ifuPtrPlus1_w  := ifuPtrPlus1_write
714  ftq_pc_mem.io.ifuPtrPlus2_w  := ifuPtrPlus2_write
715  ftq_pc_mem.io.commPtr_w      := commPtr_write
716  ftq_pc_mem.io.commPtrPlus1_w := commPtrPlus1_write
717
718
719  io.toIfu.req.bits.ftqIdx := ifuPtr
720
721  val toICachePcBundle = Wire(Vec(copyNum,new Ftq_RF_Components))
722  val toICacheEntryToSend = Wire(Vec(copyNum,Bool()))
723  val toIfuPcBundle = Wire(new Ftq_RF_Components)
724  val entry_is_to_send = WireInit(entry_fetch_status(ifuPtr.value) === f_to_send)
725  val entry_ftq_offset = WireInit(cfiIndex_vec(ifuPtr.value))
726  val entry_next_addr  = Wire(UInt(VAddrBits.W))
727
728  val pc_mem_ifu_ptr_rdata   = VecInit(Seq.fill(copyNum)(RegNext(ftq_pc_mem.io.ifuPtr_rdata)))
729  val pc_mem_ifu_plus1_rdata = VecInit(Seq.fill(copyNum)(RegNext(ftq_pc_mem.io.ifuPtrPlus1_rdata)))
730  val diff_entry_next_addr = WireInit(update_target(ifuPtr.value)) //TODO: remove this
731
732  val copied_ifu_plus1_to_send = VecInit(Seq.fill(copyNum)(RegNext(entry_fetch_status(ifuPtrPlus1.value) === f_to_send) || RegNext(last_cycle_bpu_in && bpu_in_bypass_ptr === (ifuPtrPlus1))))
733  val copied_ifu_ptr_to_send   = VecInit(Seq.fill(copyNum)(RegNext(entry_fetch_status(ifuPtr.value) === f_to_send) || RegNext(last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtr)))
734
735  for(i <- 0 until copyNum){
736    when(copied_last_cycle_bpu_in(i) && copied_bpu_in_bypass_ptr(i) === copied_ifu_ptr(i)){
737      toICachePcBundle(i) := copied_bpu_in_bypass_buf(i)
738      toICacheEntryToSend(i)   := true.B
739    }.elsewhen(copied_last_cycle_to_ifu_fire(i)){
740      toICachePcBundle(i) := pc_mem_ifu_plus1_rdata(i)
741      toICacheEntryToSend(i)   := copied_ifu_plus1_to_send(i)
742    }.otherwise{
743      toICachePcBundle(i) := pc_mem_ifu_ptr_rdata(i)
744      toICacheEntryToSend(i)   := copied_ifu_ptr_to_send(i)
745    }
746  }
747
748  // TODO: reconsider target address bypass logic
749  when (last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtr) {
750    toIfuPcBundle := bpu_in_bypass_buf_for_ifu
751    entry_is_to_send := true.B
752    entry_next_addr := last_cycle_bpu_target
753    entry_ftq_offset := last_cycle_cfiIndex
754    diff_entry_next_addr := last_cycle_bpu_target // TODO: remove this
755  }.elsewhen (last_cycle_to_ifu_fire) {
756    toIfuPcBundle := RegNext(ftq_pc_mem.io.ifuPtrPlus1_rdata)
757    entry_is_to_send := RegNext(entry_fetch_status(ifuPtrPlus1.value) === f_to_send) ||
758                        RegNext(last_cycle_bpu_in && bpu_in_bypass_ptr === (ifuPtrPlus1)) // reduce potential bubbles
759    entry_next_addr := Mux(last_cycle_bpu_in && bpu_in_bypass_ptr === (ifuPtrPlus1),
760                          bpu_in_bypass_buf_for_ifu.startAddr,
761                          Mux(ifuPtr === newest_entry_ptr,
762                            newest_entry_target,
763                            RegNext(ftq_pc_mem.io.ifuPtrPlus2_rdata.startAddr))) // ifuPtr+2
764  }.otherwise {
765    toIfuPcBundle := RegNext(ftq_pc_mem.io.ifuPtr_rdata)
766    entry_is_to_send := RegNext(entry_fetch_status(ifuPtr.value) === f_to_send) ||
767                        RegNext(last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtr) // reduce potential bubbles
768    entry_next_addr := Mux(last_cycle_bpu_in && bpu_in_bypass_ptr === (ifuPtrPlus1),
769                          bpu_in_bypass_buf_for_ifu.startAddr,
770                          Mux(ifuPtr === newest_entry_ptr,
771                            newest_entry_target,
772                            RegNext(ftq_pc_mem.io.ifuPtrPlus1_rdata.startAddr))) // ifuPtr+1
773  }
774
775  io.toIfu.req.valid := entry_is_to_send && ifuPtr =/= bpuPtr
776  io.toIfu.req.bits.nextStartAddr := entry_next_addr
777  io.toIfu.req.bits.ftqOffset := entry_ftq_offset
778  io.toIfu.req.bits.fromFtqPcBundle(toIfuPcBundle)
779
780  io.toICache.req.valid := entry_is_to_send && ifuPtr =/= bpuPtr
781  io.toICache.req.bits.readValid.zipWithIndex.map{case(copy, i) => copy := toICacheEntryToSend(i) && copied_ifu_ptr(i) =/= copied_bpu_ptr(i)}
782  io.toICache.req.bits.pcMemRead.zipWithIndex.map{case(copy,i) => copy.fromFtqPcBundle(toICachePcBundle(i))}
783  // io.toICache.req.bits.bypassSelect := last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtr
784  // io.toICache.req.bits.bpuBypassWrite.zipWithIndex.map{case(bypassWrtie, i) =>
785  //   bypassWrtie.startAddr := bpu_in_bypass_buf.tail(i).startAddr
786  //   bypassWrtie.nextlineStart := bpu_in_bypass_buf.tail(i).nextLineAddr
787  // }
788
789  // TODO: remove this
790  XSError(io.toIfu.req.valid && diff_entry_next_addr =/= entry_next_addr,
791          p"\nifu_req_target wrong! ifuPtr: ${ifuPtr}, entry_next_addr: ${Hexadecimal(entry_next_addr)} diff_entry_next_addr: ${Hexadecimal(diff_entry_next_addr)}\n")
792
793  // when fall through is smaller in value than start address, there must be a false hit
794  when (toIfuPcBundle.fallThruError && entry_hit_status(ifuPtr.value) === h_hit) {
795    when (io.toIfu.req.fire &&
796      !(bpu_s2_redirect && bpu_s2_resp.ftq_idx === ifuPtr) &&
797      !(bpu_s3_redirect && bpu_s3_resp.ftq_idx === ifuPtr)
798    ) {
799      entry_hit_status(ifuPtr.value) := h_false_hit
800      // XSError(true.B, "FTB false hit by fallThroughError, startAddr: %x, fallTHru: %x\n", io.toIfu.req.bits.startAddr, io.toIfu.req.bits.nextStartAddr)
801    }
802    XSDebug(true.B, "fallThruError! start:%x, fallThru:%x\n", io.toIfu.req.bits.startAddr, io.toIfu.req.bits.nextStartAddr)
803  }
804
805  XSPerfAccumulate(f"fall_through_error_to_ifu", toIfuPcBundle.fallThruError && entry_hit_status(ifuPtr.value) === h_hit &&
806    io.toIfu.req.fire && !(bpu_s2_redirect && bpu_s2_resp.ftq_idx === ifuPtr) && !(bpu_s3_redirect && bpu_s3_resp.ftq_idx === ifuPtr))
807
808  val ifu_req_should_be_flushed =
809    io.toIfu.flushFromBpu.shouldFlushByStage2(io.toIfu.req.bits.ftqIdx) ||
810    io.toIfu.flushFromBpu.shouldFlushByStage3(io.toIfu.req.bits.ftqIdx)
811
812    when (io.toIfu.req.fire && !ifu_req_should_be_flushed) {
813      entry_fetch_status(ifuPtr.value) := f_sent
814    }
815
816  // *********************************************************************
817  // **************************** wb from ifu ****************************
818  // *********************************************************************
819  val pdWb = io.fromIfu.pdWb
820  val pds = pdWb.bits.pd
821  val ifu_wb_valid = pdWb.valid
822  val ifu_wb_idx = pdWb.bits.ftqIdx.value
823  // read ports:                                                         commit update
824  val ftq_pd_mem = Module(new SyncDataModuleTemplate(new Ftq_pd_Entry, FtqSize, 1, 1))
825  ftq_pd_mem.io.wen(0) := ifu_wb_valid
826  ftq_pd_mem.io.waddr(0) := pdWb.bits.ftqIdx.value
827  ftq_pd_mem.io.wdata(0).fromPdWb(pdWb.bits)
828
829  val hit_pd_valid = entry_hit_status(ifu_wb_idx) === h_hit && ifu_wb_valid
830  val hit_pd_mispred = hit_pd_valid && pdWb.bits.misOffset.valid
831  val hit_pd_mispred_reg = RegNext(hit_pd_mispred, init=false.B)
832  val pd_reg       = RegEnable(pds,             pdWb.valid)
833  val start_pc_reg = RegEnable(pdWb.bits.pc(0), pdWb.valid)
834  val wb_idx_reg   = RegEnable(ifu_wb_idx,      pdWb.valid)
835
836  when (ifu_wb_valid) {
837    val comm_stq_wen = VecInit(pds.map(_.valid).zip(pdWb.bits.instrRange).map{
838      case (v, inRange) => v && inRange
839    })
840    (commitStateQueue(ifu_wb_idx) zip comm_stq_wen).map{
841      case (qe, v) => when (v) { qe := c_valid }
842    }
843  }
844
845  when (ifu_wb_valid) {
846    ifuWbPtr_write := ifuWbPtr + 1.U
847  }
848
849  XSError(ifu_wb_valid && isAfter(pdWb.bits.ftqIdx, ifuPtr), "IFU returned a predecode before its req, check IFU")
850
851  ftb_entry_mem.io.raddr.head := ifu_wb_idx
852  val has_false_hit = WireInit(false.B)
853  when (RegNext(hit_pd_valid)) {
854    // check for false hit
855    val pred_ftb_entry = ftb_entry_mem.io.rdata.head
856    val brSlots = pred_ftb_entry.brSlots
857    val tailSlot = pred_ftb_entry.tailSlot
858    // we check cfis that bpu predicted
859
860    // bpu predicted branches but denied by predecode
861    val br_false_hit =
862      brSlots.map{
863        s => s.valid && !(pd_reg(s.offset).valid && pd_reg(s.offset).isBr)
864      }.reduce(_||_) ||
865      (tailSlot.valid && pred_ftb_entry.tailSlot.sharing &&
866        !(pd_reg(tailSlot.offset).valid && pd_reg(tailSlot.offset).isBr))
867
868    val jmpOffset = tailSlot.offset
869    val jmp_pd = pd_reg(jmpOffset)
870    val jal_false_hit = pred_ftb_entry.jmpValid &&
871      ((pred_ftb_entry.isJal  && !(jmp_pd.valid && jmp_pd.isJal)) ||
872       (pred_ftb_entry.isJalr && !(jmp_pd.valid && jmp_pd.isJalr)) ||
873       (pred_ftb_entry.isCall && !(jmp_pd.valid && jmp_pd.isCall)) ||
874       (pred_ftb_entry.isRet  && !(jmp_pd.valid && jmp_pd.isRet))
875      )
876
877    has_false_hit := br_false_hit || jal_false_hit || hit_pd_mispred_reg
878    XSDebug(has_false_hit, "FTB false hit by br or jal or hit_pd, startAddr: %x\n", pdWb.bits.pc(0))
879
880    // assert(!has_false_hit)
881  }
882
883  when (has_false_hit) {
884    entry_hit_status(wb_idx_reg) := h_false_hit
885  }
886
887
888  // **********************************************************************
889  // ***************************** to backend *****************************
890  // **********************************************************************
891  // to backend pc mem / target
892  io.toBackend.pc_mem_wen   := RegNext(last_cycle_bpu_in)
893  io.toBackend.pc_mem_waddr := RegNext(last_cycle_bpu_in_idx)
894  io.toBackend.pc_mem_wdata := RegNext(bpu_in_bypass_buf_for_ifu)
895
896  // *******************************************************************************
897  // **************************** redirect from backend ****************************
898  // *******************************************************************************
899
900  // redirect read cfiInfo, couples to redirectGen s2
901  val ftq_redirect_rdata = Wire(Vec(BackendRedirectNum, new Ftq_Redirect_SRAMEntry))
902  val ftb_redirect_rdata = Wire(Vec(BackendRedirectNum, new FTBEntry))
903  for (i <- 0 until BackendRedirectNum) {
904    ftq_redirect_sram.io.ren(i + 1) := io.fromBackend.ftqIdxAhead(i).valid
905    ftq_redirect_sram.io.raddr(i + 1) := io.fromBackend.ftqIdxAhead(i).bits.value
906    ftb_entry_mem.io.raddr(i + 1)     := io.fromBackend.ftqIdxAhead(i).bits.value
907
908    ftq_redirect_rdata(i) := ftq_redirect_sram.io.rdata(i + 1)
909    ftb_redirect_rdata(i) := ftb_entry_mem.io.rdata(i + 1)
910  }
911  val stage3CfiInfo = Mux1H(io.fromBackend.ftqIdxSelOH.bits, ftq_redirect_rdata)
912  val fromBackendRedirect = WireInit(backendRedirect)
913  val backendRedirectCfi = fromBackendRedirect.bits.cfiUpdate
914  backendRedirectCfi.fromFtqRedirectSram(stage3CfiInfo)
915
916
917  val r_ftb_entry = Mux1H(io.fromBackend.ftqIdxSelOH.bits, ftb_redirect_rdata)
918  val r_ftqOffset = fromBackendRedirect.bits.ftqOffset
919
920  backendRedirectCfi.br_hit := r_ftb_entry.brIsSaved(r_ftqOffset)
921  backendRedirectCfi.jr_hit := r_ftb_entry.isJalr && r_ftb_entry.tailSlot.offset === r_ftqOffset
922  // FIXME: not portable
923  val sc_disagree = stage3CfiInfo.sc_disagree.getOrElse(VecInit(Seq.fill(numBr)(false.B)))
924  backendRedirectCfi.sc_hit := backendRedirectCfi.br_hit && Mux(r_ftb_entry.brSlots(0).offset === r_ftqOffset,
925    sc_disagree(0), sc_disagree(1))
926
927  when (entry_hit_status(fromBackendRedirect.bits.ftqIdx.value) === h_hit) {
928    backendRedirectCfi.shift := PopCount(r_ftb_entry.getBrMaskByOffset(r_ftqOffset)) +&
929      (backendRedirectCfi.pd.isBr && !r_ftb_entry.brIsSaved(r_ftqOffset) &&
930      !r_ftb_entry.newBrCanNotInsert(r_ftqOffset))
931
932    backendRedirectCfi.addIntoHist := backendRedirectCfi.pd.isBr && (r_ftb_entry.brIsSaved(r_ftqOffset) ||
933        !r_ftb_entry.newBrCanNotInsert(r_ftqOffset))
934  }.otherwise {
935    backendRedirectCfi.shift := (backendRedirectCfi.pd.isBr && backendRedirectCfi.taken).asUInt
936    backendRedirectCfi.addIntoHist := backendRedirectCfi.pd.isBr.asUInt
937  }
938
939
940  // ***************************************************************************
941  // **************************** redirect from ifu ****************************
942  // ***************************************************************************
943  val fromIfuRedirect = WireInit(0.U.asTypeOf(Valid(new BranchPredictionRedirect)))
944  fromIfuRedirect.valid := pdWb.valid && pdWb.bits.misOffset.valid && !backendFlush
945  fromIfuRedirect.bits.ftqIdx := pdWb.bits.ftqIdx
946  fromIfuRedirect.bits.ftqOffset := pdWb.bits.misOffset.bits
947  fromIfuRedirect.bits.level := RedirectLevel.flushAfter
948  fromIfuRedirect.bits.BTBMissBubble := true.B
949  fromIfuRedirect.bits.debugIsMemVio := false.B
950  fromIfuRedirect.bits.debugIsCtrl := false.B
951
952  val ifuRedirectCfiUpdate = fromIfuRedirect.bits.cfiUpdate
953  ifuRedirectCfiUpdate.pc := pdWb.bits.pc(pdWb.bits.misOffset.bits)
954  ifuRedirectCfiUpdate.pd := pdWb.bits.pd(pdWb.bits.misOffset.bits)
955  ifuRedirectCfiUpdate.predTaken := cfiIndex_vec(pdWb.bits.ftqIdx.value).valid
956  ifuRedirectCfiUpdate.target := pdWb.bits.target
957  ifuRedirectCfiUpdate.taken := pdWb.bits.cfiOffset.valid
958  ifuRedirectCfiUpdate.isMisPred := pdWb.bits.misOffset.valid
959
960  val ifuRedirectReg = RegNext(fromIfuRedirect, init=0.U.asTypeOf(Valid(new BranchPredictionRedirect)))
961  val ifuRedirectToBpu = WireInit(ifuRedirectReg)
962  ifuFlush := fromIfuRedirect.valid || ifuRedirectToBpu.valid
963
964  ftq_redirect_sram.io.ren.head := fromIfuRedirect.valid
965  ftq_redirect_sram.io.raddr.head := fromIfuRedirect.bits.ftqIdx.value
966
967  ftb_entry_mem.io.raddr.head := fromIfuRedirect.bits.ftqIdx.value
968
969  val toBpuCfi = ifuRedirectToBpu.bits.cfiUpdate
970  toBpuCfi.fromFtqRedirectSram(ftq_redirect_sram.io.rdata.head)
971  when (ifuRedirectReg.bits.cfiUpdate.pd.isRet && ifuRedirectReg.bits.cfiUpdate.pd.valid) {
972    toBpuCfi.target := toBpuCfi.topAddr
973  }
974
975  when (ifuRedirectReg.valid) {
976    ifuRedirected(ifuRedirectReg.bits.ftqIdx.value) := true.B
977  } .elsewhen(RegNext(pdWb.valid)) {
978    // if pdWb and no redirect, set to false
979    ifuRedirected(last_cycle_bpu_in_ptr.value) := false.B
980  }
981
982  // *********************************************************************
983  // **************************** wb from exu ****************************
984  // *********************************************************************
985
986  backendRedirect.valid := io.fromBackend.redirect.valid
987  backendRedirect.bits.connectRedirect(io.fromBackend.redirect.bits)
988  backendRedirect.bits.BTBMissBubble := false.B
989
990
991  def extractRedirectInfo(wb: Valid[Redirect]) = {
992    val ftqPtr = wb.bits.ftqIdx
993    val ftqOffset = wb.bits.ftqOffset
994    val taken = wb.bits.cfiUpdate.taken
995    val mispred = wb.bits.cfiUpdate.isMisPred
996    (wb.valid, ftqPtr, ftqOffset, taken, mispred)
997  }
998
999  // fix mispredict entry
1000  val lastIsMispredict = RegNext(
1001    backendRedirect.valid && backendRedirect.bits.level === RedirectLevel.flushAfter, init = false.B
1002  )
1003
1004  def updateCfiInfo(redirect: Valid[Redirect], isBackend: Boolean = true) = {
1005    val (r_valid, r_ptr, r_offset, r_taken, r_mispred) = extractRedirectInfo(redirect)
1006    val r_idx = r_ptr.value
1007    val cfiIndex_bits_wen = r_valid && r_taken && r_offset < cfiIndex_vec(r_idx).bits
1008    val cfiIndex_valid_wen = r_valid && r_offset === cfiIndex_vec(r_idx).bits
1009    when (cfiIndex_bits_wen || cfiIndex_valid_wen) {
1010      cfiIndex_vec(r_idx).valid := cfiIndex_bits_wen || cfiIndex_valid_wen && r_taken
1011    } .elsewhen (r_valid && !r_taken && r_offset =/= cfiIndex_vec(r_idx).bits) {
1012      cfiIndex_vec(r_idx).valid :=false.B
1013    }
1014    when (cfiIndex_bits_wen) {
1015      cfiIndex_vec(r_idx).bits := r_offset
1016    }
1017    newest_entry_target := redirect.bits.cfiUpdate.target
1018    newest_entry_ptr := r_ptr
1019    update_target(r_idx) := redirect.bits.cfiUpdate.target // TODO: remove this
1020    if (isBackend) {
1021      mispredict_vec(r_idx)(r_offset) := r_mispred
1022    }
1023  }
1024
1025  when(backendRedirect.valid) {
1026    updateCfiInfo(backendRedirect)
1027  }.elsewhen (ifuRedirectToBpu.valid) {
1028    updateCfiInfo(ifuRedirectToBpu, isBackend=false)
1029  }
1030
1031  when (backendRedirect.valid) {
1032    when (backendRedirect.bits.ControlRedirectBubble) {
1033      when (fromBackendRedirect.bits.ControlBTBMissBubble) {
1034        topdown_stage.reasons(TopDownCounters.BTBMissBubble.id) := true.B
1035        io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.BTBMissBubble.id) := true.B
1036      } .elsewhen (fromBackendRedirect.bits.TAGEMissBubble) {
1037        topdown_stage.reasons(TopDownCounters.TAGEMissBubble.id) := true.B
1038        io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.TAGEMissBubble.id) := true.B
1039      } .elsewhen (fromBackendRedirect.bits.SCMissBubble) {
1040        topdown_stage.reasons(TopDownCounters.SCMissBubble.id) := true.B
1041        io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.SCMissBubble.id) := true.B
1042      } .elsewhen (fromBackendRedirect.bits.ITTAGEMissBubble) {
1043        topdown_stage.reasons(TopDownCounters.ITTAGEMissBubble.id) := true.B
1044        io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.ITTAGEMissBubble.id) := true.B
1045      } .elsewhen (fromBackendRedirect.bits.RASMissBubble) {
1046        topdown_stage.reasons(TopDownCounters.RASMissBubble.id) := true.B
1047        io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.RASMissBubble.id) := true.B
1048      }
1049
1050
1051    } .elsewhen (backendRedirect.bits.MemVioRedirectBubble) {
1052      topdown_stage.reasons(TopDownCounters.MemVioRedirectBubble.id) := true.B
1053      io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.MemVioRedirectBubble.id) := true.B
1054    } .otherwise {
1055      topdown_stage.reasons(TopDownCounters.OtherRedirectBubble.id) := true.B
1056      io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.OtherRedirectBubble.id) := true.B
1057    }
1058  } .elsewhen (ifuRedirectReg.valid) {
1059    topdown_stage.reasons(TopDownCounters.BTBMissBubble.id) := true.B
1060    io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.BTBMissBubble.id) := true.B
1061  }
1062
1063  io.ControlBTBMissBubble := fromBackendRedirect.bits.ControlBTBMissBubble
1064  io.TAGEMissBubble := fromBackendRedirect.bits.TAGEMissBubble
1065  io.SCMissBubble := fromBackendRedirect.bits.SCMissBubble
1066  io.ITTAGEMissBubble := fromBackendRedirect.bits.ITTAGEMissBubble
1067  io.RASMissBubble := fromBackendRedirect.bits.RASMissBubble
1068
1069  // ***********************************************************************************
1070  // **************************** flush ptr and state queue ****************************
1071  // ***********************************************************************************
1072
1073  val redirectVec = VecInit(backendRedirect, fromIfuRedirect)
1074
1075  // when redirect, we should reset ptrs and status queues
1076  when(redirectVec.map(r => r.valid).reduce(_||_)){
1077    val r = PriorityMux(redirectVec.map(r => (r.valid -> r.bits)))
1078    val notIfu = redirectVec.dropRight(1).map(r => r.valid).reduce(_||_)
1079    val (idx, offset, flushItSelf) = (r.ftqIdx, r.ftqOffset, RedirectLevel.flushItself(r.level))
1080    val next = idx + 1.U
1081    bpuPtr := next
1082    copied_bpu_ptr.map(_ := next)
1083    ifuPtr_write := next
1084    ifuWbPtr_write := next
1085    ifuPtrPlus1_write := idx + 2.U
1086    ifuPtrPlus2_write := idx + 3.U
1087
1088  }
1089  when(RegNext(redirectVec.map(r => r.valid).reduce(_||_))){
1090    val r = PriorityMux(redirectVec.map(r => (r.valid -> r.bits)))
1091    val notIfu = redirectVec.dropRight(1).map(r => r.valid).reduce(_||_)
1092    val (idx, offset, flushItSelf) = (r.ftqIdx, r.ftqOffset, RedirectLevel.flushItself(r.level))
1093    when (RegNext(notIfu)) {
1094      commitStateQueue(RegNext(idx.value)).zipWithIndex.foreach({ case (s, i) =>
1095        when(i.U > RegNext(offset) || i.U === RegNext(offset) && RegNext(flushItSelf)){
1096          s := c_invalid
1097        }
1098      })
1099    }
1100  }
1101
1102
1103  // only the valid bit is actually needed
1104  io.toIfu.redirect.bits    := backendRedirect.bits
1105  io.toIfu.redirect.valid   := stage2Flush
1106  io.toIfu.topdown_redirect := fromBackendRedirect
1107
1108  // commit
1109  for (c <- io.fromBackend.rob_commits) {
1110    when(c.valid) {
1111      commitStateQueue(c.bits.ftqIdx.value)(c.bits.ftqOffset) := c_commited
1112      // TODO: remove this
1113      // For instruction fusions, we also update the next instruction
1114      when (c.bits.commitType === 4.U) {
1115        commitStateQueue(c.bits.ftqIdx.value)(c.bits.ftqOffset + 1.U) := c_commited
1116      }.elsewhen(c.bits.commitType === 5.U) {
1117        commitStateQueue(c.bits.ftqIdx.value)(c.bits.ftqOffset + 2.U) := c_commited
1118      }.elsewhen(c.bits.commitType === 6.U) {
1119        val index = (c.bits.ftqIdx + 1.U).value
1120        commitStateQueue(index)(0) := c_commited
1121      }.elsewhen(c.bits.commitType === 7.U) {
1122        val index = (c.bits.ftqIdx + 1.U).value
1123        commitStateQueue(index)(1) := c_commited
1124      }
1125    }
1126  }
1127
1128  // ****************************************************************
1129  // **************************** to bpu ****************************
1130  // ****************************************************************
1131
1132  io.toBpu.redirect := Mux(fromBackendRedirect.valid, fromBackendRedirect, ifuRedirectToBpu)
1133  val dummy_s1_pred_cycle_vec = VecInit(List.tabulate(FtqSize)(_=>0.U(64.W)))
1134  val redirect_latency = GTimer() - pred_s1_cycle.getOrElse(dummy_s1_pred_cycle_vec)(io.toBpu.redirect.bits.ftqIdx.value) + 1.U
1135  XSPerfHistogram("backend_redirect_latency", redirect_latency, fromBackendRedirect.valid, 0, 60, 1)
1136  XSPerfHistogram("ifu_redirect_latency", redirect_latency, !fromBackendRedirect.valid && ifuRedirectToBpu.valid, 0, 60, 1)
1137
1138  XSError(io.toBpu.redirect.valid && isBefore(io.toBpu.redirect.bits.ftqIdx, commPtr), "Ftq received a redirect after its commit, check backend or replay")
1139
1140  val may_have_stall_from_bpu = Wire(Bool())
1141  val bpu_ftb_update_stall = RegInit(0.U(2.W)) // 2-cycle stall, so we need 3 states
1142  may_have_stall_from_bpu := bpu_ftb_update_stall =/= 0.U
1143  canCommit := commPtr =/= ifuWbPtr && !may_have_stall_from_bpu &&
1144    Cat(commitStateQueue(commPtr.value).map(s => {
1145      s === c_invalid || s === c_commited
1146    })).andR
1147
1148  val mmioReadPtr = io.mmioCommitRead.mmioFtqPtr
1149  val mmioLastCommit = isBefore(commPtr, mmioReadPtr) && (isAfter(ifuPtr,mmioReadPtr)  ||  mmioReadPtr ===   ifuPtr) &&
1150                       Cat(commitStateQueue(mmioReadPtr.value).map(s => { s === c_invalid || s === c_commited})).andR
1151  io.mmioCommitRead.mmioLastCommit := RegNext(mmioLastCommit)
1152
1153  // commit reads
1154  val commit_pc_bundle = RegNext(ftq_pc_mem.io.commPtr_rdata)
1155  val commit_target =
1156    Mux(RegNext(commPtr === newest_entry_ptr),
1157      RegNext(newest_entry_target),
1158      RegNext(ftq_pc_mem.io.commPtrPlus1_rdata.startAddr))
1159  ftq_pd_mem.io.raddr.last := commPtr.value
1160  val commit_pd = ftq_pd_mem.io.rdata.last
1161  ftq_redirect_sram.io.ren.last := canCommit
1162  ftq_redirect_sram.io.raddr.last := commPtr.value
1163  val commit_spec_meta = ftq_redirect_sram.io.rdata.last
1164  ftq_meta_1r_sram.io.ren(0) := canCommit
1165  ftq_meta_1r_sram.io.raddr(0) := commPtr.value
1166  val commit_meta = ftq_meta_1r_sram.io.rdata(0)
1167  ftb_entry_mem.io.raddr.last := commPtr.value
1168  val commit_ftb_entry = ftb_entry_mem.io.rdata.last
1169
1170  // need one cycle to read mem and srams
1171  val do_commit_ptr = RegNext(commPtr)
1172  val do_commit = RegNext(canCommit, init=false.B)
1173  when (canCommit) {
1174    commPtr_write := commPtrPlus1
1175    commPtrPlus1_write := commPtrPlus1 + 1.U
1176  }
1177  val commit_state = RegNext(commitStateQueue(commPtr.value))
1178  val can_commit_cfi = WireInit(cfiIndex_vec(commPtr.value))
1179  val do_commit_cfi = WireInit(cfiIndex_vec(do_commit_ptr.value))
1180  //
1181  //when (commitStateQueue(commPtr.value)(can_commit_cfi.bits) =/= c_commited) {
1182  //  can_commit_cfi.valid := false.B
1183  //}
1184  val commit_cfi = RegNext(can_commit_cfi)
1185  val debug_cfi = commitStateQueue(do_commit_ptr.value)(do_commit_cfi.bits) =/= c_commited && do_commit_cfi.valid
1186
1187  val commit_mispredict  : Vec[Bool] = VecInit((RegNext(mispredict_vec(commPtr.value)) zip commit_state).map {
1188    case (mis, state) => mis && state === c_commited
1189  })
1190  val commit_instCommited: Vec[Bool] = VecInit(commit_state.map(_ === c_commited)) // [PredictWidth]
1191  val can_commit_hit                 = entry_hit_status(commPtr.value)
1192  val commit_hit                     = RegNext(can_commit_hit)
1193  val diff_commit_target             = RegNext(update_target(commPtr.value)) // TODO: remove this
1194  val commit_stage                   = RegNext(pred_stage(commPtr.value))
1195  val commit_valid                   = commit_hit === h_hit || commit_cfi.valid // hit or taken
1196
1197  val to_bpu_hit = can_commit_hit === h_hit || can_commit_hit === h_false_hit
1198  switch (bpu_ftb_update_stall) {
1199    is (0.U) {
1200      when (can_commit_cfi.valid && !to_bpu_hit && canCommit) {
1201        bpu_ftb_update_stall := 2.U // 2-cycle stall
1202      }
1203    }
1204    is (2.U) {
1205      bpu_ftb_update_stall := 1.U
1206    }
1207    is (1.U) {
1208      bpu_ftb_update_stall := 0.U
1209    }
1210    is (3.U) {
1211      XSError(true.B, "bpu_ftb_update_stall should be 0, 1 or 2")
1212    }
1213  }
1214
1215  // TODO: remove this
1216  XSError(do_commit && diff_commit_target =/= commit_target, "\ncommit target should be the same as update target\n")
1217
1218  // update latency stats
1219  val update_latency = GTimer() - pred_s1_cycle.getOrElse(dummy_s1_pred_cycle_vec)(do_commit_ptr.value) + 1.U
1220  XSPerfHistogram("bpu_update_latency", update_latency, io.toBpu.update.valid, 0, 64, 2)
1221
1222  io.toBpu.update := DontCare
1223  io.toBpu.update.valid := commit_valid && do_commit
1224  val update = io.toBpu.update.bits
1225  update.false_hit   := commit_hit === h_false_hit
1226  update.pc          := commit_pc_bundle.startAddr
1227  update.meta        := commit_meta.meta
1228  update.cfi_idx     := commit_cfi
1229  update.full_target := commit_target
1230  update.from_stage  := commit_stage
1231  update.spec_info   := commit_spec_meta
1232  XSError(commit_valid && do_commit && debug_cfi, "\ncommit cfi can be non c_commited\n")
1233
1234  val commit_real_hit = commit_hit === h_hit
1235  val update_ftb_entry = update.ftb_entry
1236
1237  val ftbEntryGen = Module(new FTBEntryGen).io
1238  ftbEntryGen.start_addr     := commit_pc_bundle.startAddr
1239  ftbEntryGen.old_entry      := commit_ftb_entry
1240  ftbEntryGen.pd             := commit_pd
1241  ftbEntryGen.cfiIndex       := commit_cfi
1242  ftbEntryGen.target         := commit_target
1243  ftbEntryGen.hit            := commit_real_hit
1244  ftbEntryGen.mispredict_vec := commit_mispredict
1245
1246  update_ftb_entry         := ftbEntryGen.new_entry
1247  update.new_br_insert_pos := ftbEntryGen.new_br_insert_pos
1248  update.mispred_mask      := ftbEntryGen.mispred_mask
1249  update.old_entry         := ftbEntryGen.is_old_entry
1250  update.pred_hit          := commit_hit === h_hit || commit_hit === h_false_hit
1251  update.br_taken_mask     := ftbEntryGen.taken_mask
1252  update.br_committed      := (ftbEntryGen.new_entry.brValids zip ftbEntryGen.new_entry.brOffset) map {
1253    case (valid, offset) => valid && commit_instCommited(offset)
1254  }
1255  update.jmp_taken         := ftbEntryGen.jmp_taken
1256
1257  // update.full_pred.fromFtbEntry(ftbEntryGen.new_entry, update.pc)
1258  // update.full_pred.jalr_target := commit_target
1259  // update.full_pred.hit := true.B
1260  // when (update.full_pred.is_jalr) {
1261  //   update.full_pred.targets.last := commit_target
1262  // }
1263
1264  // ****************************************************************
1265  // *********************** to prefetch ****************************
1266  // ****************************************************************
1267  /**
1268    ******************************************************************************
1269    * prefetchPtr control
1270    * - 1. prefetchPtr plus 1 when toPrefetch fire and keep distance from bpuPtr more than 2
1271    * - 2. limit range of prefetchPtr is in [ifuPtr + minRange, ifuPtr + maxRange]
1272    * - 3. flush prefetchPtr when receive redirect from ifu or backend
1273    ******************************************************************************
1274    */
1275  val prefetchPtr = RegInit(FtqPtr(false.B, 0.U))
1276  val nextPrefetchPtr = WireInit(prefetchPtr)
1277
1278  prefetchPtr := nextPrefetchPtr
1279
1280  // TODO: consider req which cross cacheline
1281  when(io.toPrefetch.req.fire) {
1282    when(prefetchPtr < bpuPtr - 2.U) {
1283      nextPrefetchPtr := prefetchPtr + 1.U
1284    }
1285  }
1286
1287  when(prefetchPtr < ifuPtr + minRangeFromIFUptr.U) {
1288    nextPrefetchPtr := ifuPtr + minRangeFromIFUptr.U
1289  }.elsewhen(prefetchPtr > ifuPtr + maxRangeFromIFUptr.U) {
1290    nextPrefetchPtr := ifuPtr + maxRangeFromIFUptr.U
1291  }
1292
1293  when(redirectVec.map(r => r.valid).reduce(_||_)){
1294    val r = PriorityMux(redirectVec.map(r => (r.valid -> r.bits)))
1295    val next = r.ftqIdx + minRangeFromIFUptr.U
1296    nextPrefetchPtr := next
1297  }
1298
1299  // data from ftq_pc_mem has 1 cycle delay
1300  io.toPrefetch.req.valid := RegNext(entry_fetch_status(nextPrefetchPtr.value) === f_to_send)
1301  ftq_pc_mem.io.other_raddrs(0) := nextPrefetchPtr.value
1302  io.toPrefetch.req.bits.target := RegNext(ftq_pc_mem.io.other_rdatas(0).startAddr)
1303
1304  // record position relationship between ifuPtr, pfPtr and bpuPtr
1305  val isWritePrefetchPtrTable = WireInit(Constantin.createRecord("isWritePrefetchPtrTable" + p(XSCoreParamsKey).HartId.toString))
1306  val prefetchPtrTable = ChiselDB.createTable("PrefetchPtrTable" + p(XSCoreParamsKey).HartId.toString, new PrefetchPtrDB)
1307  val prefetchPtrDumpData = Wire(new PrefetchPtrDB)
1308  prefetchPtrDumpData.fromFtqPtr  := distanceBetween(bpuPtr, prefetchPtr)
1309  prefetchPtrDumpData.fromIfuPtr  := distanceBetween(prefetchPtr, ifuPtr)
1310
1311  prefetchPtrTable.log(
1312    data = prefetchPtrDumpData,
1313    en = isWritePrefetchPtrTable.orR && io.toPrefetch.req.fire,
1314    site = "FTQ" + p(XSCoreParamsKey).HartId.toString,
1315    clock = clock,
1316    reset = reset
1317  )
1318
1319
1320  // ******************************************************************************
1321  // **************************** commit perf counters ****************************
1322  // ******************************************************************************
1323
1324  val commit_inst_mask    = VecInit(commit_state.map(c => c === c_commited && do_commit)).asUInt
1325  val commit_mispred_mask = commit_mispredict.asUInt
1326  val commit_not_mispred_mask = ~commit_mispred_mask
1327
1328  val commit_br_mask = commit_pd.brMask.asUInt
1329  val commit_jmp_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.jmpInfo.valid.asTypeOf(UInt(1.W)))
1330  val commit_cfi_mask = (commit_br_mask | commit_jmp_mask)
1331
1332  val mbpInstrs = commit_inst_mask & commit_cfi_mask
1333
1334  val mbpRights = mbpInstrs & commit_not_mispred_mask
1335  val mbpWrongs = mbpInstrs & commit_mispred_mask
1336
1337  io.bpuInfo.bpRight := PopCount(mbpRights)
1338  io.bpuInfo.bpWrong := PopCount(mbpWrongs)
1339
1340  val isWriteFTQTable = WireInit(Constantin.createRecord("isWriteFTQTable" + p(XSCoreParamsKey).HartId.toString))
1341  val ftqBranchTraceDB = ChiselDB.createTable("FTQTable" + p(XSCoreParamsKey).HartId.toString, new FtqDebugBundle)
1342  // Cfi Info
1343  for (i <- 0 until PredictWidth) {
1344    val pc = commit_pc_bundle.startAddr + (i * instBytes).U
1345    val v = commit_state(i) === c_commited
1346    val isBr = commit_pd.brMask(i)
1347    val isJmp = commit_pd.jmpInfo.valid && commit_pd.jmpOffset === i.U
1348    val isCfi = isBr || isJmp
1349    val isTaken = commit_cfi.valid && commit_cfi.bits === i.U
1350    val misPred = commit_mispredict(i)
1351    // val ghist = commit_spec_meta.ghist.predHist
1352    val histPtr = commit_spec_meta.histPtr
1353    val predCycle = commit_meta.meta(63, 0)
1354    val target = commit_target
1355
1356    val brIdx = OHToUInt(Reverse(Cat(update_ftb_entry.brValids.zip(update_ftb_entry.brOffset).map{case(v, offset) => v && offset === i.U})))
1357    val inFtbEntry = update_ftb_entry.brValids.zip(update_ftb_entry.brOffset).map{case(v, offset) => v && offset === i.U}.reduce(_||_)
1358    val addIntoHist = ((commit_hit === h_hit) && inFtbEntry) || ((!(commit_hit === h_hit) && i.U === commit_cfi.bits && isBr && commit_cfi.valid))
1359    XSDebug(v && do_commit && isCfi, p"cfi_update: isBr(${isBr}) pc(${Hexadecimal(pc)}) " +
1360    p"taken(${isTaken}) mispred(${misPred}) cycle($predCycle) hist(${histPtr.value}) " +
1361    p"startAddr(${Hexadecimal(commit_pc_bundle.startAddr)}) AddIntoHist(${addIntoHist}) " +
1362    p"brInEntry(${inFtbEntry}) brIdx(${brIdx}) target(${Hexadecimal(target)})\n")
1363
1364    val logbundle = Wire(new FtqDebugBundle)
1365    logbundle.pc := pc
1366    logbundle.target := target
1367    logbundle.isBr := isBr
1368    logbundle.isJmp := isJmp
1369    logbundle.isCall := isJmp && commit_pd.hasCall
1370    logbundle.isRet := isJmp && commit_pd.hasRet
1371    logbundle.misPred := misPred
1372    logbundle.isTaken := isTaken
1373    logbundle.predStage := commit_stage
1374
1375    ftqBranchTraceDB.log(
1376      data = logbundle /* hardware of type T */,
1377      en = isWriteFTQTable.orR && v && do_commit && isCfi,
1378      site = "FTQ" + p(XSCoreParamsKey).HartId.toString,
1379      clock = clock,
1380      reset = reset
1381    )
1382  }
1383
1384  val enq = io.fromBpu.resp
1385  val perf_redirect = backendRedirect
1386
1387  XSPerfAccumulate("entry", validEntries)
1388  XSPerfAccumulate("bpu_to_ftq_stall", enq.valid && !enq.ready)
1389  XSPerfAccumulate("mispredictRedirect", perf_redirect.valid && RedirectLevel.flushAfter === perf_redirect.bits.level)
1390  XSPerfAccumulate("replayRedirect", perf_redirect.valid && RedirectLevel.flushItself(perf_redirect.bits.level))
1391  XSPerfAccumulate("predecodeRedirect", fromIfuRedirect.valid)
1392
1393  XSPerfAccumulate("to_ifu_bubble", io.toIfu.req.ready && !io.toIfu.req.valid)
1394
1395  XSPerfAccumulate("to_ifu_stall", io.toIfu.req.valid && !io.toIfu.req.ready)
1396  XSPerfAccumulate("from_bpu_real_bubble", !enq.valid && enq.ready && allowBpuIn)
1397  XSPerfAccumulate("bpu_to_ifu_bubble", bpuPtr === ifuPtr)
1398  XSPerfAccumulate("bpu_to_ifu_bubble_when_ftq_full", (bpuPtr === ifuPtr) && isFull(bpuPtr, commPtr) && io.toIfu.req.ready)
1399
1400  XSPerfAccumulate("redirectAhead_ValidNum", io.fromBackend.ftqIdxAhead.map(_.valid).reduce(_|_))
1401  XSPerfAccumulate("fromBackendRedirect_ValidNum", io.fromBackend.redirect.valid)
1402  XSPerfAccumulate("toBpuRedirect_ValidNum", io.toBpu.redirect.valid)
1403
1404  val from_bpu = io.fromBpu.resp.bits
1405  val to_ifu = io.toIfu.req.bits
1406
1407
1408  XSPerfHistogram("commit_num_inst", PopCount(commit_inst_mask), do_commit, 0, PredictWidth+1, 1)
1409
1410
1411
1412
1413  val commit_jal_mask  = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasJal.asTypeOf(UInt(1.W)))
1414  val commit_jalr_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasJalr.asTypeOf(UInt(1.W)))
1415  val commit_call_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasCall.asTypeOf(UInt(1.W)))
1416  val commit_ret_mask  = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasRet.asTypeOf(UInt(1.W)))
1417
1418
1419  val mbpBRights = mbpRights & commit_br_mask
1420  val mbpJRights = mbpRights & commit_jal_mask
1421  val mbpIRights = mbpRights & commit_jalr_mask
1422  val mbpCRights = mbpRights & commit_call_mask
1423  val mbpRRights = mbpRights & commit_ret_mask
1424
1425  val mbpBWrongs = mbpWrongs & commit_br_mask
1426  val mbpJWrongs = mbpWrongs & commit_jal_mask
1427  val mbpIWrongs = mbpWrongs & commit_jalr_mask
1428  val mbpCWrongs = mbpWrongs & commit_call_mask
1429  val mbpRWrongs = mbpWrongs & commit_ret_mask
1430
1431  val commit_pred_stage = RegNext(pred_stage(commPtr.value))
1432
1433  def pred_stage_map(src: UInt, name: String) = {
1434    (0 until numBpStages).map(i =>
1435      f"${name}_stage_${i+1}" -> PopCount(src.asBools.map(_ && commit_pred_stage === BP_STAGES(i)))
1436    ).foldLeft(Map[String, UInt]())(_+_)
1437  }
1438
1439  val mispred_stage_map      = pred_stage_map(mbpWrongs,  "mispredict")
1440  val br_mispred_stage_map   = pred_stage_map(mbpBWrongs, "br_mispredict")
1441  val jalr_mispred_stage_map = pred_stage_map(mbpIWrongs, "jalr_mispredict")
1442  val correct_stage_map      = pred_stage_map(mbpRights,  "correct")
1443  val br_correct_stage_map   = pred_stage_map(mbpBRights, "br_correct")
1444  val jalr_correct_stage_map = pred_stage_map(mbpIRights, "jalr_correct")
1445
1446  val update_valid = io.toBpu.update.valid
1447  def u(cond: Bool) = update_valid && cond
1448  val ftb_false_hit = u(update.false_hit)
1449  // assert(!ftb_false_hit)
1450  val ftb_hit = u(commit_hit === h_hit)
1451
1452  val ftb_new_entry = u(ftbEntryGen.is_init_entry)
1453  val ftb_new_entry_only_br = ftb_new_entry && !update_ftb_entry.jmpValid
1454  val ftb_new_entry_only_jmp = ftb_new_entry && !update_ftb_entry.brValids(0)
1455  val ftb_new_entry_has_br_and_jmp = ftb_new_entry && update_ftb_entry.brValids(0) && update_ftb_entry.jmpValid
1456
1457  val ftb_old_entry = u(ftbEntryGen.is_old_entry)
1458
1459  val ftb_modified_entry = u(ftbEntryGen.is_new_br || ftbEntryGen.is_jalr_target_modified || ftbEntryGen.is_always_taken_modified)
1460  val ftb_modified_entry_new_br = u(ftbEntryGen.is_new_br)
1461  val ftb_modified_entry_ifu_redirected = u(ifuRedirected(do_commit_ptr.value))
1462  val ftb_modified_entry_jalr_target_modified = u(ftbEntryGen.is_jalr_target_modified)
1463  val ftb_modified_entry_br_full = ftb_modified_entry && ftbEntryGen.is_br_full
1464  val ftb_modified_entry_always_taken = ftb_modified_entry && ftbEntryGen.is_always_taken_modified
1465
1466  def getFtbEntryLen(pc: UInt, entry: FTBEntry) = (entry.getFallThrough(pc) - pc) >> instOffsetBits
1467  val gen_ftb_entry_len = getFtbEntryLen(update.pc, ftbEntryGen.new_entry)
1468  XSPerfHistogram("ftb_init_entry_len", gen_ftb_entry_len, ftb_new_entry, 0, PredictWidth+1, 1)
1469  XSPerfHistogram("ftb_modified_entry_len", gen_ftb_entry_len, ftb_modified_entry, 0, PredictWidth+1, 1)
1470  val s3_ftb_entry_len = getFtbEntryLen(from_bpu.s3.pc(0), from_bpu.last_stage_ftb_entry)
1471  XSPerfHistogram("s3_ftb_entry_len", s3_ftb_entry_len, from_bpu.s3.valid(0), 0, PredictWidth+1, 1)
1472
1473  XSPerfHistogram("ftq_has_entry", validEntries, true.B, 0, FtqSize+1, 1)
1474
1475  val perfCountsMap = Map(
1476    "BpInstr" -> PopCount(mbpInstrs),
1477    "BpBInstr" -> PopCount(mbpBRights | mbpBWrongs),
1478    "BpRight"  -> PopCount(mbpRights),
1479    "BpWrong"  -> PopCount(mbpWrongs),
1480    "BpBRight" -> PopCount(mbpBRights),
1481    "BpBWrong" -> PopCount(mbpBWrongs),
1482    "BpJRight" -> PopCount(mbpJRights),
1483    "BpJWrong" -> PopCount(mbpJWrongs),
1484    "BpIRight" -> PopCount(mbpIRights),
1485    "BpIWrong" -> PopCount(mbpIWrongs),
1486    "BpCRight" -> PopCount(mbpCRights),
1487    "BpCWrong" -> PopCount(mbpCWrongs),
1488    "BpRRight" -> PopCount(mbpRRights),
1489    "BpRWrong" -> PopCount(mbpRWrongs),
1490
1491    "ftb_false_hit"                -> PopCount(ftb_false_hit),
1492    "ftb_hit"                      -> PopCount(ftb_hit),
1493    "ftb_new_entry"                -> PopCount(ftb_new_entry),
1494    "ftb_new_entry_only_br"        -> PopCount(ftb_new_entry_only_br),
1495    "ftb_new_entry_only_jmp"       -> PopCount(ftb_new_entry_only_jmp),
1496    "ftb_new_entry_has_br_and_jmp" -> PopCount(ftb_new_entry_has_br_and_jmp),
1497    "ftb_old_entry"                -> PopCount(ftb_old_entry),
1498    "ftb_modified_entry"           -> PopCount(ftb_modified_entry),
1499    "ftb_modified_entry_new_br"    -> PopCount(ftb_modified_entry_new_br),
1500    "ftb_jalr_target_modified"     -> PopCount(ftb_modified_entry_jalr_target_modified),
1501    "ftb_modified_entry_br_full"   -> PopCount(ftb_modified_entry_br_full),
1502    "ftb_modified_entry_always_taken" -> PopCount(ftb_modified_entry_always_taken)
1503  ) ++ mispred_stage_map ++ br_mispred_stage_map ++ jalr_mispred_stage_map ++
1504       correct_stage_map ++ br_correct_stage_map ++ jalr_correct_stage_map
1505
1506  for((key, value) <- perfCountsMap) {
1507    XSPerfAccumulate(key, value)
1508  }
1509
1510  // --------------------------- Debug --------------------------------
1511  // XSDebug(enq_fire, p"enq! " + io.fromBpu.resp.bits.toPrintable)
1512  XSDebug(io.toIfu.req.fire, p"fire to ifu " + io.toIfu.req.bits.toPrintable)
1513  XSDebug(do_commit, p"deq! [ptr] $do_commit_ptr\n")
1514  XSDebug(true.B, p"[bpuPtr] $bpuPtr, [ifuPtr] $ifuPtr, [ifuWbPtr] $ifuWbPtr [commPtr] $commPtr\n")
1515  XSDebug(true.B, p"[in] v:${io.fromBpu.resp.valid} r:${io.fromBpu.resp.ready} " +
1516    p"[out] v:${io.toIfu.req.valid} r:${io.toIfu.req.ready}\n")
1517  XSDebug(do_commit, p"[deq info] cfiIndex: $commit_cfi, $commit_pc_bundle, target: ${Hexadecimal(commit_target)}\n")
1518
1519  //   def ubtbCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1520  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1521  //       case (((valid, pd), ans), taken) =>
1522  //       Mux(valid && pd.isBr,
1523  //         isWrong ^ Mux(ans.hit.asBool,
1524  //           Mux(ans.taken.asBool, taken && ans.target === commitEntry.target,
1525  //           !taken),
1526  //         !taken),
1527  //       false.B)
1528  //     }
1529  //   }
1530
1531  //   def btbCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1532  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1533  //       case (((valid, pd), ans), taken) =>
1534  //       Mux(valid && pd.isBr,
1535  //         isWrong ^ Mux(ans.hit.asBool,
1536  //           Mux(ans.taken.asBool, taken && ans.target === commitEntry.target,
1537  //           !taken),
1538  //         !taken),
1539  //       false.B)
1540  //     }
1541  //   }
1542
1543  //   def tageCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1544  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1545  //       case (((valid, pd), ans), taken) =>
1546  //       Mux(valid && pd.isBr,
1547  //         isWrong ^ (ans.taken.asBool === taken),
1548  //       false.B)
1549  //     }
1550  //   }
1551
1552  //   def loopCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1553  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1554  //       case (((valid, pd), ans), taken) =>
1555  //       Mux(valid && (pd.isBr) && ans.hit.asBool,
1556  //         isWrong ^ (!taken),
1557  //           false.B)
1558  //     }
1559  //   }
1560
1561  //   def rasCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1562  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1563  //       case (((valid, pd), ans), taken) =>
1564  //       Mux(valid && pd.isRet.asBool /*&& taken*/ && ans.hit.asBool,
1565  //         isWrong ^ (ans.target === commitEntry.target),
1566  //           false.B)
1567  //     }
1568  //   }
1569
1570  //   val ubtbRights = ubtbCheck(commitEntry, commitEntry.metas.map(_.ubtbAns), false.B)
1571  //   val ubtbWrongs = ubtbCheck(commitEntry, commitEntry.metas.map(_.ubtbAns), true.B)
1572  //   // btb and ubtb pred jal and jalr as well
1573  //   val btbRights = btbCheck(commitEntry, commitEntry.metas.map(_.btbAns), false.B)
1574  //   val btbWrongs = btbCheck(commitEntry, commitEntry.metas.map(_.btbAns), true.B)
1575  //   val tageRights = tageCheck(commitEntry, commitEntry.metas.map(_.tageAns), false.B)
1576  //   val tageWrongs = tageCheck(commitEntry, commitEntry.metas.map(_.tageAns), true.B)
1577
1578  //   val loopRights = loopCheck(commitEntry, commitEntry.metas.map(_.loopAns), false.B)
1579  //   val loopWrongs = loopCheck(commitEntry, commitEntry.metas.map(_.loopAns), true.B)
1580
1581  //   val rasRights = rasCheck(commitEntry, commitEntry.metas.map(_.rasAns), false.B)
1582  //   val rasWrongs = rasCheck(commitEntry, commitEntry.metas.map(_.rasAns), true.B)
1583
1584  val perfEvents = Seq(
1585    ("bpu_s2_redirect        ", bpu_s2_redirect                                                             ),
1586    ("bpu_s3_redirect        ", bpu_s3_redirect                                                             ),
1587    ("bpu_to_ftq_stall       ", enq.valid && ~enq.ready                                                     ),
1588    ("mispredictRedirect     ", perf_redirect.valid && RedirectLevel.flushAfter === perf_redirect.bits.level),
1589    ("replayRedirect         ", perf_redirect.valid && RedirectLevel.flushItself(perf_redirect.bits.level)  ),
1590    ("predecodeRedirect      ", fromIfuRedirect.valid                                                       ),
1591    ("to_ifu_bubble          ", io.toIfu.req.ready && !io.toIfu.req.valid                                   ),
1592    ("from_bpu_real_bubble   ", !enq.valid && enq.ready && allowBpuIn                                       ),
1593    ("BpInstr                ", PopCount(mbpInstrs)                                                         ),
1594    ("BpBInstr               ", PopCount(mbpBRights | mbpBWrongs)                                           ),
1595    ("BpRight                ", PopCount(mbpRights)                                                         ),
1596    ("BpWrong                ", PopCount(mbpWrongs)                                                         ),
1597    ("BpBRight               ", PopCount(mbpBRights)                                                        ),
1598    ("BpBWrong               ", PopCount(mbpBWrongs)                                                        ),
1599    ("BpJRight               ", PopCount(mbpJRights)                                                        ),
1600    ("BpJWrong               ", PopCount(mbpJWrongs)                                                        ),
1601    ("BpIRight               ", PopCount(mbpIRights)                                                        ),
1602    ("BpIWrong               ", PopCount(mbpIWrongs)                                                        ),
1603    ("BpCRight               ", PopCount(mbpCRights)                                                        ),
1604    ("BpCWrong               ", PopCount(mbpCWrongs)                                                        ),
1605    ("BpRRight               ", PopCount(mbpRRights)                                                        ),
1606    ("BpRWrong               ", PopCount(mbpRWrongs)                                                        ),
1607    ("ftb_false_hit          ", PopCount(ftb_false_hit)                                                     ),
1608    ("ftb_hit                ", PopCount(ftb_hit)                                                           ),
1609  )
1610  generatePerfEvent()
1611}